128 lines
3.6 KiB
Markdown
128 lines
3.6 KiB
Markdown
---
|
|
layout: default
|
|
title: "Chat Memory"
|
|
parent: "Paradigm"
|
|
nav_order: 5
|
|
---
|
|
|
|
# Chat Memory
|
|
|
|
Multi-turn conversations require memory management to maintain context while avoiding overwhelming the LLM.
|
|
|
|
### 1. Naive Approach: Full History
|
|
|
|
Sending the full chat history may overwhelm LLMs.
|
|
|
|
```python
|
|
class ChatNode(Node):
|
|
def prep(self, shared):
|
|
if "history" not in shared:
|
|
shared["history"] = []
|
|
user_input = input("You: ")
|
|
return shared["history"], user_input
|
|
|
|
def exec(self, inputs):
|
|
history, user_input = inputs
|
|
messages = [{"role": "system", "content": "You are a helpful assistant"}]
|
|
for h in history:
|
|
messages.append(h)
|
|
messages.append({"role": "user", "content": user_input})
|
|
response = call_llm(messages)
|
|
return response
|
|
|
|
def post(self, shared, prep_res, exec_res):
|
|
shared["history"].append({"role": "user", "content": prep_res[1]})
|
|
shared["history"].append({"role": "assistant", "content": exec_res})
|
|
return "continue"
|
|
|
|
chat = ChatNode()
|
|
chat - "continue" >> chat
|
|
flow = Flow(start=chat)
|
|
```
|
|
|
|
### 2. Improved Memory Management
|
|
|
|
We can:
|
|
1. Recursively summarize conversations for overview.
|
|
2. Use [vector search](./tool.md) to retrieve relevant past exchanges for details
|
|
|
|
```python
|
|
class HandleInput(Node):
|
|
def prep(self, shared):
|
|
if "history" not in shared:
|
|
shared["history"] = []
|
|
shared["summary"] = ""
|
|
shared["memory_index"] = None
|
|
shared["memories"] = []
|
|
|
|
user_input = input("You: ")
|
|
query_embedding = get_embedding(user_input)
|
|
|
|
relevant_memories = []
|
|
if shared["memory_index"] is not None:
|
|
indices, _ = search_index(shared["memory_index"], query_embedding, top_k=2)
|
|
relevant_memories = [shared["memories"][i[0]] for i in indices]
|
|
|
|
shared["current_input"] = {
|
|
"summary": shared["summary"],
|
|
"relevant": relevant_memories,
|
|
"input": user_input
|
|
}
|
|
|
|
class GenerateResponse(Node):
|
|
def prep(self, shared):
|
|
return shared["current_input"]
|
|
|
|
def exec(self, context):
|
|
prompt = f"""Context:
|
|
Summary: {context['summary']}
|
|
Relevant past: {context['relevant']}
|
|
User: {context['input']}
|
|
|
|
Response:"""
|
|
return call_llm(prompt)
|
|
|
|
def post(self, shared, prep_res, exec_res):
|
|
shared["history"].append({"role": "user", "content": prep_res["input"]})
|
|
shared["history"].append({"role": "assistant", "content": exec_res})
|
|
|
|
class UpdateMemory(Node):
|
|
def prep(self, shared):
|
|
return shared["current_input"]["input"]
|
|
|
|
def exec(self, user_input):
|
|
return get_embedding(user_input)
|
|
|
|
def post(self, shared, prep_res, exec_res):
|
|
shared["memories"].append(prep_res)
|
|
if shared["memory_index"] is None:
|
|
shared["memory_index"] = create_index([exec_res])
|
|
else:
|
|
shared["memory_index"].add(np.array([exec_res]))
|
|
|
|
class UpdateSummary(Node):
|
|
def prep(self, shared):
|
|
if shared["history"]:
|
|
return shared["history"][-10:]
|
|
return None
|
|
|
|
def exec(self, recent_history):
|
|
if recent_history:
|
|
return call_llm(f"Summarize this conversation:\n{recent_history}")
|
|
return ""
|
|
|
|
def post(self, shared, prep_res, exec_res):
|
|
if exec_res:
|
|
shared["summary"] = exec_res
|
|
|
|
# Connect nodes
|
|
input_node = HandleInput()
|
|
response_node = GenerateResponse()
|
|
memory_node = UpdateMemory()
|
|
summary_node = UpdateSummary()
|
|
|
|
input_node >> response_node >> memory_node >> summary_node >> input_node
|
|
|
|
chat_flow = Flow(start=input_node)
|
|
```
|