diff --git a/docs/design_pattern/agent.md b/docs/design_pattern/agent.md index 2aaade1..a503a3a 100644 --- a/docs/design_pattern/agent.md +++ b/docs/design_pattern/agent.md @@ -2,7 +2,7 @@ layout: default title: "Agent" parent: "Design Pattern" -nav_order: 6 +nav_order: 1 --- # Agent diff --git a/docs/design_pattern/mapreduce.md b/docs/design_pattern/mapreduce.md index 61dce0c..b8a2975 100644 --- a/docs/design_pattern/mapreduce.md +++ b/docs/design_pattern/mapreduce.md @@ -2,7 +2,7 @@ layout: default title: "Map Reduce" parent: "Design Pattern" -nav_order: 3 +nav_order: 4 --- # Map Reduce diff --git a/docs/design_pattern/memory.md b/docs/design_pattern/memory.md deleted file mode 100644 index 36b23eb..0000000 --- a/docs/design_pattern/memory.md +++ /dev/null @@ -1,125 +0,0 @@ ---- -layout: default -title: "Chat Memory" -parent: "Design Pattern" -nav_order: 5 ---- - -# Chat Memory - -Multi-turn conversations require memory management to maintain context while avoiding overwhelming the LLM. - -### 1. Naive Approach: Full History - -Sending the full chat history may overwhelm LLMs. - -```python -class ChatNode(Node): - def prep(self, shared): - if "history" not in shared: - shared["history"] = [] - user_input = input("You: ") - return shared["history"], user_input - - def exec(self, inputs): - history, user_input = inputs - messages = [{"role": "system", "content": "You are a helpful assistant"}] - for h in history: - messages.append(h) - messages.append({"role": "user", "content": user_input}) - response = call_llm(messages) - return response - - def post(self, shared, prep_res, exec_res): - shared["history"].append({"role": "user", "content": prep_res[1]}) - shared["history"].append({"role": "assistant", "content": exec_res}) - return "continue" - -chat = ChatNode() -chat - "continue" >> chat -flow = Flow(start=chat) -``` - -### 2. Improved Memory Management - -We can: -1. Limit the chat history to the most recent 4. -2. Use [vector search](./tool.md) to retrieve relevant exchanges beyond the last 4. - -```python -################################ -# Node A: Retrieve user input & relevant messages -################################ -class ChatRetrieve(Node): - def prep(self, s): - s.setdefault("history", []) - s.setdefault("memory_index", None) - user_input = input("You: ") - return user_input - - def exec(self, user_input): - emb = get_embedding(user_input) - relevant = [] - if len(shared["history"]) > 8 and shared["memory_index"]: - idx, _ = search_index(shared["memory_index"], emb, top_k=2) - relevant = [shared["history"][i[0]] for i in idx] - return (user_input, relevant) - - def post(self, s, p, r): - user_input, relevant = r - s["user_input"] = user_input - s["relevant"] = relevant - return "continue" - -################################ -# Node B: Call LLM, update history + index -################################ -class ChatReply(Node): - def prep(self, s): - user_input = s["user_input"] - recent = s["history"][-8:] - relevant = s.get("relevant", []) - return user_input, recent, relevant - - def exec(self, inputs): - user_input, recent, relevant = inputs - msgs = [{"role":"system","content":"You are a helpful assistant."}] - if relevant: - msgs.append({"role":"system","content":f"Relevant: {relevant}"}) - msgs.extend(recent) - msgs.append({"role":"user","content":user_input}) - ans = call_llm(msgs) - return ans - - def post(self, s, pre, ans): - user_input, _, _ = pre - s["history"].append({"role":"user","content":user_input}) - s["history"].append({"role":"assistant","content":ans}) - - # Manage memory index - if len(s["history"]) == 8: - embs = [] - for i in range(0, 8, 2): - text = s["history"][i]["content"] + " " + s["history"][i+1]["content"] - embs.append(get_embedding(text)) - s["memory_index"] = create_index(embs) - elif len(s["history"]) > 8: - text = s["history"][-2]["content"] + " " + s["history"][-1]["content"] - new_emb = np.array([get_embedding(text)]).astype('float32') - s["memory_index"].add(new_emb) - - print(f"Assistant: {ans}") - return "continue" - -################################ -# Flow wiring -################################ -retrieve = ChatRetrieve() -reply = ChatReply() -retrieve - "continue" >> reply -reply - "continue" >> retrieve - -flow = Flow(start=retrieve) -shared = {} -flow.run(shared) -``` \ No newline at end of file diff --git a/docs/design_pattern/multi_agent.md b/docs/design_pattern/multi_agent.md index d427586..f0a7bcb 100644 --- a/docs/design_pattern/multi_agent.md +++ b/docs/design_pattern/multi_agent.md @@ -2,7 +2,7 @@ layout: default title: "(Advanced) Multi-Agents" parent: "Design Pattern" -nav_order: 7 +nav_order: 6 --- # (Advanced) Multi-Agents diff --git a/docs/design_pattern/rag.md b/docs/design_pattern/rag.md index bd502ca..a2629e4 100644 --- a/docs/design_pattern/rag.md +++ b/docs/design_pattern/rag.md @@ -2,7 +2,7 @@ layout: default title: "RAG" parent: "Design Pattern" -nav_order: 4 +nav_order: 3 --- # RAG (Retrieval Augmented Generation) diff --git a/docs/design_pattern/structure.md b/docs/design_pattern/structure.md index 91d6083..82b9c93 100644 --- a/docs/design_pattern/structure.md +++ b/docs/design_pattern/structure.md @@ -2,7 +2,7 @@ layout: default title: "Structured Output" parent: "Design Pattern" -nav_order: 1 +nav_order: 5 --- # Structured Output diff --git a/docs/design_pattern/workflow.md b/docs/design_pattern/workflow.md index 62b4436..476dfb4 100644 --- a/docs/design_pattern/workflow.md +++ b/docs/design_pattern/workflow.md @@ -7,7 +7,7 @@ nav_order: 2 # Workflow -Many real-world tasks are too complex for one LLM call. The solution is to decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes. +Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
diff --git a/docs/index.md b/docs/index.md index 9098353..b80145c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -34,12 +34,11 @@ We model the LLM workflow as a **Graph + Shared Store**: From there, it’s easy to implement popular design patterns: -- [Structured Output](./design_pattern/structure.md) formats outputs consistently. -- [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines. -- [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps. -- [RAG](./design_pattern/rag.md) integrates data retrieval with generation. - [Agent](./design_pattern/agent.md) autonomously makes decisions. -- [(Optional) Chat Memory](./design_pattern/memory.md) preserves conversation context. +- [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines. +- [RAG](./design_pattern/rag.md) integrates data retrieval with generation. +- [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps. +- [Structured Output](./design_pattern/structure.md) formats outputs consistently. - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents. ## Utility Function