From e6fe74bdda5170641e4d3e503875c085b4939931 Mon Sep 17 00:00:00 2001
From: zachary62 <zhuang333@wisc.edu>
Date: Thu, 2 Jan 2025 18:38:18 +0000
Subject: [PATCH] agent example

---
 docs/agent.md  |  92 +++++++++++++++++++++++++++++++++++
 docs/index.md  |   4 +-
 docs/memory.md | 127 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 221 insertions(+), 2 deletions(-)
 create mode 100644 docs/agent.md
 create mode 100644 docs/memory.md

diff --git a/docs/agent.md b/docs/agent.md
new file mode 100644
index 0000000..6694862
--- /dev/null
+++ b/docs/agent.md
@@ -0,0 +1,92 @@
+---
+layout: default
+title: "Agent"
+parent: "Paradigm"
+nav_order: 6
+---
+
+# Agent
+
+For many tasks, we need agents that take dynamic and recursive actions based on the inputs they receive.
+You can create these agents as **Nodes** connected by *Actions* in a directed graph using [Flow](./flow.md).
+
+
+### Example: Search Agent
+
+This agent:
+1. Decides whether to search or answer
+2. If searches, loops back to decide if more search needed
+3. Answers when enough context gathered
+
+```python
+class DecideAction(Node):
+    def prep(self, shared):
+        context = shared.get("context", "No previous search")
+        query = shared["query"]
+        return query, context
+        
+    def exec(self, inputs):
+        query, context = inputs
+        prompt = f"""
+Given input: {query}
+Previous search results: {context}
+Should I: 1) Search web for more info 2) Answer with current knowledge
+Output in yaml:
+```yaml
+action: search/answer
+reason: why this action
+search_term: search phrase if action is search
+```"""
+        resp = call_llm(prompt)
+        yaml_str = resp.split("```yaml")[1].split("```")[0].strip()
+        result = yaml.safe_load(yaml_str)
+        
+        assert isinstance(result, dict)
+        assert "action" in result
+        assert "reason" in result
+        assert result["action"] in ["search", "answer"]
+        if result["action"] == "search":
+            assert "search_term" in result
+        
+        return result
+
+    def post(self, shared, prep_res, exec_res):
+        if exec_res["action"] == "search":
+            shared["search_term"] = exec_res["search_term"]
+        return exec_res["action"]
+
+class SearchWeb(Node):
+    def prep(self, shared):
+        return shared["search_term"]
+        
+    def exec(self, search_term):
+        return search_web(search_term)
+    
+    def post(self, shared, prep_res, exec_res):
+        prev_searches = shared.get("context", [])
+        shared["context"] = prev_searches + [
+            {"term": shared["search_term"], "result": exec_res}
+        ]
+        return "decide"
+        
+class DirectAnswer(Node):
+    def prep(self, shared):
+        return shared["query"], shared.get("context", "")
+        
+    def exec(self, inputs):
+        query, context = inputs
+        return call_llm(f"Context: {context}\nAnswer: {query}")
+
+# Connect nodes
+decide = DecideAction()
+search = SearchWeb()
+answer = DirectAnswer()
+
+decide - "search" >> search
+decide - "answer" >> answer
+search - "decide" >> decide  # Loop back
+
+flow = Flow(start=decide)
+flow.run({"query": "Who won the Nobel Prize in Physics 2024?"})
+```
+
diff --git a/docs/index.md b/docs/index.md
index 834e225..a65f3bf 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -54,8 +54,8 @@ We model the LLM workflow as a **Nested Directed Graph**:
 - [Task Decomposition](./decomp.md)
 - [Map Reduce](./mapreduce.md)
 - [RAG](./rag.md)
-- Chat Memory
-- Agent
+- [Chat Memory](./memory.md)
+- [Agent](./agent.md)
 - Multi-Agent
 - Evaluation
 
diff --git a/docs/memory.md b/docs/memory.md
new file mode 100644
index 0000000..5e01324
--- /dev/null
+++ b/docs/memory.md
@@ -0,0 +1,127 @@
+---
+layout: default
+title: "Chat Memory"
+parent: "Paradigm"
+nav_order: 5
+---
+
+# Chat Memory
+
+Multi-turn conversations require memory management to maintain context while avoiding overwhelming the LLM.
+
+### 1. Naive Approach: Full History
+
+Sending the full chat history may overwhelm LLMs.
+
+```python
+class ChatNode(Node):
+    def prep(self, shared):
+        if "history" not in shared:
+            shared["history"] = []
+        user_input = input("You: ")
+        return shared["history"], user_input
+
+    def exec(self, inputs):
+        history, user_input = inputs
+        messages = [{"role": "system", "content": "You are a helpful assistant"}]
+        for h in history:
+            messages.append(h)
+        messages.append({"role": "user", "content": user_input})
+        response = call_llm(messages)
+        return response
+
+    def post(self, shared, prep_res, exec_res):
+        shared["history"].append({"role": "user", "content": prep_res[1]})
+        shared["history"].append({"role": "assistant", "content": exec_res})
+        return "continue"
+
+chat = ChatNode()
+chat - "continue" >> chat
+flow = Flow(start=chat)
+```
+
+### 2. Improved Memory Management
+
+We can:
+1. Recursively summarize conversations for overview.
+2. Use [vector search](./tool.md) to retrieve relevant past exchanges for details
+
+```python
+class HandleInput(Node):
+    def prep(self, shared):
+        if "history" not in shared:
+            shared["history"] = []
+            shared["summary"] = ""
+            shared["memory_index"] = None
+            shared["memories"] = []
+        
+        user_input = input("You: ")
+        query_embedding = get_embedding(user_input)
+        
+        relevant_memories = []
+        if shared["memory_index"] is not None:
+            indices, _ = search_index(shared["memory_index"], query_embedding, top_k=2)
+            relevant_memories = [shared["memories"][i[0]] for i in indices]
+        
+        shared["current_input"] = {
+            "summary": shared["summary"],
+            "relevant": relevant_memories,
+            "input": user_input
+        }
+
+class GenerateResponse(Node):
+    def prep(self, shared):
+        return shared["current_input"]
+
+    def exec(self, context):
+        prompt = f"""Context:
+Summary: {context['summary']}
+Relevant past: {context['relevant']}
+User: {context['input']}
+
+Response:"""
+        return call_llm(prompt)
+        
+    def post(self, shared, prep_res, exec_res):
+        shared["history"].append({"role": "user", "content": prep_res["input"]})
+        shared["history"].append({"role": "assistant", "content": exec_res})
+
+class UpdateMemory(Node):
+    def prep(self, shared):
+        return shared["current_input"]["input"]
+
+    def exec(self, user_input):
+        return get_embedding(user_input)
+        
+    def post(self, shared, prep_res, exec_res):
+        shared["memories"].append(prep_res)
+        if shared["memory_index"] is None:
+            shared["memory_index"] = create_index([exec_res])
+        else:
+            shared["memory_index"].add(np.array([exec_res]))
+
+class UpdateSummary(Node):
+    def prep(self, shared):
+        if shared["history"]:
+            return shared["history"][-10:]
+        return None
+
+    def exec(self, recent_history):
+        if recent_history:
+            return call_llm(f"Summarize this conversation:\n{recent_history}")
+        return ""
+
+    def post(self, shared, prep_res, exec_res):
+        if exec_res:
+            shared["summary"] = exec_res
+
+# Connect nodes
+input_node = HandleInput()
+response_node = GenerateResponse() 
+memory_node = UpdateMemory()
+summary_node = UpdateSummary()
+
+input_node >> response_node >> memory_node >> summary_node >> input_node
+
+chat_flow = Flow(start=input_node)
+```