add chat

2025-03-21 14:10:12 -04:00 · 2025-03-21 14:10:12 -04:00 · 556130d4f4
parent 9567331fab
commit 556130d4f4
10 changed files with 468 additions and 0 deletions
--- a/cookbook/pocketflow-chat-memory/README.md
+++ b/cookbook/pocketflow-chat-memory/README.md
@ -0,0 +1,91 @@
 # Retrieval Augmented Generation (RAG)
 This project demonstrates a simplified RAG system that retrieves relevant documents based on user queries.
 ## Features
 - Simple vector-based document retrieval
 - Two-stage pipeline (offline indexing, online querying)
 - FAISS-powered similarity search
 ## Getting Started
 1. Install the required dependencies:
 ```bash
 pip install -r requirements.txt
 ```
 2. Run the application with a sample query:
 ```bash
 python main.py --"Large Language Model"
 ```
 3. Or run without arguments to use the default query:
 ```bash
 python main.py
 ```
 ## API Key
 By default, demo uses dummy embedding based on character frequencies. To use real OpenAI embedding:
 1. Edit nodes.py to replace the dummy `get_embedding` with `get_openai_embedding`:
 ```python
 # Change this line:
 query_embedding = get_embedding(query)
 # To this:
 query_embedding = get_openai_embedding(query)
 # And also change this line:
 return get_embedding(text)
 # To this:
 return get_openai_embedding(text)
 ```
 2. Make sure your OpenAI API key is set:
 ```bash
 export OPENAI_API_KEY="your-api-key-here"
 ```
 ## How It Works
 The magic happens through a two-stage pipeline implemented with PocketFlow:
 ```mermaid
 graph TD
    subgraph OfflineFlow[Offline Document Indexing]
        EmbedDocs[EmbedDocumentsNode] --> CreateIndex[CreateIndexNode]
    end
    subgraph OnlineFlow[Online Query Processing]
        EmbedQuery[EmbedQueryNode] --> RetrieveDoc[RetrieveDocumentNode]
    end
 ```
 Here's what each part does:
 1. **EmbedDocumentsNode**: Converts documents into vector representations
 2. **CreateIndexNode**: Creates a searchable FAISS index from embeddings
 3. **EmbedQueryNode**: Converts user query into the same vector space
 4. **RetrieveDocumentNode**: Finds the most similar document using vector search
 ## Example Output
 ```
 ✅ Created 5 document embeddings
 🔍 Creating search index...
 ✅ Index created with 5 vectors
 🔍 Embedding query: Large Language Model
 🔎 Searching for relevant documents...
 📄 Retrieved document (index: 3, distance: 0.3296)
 📄 Most relevant text: "PocketFlow is a 100-line Large Language Model Framework."
 ```
 ## Files
 - [`main.py`](./main.py): Main entry point for running the RAG demonstration
 - [`flow.py`](./flow.py): Configures the flows that connect the nodes
 - [`nodes.py`](./nodes.py): Defines the nodes for document processing and retrieval
 - [`utils.py`](./utils.py): Utility functions including the embedding function
--- a/cookbook/pocketflow-chat-memory/flow.py
+++ b/cookbook/pocketflow-chat-memory/flow.py
@ -0,0 +1,22 @@
 from pocketflow import Flow
 from nodes import EmbedDocumentsNode, CreateIndexNode, EmbedQueryNode, RetrieveDocumentNode
 def get_offline_flow():
    # Create offline flow for document indexing
    embed_docs_node = EmbedDocumentsNode()
    create_index_node = CreateIndexNode()
    embed_docs_node >> create_index_node
    offline_flow = Flow(start=embed_docs_node)
    return offline_flow
 def get_online_flow():
    # Create online flow for document retrieval
    embed_query_node = EmbedQueryNode()
    retrieve_doc_node = RetrieveDocumentNode()
    embed_query_node >> retrieve_doc_node
    online_flow = Flow(start=embed_query_node)
    return online_flow
 # Initialize flows
 offline_flow = get_offline_flow()
 online_flow = get_online_flow() 
--- a/cookbook/pocketflow-chat-memory/main.py
+++ b/cookbook/pocketflow-chat-memory/main.py
@ -0,0 +1,55 @@
 import sys
 from flow import offline_flow, online_flow
 def run_rag_demo():
    """
    Run a demonstration of the RAG system.
    This function:
    1. Indexes a set of sample documents (offline flow)
    2. Takes a query from the command line
    3. Retrieves the most relevant document (online flow)
    """
    # Sample texts - corpus of documents to search
    texts = [
        "The quick brown fox jumps over the lazy dog.",
        "Machine learning is a subset of artificial intelligence.",
        "Python is a popular programming language for data science.",
        "PocketFlow is a 100-line Large Language Model Framework.",
        "The weather is sunny and warm today.",
    ]
    print("=" * 50)
    print("PocketFlow RAG Document Retrieval")
    print("=" * 50)
    # Default query
    default_query = "Large Language Model"
    # Get query from command line if provided with --
    query = default_query
    for arg in sys.argv[1:]:
        if arg.startswith("--"):
            query = arg[2:]
            break
    # Single shared store for both flows
    shared = {
        "texts": texts,
        "embeddings": None,
        "index": None,
        "query": query,
        "query_embedding": None,
        "retrieved_document": None
    }
    # Initialize and run the offline flow (document indexing)
    offline_flow.run(shared)
    # Run the online flow to retrieve the most relevant document
    online_flow.run(shared)
 if __name__ == "__main__":
    run_rag_demo()
--- a/cookbook/pocketflow-chat-memory/nodes.py
+++ b/cookbook/pocketflow-chat-memory/nodes.py
@ -0,0 +1,95 @@
 from pocketflow import Node, Flow, BatchNode
 import numpy as np
 import faiss
 from utils import get_embedding, get_openai_embedding
 # Nodes for the offline flow
 class EmbedDocumentsNode(BatchNode):
    def prep(self, shared):
        """Read texts from shared store and return as an iterable"""
        return shared["texts"]
    def exec(self, text):
        """Embed a single text"""
        return get_embedding(text)
    def post(self, shared, prep_res, exec_res_list):
        """Store embeddings in the shared store"""
        embeddings = np.array(exec_res_list, dtype=np.float32)
        shared["embeddings"] = embeddings
        print(f"✅ Created {len(embeddings)} document embeddings")
        return "default"
 class CreateIndexNode(Node):
    def prep(self, shared):
        """Get embeddings from shared store"""
        return shared["embeddings"]
    def exec(self, embeddings):
        """Create FAISS index and add embeddings"""
        print("🔍 Creating search index...")
        dimension = embeddings.shape[1]
        # Create a flat L2 index
        index = faiss.IndexFlatL2(dimension)
        # Add the embeddings to the index
        index.add(embeddings)
        return index
    def post(self, shared, prep_res, exec_res):
        """Store the index in shared store"""
        shared["index"] = exec_res
        print(f"✅ Index created with {exec_res.ntotal} vectors")
        return "default"
 # Nodes for the online flow
 class EmbedQueryNode(Node):
    def prep(self, shared):
        """Get query from shared store"""
        return shared["query"]
    def exec(self, query):
        """Embed the query"""
        print(f"🔍 Embedding query: {query}")
        query_embedding = get_embedding(query)
        return np.array([query_embedding], dtype=np.float32)
    def post(self, shared, prep_res, exec_res):
        """Store query embedding in shared store"""
        shared["query_embedding"] = exec_res
        return "default"
 class RetrieveDocumentNode(Node):
    def prep(self, shared):
        """Get query embedding, index, and texts from shared store"""
        return shared["query_embedding"], shared["index"], shared["texts"]
    def exec(self, inputs):
        """Search the index for similar documents"""
        print("🔎 Searching for relevant documents...")
        query_embedding, index, texts = inputs
        # Search for the most similar document
        distances, indices = index.search(query_embedding, k=1)
        # Get the index of the most similar document
        best_idx = indices[0][0]
        distance = distances[0][0]
        # Get the corresponding text
        most_relevant_text = texts[best_idx]
        return {
            "text": most_relevant_text,
            "index": best_idx,
            "distance": distance
        }
    def post(self, shared, prep_res, exec_res):
        """Store retrieved document in shared store"""
        shared["retrieved_document"] = exec_res
        print(f"📄 Retrieved document (index: {exec_res['index']}, distance: {exec_res['distance']:.4f})")
        print(f"📄 Most relevant text: \"{exec_res['text']}\"")
        return "default"
--- a/cookbook/pocketflow-chat-memory/requirements.txt
+++ b/cookbook/pocketflow-chat-memory/requirements.txt
@ -0,0 +1,4 @@
 pocketflow>=0.0.5
 numpy>=1.20.0
 faiss-cpu>=1.7.0
 openai>=1.0.0
--- a/cookbook/pocketflow-chat-memory/utils.py
+++ b/cookbook/pocketflow-chat-memory/utils.py
@ -0,0 +1,79 @@
 import os
 import numpy as np
 from openai import OpenAI
 def get_embedding(text):
    """
    A simple embedding function that converts text to vector.
    In a real application, you would use a proper embedding model like OpenAI,
    Hugging Face, or other embedding services. For this example, we'll use a 
    simple approach based on character frequencies for demonstration purposes.
    """
    # Create a simple embedding (128-dimensional) based on character frequencies
    # This is just for demonstration - not a real embedding algorithm!
    embedding = np.zeros(128, dtype=np.float32)
    # Generate a deterministic but distributed embedding based on character frequency
    for i, char in enumerate(text):
        # Use modulo to distribute values across the embedding dimensions
        pos = ord(char) % 128
        embedding[pos] += 1.0
    # Normalize the embedding
    norm = np.linalg.norm(embedding)
    if norm > 0:
        embedding = embedding / norm
    return embedding
 def get_openai_embedding(text):
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "YOUR_API_KEY"))
    response = client.embeddings.create(
        model="text-embedding-ada-002",
        input=text
    )
    # Extract the embedding vector from the response
    embedding = response.data[0].embedding
    # Convert to numpy array for consistency with other embedding functions
    return np.array(embedding, dtype=np.float32)
 if __name__ == "__main__":
    # Test the embedding function
    text1 = "The quick brown fox jumps over the lazy dog."
    text2 = "Python is a popular programming language for data science."
    emb1 = get_embedding(text1)
    emb2 = get_embedding(text2)
    print(f"Embedding 1 shape: {emb1.shape}")
    print(f"Embedding 2 shape: {emb2.shape}")
    # Calculate similarity (dot product)
    similarity = np.dot(emb1, emb2)
    print(f"Similarity between texts: {similarity:.4f}")
    # Compare with a different text
    text3 = "Machine learning is a subset of artificial intelligence."
    emb3 = get_embedding(text3)
    similarity13 = np.dot(emb1, emb3)
    similarity23 = np.dot(emb2, emb3)
    print(f"Similarity between text1 and text3: {similarity13:.4f}")
    print(f"Similarity between text2 and text3: {similarity23:.4f}")
    # These simple comparisons should show higher similarity 
    # between related concepts (text2 and text3) than between
    # unrelated texts (text1 and text3)
    # Uncomment to test OpenAI embeddings (requires API key)
    print("\nTesting OpenAI embeddings (requires API key):")
    oai_emb1 = get_openai_embedding(text1)
    oai_emb2 = get_openai_embedding(text2)
    print(f"OpenAI Embedding 1 shape: {oai_emb1.shape}")
    oai_similarity = np.dot(oai_emb1, oai_emb2)
    print(f"OpenAI similarity between texts: {oai_similarity:.4f}")
--- a/cookbook/pocketflow-chat/README.md
+++ b/cookbook/pocketflow-chat/README.md
@ -0,0 +1,44 @@
 #  Simple PocketFlow Chat
 A basic chat application using PocketFlow with OpenAI's GPT-4o model.
 ## Features
 - Conversational chat interface in the terminal
 - Maintains full conversation history for context
 - Simple implementation demonstrating PocketFlow's node and flow concepts
 ## Run It
 1. Make sure your OpenAI API key is set:
    ```bash
    export OPENAI_API_KEY="your-api-key-here"
    ```
    Alternatively, you can edit the `utils.py` file to include your API key directly.
 2. Install requirements and run the application:
    ```bash
    pip install -r requirements.txt
    python main.py
    ```
 ## How It Works
 ```mermaid
 flowchart LR
    chat[ChatNode] -->|continue| chat
 ```
 The chat application uses:
 - A single `ChatNode` with a self-loop that:
  - Takes user input in the `prep` method
  - Sends the complete conversation history to GPT-4o
  - Adds responses to the conversation history
  - Loops back to continue the chat until the user types 'exit'
 ## Files
 - `main.py`: Implementation of the ChatNode and chat flow
 - `utils.py`: Simple wrapper for calling the OpenAI API
--- a/cookbook/pocketflow-chat/main.py
+++ b/cookbook/pocketflow-chat/main.py
@ -0,0 +1,55 @@
 from pocketflow import Node, Flow
 from utils import call_llm
 class ChatNode(Node):
    def prep(self, shared):
        # Initialize messages if this is the first run
        if "messages" not in shared:
            shared["messages"] = []
            print("Welcome to the chat! Type 'exit' to end the conversation.")
        # Get user input
        user_input = input("\nYou: ")
        # Check if user wants to exit
        if user_input.lower() == 'exit':
            return None
        # Add user message to history
        shared["messages"].append({"role": "user", "content": user_input})
        # Return all messages for the LLM
        return shared["messages"]
    def exec(self, messages):
        if messages is None:
            return None
        # Call LLM with the entire conversation history
        response = call_llm(messages)
        return response
    def post(self, shared, prep_res, exec_res):
        if prep_res is None or exec_res is None:
            print("\nGoodbye!")
            return None  # End the conversation
        # Print the assistant's response
        print(f"\nAssistant: {exec_res}")
        # Add assistant message to history
        shared["messages"].append({"role": "assistant", "content": exec_res})
        # Loop back to continue the conversation
        return "continue"
 # Create the flow with self-loop
 chat_node = ChatNode()
 chat_node - "continue" >> chat_node  # Loop back to continue conversation
 flow = Flow(start=chat_node)
 # Start the chat
 if __name__ == "__main__":
    shared = {}
    flow.run(shared)
--- a/cookbook/pocketflow-chat/requirements.txt
+++ b/cookbook/pocketflow-chat/requirements.txt
@ -0,0 +1,2 @@
 pocketflow>=0.0.1
 openai>=1.0.0
--- a/cookbook/pocketflow-chat/utils.py
+++ b/cookbook/pocketflow-chat/utils.py
@ -0,0 +1,21 @@
 from openai import OpenAI
 import os
 def call_llm(messages):
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key"))
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        temperature=0.7
    )
    return response.choices[0].message.content
 if __name__ == "__main__":
    # Test the LLM call
    messages = [{"role": "user", "content": "In a few words, what's the meaning of life?"}]
    response = call_llm(messages)
    print(f"Prompt: {messages[0]['content']}")
    print(f"Response: {response}")