diff --git a/cookbook/pocketflow-chat-memory/README.md b/cookbook/pocketflow-chat-memory/README.md new file mode 100644 index 0000000..3383f88 --- /dev/null +++ b/cookbook/pocketflow-chat-memory/README.md @@ -0,0 +1,91 @@ +# Retrieval Augmented Generation (RAG) + +This project demonstrates a simplified RAG system that retrieves relevant documents based on user queries. + +## Features + +- Simple vector-based document retrieval +- Two-stage pipeline (offline indexing, online querying) +- FAISS-powered similarity search + +## Getting Started + +1. Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +2. Run the application with a sample query: + +```bash +python main.py --"Large Language Model" +``` + +3. Or run without arguments to use the default query: + +```bash +python main.py +``` + +## API Key + +By default, demo uses dummy embedding based on character frequencies. To use real OpenAI embedding: + +1. Edit nodes.py to replace the dummy `get_embedding` with `get_openai_embedding`: +```python +# Change this line: +query_embedding = get_embedding(query) +# To this: +query_embedding = get_openai_embedding(query) + +# And also change this line: +return get_embedding(text) +# To this: +return get_openai_embedding(text) +``` + +2. Make sure your OpenAI API key is set: +```bash +export OPENAI_API_KEY="your-api-key-here" +``` + +## How It Works + +The magic happens through a two-stage pipeline implemented with PocketFlow: + +```mermaid +graph TD + subgraph OfflineFlow[Offline Document Indexing] + EmbedDocs[EmbedDocumentsNode] --> CreateIndex[CreateIndexNode] + end + + subgraph OnlineFlow[Online Query Processing] + EmbedQuery[EmbedQueryNode] --> RetrieveDoc[RetrieveDocumentNode] + end +``` + +Here's what each part does: +1. **EmbedDocumentsNode**: Converts documents into vector representations +2. **CreateIndexNode**: Creates a searchable FAISS index from embeddings +3. **EmbedQueryNode**: Converts user query into the same vector space +4. **RetrieveDocumentNode**: Finds the most similar document using vector search + +## Example Output + +``` +✅ Created 5 document embeddings +🔍 Creating search index... +✅ Index created with 5 vectors +🔍 Embedding query: Large Language Model +🔎 Searching for relevant documents... +📄 Retrieved document (index: 3, distance: 0.3296) +📄 Most relevant text: "PocketFlow is a 100-line Large Language Model Framework." +``` + +## Files + +- [`main.py`](./main.py): Main entry point for running the RAG demonstration +- [`flow.py`](./flow.py): Configures the flows that connect the nodes +- [`nodes.py`](./nodes.py): Defines the nodes for document processing and retrieval +- [`utils.py`](./utils.py): Utility functions including the embedding function diff --git a/cookbook/pocketflow-chat-memory/flow.py b/cookbook/pocketflow-chat-memory/flow.py new file mode 100644 index 0000000..a022237 --- /dev/null +++ b/cookbook/pocketflow-chat-memory/flow.py @@ -0,0 +1,22 @@ +from pocketflow import Flow +from nodes import EmbedDocumentsNode, CreateIndexNode, EmbedQueryNode, RetrieveDocumentNode + +def get_offline_flow(): + # Create offline flow for document indexing + embed_docs_node = EmbedDocumentsNode() + create_index_node = CreateIndexNode() + embed_docs_node >> create_index_node + offline_flow = Flow(start=embed_docs_node) + return offline_flow + +def get_online_flow(): + # Create online flow for document retrieval + embed_query_node = EmbedQueryNode() + retrieve_doc_node = RetrieveDocumentNode() + embed_query_node >> retrieve_doc_node + online_flow = Flow(start=embed_query_node) + return online_flow + +# Initialize flows +offline_flow = get_offline_flow() +online_flow = get_online_flow() \ No newline at end of file diff --git a/cookbook/pocketflow-chat-memory/main.py b/cookbook/pocketflow-chat-memory/main.py new file mode 100644 index 0000000..ecb4028 --- /dev/null +++ b/cookbook/pocketflow-chat-memory/main.py @@ -0,0 +1,55 @@ +import sys +from flow import offline_flow, online_flow + +def run_rag_demo(): + """ + Run a demonstration of the RAG system. + + This function: + 1. Indexes a set of sample documents (offline flow) + 2. Takes a query from the command line + 3. Retrieves the most relevant document (online flow) + """ + + # Sample texts - corpus of documents to search + texts = [ + "The quick brown fox jumps over the lazy dog.", + "Machine learning is a subset of artificial intelligence.", + "Python is a popular programming language for data science.", + "PocketFlow is a 100-line Large Language Model Framework.", + "The weather is sunny and warm today.", + ] + + print("=" * 50) + print("PocketFlow RAG Document Retrieval") + print("=" * 50) + + # Default query + default_query = "Large Language Model" + + # Get query from command line if provided with -- + query = default_query + for arg in sys.argv[1:]: + if arg.startswith("--"): + query = arg[2:] + break + + # Single shared store for both flows + shared = { + "texts": texts, + "embeddings": None, + "index": None, + "query": query, + "query_embedding": None, + "retrieved_document": None + } + + # Initialize and run the offline flow (document indexing) + offline_flow.run(shared) + + # Run the online flow to retrieve the most relevant document + online_flow.run(shared) + + +if __name__ == "__main__": + run_rag_demo() \ No newline at end of file diff --git a/cookbook/pocketflow-chat-memory/nodes.py b/cookbook/pocketflow-chat-memory/nodes.py new file mode 100644 index 0000000..d0e5519 --- /dev/null +++ b/cookbook/pocketflow-chat-memory/nodes.py @@ -0,0 +1,95 @@ +from pocketflow import Node, Flow, BatchNode +import numpy as np +import faiss +from utils import get_embedding, get_openai_embedding + +# Nodes for the offline flow +class EmbedDocumentsNode(BatchNode): + def prep(self, shared): + """Read texts from shared store and return as an iterable""" + return shared["texts"] + + def exec(self, text): + """Embed a single text""" + return get_embedding(text) + + def post(self, shared, prep_res, exec_res_list): + """Store embeddings in the shared store""" + embeddings = np.array(exec_res_list, dtype=np.float32) + shared["embeddings"] = embeddings + print(f"✅ Created {len(embeddings)} document embeddings") + return "default" + +class CreateIndexNode(Node): + def prep(self, shared): + """Get embeddings from shared store""" + return shared["embeddings"] + + def exec(self, embeddings): + """Create FAISS index and add embeddings""" + print("🔍 Creating search index...") + dimension = embeddings.shape[1] + + # Create a flat L2 index + index = faiss.IndexFlatL2(dimension) + + # Add the embeddings to the index + index.add(embeddings) + + return index + + def post(self, shared, prep_res, exec_res): + """Store the index in shared store""" + shared["index"] = exec_res + print(f"✅ Index created with {exec_res.ntotal} vectors") + return "default" + +# Nodes for the online flow +class EmbedQueryNode(Node): + def prep(self, shared): + """Get query from shared store""" + return shared["query"] + + def exec(self, query): + """Embed the query""" + print(f"🔍 Embedding query: {query}") + query_embedding = get_embedding(query) + return np.array([query_embedding], dtype=np.float32) + + def post(self, shared, prep_res, exec_res): + """Store query embedding in shared store""" + shared["query_embedding"] = exec_res + return "default" + +class RetrieveDocumentNode(Node): + def prep(self, shared): + """Get query embedding, index, and texts from shared store""" + return shared["query_embedding"], shared["index"], shared["texts"] + + def exec(self, inputs): + """Search the index for similar documents""" + print("🔎 Searching for relevant documents...") + query_embedding, index, texts = inputs + + # Search for the most similar document + distances, indices = index.search(query_embedding, k=1) + + # Get the index of the most similar document + best_idx = indices[0][0] + distance = distances[0][0] + + # Get the corresponding text + most_relevant_text = texts[best_idx] + + return { + "text": most_relevant_text, + "index": best_idx, + "distance": distance + } + + def post(self, shared, prep_res, exec_res): + """Store retrieved document in shared store""" + shared["retrieved_document"] = exec_res + print(f"📄 Retrieved document (index: {exec_res['index']}, distance: {exec_res['distance']:.4f})") + print(f"📄 Most relevant text: \"{exec_res['text']}\"") + return "default" \ No newline at end of file diff --git a/cookbook/pocketflow-chat-memory/requirements.txt b/cookbook/pocketflow-chat-memory/requirements.txt new file mode 100644 index 0000000..abb8e35 --- /dev/null +++ b/cookbook/pocketflow-chat-memory/requirements.txt @@ -0,0 +1,4 @@ +pocketflow>=0.0.5 +numpy>=1.20.0 +faiss-cpu>=1.7.0 +openai>=1.0.0 \ No newline at end of file diff --git a/cookbook/pocketflow-chat-memory/utils.py b/cookbook/pocketflow-chat-memory/utils.py new file mode 100644 index 0000000..4ced69b --- /dev/null +++ b/cookbook/pocketflow-chat-memory/utils.py @@ -0,0 +1,79 @@ +import os +import numpy as np +from openai import OpenAI + +def get_embedding(text): + """ + A simple embedding function that converts text to vector. + + In a real application, you would use a proper embedding model like OpenAI, + Hugging Face, or other embedding services. For this example, we'll use a + simple approach based on character frequencies for demonstration purposes. + """ + # Create a simple embedding (128-dimensional) based on character frequencies + # This is just for demonstration - not a real embedding algorithm! + embedding = np.zeros(128, dtype=np.float32) + + # Generate a deterministic but distributed embedding based on character frequency + for i, char in enumerate(text): + # Use modulo to distribute values across the embedding dimensions + pos = ord(char) % 128 + embedding[pos] += 1.0 + + # Normalize the embedding + norm = np.linalg.norm(embedding) + if norm > 0: + embedding = embedding / norm + + return embedding + +def get_openai_embedding(text): + client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "YOUR_API_KEY")) + + response = client.embeddings.create( + model="text-embedding-ada-002", + input=text + ) + + # Extract the embedding vector from the response + embedding = response.data[0].embedding + + # Convert to numpy array for consistency with other embedding functions + return np.array(embedding, dtype=np.float32) + + +if __name__ == "__main__": + # Test the embedding function + text1 = "The quick brown fox jumps over the lazy dog." + text2 = "Python is a popular programming language for data science." + + emb1 = get_embedding(text1) + emb2 = get_embedding(text2) + + print(f"Embedding 1 shape: {emb1.shape}") + print(f"Embedding 2 shape: {emb2.shape}") + + # Calculate similarity (dot product) + similarity = np.dot(emb1, emb2) + print(f"Similarity between texts: {similarity:.4f}") + + # Compare with a different text + text3 = "Machine learning is a subset of artificial intelligence." + emb3 = get_embedding(text3) + similarity13 = np.dot(emb1, emb3) + similarity23 = np.dot(emb2, emb3) + + print(f"Similarity between text1 and text3: {similarity13:.4f}") + print(f"Similarity between text2 and text3: {similarity23:.4f}") + + # These simple comparisons should show higher similarity + # between related concepts (text2 and text3) than between + # unrelated texts (text1 and text3) + + # Uncomment to test OpenAI embeddings (requires API key) + print("\nTesting OpenAI embeddings (requires API key):") + oai_emb1 = get_openai_embedding(text1) + oai_emb2 = get_openai_embedding(text2) + print(f"OpenAI Embedding 1 shape: {oai_emb1.shape}") + oai_similarity = np.dot(oai_emb1, oai_emb2) + print(f"OpenAI similarity between texts: {oai_similarity:.4f}") \ No newline at end of file diff --git a/cookbook/pocketflow-chat/README.md b/cookbook/pocketflow-chat/README.md new file mode 100644 index 0000000..50e4dbc --- /dev/null +++ b/cookbook/pocketflow-chat/README.md @@ -0,0 +1,44 @@ +# Simple PocketFlow Chat + +A basic chat application using PocketFlow with OpenAI's GPT-4o model. + +## Features + +- Conversational chat interface in the terminal +- Maintains full conversation history for context +- Simple implementation demonstrating PocketFlow's node and flow concepts + +## Run It + +1. Make sure your OpenAI API key is set: + ```bash + export OPENAI_API_KEY="your-api-key-here" + ``` + Alternatively, you can edit the `utils.py` file to include your API key directly. + +2. Install requirements and run the application: + ```bash + pip install -r requirements.txt + python main.py + ``` + +## How It Works + +```mermaid +flowchart LR + chat[ChatNode] -->|continue| chat +``` + +The chat application uses: +- A single `ChatNode` with a self-loop that: + - Takes user input in the `prep` method + - Sends the complete conversation history to GPT-4o + - Adds responses to the conversation history + - Loops back to continue the chat until the user types 'exit' + + +## Files + +- `main.py`: Implementation of the ChatNode and chat flow +- `utils.py`: Simple wrapper for calling the OpenAI API + \ No newline at end of file diff --git a/cookbook/pocketflow-chat/main.py b/cookbook/pocketflow-chat/main.py new file mode 100644 index 0000000..126c106 --- /dev/null +++ b/cookbook/pocketflow-chat/main.py @@ -0,0 +1,55 @@ +from pocketflow import Node, Flow +from utils import call_llm + +class ChatNode(Node): + def prep(self, shared): + # Initialize messages if this is the first run + if "messages" not in shared: + shared["messages"] = [] + print("Welcome to the chat! Type 'exit' to end the conversation.") + + # Get user input + user_input = input("\nYou: ") + + # Check if user wants to exit + if user_input.lower() == 'exit': + return None + + # Add user message to history + shared["messages"].append({"role": "user", "content": user_input}) + + # Return all messages for the LLM + return shared["messages"] + + def exec(self, messages): + if messages is None: + return None + + # Call LLM with the entire conversation history + response = call_llm(messages) + return response + + def post(self, shared, prep_res, exec_res): + if prep_res is None or exec_res is None: + print("\nGoodbye!") + return None # End the conversation + + # Print the assistant's response + print(f"\nAssistant: {exec_res}") + + # Add assistant message to history + shared["messages"].append({"role": "assistant", "content": exec_res}) + + # Loop back to continue the conversation + return "continue" + +# Create the flow with self-loop +chat_node = ChatNode() +chat_node - "continue" >> chat_node # Loop back to continue conversation + +flow = Flow(start=chat_node) + +# Start the chat +if __name__ == "__main__": + shared = {} + flow.run(shared) diff --git a/cookbook/pocketflow-chat/requirements.txt b/cookbook/pocketflow-chat/requirements.txt new file mode 100644 index 0000000..87eaabc --- /dev/null +++ b/cookbook/pocketflow-chat/requirements.txt @@ -0,0 +1,2 @@ +pocketflow>=0.0.1 +openai>=1.0.0 \ No newline at end of file diff --git a/cookbook/pocketflow-chat/utils.py b/cookbook/pocketflow-chat/utils.py new file mode 100644 index 0000000..ffced04 --- /dev/null +++ b/cookbook/pocketflow-chat/utils.py @@ -0,0 +1,21 @@ +from openai import OpenAI +import os + +def call_llm(messages): + client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key")) + + response = client.chat.completions.create( + model="gpt-4o", + messages=messages, + temperature=0.7 + ) + + return response.choices[0].message.content + +if __name__ == "__main__": + # Test the LLM call + messages = [{"role": "user", "content": "In a few words, what's the meaning of life?"}] + response = call_llm(messages) + print(f"Prompt: {messages[0]['content']}") + print(f"Response: {response}") +