From cd03a9b2a289dfacc3d064c04d1c88e2d069c2da Mon Sep 17 00:00:00 2001 From: zachary62 Date: Thu, 2 Jan 2025 05:10:32 +0000 Subject: [PATCH] exp docs --- docs/async.md | 61 -------------- docs/batch.md | 107 ------------------------ docs/communication.md | 138 ------------------------------- docs/core_abstraction.md | 6 -- docs/flow.md | 171 --------------------------------------- docs/llm.md | 69 ---------------- docs/node.md | 95 ---------------------- docs/paradigm.md | 6 -- docs/parallel.md | 54 ------------- docs/preparation.md | 6 -- docs/structure.md | 111 ------------------------- docs/tool.md | 165 ------------------------------------- 12 files changed, 989 deletions(-) delete mode 100644 docs/async.md delete mode 100644 docs/batch.md delete mode 100644 docs/communication.md delete mode 100644 docs/core_abstraction.md delete mode 100644 docs/flow.md delete mode 100644 docs/llm.md delete mode 100644 docs/node.md delete mode 100644 docs/paradigm.md delete mode 100644 docs/parallel.md delete mode 100644 docs/preparation.md delete mode 100644 docs/structure.md delete mode 100644 docs/tool.md diff --git a/docs/async.md b/docs/async.md deleted file mode 100644 index c9d265a..0000000 --- a/docs/async.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -layout: default -title: "(Advanced) Async" -parent: "Core Abstraction" -nav_order: 5 ---- - -# (Advanced) Async - -**Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for: - -1. **prep_async()** - - For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way. - -2. **exec_async()** - - Typically used for async LLM calls. - -3. **post_async()** - - For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`. - - -**Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes. - -### Example - -```python -class SummarizeThenVerify(AsyncNode): - async def prep_async(self, shared): - # Example: read a file asynchronously - doc_text = await read_file_async(shared["doc_path"]) - return doc_text - - async def exec_async(self, prep_res): - # Example: async LLM call - summary = await call_llm_async(f"Summarize: {prep_res}") - return summary - - async def post_async(self, shared, prep_res, exec_res): - # Example: wait for user feedback - decision = await gather_user_feedback(exec_res) - if decision == "approve": - shared["summary"] = exec_res - return "approve" - return "deny" - -summarize_node = SummarizeThenVerify() -final_node = Finalize() - -# Define transitions -summarize_node - "approve" >> final_node -summarize_node - "deny" >> summarize_node # retry - -flow = AsyncFlow(start=summarize_node) - -async def main(): - shared = {"doc_path": "document.txt"} - await flow.run_async(shared) - print("Final Summary:", shared.get("summary")) - -asyncio.run(main()) -``` \ No newline at end of file diff --git a/docs/batch.md b/docs/batch.md deleted file mode 100644 index b263845..0000000 --- a/docs/batch.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -layout: default -title: "Batch" -parent: "Core Abstraction" -nav_order: 4 ---- - -# Batch - -**Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Handy for: -- **Chunk-based** processing (e.g., splitting large texts). -- **Multi-file** processing. -- **Iterating** over lists of params (e.g., user queries, documents, URLs). - -## 1. BatchNode - -A **BatchNode** extends `Node` but changes `prep()` and `exec()`: - -- **`prep(shared)`**: returns an **iterable** (e.g., list, generator). -- **`exec(item)`**: called **once** per item in that iterable. -- **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**. - - -### Example: Summarize a Large File - -```python -class MapSummaries(BatchNode): - def prep(self, shared): - # Suppose we have a big file; chunk it - content = shared["data"].get("large_text.txt", "") - chunk_size = 10000 - chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)] - return chunks - - def exec(self, chunk): - prompt = f"Summarize this chunk in 10 words: {chunk}" - summary = call_llm(prompt) - return summary - - def post(self, shared, prep_res, exec_res_list): - combined = "\n".join(exec_res_list) - shared["summary"]["large_text.txt"] = combined - return "default" - -map_summaries = MapSummaries() -flow = Flow(start=map_summaries) -flow.run(shared) -``` - ---- - -## 2. BatchFlow - -A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set. - - -### Example: Summarize Many Files - -```python -class SummarizeAllFiles(BatchFlow): - def prep(self, shared): - # Return a list of param dicts (one per file) - filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...] - return [{"filename": fn} for fn in filenames] - -# Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce): -summarize_file = SummarizeFile(start=load_file) - -# Wrap that flow into a BatchFlow: -summarize_all_files = SummarizeAllFiles(start=summarize_file) -summarize_all_files.run(shared) -``` - -### Under the Hood -1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`. -2. The **BatchFlow** loops through each dict. For each one: - - It merges the dict with the BatchFlow’s own `params`. - - It calls `flow.run(shared)` using the merged result. -3. This means the sub-Flow is run **repeatedly**, once for every param dict. - ---- - -## 3. Nested or Multi-Level Batches - -You can nest a **BatchFlow** in another **BatchFlow**. For instance: -- **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...). -- **Inner** batch: returning a list of per-file param dicts. - -At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once. - -```python - -class FileBatchFlow(BatchFlow): - def prep(self, shared): - directory = self.params["directory"] - files = [f for f in os.listdir(directory) if f.endswith(".txt")] - return [{"filename": f} for f in files] - -class DirectoryBatchFlow(BatchFlow): - def prep(self, shared): - directories = [ "/path/to/dirA", "/path/to/dirB"] - return [{"directory": d} for d in directories] - - -inner_flow = FileBatchFlow(start=MapSummaries()) -outer_flow = DirectoryBatchFlow(start=inner_flow) -``` \ No newline at end of file diff --git a/docs/communication.md b/docs/communication.md deleted file mode 100644 index ebba2e2..0000000 --- a/docs/communication.md +++ /dev/null @@ -1,138 +0,0 @@ ---- -layout: default -title: "Communication" -parent: "Core Abstraction" -nav_order: 3 ---- - -# Communication - -Nodes and Flows **communicate** in two ways: - -1. **Shared Store** – A global data structure (often an in-mem dict) that all nodes can read from and write to. Every Node’s `prep()` and `post()` methods receive the **same** `shared` store. -2. **Params** – Each node and Flow has a `params` dict assigned by the **parent Flow**. Params mostly serve as identifiers, letting each node/flow know what task it’s assigned. - -If you know memory management, **Shared Store** is like a **heap** shared across function calls, while **Params** is like a **stack** assigned by parent function calls. - - - -> **Why not use other communication models like Message Passing?** *Message passing* works well for simple DAGs, but with *nested graphs* (Flows containing Flows, repeated or cyclic calls), routing messages becomes hard to maintain. A shared store keeps the design simple and easy. -{: .note } - ---- - -## 1. Shared Store - -### Overview - -A shared store is typically an in-mem dictionary, like: -```python -shared = {"data": {}, "summary": {}, "config": {...}, ...} -``` - -It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements. - -### Example - -```python -class LoadData(Node): - def prep(self, shared): - # Suppose we read from disk or an API - shared["data"]["my_file.txt"] = "Some text content" - return None - -class Summarize(Node): - def prep(self, shared): - # We can read what LoadData wrote - content = shared["data"].get("my_file.txt", "") - return content - - def exec(self, prep_res): - prompt = f"Summarize: {prep_res}" - summary = call_llm(prompt) - return summary - - def post(self, shared, prep_res, exec_res): - shared["summary"]["my_file.txt"] = exec_res - return "default" - -load_data = LoadData() -summarize = Summarize() -load_data >> summarize -flow = Flow(start=load_data) - -shared = {} -flow.run(shared) -``` - -Here: -- `LoadData` writes to `shared["data"]`. -- `Summarize` reads from the same location. -No special data-passing—just the same `shared` object. - ---- - -## 2. Params - -**Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are: -- **Immutable** during a Node’s run cycle (i.e., they don’t change mid-`prep`, `exec`, `post`). -- **Set** via `set_params()`. -- **Cleared** and updated each time a parent Flow calls it. - - -> Only set the uppermost Flow params because others will be overwritten by the parent Flow. If you need to set child node params, see [Batch](./batch.md). -{: .warning } - -Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store. - -### Example - -```python -# 1) Create a Node that uses params -class SummarizeFile(Node): - def prep(self, shared): - # Access the node's param - filename = self.params["filename"] - return shared["data"].get(filename, "") - - def exec(self, prep_res): - prompt = f"Summarize: {prep_res}" - return call_llm(prompt) - - def post(self, shared, prep_res, exec_res): - filename = self.params["filename"] - shared["summary"][filename] = exec_res - return "default" - -# 2) Set params -node = SummarizeFile() - -# 3) Set Node params directly (for testing) -node.set_params({"filename": "doc1.txt"}) -node.run(shared) - -# 4) Create Flow -flow = Flow(start=node) - -# 5) Set Flow params (overwrites node params) -flow.set_params({"filename": "doc2.txt"}) -flow.run(shared) # The node summarizes doc2, not doc1 -``` - ---- - -## 3. Shared Store vs. Params - -Think of the **Shared Store** like a heap and **Params** like a stack. - -- **Shared Store**: - - Public, global. - - You can design and populate ahead, e.g., for the input to process. - - Great for data results, large content, or anything multiple nodes need. - - Keep it tidy—structure it carefully (like a mini schema). - -- **Params**: - - Local, ephemeral. - - Passed in by parent Flows. You should only set it for the uppermost flow. - - Perfect for small values like filenames or numeric IDs. - - Do **not** persist across different nodes and are reset. \ No newline at end of file diff --git a/docs/core_abstraction.md b/docs/core_abstraction.md deleted file mode 100644 index 6043574..0000000 --- a/docs/core_abstraction.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: default -title: "Core Abstraction" -nav_order: 2 -has_children: true ---- \ No newline at end of file diff --git a/docs/flow.md b/docs/flow.md deleted file mode 100644 index 6ac6208..0000000 --- a/docs/flow.md +++ /dev/null @@ -1,171 +0,0 @@ ---- -layout: default -title: "Flow" -parent: "Core Abstraction" -nav_order: 2 ---- - -# Flow - -A **Flow** orchestrates how Nodes connect and run, based on **Actions** returned from each Node’s `post()` method. You can chain Nodes in a sequence or create branching logic depending on the **Action** string. - -## 1. Action-based Transitions - -Each Node's `post(shared, prep_res, exec_res)` method returns an **Action** string. By default, if `post()` doesn't explicitly return anything, we treat that as `"default"`. - -You define transitions with the syntax: - -1. Basic default transition: `node_a >> node_b` - This means if `node_a.post()` returns `"default"` (or `None`), go to `node_b`. - (Equivalent to `node_a - "default" >> node_b`) - -2. Named action transition: `node_a - "action_name" >> node_b` - This means if `node_a.post()` returns `"action_name"`, go to `node_b`. - -It’s possible to create loops, branching, or multi-step flows. - -## 2. Creating a Flow - -A **Flow** begins with a **start** node (or flow). You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the first node, looks at its `post()` return Action, follows the corresponding transition, and continues until there’s no next node or you explicitly stop. - -### Example: Simple Sequence - -Here’s a minimal flow of two nodes in a chain: - -```python -node_a >> node_b -flow = Flow(start=node_a) -flow.run(shared) -``` - -- When you run the flow, it executes `node_a`. -- Suppose `node_a.post()` returns `"default"`. -- The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`. -- If `node_b.post()` returns `"default"` but we didn’t define `node_b >> something_else`, the flow ends there. - -### Example: Branching & Looping - -Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions: - -- `"approved"`: expense is approved, move to payment processing -- `"needs_revision"`: expense needs changes, send back for revision -- `"rejected"`: expense is denied, finish the process - -We can wire them like this: - -```python -# Define the flow connections -review - "approved" >> payment # If approved, process payment -review - "needs_revision" >> revise # If needs changes, go to revision -review - "rejected" >> finish # If rejected, finish the process - -revise >> review # After revision, go back for another review -payment >> finish # After payment, finish the process - -flow = Flow(start=review) -``` - -Let's see how it flows: - -1. If `review.post()` returns `"approved"`, the expense moves to `payment` node -2. If `review.post()` returns `"needs_revision"`, it goes to `revise` node, which then loops back to `review` -3. If `review.post()` returns `"rejected"`, it moves to `finish` node and stops - -```mermaid -flowchart TD - review[Review Expense] -->|approved| payment[Process Payment] - review -->|needs_revision| revise[Revise Report] - review -->|rejected| finish[Finish Process] - - revise --> review - payment --> finish -``` - -### Running Individual Nodes vs. Running a Flow - -- `node.run(shared)`: Just runs that node alone (calls `prep()`, `exec()`, `post()`), returns an Action. -- `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can’t continue (no next node or no next Action). - - -> node.run(shared) **does not** proceed automatically to the successor and may use incorrect parameters. -> This is mainly for debugging or testing a single node. -> Always use `flow.run(...)` in production to ensure the full pipeline runs correctly. -{: .warning } - -## 3. Nested Flows - -A **Flow** can act like a Node, which enables powerful composition patterns. This means you can: - -1. Use a Flow as a Node within another Flow's transitions. -2. Combine multiple smaller Flows into a larger Flow for reuse. -3. Node `params` will be a merging of **all** parents' `params`. - -### Basic Flow Nesting - -Here's how to connect a flow to another node: - -```python -# Create a sub-flow -node_a >> node_b -subflow = Flow(start=node_a) - -# Connect it to another node -subflow >> node_c - -# Create the parent flow -parent_flow = Flow(start=subflow) -``` - -When `parent_flow.run()` executes: -1. It starts `subflow` -2. `subflow` runs through its nodes (`node_a` then `node_b`) -3. After `subflow` completes, execution continues to `node_c` - -### Example: Order Processing Pipeline - -Here's a practical example that breaks down order processing into nested flows: - -```python -# Payment processing sub-flow -validate_payment >> process_payment >> payment_confirmation -payment_flow = Flow(start=validate_payment) - -# Inventory sub-flow -check_stock >> reserve_items >> update_inventory -inventory_flow = Flow(start=check_stock) - -# Shipping sub-flow -create_label >> assign_carrier >> schedule_pickup -shipping_flow = Flow(start=create_label) - -# Connect the flows into a main order pipeline -payment_flow >> inventory_flow >> shipping_flow - -# Create the master flow -order_pipeline = Flow(start=payment_flow) - -# Run the entire pipeline -order_pipeline.run(shared_data) -``` - -This creates a clean separation of concerns while maintaining a clear execution path: - -```mermaid -flowchart LR - subgraph order_pipeline[Order Pipeline] - subgraph paymentFlow["Payment Flow"] - A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation] - end - - subgraph inventoryFlow["Inventory Flow"] - D[Check Stock] --> E[Reserve Items] --> F[Update Inventory] - end - - subgraph shippingFlow["Shipping Flow"] - G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup] - end - - paymentFlow --> inventoryFlow - inventoryFlow --> shippingFlow - end -``` diff --git a/docs/llm.md b/docs/llm.md deleted file mode 100644 index 789cd06..0000000 --- a/docs/llm.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -layout: default -title: "LLM Wrapper" -parent: "Preparation" -nav_order: 1 ---- - -# LLM Wrappers - -We **don't** provide built-in LLM wrappers. Instead, please implement your own, for example by asking an assistant like ChatGPT or Claude. If you ask ChatGPT to "implement a `call_llm` function that takes a prompt and returns the LLM response," you shall get something like: - -```python -def call_llm(prompt): - from openai import OpenAI - # Set the OpenAI API key (use environment variables, etc.) - client = OpenAI(api_key="YOUR_API_KEY_HERE") - r = client.chat.completions.create( - model="gpt-4", - messages=[{"role": "user", "content": prompt}] - ) - return r.choices[0].message.content - -# Example usage -call_llm("How are you?") -``` - -## Improvements -Feel free to enhance your `call_llm` function as needed. Here are examples: - -- Handle chat history: - -```python -def call_llm(messages): - from openai import OpenAI - client = OpenAI(api_key="YOUR_API_KEY_HERE") - r = client.chat.completions.create( - model="gpt-4", - messages=messages - ) - return r.choices[0].message.content -``` - -- Add in-memory caching: - -```python -from functools import lru_cache - -@lru_cache(maxsize=1000) -def call_llm(prompt): - # Your implementation here - pass -``` - -- Enable logging: - -```python -def call_llm(prompt): - import logging - logging.info(f"Prompt: {prompt}") - response = ... # Your implementation here - logging.info(f"Response: {response}") - return response -``` - -## Why Not Provide Built-in LLM Wrappers? -I believe it is a **bad practice** to provide LLM-specific implementations in a general framework: -- **LLM APIs change frequently**. Hardcoding them makes maintenance a nighmare. -- You may need **flexibility** to switch vendors, use fine-tuned models, or deploy local LLMs. -- You may need **optimizations** like prompt caching, request batching, or response streaming. diff --git a/docs/node.md b/docs/node.md deleted file mode 100644 index aeaa61e..0000000 --- a/docs/node.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -layout: default -title: "Node" -parent: "Core Abstraction" -nav_order: 1 ---- - -# Node - -A **Node** is the smallest building block of Mini LLM Flow. Each Node has 3 steps: - -1. **`prep(shared)`** - - Reads and preprocesses data from the `shared` store for LLMs. - - Examples: *query DB, read files, or serialize data into a string*. - - Returns `prep_res`, which will be passed to both `exec()` and `post()`. - -2. **`exec(prep_res)`** - - The main execution step where the LLM is called. - - Optionally has built-in retry and error handling (below). - - ⚠️ If retry enabled, ensure implementation is idempotent. - - Returns `exec_res`, which is passed to `post()`. - -3. **`post(shared, prep_res, exec_res)`** - - Writes results back to the `shared` store or decides the next action. - - Examples: *finalize outputs, trigger next steps, or log results*. - - Returns a **string** to specify the next action (`"default"` if nothing or `None` is returned). - - -> All 3 steps are optional. For example, you might only need to run the Prep without calling the LLM. -{: .note } - - -## Fault Tolerance & Retries - -Nodes in Mini LLM Flow can **retry** execution if `exec()` raises an exception. You control this via two parameters when you create the Node: - -- `max_retries` (int): How many times to try running `exec()`. The default is `1`, which means **no** retry. -- `wait` (int): The time to wait (in **seconds**) before each retry attempt. By default, `wait=0` (i.e., no waiting). Increasing this is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off. - -```python -my_node = SummarizeFile(max_retries=3, wait=10) -``` - -When an exception occurs in `exec()`, the Node automatically retries until: - -- It either succeeds, or -- The Node has retried `max_retries - 1` times already and fails on the last attempt. - -### Graceful Fallback - -If you want to **gracefully handle** the error rather than raising it, you can override: - -```python -def exec_fallback(self, shared, prep_res, exc): - raise exc -``` - -By default, it just re-raises `exc`. But you can return a fallback result instead. -That fallback result becomes the `exec_res` passed to `post()`. - -## Example - -```python -class SummarizeFile(Node): - def prep(self, shared): - filename = self.params["filename"] - return shared["data"][filename] - - def exec(self, prep_res): - if not prep_res: - raise ValueError("Empty file content!") - prompt = f"Summarize this text in 10 words: {prep_res}" - summary = call_llm(prompt) # might fail - return summary - - def exec_fallback(self, shared, prep_res, exc): - # Provide a simple fallback instead of crashing - return "There was an error processing your request." - - def post(self, shared, prep_res, exec_res): - filename = self.params["filename"] - shared["summary"][filename] = exec_res - # Return "default" by not returning anything - -summarize_node = SummarizeFile(max_retries=3) - -# Run the node standalone for testing (calls prep->exec->post). -# If exec() fails, it retries up to 3 times before calling exec_fallback(). -summarize_node.set_params({"filename": "test_file.txt"}) -action_result = summarize_node.run(shared) - -print("Action returned:", action_result) # Usually "default" -print("Summary stored:", shared["summary"].get("test_file.txt")) -``` - diff --git a/docs/paradigm.md b/docs/paradigm.md deleted file mode 100644 index ec5072f..0000000 --- a/docs/paradigm.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: default -title: "Paradigm" -nav_order: 4 -has_children: true ---- \ No newline at end of file diff --git a/docs/parallel.md b/docs/parallel.md deleted file mode 100644 index f822fbe..0000000 --- a/docs/parallel.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -layout: default -title: "(Advanced) Parallel" -parent: "Core Abstraction" -nav_order: 6 ---- - -# (Advanced) Parallel - -**Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute. - -## AsyncParallelBatchNode - -Like **AsyncBatchNode**, but run `exec_async()` in **parallel**: - -```python -class ParallelSummaries(AsyncParallelBatchNode): - async def prep_async(self, shared): - # e.g., multiple texts - return shared["texts"] - - async def exec_async(self, text): - prompt = f"Summarize: {text}" - return await call_llm_async(prompt) - - async def post_async(self, shared, prep_res, exec_res_list): - shared["summary"] = "\n\n".join(exec_res_list) - return "default" - -node = ParallelSummaries() -flow = AsyncFlow(start=node) -``` - -## AsyncParallelBatchFlow - -Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters: - -```python -class SummarizeMultipleFiles(AsyncParallelBatchFlow): - async def prep_async(self, shared): - return [{"filename": f} for f in shared["files"]] - -sub_flow = AsyncFlow(start=LoadAndSummarizeFile()) -parallel_flow = SummarizeMultipleFiles(start=sub_flow) -await parallel_flow.run_async(shared) -``` - -## Best Practices - -- **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize. - -- **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals). - -- **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits. diff --git a/docs/preparation.md b/docs/preparation.md deleted file mode 100644 index 9e8cbf9..0000000 --- a/docs/preparation.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: default -title: "Preparation" -nav_order: 3 -has_children: true ---- \ No newline at end of file diff --git a/docs/structure.md b/docs/structure.md deleted file mode 100644 index cc2b123..0000000 --- a/docs/structure.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -layout: default -title: "Structured Output" -parent: "Paradigm" -nav_order: 1 ---- - -# Structured Output - -In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys. - -There are several approaches to achieve a structured output: -- **Prompting** the LLM to strictly return a defined structure. -- Using LLMs that natively support **schema enforcement**. -- **Post-processing** the LLM’s response to extract structured content. - -In practice, **Prompting** is simple and reliable for modern LLMs. - -### Example Use Cases - -- Extracting Key Information - -```yaml -product: - name: Widget Pro - price: 199.99 - description: | - A high-quality widget designed for professionals. - Recommended for advanced users. -``` - -- Summarizing Documents into Bullet Points - -```yaml -summary: - - This product is easy to use. - - It is cost-effective. - - Suitable for all skill levels. -``` - -- Generating Configuration Files - -```yaml -server: - host: 127.0.0.1 - port: 8080 - ssl: true -``` - -## Prompt Engineering - -When prompting the LLM to produce **structured** output: -1. **Wrap** the structure in code fences (e.g., ` ```yaml`). -2. **Validate** that all required fields exist (and let `Node` handles retry). - -### Example Text Summarization - -```python -class SummarizeNode(Node): - def exec(self, prep_res): - # Suppose `prep_res` is the text to summarize. - prompt = f""" -Please summarize the following text as YAML, with exactly 3 bullet points - -{prep_res} - -Now, output: -```yaml -summary: - - bullet 1 - - bullet 2 - - bullet 3 -```""" - response = call_llm(prompt) - yaml_str = response.split("```yaml")[1].split("```")[0].strip() - - import yaml - structured_result = yaml.safe_load(yaml_str) - - assert "summary" in structured_result - assert isinstance(structured_result["summary"], list) - - return structured_result -``` - -### Why YAML instead of JSON? - -Current LLMs struggle with escaping. YAML is easier with strings since they don’t always need quotes. - -**In JSON** - -```json -{ - "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\"" -} -``` - -- Every double quote inside the string must be escaped with `\"`. -- Each newline in the dialogue must be represented as `\n`. - -**In YAML** - -```yaml -dialogue: | - Alice said: "Hello Bob. - How are you? - I am good." -``` - -- No need to escape interior quotes—just place the entire text under a block literal (`|`). -- Newlines are naturally preserved without needing `\n`. \ No newline at end of file diff --git a/docs/tool.md b/docs/tool.md deleted file mode 100644 index a0256b0..0000000 --- a/docs/tool.md +++ /dev/null @@ -1,165 +0,0 @@ ---- -layout: default -title: "Tool" -parent: "Preparation" -nav_order: 2 ---- - -# Tool - -Similar to LLM wrappers, we **don't** provide built-in tools. Here, we recommend some *minimal* (and incomplete) implementations of commonly used tools. These examples can serve as a starting point for your own tooling. - ---- - - -## 1. Embedding Calls - -```python -def get_embedding(text): - import openai - # Set your API key elsewhere, e.g., environment variables - r = openai.Embedding.create( - model="text-embedding-ada-002", - input=text - ) - return r["data"][0]["embedding"] -``` - ---- - -## 2. Vector Database (Faiss) - -```python -import faiss -import numpy as np - -def create_index(embeddings): - dim = len(embeddings[0]) - index = faiss.IndexFlatL2(dim) - index.add(np.array(embeddings).astype('float32')) - return index - -def search_index(index, query_embedding, top_k=5): - D, I = index.search( - np.array([query_embedding]).astype('float32'), - top_k - ) - return I, D -``` - ---- - -## 3. Local Database - -```python -import sqlite3 - -def execute_sql(query): - conn = sqlite3.connect("mydb.db") - cursor = conn.cursor() - cursor.execute(query) - result = cursor.fetchall() - conn.commit() - conn.close() - return result -``` - ---- - -## 4. Python Function Execution - -```python -def run_code(code_str): - env = {} - exec(code_str, env) - return env -``` - ---- - -## 5. PDF Extraction - -```python -def extract_text_from_pdf(file_path): - import PyPDF2 - pdfFileObj = open(file_path, "rb") - reader = PyPDF2.PdfReader(pdfFileObj) - text = "" - for page in reader.pages: - text += page.extract_text() - pdfFileObj.close() - return text -``` - ---- - -## 6. Web Crawling - -```python -def crawl_web(url): - import requests - from bs4 import BeautifulSoup - html = requests.get(url).text - soup = BeautifulSoup(html, "html.parser") - return soup.title.string, soup.get_text() -``` - ---- - -## 7. Basic Search (SerpAPI example) - -```python -def search_google(query): - import requests - params = { - "engine": "google", - "q": query, - "api_key": "YOUR_API_KEY" - } - r = requests.get("https://serpapi.com/search", params=params) - return r.json() -``` - ---- - - -## 8. Audio Transcription (OpenAI Whisper) - -```python -def transcribe_audio(file_path): - import openai - audio_file = open(file_path, "rb") - transcript = openai.Audio.transcribe("whisper-1", audio_file) - return transcript["text"] -``` - ---- - -## 9. Text-to-Speech (TTS) - -```python -def text_to_speech(text): - import pyttsx3 - engine = pyttsx3.init() - engine.say(text) - engine.runAndWait() -``` - ---- - -## 10. Sending Email - -```python -def send_email(to_address, subject, body, from_address, password): - import smtplib - from email.mime.text import MIMEText - - msg = MIMEText(body) - msg["Subject"] = subject - msg["From"] = from_address - msg["To"] = to_address - - with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server: - server.login(from_address, password) - server.sendmail(from_address, [to_address], msg.as_string()) -``` \ No newline at end of file