update cursor rule files
This commit is contained in:
parent
b561a10c76
commit
f6c4b06db8
|
|
@ -50,30 +50,75 @@ flow.run(shared)
|
||||||
|
|
||||||
A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
|
A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
|
||||||
|
|
||||||
|
### Key Differences from BatchNode
|
||||||
|
|
||||||
|
**Important**: Unlike BatchNode, which processes items and modifies the shared store:
|
||||||
|
|
||||||
|
1. BatchFlow returns **parameters to pass to the child Flow**, not data to process
|
||||||
|
2. These parameters are accessed in child nodes via `self.params`, not from the shared store
|
||||||
|
3. Each child Flow runs independently with a different set of parameters
|
||||||
|
4. Child nodes can be regular Nodes, not BatchNodes (the batching happens at the Flow level)
|
||||||
|
|
||||||
### Example: Summarize Many Files
|
### Example: Summarize Many Files
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class SummarizeAllFiles(BatchFlow):
|
class SummarizeAllFiles(BatchFlow):
|
||||||
def prep(self, shared):
|
def prep(self, shared):
|
||||||
# Return a list of param dicts (one per file)
|
# IMPORTANT: Return a list of param dictionaries (not data for processing)
|
||||||
filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...]
|
filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...]
|
||||||
return [{"filename": fn} for fn in filenames]
|
return [{"filename": fn} for fn in filenames]
|
||||||
|
|
||||||
# Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
|
# Child node that accesses filename from params, not shared store
|
||||||
summarize_file = SummarizeFile(start=load_file)
|
class LoadFile(Node):
|
||||||
|
def prep(self, shared):
|
||||||
|
# Access filename from params (not from shared)
|
||||||
|
filename = self.params["filename"] # Important! Use self.params, not shared
|
||||||
|
return filename
|
||||||
|
|
||||||
|
def exec(self, filename):
|
||||||
|
with open(filename, 'r') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def post(self, shared, prep_res, exec_res):
|
||||||
|
# Store file content in shared
|
||||||
|
shared["current_file_content"] = exec_res
|
||||||
|
return "default"
|
||||||
|
|
||||||
# Wrap that flow into a BatchFlow:
|
# Summarize node that works on the currently loaded file
|
||||||
|
class Summarize(Node):
|
||||||
|
def prep(self, shared):
|
||||||
|
return shared["current_file_content"]
|
||||||
|
|
||||||
|
def exec(self, content):
|
||||||
|
prompt = f"Summarize this file in 50 words: {content}"
|
||||||
|
return call_llm(prompt)
|
||||||
|
|
||||||
|
def post(self, shared, prep_res, exec_res):
|
||||||
|
# Store summary in shared, indexed by current filename
|
||||||
|
filename = self.params["filename"] # Again, using params
|
||||||
|
if "summaries" not in shared:
|
||||||
|
shared["summaries"] = {}
|
||||||
|
shared["summaries"][filename] = exec_res
|
||||||
|
return "default"
|
||||||
|
|
||||||
|
# Create a per-file flow
|
||||||
|
load_file = LoadFile()
|
||||||
|
summarize = Summarize()
|
||||||
|
load_file >> summarize
|
||||||
|
summarize_file = Flow(start=load_file)
|
||||||
|
|
||||||
|
# Wrap in a BatchFlow to process all files
|
||||||
summarize_all_files = SummarizeAllFiles(start=summarize_file)
|
summarize_all_files = SummarizeAllFiles(start=summarize_file)
|
||||||
summarize_all_files.run(shared)
|
summarize_all_files.run(shared)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Under the Hood
|
### Under the Hood
|
||||||
1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
|
1. `prep(shared)` in the BatchFlow returns a list of param dicts—e.g., `[{"filename": "file1.txt"}, {"filename": "file2.txt"}, ...]`.
|
||||||
2. The **BatchFlow** loops through each dict. For each one:
|
2. The **BatchFlow** loops through each dict. For each one:
|
||||||
- It merges the dict with the BatchFlow’s own `params`.
|
- It merges the dict with the BatchFlow's own `params` (if any): `{**batch_flow.params, **dict_from_prep}`
|
||||||
- It calls `flow.run(shared)` using the merged result.
|
- It calls `flow.run(shared)` using the merged parameters
|
||||||
3. This means the sub-Flow is run **repeatedly**, once for every param dict.
|
- **IMPORTANT**: These parameters are passed to the child Flow's nodes via `self.params`, NOT via the shared store
|
||||||
|
3. This means the sub-Flow is run **repeatedly**, once for every param dict, with each node in the flow accessing the parameters via `self.params`.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -89,6 +134,7 @@ At each level, **BatchFlow** merges its own param dict with the parent’s. By t
|
||||||
|
|
||||||
class FileBatchFlow(BatchFlow):
|
class FileBatchFlow(BatchFlow):
|
||||||
def prep(self, shared):
|
def prep(self, shared):
|
||||||
|
# Access directory from params (set by parent)
|
||||||
directory = self.params["directory"]
|
directory = self.params["directory"]
|
||||||
# e.g., files = ["file1.txt", "file2.txt", ...]
|
# e.g., files = ["file1.txt", "file2.txt", ...]
|
||||||
files = [f for f in os.listdir(directory) if f.endswith(".txt")]
|
files = [f for f in os.listdir(directory) if f.endswith(".txt")]
|
||||||
|
|
@ -99,7 +145,31 @@ class DirectoryBatchFlow(BatchFlow):
|
||||||
directories = [ "/path/to/dirA", "/path/to/dirB"]
|
directories = [ "/path/to/dirA", "/path/to/dirB"]
|
||||||
return [{"directory": d} for d in directories]
|
return [{"directory": d} for d in directories]
|
||||||
|
|
||||||
# MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
|
# The actual processing node
|
||||||
inner_flow = FileBatchFlow(start=MapSummaries())
|
class ProcessFile(Node):
|
||||||
|
def prep(self, shared):
|
||||||
|
# Access both directory and filename from params
|
||||||
|
directory = self.params["directory"] # From outer batch
|
||||||
|
filename = self.params["filename"] # From inner batch
|
||||||
|
full_path = os.path.join(directory, filename)
|
||||||
|
return full_path
|
||||||
|
|
||||||
|
def exec(self, full_path):
|
||||||
|
# Process the file...
|
||||||
|
return f"Processed {full_path}"
|
||||||
|
|
||||||
|
def post(self, shared, prep_res, exec_res):
|
||||||
|
# Store results, perhaps indexed by path
|
||||||
|
if "results" not in shared:
|
||||||
|
shared["results"] = {}
|
||||||
|
shared["results"][prep_res] = exec_res
|
||||||
|
return "default"
|
||||||
|
|
||||||
|
# Set up the nested batch structure
|
||||||
|
process_node = ProcessFile()
|
||||||
|
inner_flow = FileBatchFlow(start=process_node)
|
||||||
outer_flow = DirectoryBatchFlow(start=inner_flow)
|
outer_flow = DirectoryBatchFlow(start=inner_flow)
|
||||||
|
|
||||||
|
# Run it
|
||||||
|
outer_flow.run(shared)
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,10 @@ Nodes and Flows **communicate** in 2 ways:
|
||||||
- Great for data results, large content, or anything multiple nodes need.
|
- Great for data results, large content, or anything multiple nodes need.
|
||||||
- You shall design the data structure and populate it ahead.
|
- You shall design the data structure and populate it ahead.
|
||||||
|
|
||||||
- > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*! This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](mdc:batch.md).
|
- > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*! This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](mdc:./batch.md).
|
||||||
{: .best-practice }
|
{: .best-practice }
|
||||||
|
|
||||||
2. **Params (only for [Batch](mdc:batch.md))**
|
2. **Params (only for [Batch](mdc:./batch.md))**
|
||||||
- Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
|
- Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
|
||||||
- Good for identifiers like filenames or numeric IDs, in Batch mode.
|
- Good for identifiers like filenames or numeric IDs, in Batch mode.
|
||||||
|
|
||||||
|
|
@ -84,7 +84,7 @@ Here:
|
||||||
|
|
||||||
> Only set the uppermost Flow params because others will be overwritten by the parent Flow.
|
> Only set the uppermost Flow params because others will be overwritten by the parent Flow.
|
||||||
>
|
>
|
||||||
> If you need to set child node params, see [Batch](mdc:batch.md).
|
> If you need to set child node params, see [Batch](mdc:./batch.md).
|
||||||
{: .warning }
|
{: .warning }
|
||||||
|
|
||||||
Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
|
Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ Agent is a powerful design pattern in which nodes can take dynamic actions based
|
||||||
## Implement Agent with Graph
|
## Implement Agent with Graph
|
||||||
|
|
||||||
1. **Context and Action:** Implement nodes that supply context and perform actions.
|
1. **Context and Action:** Implement nodes that supply context and perform actions.
|
||||||
2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](mdc:../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
|
2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
|
||||||
3. **Agent Node:** Provide a prompt to decide action—for example:
|
3. **Agent Node:** Provide a prompt to decide action—for example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
@ -48,7 +48,7 @@ parameters:
|
||||||
|
|
||||||
The core of building **high-performance** and **reliable** agents boils down to:
|
The core of building **high-performance** and **reliable** agents boils down to:
|
||||||
|
|
||||||
1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](mdc:rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](mdc:https:/arxiv.org/abs/2307.03172), overlooking mid-prompt content.
|
1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](mdc:./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content.
|
||||||
|
|
||||||
2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or `read_csvs`. Instead, import CSVs into the database.
|
2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or `read_csvs`. Instead, import CSVs into the database.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ and there is a logical way to break the task into smaller, ideally independent p
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
You first break down the task using [BatchNode](mdc:../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
|
You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
|
||||||
|
|
||||||
### Example: Document Summarization
|
### Example: Document Summarization
|
||||||
|
|
||||||
|
|
@ -65,5 +65,5 @@ print("Individual Summaries:", shared["file_summaries"])
|
||||||
print("\nFinal Summary:\n", shared["all_files_summary"])
|
print("\nFinal Summary:\n", shared["all_files_summary"])
|
||||||
```
|
```
|
||||||
|
|
||||||
> **Performance Tip**: The example above works sequentially. You can speed up the map phase by running it in parallel. See [(Advanced) Parallel](mdc:../core_abstraction/parallel.md) for more details.
|
> **Performance Tip**: The example above works sequentially. You can speed up the map phase by running it in parallel. See [(Advanced) Parallel](../core_abstraction/parallel.md) for more details.
|
||||||
{: .note }
|
{: .note }
|
||||||
|
|
@ -5,7 +5,7 @@ alwaysApply: false
|
||||||
---
|
---
|
||||||
# (Advanced) Multi-Agents
|
# (Advanced) Multi-Agents
|
||||||
|
|
||||||
Multiple [Agents](mdc:flow.md) can work together by handling subtasks and communicating the progress.
|
Multiple [Agents](mdc:./flow.md) can work together by handling subtasks and communicating the progress.
|
||||||
Communication between agents is typically implemented using message queues in shared storage.
|
Communication between agents is typically implemented using message queues in shared storage.
|
||||||
|
|
||||||
> Most of time, you don't need Multi-Agents. Start with a simple solution first.
|
> Most of time, you don't need Multi-Agents. Start with a simple solution first.
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,9 @@ For certain LLM tasks like answering questions, providing relevant context is es
|
||||||
## Stage 1: Offline Indexing
|
## Stage 1: Offline Indexing
|
||||||
|
|
||||||
We create three Nodes:
|
We create three Nodes:
|
||||||
1. `ChunkDocs` – [chunks](mdc:../utility_function/chunking.md) raw text.
|
1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
|
||||||
2. `EmbedDocs` – [embeds](mdc:../utility_function/embedding.md) each chunk.
|
2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
|
||||||
3. `StoreIndex` – stores embeddings into a [vector database](mdc:../utility_function/vector.md).
|
3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class ChunkDocs(BatchNode):
|
class ChunkDocs(BatchNode):
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ summary:
|
||||||
return structured_result
|
return structured_result
|
||||||
```
|
```
|
||||||
|
|
||||||
> Besides using `assert` statements, another popular way to validate schemas is [Pydantic](mdc:https:/github.com/pydantic/pydantic)
|
> Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
|
||||||
{: .note }
|
{: .note }
|
||||||
|
|
||||||
### Why YAML instead of JSON?
|
### Why YAML instead of JSON?
|
||||||
|
|
|
||||||
|
|
@ -5,14 +5,14 @@ alwaysApply: false
|
||||||
---
|
---
|
||||||
# Workflow
|
# Workflow
|
||||||
|
|
||||||
Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](mdc:../core_abstraction/flow.md) of multiple Nodes.
|
Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
> - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
|
> - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
|
||||||
> - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
|
> - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
|
||||||
>
|
>
|
||||||
> You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](mdc:agent.md).
|
> You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](mdc:./agent.md).
|
||||||
{: .best-practice }
|
{: .best-practice }
|
||||||
|
|
||||||
### Example: Article Writing
|
### Example: Article Writing
|
||||||
|
|
@ -46,4 +46,4 @@ shared = {"topic": "AI Safety"}
|
||||||
writing_flow.run(shared)
|
writing_flow.run(shared)
|
||||||
```
|
```
|
||||||
|
|
||||||
For *dynamic cases*, consider using [Agents](mdc:agent.md).
|
For *dynamic cases*, consider using [Agents](mdc:./agent.md).
|
||||||
|
|
@ -3,9 +3,19 @@ description: Guidelines for using PocketFlow, Agentic Coding
|
||||||
globs: **/*.py
|
globs: **/*.py
|
||||||
alwaysApply: true
|
alwaysApply: true
|
||||||
---
|
---
|
||||||
|
# DOCUMENTATION FIRST POLICY
|
||||||
|
|
||||||
|
**CRITICAL INSTRUCTION**: When implementing a Pocket Flow app:
|
||||||
|
|
||||||
|
1. **ALWAYS REQUEST MDC FILES FIRST** - Before writing any code, request and review all relevant MDC documentation files. This doc provides an explaination of the documents.
|
||||||
|
2. **UNDERSTAND THE FRAMEWORK** - Gain comprehensive understanding of the Pocket Flow framework from documentation
|
||||||
|
3. **AVOID ASSUMPTION-DRIVEN DEVELOPMENT** - Do not base your implementation on assumptions or guesswork. Even if the human didn't explicitly mention pocket flow in their request, if the code you are editing is using pocket flow, you should request relevant docs to help you understand best practice as well before editing.
|
||||||
|
|
||||||
|
**VERIFICATION**: Begin each implementation with a brief summary of the documentation you've reviewed to inform your approach.
|
||||||
|
|
||||||
# Agentic Coding: Humans Design, Agents code!
|
# Agentic Coding: Humans Design, Agents code!
|
||||||
|
|
||||||
> If you are an AI agents involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
|
> If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
|
||||||
{: .warning }
|
{: .warning }
|
||||||
|
|
||||||
## Agentic Coding Steps
|
## Agentic Coding Steps
|
||||||
|
|
@ -225,3 +235,62 @@ my_project/
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Pocket Flow
|
||||||
|
|
||||||
|
A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
|
||||||
|
|
||||||
|
- **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
|
||||||
|
- **Expressive**: Everything you love from larger frameworks—([Multi-])[Agents], [Workflow], [RAG], and more.
|
||||||
|
- **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Core Abstraction
|
||||||
|
|
||||||
|
We model the LLM workflow as a **Graph + Shared Store**:
|
||||||
|
|
||||||
|
- [Node] handles simple (LLM) tasks.
|
||||||
|
- [Flow] connects nodes through **Actions** (labeled edges).
|
||||||
|
- [Shared Store] enables communication between nodes within flows.
|
||||||
|
- [Batch] nodes/flows allow for data-intensive tasks.
|
||||||
|
- [Async] nodes/flows allow waiting for asynchronous tasks.
|
||||||
|
- [(Advanced) Parallel] nodes/flows handle I/O-bound tasks.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Design Pattern
|
||||||
|
|
||||||
|
From there, it’s easy to implement popular design patterns:
|
||||||
|
|
||||||
|
- [Agent] autonomously makes decisions.
|
||||||
|
- [Workflow] chains multiple tasks into pipelines.
|
||||||
|
- [RAG] integrates data retrieval with generation.
|
||||||
|
- [Map Reduce] splits data tasks into Map and Reduce steps.
|
||||||
|
- [Structured Output] formats outputs consistently.
|
||||||
|
- [(Advanced) Multi-Agents] coordinate multiple agents.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Utility Function
|
||||||
|
|
||||||
|
We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
|
||||||
|
|
||||||
|
- [LLM Wrapper]
|
||||||
|
- [Viz and Debug]
|
||||||
|
- [Web Search]
|
||||||
|
- [Chunking]
|
||||||
|
- [Embedding]
|
||||||
|
- [Vector Databases]
|
||||||
|
- [Text-to-Speech]
|
||||||
|
|
||||||
|
**Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
|
||||||
|
- *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
|
||||||
|
- *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
|
||||||
|
- *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
|
||||||
|
|
||||||
|
## Ready to build your Apps?
|
||||||
|
|
||||||
|
Check out [Agentic Coding Guidance], the fastest way to develop LLM projects with Pocket Flow!
|
||||||
|
|
@ -1,62 +0,0 @@
|
||||||
---
|
|
||||||
description: Guidelines for using PocketFlow, a minimalist LLM framework
|
|
||||||
globs: **/*.py
|
|
||||||
alwaysApply: true
|
|
||||||
---
|
|
||||||
# Pocket Flow
|
|
||||||
|
|
||||||
A [100-line](mdc:https:/github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
|
|
||||||
|
|
||||||
- **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
|
|
||||||
- **Expressive**: Everything you love from larger frameworks—([Multi-])[Agents], [Workflow], [RAG], and more.
|
|
||||||
- **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Core Abstraction
|
|
||||||
|
|
||||||
We model the LLM workflow as a **Graph + Shared Store**:
|
|
||||||
|
|
||||||
- [Node] handles simple (LLM) tasks.
|
|
||||||
- [Flow] connects nodes through **Actions** (labeled edges).
|
|
||||||
- [Shared Store] enables communication between nodes within flows.
|
|
||||||
- [Batch] nodes/flows allow for data-intensive tasks.
|
|
||||||
- [Async] nodes/flows allow waiting for asynchronous tasks.
|
|
||||||
- [(Advanced) Parallel] nodes/flows handle I/O-bound tasks.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Design Pattern
|
|
||||||
|
|
||||||
From there, it’s easy to implement popular design patterns:
|
|
||||||
|
|
||||||
- [Agent] autonomously makes decisions.
|
|
||||||
- [Workflow] chains multiple tasks into pipelines.
|
|
||||||
- [RAG] integrates data retrieval with generation.
|
|
||||||
- [Map Reduce] splits data tasks into Map and Reduce steps.
|
|
||||||
- [Structured Output] formats outputs consistently.
|
|
||||||
- [(Advanced) Multi-Agents] coordinate multiple agents.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Utility Function
|
|
||||||
|
|
||||||
We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
|
|
||||||
|
|
||||||
- [LLM Wrapper]
|
|
||||||
- [Viz and Debug]
|
|
||||||
- [Web Search]
|
|
||||||
- [Chunking]
|
|
||||||
- [Embedding]
|
|
||||||
- [Vector Databases]
|
|
||||||
- [Text-to-Speech]
|
|
||||||
|
|
||||||
**Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
|
|
||||||
- *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
|
|
||||||
- *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
|
|
||||||
- *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
|
|
||||||
|
|
||||||
## Ready to build your Apps?
|
|
||||||
|
|
||||||
Check out [Agentic Coding Guidance], the fastest way to develop LLM projects with Pocket Flow!
|
|
||||||
|
|
@ -26,11 +26,11 @@ def build_mermaid(start):
|
||||||
return parent and link(parent, get_id(node))
|
return parent and link(parent, get_id(node))
|
||||||
visited.add(node)
|
visited.add(node)
|
||||||
if isinstance(node, Flow):
|
if isinstance(node, Flow):
|
||||||
node.start and parent and link(parent, get_id(node.start))
|
node.start_node and parent and link(parent, get_id(node.start_node))
|
||||||
lines.append(f"\n subgraph sub_flow_{get_id(node)}[{type(node).__name__}]")
|
lines.append(f"\n subgraph sub_flow_{get_id(node)}[{type(node).__name__}]")
|
||||||
node.start and walk(node.start)
|
node.start_node and walk(node.start_node)
|
||||||
for nxt in node.successors.values():
|
for nxt in node.successors.values():
|
||||||
node.start and walk(nxt, get_id(node.start)) or (parent and link(parent, get_id(nxt))) or walk(nxt)
|
node.start_node and walk(nxt, get_id(node.start_node)) or (parent and link(parent, get_id(nxt))) or walk(nxt)
|
||||||
lines.append(" end\n")
|
lines.append(" end\n")
|
||||||
else:
|
else:
|
||||||
lines.append(f" {(nid := get_id(node))}['{type(node).__name__}']")
|
lines.append(f" {(nid := get_id(node))}['{type(node).__name__}']")
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,10 @@ def get_mdc_description(md_file, frontmatter, heading):
|
||||||
else:
|
else:
|
||||||
subsection = heading
|
subsection = heading
|
||||||
|
|
||||||
|
# For the combined guide and index
|
||||||
|
if Path(md_file).name == "guide.md":
|
||||||
|
return "Guidelines for using PocketFlow, Agentic Coding"
|
||||||
|
|
||||||
# For index.md at root level, use a different format
|
# For index.md at root level, use a different format
|
||||||
if Path(md_file).name == "index.md" and section == "":
|
if Path(md_file).name == "index.md" and section == "":
|
||||||
return "Guidelines for using PocketFlow, a minimalist LLM framework"
|
return "Guidelines for using PocketFlow, a minimalist LLM framework"
|
||||||
|
|
@ -137,6 +141,20 @@ def process_markdown_content(content, remove_local_refs=False):
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
def get_documentation_first_policy():
|
||||||
|
"""Return the DOCUMENTATION FIRST POLICY text to be included in the guide"""
|
||||||
|
return """# DOCUMENTATION FIRST POLICY
|
||||||
|
|
||||||
|
**CRITICAL INSTRUCTION**: When implementing a Pocket Flow app:
|
||||||
|
|
||||||
|
1. **ALWAYS REQUEST MDC FILES FIRST** - Before writing any code, request and review all relevant MDC documentation files. This doc provides an explaination of the documents.
|
||||||
|
2. **UNDERSTAND THE FRAMEWORK** - Gain comprehensive understanding of the Pocket Flow framework from documentation
|
||||||
|
3. **AVOID ASSUMPTION-DRIVEN DEVELOPMENT** - Do not base your implementation on assumptions or guesswork. Even if the human didn't explicitly mention pocket flow in their request, if the code you are editing is using pocket flow, you should request relevant docs to help you understand best practice as well before editing.
|
||||||
|
|
||||||
|
**VERIFICATION**: Begin each implementation with a brief summary of the documentation you've reviewed to inform your approach.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def generate_mdc_header(md_file, description, always_apply=False):
|
def generate_mdc_header(md_file, description, always_apply=False):
|
||||||
"""Generate MDC file header with appropriate frontmatter"""
|
"""Generate MDC file header with appropriate frontmatter"""
|
||||||
# Determine if we should include globs
|
# Determine if we should include globs
|
||||||
|
|
@ -163,13 +181,64 @@ def has_substantive_content(content):
|
||||||
# If there's almost nothing left after cleaning, consider it empty
|
# If there's almost nothing left after cleaning, consider it empty
|
||||||
return len(cleaned_content) > 20 # Arbitrary threshold, adjust as needed
|
return len(cleaned_content) > 20 # Arbitrary threshold, adjust as needed
|
||||||
|
|
||||||
|
def create_combined_guide(docs_dir, rules_dir):
|
||||||
|
"""Create a combined guide that includes both the guide and index content"""
|
||||||
|
docs_path = Path(docs_dir)
|
||||||
|
rules_path = Path(rules_dir)
|
||||||
|
|
||||||
|
guide_file = docs_path / "guide.md"
|
||||||
|
index_file = docs_path / "index.md"
|
||||||
|
|
||||||
|
if not guide_file.exists() or not index_file.exists():
|
||||||
|
print("Warning: guide.md or index.md not found, skipping combined guide creation")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Get guide content and index content
|
||||||
|
with open(guide_file, 'r', encoding='utf-8') as f:
|
||||||
|
guide_content = f.read()
|
||||||
|
|
||||||
|
with open(index_file, 'r', encoding='utf-8') as f:
|
||||||
|
index_content = f.read()
|
||||||
|
|
||||||
|
# Process the content
|
||||||
|
processed_guide = process_markdown_content(guide_content, remove_local_refs=True)
|
||||||
|
processed_index = process_markdown_content(index_content, remove_local_refs=True)
|
||||||
|
|
||||||
|
# Get the documentation first policy
|
||||||
|
doc_first_policy = get_documentation_first_policy()
|
||||||
|
|
||||||
|
# Combine the content with the documentation first policy at the beginning
|
||||||
|
combined_content = doc_first_policy + processed_guide + "\n\n" + processed_index
|
||||||
|
|
||||||
|
# Generate the MDC header
|
||||||
|
description = "Guidelines for using PocketFlow, Agentic Coding"
|
||||||
|
mdc_header = generate_mdc_header(guide_file, description, always_apply=True)
|
||||||
|
|
||||||
|
# Combine header and processed content
|
||||||
|
mdc_content = mdc_header + combined_content
|
||||||
|
|
||||||
|
# Create the output path with the new filename
|
||||||
|
output_path = rules_path / "guide_for_pocketflow.mdc"
|
||||||
|
|
||||||
|
# Write the MDC file
|
||||||
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(mdc_content)
|
||||||
|
|
||||||
|
print(f"Created combined guide MDC file: {output_path}")
|
||||||
|
return True
|
||||||
|
|
||||||
def convert_md_to_mdc(md_file, output_dir, docs_dir, special_treatment=False):
|
def convert_md_to_mdc(md_file, output_dir, docs_dir, special_treatment=False):
|
||||||
"""Convert a markdown file to MDC format and save to the output directory"""
|
"""Convert a markdown file to MDC format and save to the output directory"""
|
||||||
try:
|
try:
|
||||||
print(f"Processing: {md_file}")
|
print(f"Processing: {md_file}")
|
||||||
|
|
||||||
# Skip empty index.md files in subfolders
|
# Skip guide.md and index.md as they'll be handled separately
|
||||||
file_name = Path(md_file).name
|
file_name = Path(md_file).name
|
||||||
|
if file_name in ["guide.md", "index.md"]:
|
||||||
|
print(f"Skipping {file_name} for individual processing - it will be included in the combined guide")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Skip empty index.md files in subfolders
|
||||||
parent_dir = Path(md_file).parent.name
|
parent_dir = Path(md_file).parent.name
|
||||||
|
|
||||||
# Check if this is an index.md in a subfolder (not the main index.md)
|
# Check if this is an index.md in a subfolder (not the main index.md)
|
||||||
|
|
@ -194,14 +263,11 @@ def convert_md_to_mdc(md_file, output_dir, docs_dir, special_treatment=False):
|
||||||
with open(md_file, 'r', encoding='utf-8') as f:
|
with open(md_file, 'r', encoding='utf-8') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
# Check if this file should have special treatment (index.md or guide.md)
|
|
||||||
is_special = special_treatment or Path(md_file).name == "guide.md"
|
|
||||||
|
|
||||||
# Process the content
|
# Process the content
|
||||||
processed_content = process_markdown_content(content, remove_local_refs=is_special)
|
processed_content = process_markdown_content(content, remove_local_refs=special_treatment)
|
||||||
|
|
||||||
# Generate the MDC header
|
# Generate the MDC header
|
||||||
mdc_header = generate_mdc_header(md_file, description, always_apply=is_special)
|
mdc_header = generate_mdc_header(md_file, description, always_apply=special_treatment)
|
||||||
|
|
||||||
# Combine header and processed content
|
# Combine header and processed content
|
||||||
mdc_content = mdc_header + processed_content
|
mdc_content = mdc_header + processed_content
|
||||||
|
|
@ -255,15 +321,8 @@ def generate_mdc_files(docs_dir, rules_dir):
|
||||||
# Create the rules directory if it doesn't exist
|
# Create the rules directory if it doesn't exist
|
||||||
rules_path.mkdir(parents=True, exist_ok=True)
|
rules_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Process the main index.md file first
|
# Create the combined guide file first (includes both guide.md and index.md)
|
||||||
index_file = docs_path / "index.md"
|
create_combined_guide(docs_dir, rules_dir)
|
||||||
if index_file.exists():
|
|
||||||
convert_md_to_mdc(index_file, rules_path, docs_dir, special_treatment=True)
|
|
||||||
|
|
||||||
# Process guide.md file with special treatment (if it exists)
|
|
||||||
guide_file = docs_path / "guide.md"
|
|
||||||
if guide_file.exists():
|
|
||||||
convert_md_to_mdc(guide_file, rules_path, docs_dir, special_treatment=True)
|
|
||||||
|
|
||||||
# Process all other markdown files
|
# Process all other markdown files
|
||||||
success_count = 0
|
success_count = 0
|
||||||
|
|
@ -272,8 +331,8 @@ def generate_mdc_files(docs_dir, rules_dir):
|
||||||
# Find all markdown files
|
# Find all markdown files
|
||||||
md_files = list(docs_path.glob("**/*.md"))
|
md_files = list(docs_path.glob("**/*.md"))
|
||||||
|
|
||||||
# Skip the main index.md and guide.md files as we've already processed them
|
# Skip the main index.md and guide.md files as we've already processed them in create_combined_guide
|
||||||
md_files = [f for f in md_files if f != index_file and f != guide_file]
|
md_files = [f for f in md_files if f.name != "index.md" and f.name != "guide.md"]
|
||||||
|
|
||||||
# Process each markdown file
|
# Process each markdown file
|
||||||
for md_file in md_files:
|
for md_file in md_files:
|
||||||
|
|
@ -282,8 +341,8 @@ def generate_mdc_files(docs_dir, rules_dir):
|
||||||
else:
|
else:
|
||||||
failure_count += 1
|
failure_count += 1
|
||||||
|
|
||||||
print(f"\nProcessed {len(md_files) + 2} markdown files:")
|
print(f"\nProcessed {len(md_files) + 1} markdown files:") # +1 for the combined guide
|
||||||
print(f" - Successfully converted: {success_count + 2}")
|
print(f" - Successfully converted: {success_count + 1}") # +1 for the combined guide
|
||||||
print(f" - Failed conversions: {failure_count}")
|
print(f" - Failed conversions: {failure_count}")
|
||||||
|
|
||||||
return success_count > 0 and failure_count == 0
|
return success_count > 0 and failure_count == 0
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue