update doc
This commit is contained in:
parent
cb9339f6de
commit
95c61db64b
Binary file not shown.
|
After Width: | Height: | Size: 72 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 62 KiB |
|
|
@ -12,27 +12,59 @@ MapReduce is a design pattern suitable when you have either:
|
||||||
- Large output data (e.g., multiple forms to fill)
|
- Large output data (e.g., multiple forms to fill)
|
||||||
|
|
||||||
and there is a logical way to break the task into smaller, ideally independent parts.
|
and there is a logical way to break the task into smaller, ideally independent parts.
|
||||||
|
|
||||||
|
<div align="center">
|
||||||
|
<img src="https://github.com/the-pocket/PocketFlow/raw/main/assets/mapreduce.png?raw=true" width="400"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
|
You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
|
||||||
|
|
||||||
### Example: Document Summarization
|
### Example: Document Summarization
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class MapSummaries(BatchNode):
|
class SummarizeAllFiles(BatchNode):
|
||||||
def prep(self, shared): return [shared["text"][i:i+10000] for i in range(0, len(shared["text"]), 10000)]
|
def prep(self, shared):
|
||||||
def exec(self, chunk): return call_llm(f"Summarize this chunk: {chunk}")
|
files_dict = shared["files"] # e.g. 10 files
|
||||||
def post(self, shared, prep_res, exec_res_list): shared["summaries"] = exec_res_list
|
return list(files_dict.items()) # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...]
|
||||||
|
|
||||||
class ReduceSummaries(Node):
|
def exec(self, one_file):
|
||||||
def prep(self, shared): return shared["summaries"]
|
filename, file_content = one_file
|
||||||
def exec(self, summaries): return call_llm(f"Combine these summaries: {summaries}")
|
summary_text = call_llm(f"Summarize the following file:\n{file_content}")
|
||||||
def post(self, shared, prep_res, exec_res): shared["final_summary"] = exec_res
|
return (filename, summary_text)
|
||||||
|
|
||||||
# Connect nodes
|
def post(self, shared, prep_res, exec_res_list):
|
||||||
map_node = MapSummaries()
|
shared["file_summaries"] = dict(exec_res_list)
|
||||||
reduce_node = ReduceSummaries()
|
|
||||||
map_node >> reduce_node
|
|
||||||
|
|
||||||
# Create flow
|
class CombineSummaries(Node):
|
||||||
summarize_flow = Flow(start=map_node)
|
def prep(self, shared):
|
||||||
summarize_flow.run(shared)
|
return shared["file_summaries"]
|
||||||
|
|
||||||
|
def exec(self, file_summaries):
|
||||||
|
# format as: "File1: summary\nFile2: summary...\n"
|
||||||
|
text_list = []
|
||||||
|
for fname, summ in file_summaries.items():
|
||||||
|
text_list.append(f"{fname} summary:\n{summ}\n")
|
||||||
|
big_text = "\n---\n".join(text_list)
|
||||||
|
|
||||||
|
return call_llm(f"Combine these file summaries into one final summary:\n{big_text}")
|
||||||
|
|
||||||
|
def post(self, shared, prep_res, final_summary):
|
||||||
|
shared["all_files_summary"] = final_summary
|
||||||
|
|
||||||
|
batch_node = SummarizeAllFiles()
|
||||||
|
combine_node = CombineSummaries()
|
||||||
|
batch_node >> combine_node
|
||||||
|
|
||||||
|
flow = Flow(start=batch_node)
|
||||||
|
|
||||||
|
shared = {
|
||||||
|
"files": {
|
||||||
|
"file1.txt": "Alice was beginning to get very tired of sitting by her sister...",
|
||||||
|
"file2.txt": "Some other interesting text ...",
|
||||||
|
# ...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flow.run(shared)
|
||||||
|
print("Individual Summaries:", shared["file_summaries"])
|
||||||
|
print("\nFinal Summary:\n", shared["all_files_summary"])
|
||||||
```
|
```
|
||||||
|
|
@ -9,6 +9,10 @@ nav_order: 2
|
||||||
|
|
||||||
Many real-world tasks are too complex for one LLM call. The solution is to decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
|
Many real-world tasks are too complex for one LLM call. The solution is to decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
|
||||||
|
|
||||||
|
<div align="center">
|
||||||
|
<img src="https://github.com/the-pocket/PocketFlow/raw/main/assets/workflow.png?raw=true" width="400"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
> - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
|
> - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
|
||||||
> - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
|
> - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
|
||||||
>
|
>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue