This commit is contained in:
zachary62 2024-12-27 17:42:28 +00:00
parent 877a1b728b
commit 194a97a31d
1 changed files with 47 additions and 32 deletions

View File

@ -2,28 +2,9 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"No relevant file found: the question has no relevant file because while some files discuss startups, none specifically address how to find or generate startup ideas\n",
"No question asked\n"
]
},
{
"data": {
"text/plain": [
"'default'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Example App for text summarization & QA using minillmflow\n", "# Example App for text summarization & QA using minillmflow\n",
"from minillmflow import Node, BatchNode, Flow, BatchFlow, AsyncNode, AsyncFlow, BatchAsyncFlow\n", "from minillmflow import Node, BatchNode, Flow, BatchFlow, AsyncNode, AsyncFlow, BatchAsyncFlow\n",
@ -37,8 +18,15 @@
" model=\"gpt-4\",\n", " model=\"gpt-4\",\n",
" messages=[{\"role\": \"user\", \"content\": prompt}]\n", " messages=[{\"role\": \"user\", \"content\": prompt}]\n",
" )\n", " )\n",
" return r.choices[0].message.content\n", " return r.choices[0].message.content"
"\n", ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 2) Create a shared store (dict) for Node/Flow data exchange.\n", "# 2) Create a shared store (dict) for Node/Flow data exchange.\n",
"# This can be replaced with a DB or other storage.\n", "# This can be replaced with a DB or other storage.\n",
"# Design the structure / schema based on the app requirements.\n", "# Design the structure / schema based on the app requirements.\n",
@ -60,8 +48,15 @@
"\n", "\n",
"load_data = LoadData()\n", "load_data = LoadData()\n",
"# Run the data-loading node once\n", "# Run the data-loading node once\n",
"load_data.run(shared)\n", "load_data.run(shared)"
"\n", ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 4) Create a Node that summarizes a single file using the LLM.\n", "# 4) Create a Node that summarizes a single file using the LLM.\n",
"class SummarizeFile(Node):\n", "class SummarizeFile(Node):\n",
" def prep(self, shared):\n", " def prep(self, shared):\n",
@ -81,8 +76,15 @@
"# For testing, we set params directly on the node.\n", "# For testing, we set params directly on the node.\n",
"# In real usage, you'd set them in a Flow or BatchFlow.\n", "# In real usage, you'd set them in a Flow or BatchFlow.\n",
"summarize_file.set_params({\"filename\":\"addiction.txt\"})\n", "summarize_file.set_params({\"filename\":\"addiction.txt\"})\n",
"summarize_file.run(shared)\n", "summarize_file.run(shared)"
"\n", ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 5) If data is large, we can apply a map-reduce pattern:\n", "# 5) If data is large, we can apply a map-reduce pattern:\n",
"# - MapSummaries(BatchNode) => chunk the file and summarize each chunk\n", "# - MapSummaries(BatchNode) => chunk the file and summarize each chunk\n",
"# - ReduceSummaries(Node) => combine those chunk-level summaries\n", "# - ReduceSummaries(Node) => combine those chunk-level summaries\n",
@ -135,8 +137,15 @@
"file_summary_flow = Flow(start=map_summaries)\n", "file_summary_flow = Flow(start=map_summaries)\n",
"# When a flow params is set, it will recursively set its params to all nodes in the flow\n", "# When a flow params is set, it will recursively set its params to all nodes in the flow\n",
"file_summary_flow.set_params({\"filename\":\"before.txt\"})\n", "file_summary_flow.set_params({\"filename\":\"before.txt\"})\n",
"file_summary_flow.run(shared)\n", "file_summary_flow.run(shared)"
"\n", ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 7) Summarize all files using a BatchFlow that reruns file_summary_flow for each file\n", "# 7) Summarize all files using a BatchFlow that reruns file_summary_flow for each file\n",
"class SummarizeAllFiles(BatchFlow):\n", "class SummarizeAllFiles(BatchFlow):\n",
" def prep(self, shared):\n", " def prep(self, shared):\n",
@ -147,9 +156,15 @@
" return [{\"filename\":filename} for filename in shared['data']]\n", " return [{\"filename\":filename} for filename in shared['data']]\n",
"\n", "\n",
"summarize_all_files = SummarizeAllFiles(start=file_summary_flow)\n", "summarize_all_files = SummarizeAllFiles(start=file_summary_flow)\n",
"summarize_all_files.run(shared)\n", "summarize_all_files.run(shared)"
"\n", ]
"\n", },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 8) QA Agent: Find the most relevant file based on summary with actions\n", "# 8) QA Agent: Find the most relevant file based on summary with actions\n",
"# if no question is asked:\n", "# if no question is asked:\n",
"# (a) end: terminate the flow \n", "# (a) end: terminate the flow \n",