diff --git a/docs/essay.md b/docs/essay.md new file mode 100644 index 0000000..1d19420 --- /dev/null +++ b/docs/essay.md @@ -0,0 +1,150 @@ +--- +layout: default +title: "Essay" +parent: "Example" +nav_order: 1 +--- + +# Summarization + QA agent for Paul Graham Essay + +```python +from minillmflow import * +import openai, os, yaml + +# Minimal LLM wrapper +def call_llm(prompt): + openai.api_key = "YOUR_API_KEY_HERE" + r = openai.ChatCompletion.create( + model="gpt-4o", + messages=[{"role": "user", "content": prompt}] + ) + return r.choices[0].message.content + +shared = {"data": {}, "summary": {}} + +# Load data into shared['data'] +class LoadData(Node): + def prep(self, shared): + path = "./miniLLMFlow/data/PaulGrahamEssaysLarge" + for fn in os.listdir(path): + with open(os.path.join(path, fn), 'r') as f: + shared['data'][fn] = f.read() + def exec(self, res): pass + def post(self, s, pr, er): pass + +LoadData().run(shared) + +# Summarize one file +class SummarizeFile(Node): + def prep(self, s): return s['data'][self.params['filename']] + def exec(self, content): return call_llm(f"{content} Summarize in 10 words.") + def post(self, s, pr, sr): s["summary"][self.params['filename']] = sr + +node_summ = SummarizeFile() +node_summ.set_params({"filename":"addiction.txt"}) +node_summ.run(shared) + +# Map-Reduce summarization +class MapSummaries(BatchNode): + def prep(self, s): + text = s['data'][self.params['filename']] + return [text[i:i+10000] for i in range(0, len(text), 10000)] + def exec(self, chunk): + return call_llm(f"{chunk} Summarize in 10 words.") + def post(self, s, pr, er): + s["summary"][self.params['filename']] = [f"{i}. {r}" for i,r in enumerate(er)] + +class ReduceSummaries(Node): + def prep(self, s): return s["summary"][self.params['filename']] + def exec(self, chunks): return call_llm(f"{chunks} Combine into 10 words summary.") + def post(self, s, pr, sr): s["summary"][self.params['filename']] = sr + +map_summ = MapSummaries() +reduce_summ = ReduceSummaries() +map_summ >> reduce_summ + +flow = Flow(start=map_summ) +flow.set_params({"filename":"before.txt"}) +flow.run(shared) + +# Summarize all files +class SummarizeAllFiles(BatchFlow): + def prep(self, s): return [{"filename":fn} for fn in s['data']] + +SummarizeAllFiles(start=flow).run(shared) + +# QA agent +class FindRelevantFile(Node): + def prep(self, s): + q = input("Enter a question: ") + filenames = list(s['summary'].keys()) + file_summaries = [f"- '{fn}': {s['summary'][fn]}" for fn in filenames] + return q, filenames, file_summaries + + def exec(self, p): + q, filenames, file_summaries = p + if not q: + return {"think":"no question", "has_relevant":False} + + resp = call_llm(f""" +Question: {q} +Find the most relevant file from: {file_summaries} +If none, explain why + +Output in code fence: +```yaml +think: > + reasoning about relevance +has_relevant: true/false +most_relevant: filename if relevant +```""") + yaml_str = resp.split("```yaml")[1].split("```")[0].strip() + result = yaml.safe_load(yaml_str) + + # Validate response + assert isinstance(result, dict) + assert "think" in result + assert "has_relevant" in result + assert isinstance(result["has_relevant"], bool) + + if result["has_relevant"]: + assert "most_relevant" in result + assert result["most_relevant"] in filenames + + return result + + def exec_fallback(self, p, exc): return {"think":"error","has_relevant":False} + def post(self, s, pr, res): + q, _ = pr + if not q: + print("No question asked"); return "end" + if res["has_relevant"]: + s["question"], s["relevant_file"] = q, res["most_relevant"] + print("Relevant file:", res["most_relevant"]) + return "answer" + else: + print("No relevant file:", res["think"]) + return "retry" + +class AnswerQuestion(Node): + def prep(self, s): + return s['question'], s['data'][s['relevant_file']] + def exec(self, p): + q, txt = p + return call_llm(f"Question: {q}\nText: {txt}\nAnswer in 50 words.") + def post(self, s, pr, ex): + print("Answer:", ex) + +class NoOp(Node): pass + +frf = FindRelevantFile(max_retries=3) +aq = AnswerQuestion() +noop = NoOp() + +frf - "answer" >> aq >> frf +frf - "retry" >> frf +frf - "end" >> noop + +qa = Flow(start=frf) +qa.run(shared) +``` \ No newline at end of file diff --git a/docs/example.md b/docs/example.md new file mode 100644 index 0000000..934daf3 --- /dev/null +++ b/docs/example.md @@ -0,0 +1,6 @@ +--- +layout: default +title: "Example" +nav_order: 5 +has_children: true +--- \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index f1c97f3..1e94552 100644 --- a/docs/index.md +++ b/docs/index.md @@ -61,4 +61,4 @@ We model the LLM workflow as a **Nested Directed Graph**: ## Example Projects -- Coming soon ... +- [Summarization + QA agent for Paul Graham Essay](./essay.md) diff --git a/docs/llm.md b/docs/llm.md index 9f906ea..2633d5b 100644 --- a/docs/llm.md +++ b/docs/llm.md @@ -1,7 +1,7 @@ --- layout: default title: "LLM Wrapper" -parent: "Preparation" +parent: "Details" nav_order: 1 --- diff --git a/docs/preparation.md b/docs/preparation.md index 9e8cbf9..6e1c0fe 100644 --- a/docs/preparation.md +++ b/docs/preparation.md @@ -1,6 +1,6 @@ --- layout: default -title: "Preparation" +title: "Details" nav_order: 3 has_children: true --- \ No newline at end of file diff --git a/docs/tool.md b/docs/tool.md index 2a5eb5a..ecc8e13 100644 --- a/docs/tool.md +++ b/docs/tool.md @@ -1,7 +1,7 @@ --- layout: default title: "Tool" -parent: "Preparation" +parent: "Details" nav_order: 2 ---