change the syntax of exec

2024-12-29 02:40:27 +00:00 · 2024-12-29 02:40:27 +00:00 · 2550decdc5
parent 96cecb9086
commit 2550decdc5
11 changed files with 103 additions and 101 deletions
--- a/cookbook/demo.ipynb
+++ b/cookbook/demo.ipynb
--- a/docs/async.md
+++ b/docs/async.md
@ -19,9 +19,11 @@ Below is a minimal **AsyncNode** that calls an LLM in `exec()` to summarize text

 ```python
 class SummarizeThenVerify(AsyncNode):
-    def exec(self, shared, prep_res):
-        doc = shared.get("doc", "")
-        return call_llm(f"Summarize: {doc}")
+    def prep(self, shared):
+        return shared.get("doc", "")
+
+    def exec(self, prep_res):
+        return call_llm(f"Summarize: {prep_res}")

    async def post_async(self, shared, prep_res, exec_res):
        user_decision = await gather_user_feedback(exec_res)
--- a/docs/communication.md
+++ b/docs/communication.md
@ -9,7 +9,7 @@ nav_order: 3

 Nodes and Flows **communicate** in two ways:

-1. **Shared Store** – A global data structure (often an in-mem dict) that all nodes can read from and write to. Every Node’s `prep()`, `exec()`, and `post()` methods receive the **same** `shared` store.  
+1. **Shared Store** – A global data structure (often an in-mem dict) that all nodes can read from and write to. Every Node’s `prep()` and `post()` methods receive the **same** `shared` store.  
 2. **Params** – Each node and Flow has a `params` dict assigned by the **parent Flow**. Params mostly serve as identifiers, letting each node/flow know what task it’s assigned.

 If you know memory management, **Shared Store** is like a **heap** shared across function calls, while **Params** is like a **stack** assigned by parent function calls.
@ -47,7 +47,7 @@ class Summarize(Node):
        content = shared["data"].get("my_file.txt", "")
        return content

-    def exec(self, shared, prep_res):
+    def exec(self, prep_res):
        prompt = f"Summarize: {prep_res}"
        summary = call_llm(prompt)
        return summary
@ -91,7 +91,7 @@ class SummarizeFile(Node):
        filename = self.params["filename"]
        return shared["data"].get(filename, "")

-    def exec(self, shared, prep_res):
+    def exec(self, prep_res):
        prompt = f"Summarize: {prep_res}"
        return call_llm(prompt)

--- a/docs/node.md
+++ b/docs/node.md
@ -14,9 +14,10 @@ A **Node** is the smallest building block of Mini LLM Flow. Each Node has three
   - Often used for tasks like reading files, chunking text, or validation.
   - Returns `prep_res`, which will be passed to both `exec()` and `post()`.

-2. **`exec(shared, prep_res)`**  
+2. **`exec(prep_res)`**  
   - The main execution step where the LLM is called.
-   - Has a built-in retry feature to handle errors and ensure reliable results.
+   - Optionally has built-in retry and error handling (below).
+   - ⚠️ If retry enabled, ensure implementation is idempotent.
   - Returns `exec_res`, which is passed to `post()`.

 3. **`post(shared, prep_res, exec_res)`**  
@ -55,7 +56,7 @@ class SummarizeFile(Node):
        filename = self.params["filename"]
        return shared["data"][filename]

-    def exec(self, shared, prep_res):
+    def exec(self, prep_res):
        if not prep_res:
            raise ValueError("Empty file content!")
        prompt = f"Summarize this text in 10 words: {prep_res}"
--- a/minillmflow/init.py
+++ b/minillmflow/init.py
@ -7,12 +7,12 @@ class BaseNode:
        if action in self.successors: warnings.warn(f"Overwriting successor for action '{action}'")
        self.successors[action]=node;return node
    def prep(self,shared): return None
-    def exec(self,shared,prep_res): return None
-    def _exec(self,shared,prep_res): return self.exec(shared,prep_res)
+    def exec(self,prep_res): return None
+    def _exec(self,prep_res): return self.exec(prep_res)
    def post(self,shared,prep_res,exec_res): return "default"
    def _run(self,shared):
        prep_res=self.prep(shared)
-        exec_res=self._exec(shared,prep_res)
+        exec_res=self._exec(prep_res)
        return self.post(shared,prep_res,exec_res)
    def run(self,shared):
        if self.successors: warnings.warn("Node won't run successors. Use a parent Flow instead.")
@ -30,16 +30,16 @@ class Node(BaseNode):
    def __init__(self,max_retries=1): 
        super().__init__()
        self.max_retries=max_retries
-    def process_after_fail(self,shared,prep_res,exc): raise exc
-    def _exec(self,shared,prep_res):
+    def process_after_fail(self,prep_res,exc): raise exc
+    def _exec(self,prep_res):
        for i in range(self.max_retries):
-            try:return super()._exec(shared,prep_res)
+            try:return super()._exec(prep_res)
            except Exception as e:
-                if i==self.max_retries-1:return self.process_after_fail(shared,prep_res,e)
+                if i==self.max_retries-1:return self.process_after_fail(prep_res,e)

 class BatchNode(Node):
    def prep(self,shared): return []
-    def _exec(self,shared,items): return [super(Node,self)._exec(shared,i) for i in items]
+    def _exec(self,items): return [super(Node,self)._exec(i) for i in items]

 class Flow(BaseNode):
    def __init__(self,start):
@ -47,23 +47,24 @@ class Flow(BaseNode):
        self.start=start
    def get_next_node(self,curr,action):
        nxt=curr.successors.get(action if action is not None else "default")
-        if not nxt and curr.successors: 
-            warnings.warn(f"Flow ends: action '{action}' not found in {list(curr.successors)}")
+        if not nxt and curr.successors: warnings.warn(f"Flow ends: action '{action}' not found in {list(curr.successors)}")
        return nxt
-    def _exec(self,shared,params=None):
+    def _orchestrate(self,shared,params=None):
        curr,p=self.start,(params if params else {**self.params})
        while curr:
            curr.set_params(p)
-            c=curr._run(shared)
-            curr=self.get_next_node(curr,c)
-    def exec(self,shared,prep_res): 
+            curr=self.get_next_node(curr,curr._run(shared))
+    def _run(self,shared):
+        self._orchestrate(shared)
+        return self.post(shared,self.prep(shared),None)
+    def exec(self,prep_res): 
        raise RuntimeError("Flow should not exec directly. Create a child Node instead.")

 class BatchFlow(Flow):
    def prep(self,shared): return []
    def _run(self,shared):
        prep_res=self.prep(shared)
-        for batch_params in prep_res:self._exec(shared,{**self.params,**batch_params})
+        for batch_params in prep_res:self._orchestrate(shared,{**self.params,**batch_params})
        return self.post(shared,prep_res,None)

 class AsyncNode(Node):
@ -77,24 +78,23 @@ class AsyncNode(Node):
        return await self._run_async(shared)
    async def _run_async(self,shared):
        prep_res=self.prep(shared)
-        exec_res=self._exec(shared,prep_res)
+        exec_res=self._exec(prep_res)
        return await self.post_async(shared,prep_res,exec_res)
    def _run(self,shared): raise RuntimeError("AsyncNode should run using run_async instead.")

 class AsyncFlow(Flow,AsyncNode):
-    async def _exec_async(self,shared,params=None):
+    async def _orchestrate_async(self,shared,params=None):
        curr,p=self.start,(params if params else {**self.params})
        while curr:
            curr.set_params(p)
            c=await curr._run_async(shared) if hasattr(curr,"run_async") else curr._run(shared)
            curr=self.get_next_node(curr,c)
    async def _run_async(self,shared):
-        prep_res=self.prep(shared)
-        await self._exec_async(shared)
-        return await self.post_async(shared,prep_res,None)
+        await self._orchestrate_async(shared)
+        return await self.post_async(shared,self.prep(shared),None)

 class BatchAsyncFlow(BatchFlow,AsyncFlow):
    async def _run_async(self,shared):
        prep_res=self.prep(shared)
-        for batch_params in prep_res:await self._exec_async(shared,{**self.params,**batch_params})
+        for batch_params in prep_res:await self._orchestrate_async(shared,{**self.params,**batch_params})
        return await self.post_async(shared,prep_res,None)
--- a/tests/test_async_batch_flow.py
+++ b/tests/test_async_batch_flow.py
@ -3,11 +3,11 @@ import asyncio
 import sys
 from pathlib import Path

-sys.path.append(str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).parent.parent))
 from minillmflow import AsyncNode, BatchAsyncFlow

 class AsyncDataProcessNode(AsyncNode):
-    def exec(self, shared_storage, prep_result):
+    def prep(self, shared_storage):
        key = self.params.get('key')
        data = shared_storage['input_data'][key]
        if 'results' not in shared_storage:
@ -18,7 +18,7 @@ class AsyncDataProcessNode(AsyncNode):
    async def post_async(self, shared_storage, prep_result, proc_result):
        await asyncio.sleep(0.01)  # Simulate async work
        key = self.params.get('key')
-        shared_storage['results'][key] = proc_result * 2  # Double the value
+        shared_storage['results'][key] = prep_result * 2  # Double the value
        return "processed"

 class AsyncErrorNode(AsyncNode):
--- a/tests/test_async_flow.py
+++ b/tests/test_async_flow.py
@ -3,7 +3,7 @@ import asyncio
 import sys
 from pathlib import Path

-sys.path.append(str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).parent.parent))
 from minillmflow import Node, AsyncNode, AsyncFlow


@ -17,7 +17,7 @@ class AsyncNumberNode(AsyncNode):
        super().__init__()
        self.number = number

-    def exec(self, shared_storage, data):
+    def prep(self, shared_storage):
        # Synchronous work is allowed inside an AsyncNode,
        # but final 'condition' is determined by post_async().
        shared_storage['current'] = self.number
@ -34,7 +34,7 @@ class AsyncIncrementNode(AsyncNode):
    """
    Demonstrates incrementing the 'current' value asynchronously.
    """
-    def exec(self, shared_storage, data):
+    def prep(self, shared_storage):
        shared_storage['current'] = shared_storage.get('current', 0) + 1
        return "incremented"

@ -110,7 +110,7 @@ class TestAsyncFlow(unittest.TestCase):
        """

        class BranchingAsyncNode(AsyncNode):
-            def exec(self, shared_storage, data):
+            def exec(self, data):
                value = shared_storage.get("value", 0)
                shared_storage["value"] = value
                # We'll decide branch based on whether 'value' is positive
@ -124,12 +124,12 @@ class TestAsyncFlow(unittest.TestCase):
                    return "negative_branch"

        class PositiveNode(Node):
-            def exec(self, shared_storage, data):
+            def exec(self, data):
                shared_storage["path"] = "positive"
                return None

        class NegativeNode(Node):
-            def exec(self, shared_storage, data):
+            def exec(self, data):
                shared_storage["path"] = "negative"
                return None

--- a/tests/test_batch_flow.py
+++ b/tests/test_batch_flow.py
@ -2,11 +2,11 @@ import unittest
 import sys
 from pathlib import Path

-sys.path.append(str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).parent.parent))
 from minillmflow import Node, BatchFlow, Flow

 class DataProcessNode(Node):
-    def exec(self, shared_storage, prep_result):
+    def prep(self, shared_storage):
        key = self.params.get('key')
        data = shared_storage['input_data'][key]
        if 'results' not in shared_storage:
@ -14,7 +14,7 @@ class DataProcessNode(Node):
        shared_storage['results'][key] = data * 2

 class ErrorProcessNode(Node):
-    def exec(self, shared_storage, prep_result):
+    def prep(self, shared_storage):
        key = self.params.get('key')
        if key == 'error_key':
            raise ValueError(f"Error processing key: {key}")
@ -107,14 +107,14 @@ class TestBatchFlow(unittest.TestCase):
    def test_nested_flow(self):
        """Test batch processing with nested flows"""
        class InnerNode(Node):
-            def exec(self, shared_storage, prep_result):
+            def exec(self, prep_result):
                key = self.params.get('key')
                if 'intermediate_results' not in shared_storage:
                    shared_storage['intermediate_results'] = {}
                shared_storage['intermediate_results'][key] = shared_storage['input_data'][key] + 1

        class OuterNode(Node):
-            def exec(self, shared_storage, prep_result):
+            def exec(self, prep_result):
                key = self.params.get('key')
                if 'results' not in shared_storage:
                    shared_storage['results'] = {}
@ -148,7 +148,7 @@ class TestBatchFlow(unittest.TestCase):
    def test_custom_parameters(self):
        """Test batch processing with additional custom parameters"""
        class CustomParamNode(Node):
-            def exec(self, shared_storage, prep_result):
+            def exec(self, prep_result):
                key = self.params.get('key')
                multiplier = self.params.get('multiplier', 1)
                if 'results' not in shared_storage:
--- a/tests/test_batch_node.py
+++ b/tests/test_batch_node.py
@ -2,7 +2,7 @@ import unittest
 import sys
 from pathlib import Path

-sys.path.append(str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).parent.parent))
 from minillmflow import Node, BatchNode, Flow

 class ArrayChunkNode(BatchNode):
@ -14,16 +14,14 @@ class ArrayChunkNode(BatchNode):
        # Get array from shared storage and split into chunks
        array = shared_storage.get('input_array', [])
        chunks = []
-        for i in range(0, len(array), self.chunk_size):
-            end = min(i + self.chunk_size, len(array))
-            chunks.append((i, end))
+        for start in range(0, len(array), self.chunk_size):
+            end = min(start + self.chunk_size, len(array))
+            chunks.append(array[start: end])
        return chunks
    
-    def exec(self, shared_storage, chunk_indices):
-        start, end = chunk_indices
-        array = shared_storage['input_array']
+    def exec(self, chunk):
        # Process the chunk and return its sum
-        chunk_sum = sum(array[start:end])
+        chunk_sum = sum(chunk)
        return chunk_sum
        
    def post(self, shared_storage, prep_result, proc_result):
@ -32,7 +30,7 @@ class ArrayChunkNode(BatchNode):
        return "default"

 class SumReduceNode(Node):
-    def exec(self, shared_storage, data):
+    def prep(self, shared_storage):
        # Get chunk results from shared storage and sum them
        chunk_results = shared_storage.get('chunk_results', [])
        total = sum(chunk_results)
@ -48,9 +46,9 @@ class TestBatchNode(unittest.TestCase):
        }
        
        chunk_node = ArrayChunkNode(chunk_size=10)
-        chunks = chunk_node.prep(shared_storage)
-        
-        self.assertEqual(chunks, [(0, 10), (10, 20), (20, 25)])
+        chunk_node.run(shared_storage)
+        results = shared_storage['chunk_results']
+        self.assertEqual(results, [45, 145, 110])
        
    def test_map_reduce_sum(self):
        """
--- a/tests/test_flow_basic.py
+++ b/tests/test_flow_basic.py
@ -2,7 +2,7 @@ import unittest
 import sys
 from pathlib import Path

-sys.path.append(str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).parent.parent))
 from minillmflow import Node, Flow

 class NumberNode(Node):
@ -10,7 +10,7 @@ class NumberNode(Node):
        super().__init__()
        self.number = number

-    def exec(self, shared_storage, data):
+    def prep(self, shared_storage):
        shared_storage['current'] = self.number

 class AddNode(Node):
@ -18,7 +18,7 @@ class AddNode(Node):
        super().__init__()
        self.number = number

-    def exec(self, shared_storage, data):
+    def prep(self, shared_storage):
        shared_storage['current'] += self.number

 class MultiplyNode(Node):
@ -26,7 +26,7 @@ class MultiplyNode(Node):
        super().__init__()
        self.number = number

-    def exec(self, shared_storage, data):
+    def prep(self, shared_storage):
        shared_storage['current'] *= self.number

 class CheckPositiveNode(Node):
@ -37,7 +37,7 @@ class CheckPositiveNode(Node):
            return 'negative'

 class NoOpNode(Node):
-    def exec(self, shared_storage, data):
+    def prep(self, shared_storage):
        # Do nothing, just pass
        pass
    
--- a/tests/test_flow_composition.py
+++ b/tests/test_flow_composition.py
@ -2,8 +2,9 @@ import unittest
 import asyncio
 import sys
 from pathlib import Path
-sys.path.append(str(Path(__file__).parent.parent))

+
+sys.path.insert(0, str(Path(__file__).parent.parent))
 from minillmflow import Node, Flow

 # Simple example Nodes
@ -12,7 +13,7 @@ class NumberNode(Node):
        super().__init__()
        self.number = number

-    def exec(self, shared_storage, prep_result):
+    def prep(self, shared_storage):
        shared_storage['current'] = self.number

 class AddNode(Node):
@ -20,7 +21,7 @@ class AddNode(Node):
        super().__init__()
        self.number = number

-    def exec(self, shared_storage, prep_result):
+    def prep(self, shared_storage):
        shared_storage['current'] += self.number

 class MultiplyNode(Node):
@ -28,7 +29,7 @@ class MultiplyNode(Node):
        super().__init__()
        self.number = number

-    def exec(self, shared_storage, prep_result):
+    def prep(self, shared_storage):
        shared_storage['current'] *= self.number

 class TestFlowComposition(unittest.TestCase):