update structrued output demo

2025-04-18 19:14:59 -04:00 · 2025-04-18 19:14:59 -04:00 · e938bf3e42
parent 9e026c7f14
commit e938bf3e42
2 changed files with 170 additions and 81 deletions
--- a/cookbook/pocketflow-structured-output/README.md
+++ b/cookbook/pocketflow-structured-output/README.md
@ -9,17 +9,27 @@ A minimal demo application showing how to use PocketFlow to extract structured d

 ## Run It

-1. Make sure your OpenAI API key is set:
+1. Install the packages you need with this simple command:
+    ```bash
+    pip install -r requirements.txt
+    ```
+
+2. Make sure your OpenAI API key is set:
    ```bash
    export OPENAI_API_KEY="your-api-key-here"
    ```
-    Alternatively, you can edit the `utils.py` file to include your API key directly.
+    Alternatively, you can edit the [`utils.py`](./utils.py) file to include your API key directly.

-2. Edit data.txt with the resume you want to parse (a sample resume is already included)
+    Let's do a quick check to make sure your API key is working properly:

-3. Install requirements and run the application:
    ```bash
-    pip install -r requirements.txt
+    python utils.py
+    ```
+
+3. Edit [data.txt](./data.txt) with the resume you want to parse (a sample resume is already included)
+
+4. Run the application:
+    ```bash
    python main.py
    ```

@ -45,24 +55,29 @@ The Resume Parser application uses a single node that:
 ## Example Output

 ```
-=== STRUCTURED RESUME DATA ===
+=== Resume Parser - Structured Output with Indexes & Comments ===

-name: John Smith
+
+=== STRUCTURED RESUME DATA (Comments & Skill Index List) ===
+
+name: JOHN SMTIH
 email: johnsmtih1983@gnail.com
 experience:
- title: Sales Manager
-  company: ABC Corporation
- title: Assistant Manager
-  company: XYZ Industries
- title: Customer Service Representative
-  company: Fast Solutions Inc
-skills:
- Microsoft Office: Excel, Word, PowerPoint (Advanced)
- Customer relationship management (CRM) software
- Team leadership & management
- Project management
- Public speaking
- Time management
+- {title: SALES MANAGER, company: ABC Corportaion}
+- {title: ASST. MANAGER, company: XYZ Industries}
+- {title: CUSTOMER SERVICE REPRESENTATIVE, company: Fast Solutions Inc}
+skill_indexes: [0, 1, 2, 3, 4]

-============================
+
+============================================================
+
+✅ Extracted resume information.
+
+--- Found Target Skills (from Indexes) ---
+- Team leadership & management (Index: 0)
+- CRM software (Index: 1)
+- Project management (Index: 2)
+- Public speaking (Index: 3)
+- Microsoft Office (Index: 4)
+----------------------------------------
 ```
--- a/cookbook/pocketflow-structured-output/main.py
+++ b/cookbook/pocketflow-structured-output/main.py
@ -1,74 +1,148 @@
-from pocketflow import Node, Flow
-from utils import call_llm
 import yaml
+import os  # Needed for the utils import below
+from pocketflow import Node, Flow
+from utils import call_llm # Assumes utils.py with call_llm exists

 class ResumeParserNode(Node):
    def prep(self, shared):
-        """Return resume text from shared state"""
-        return shared["resume_text"]
-    
-    def exec(self, resume_text):
-        """Extract structured data from resume using prompt engineering"""
+        """Return resume text and target skills from shared state."""
+        return {
+            "resume_text": shared["resume_text"],
+            "target_skills": shared.get("target_skills", [])
+        }
+
+    def exec(self, prep_res):
+        """Extract structured data from resume using prompt engineering.
+        Requests YAML output with comments and skill indexes as a list.
+        """
+        resume_text = prep_res["resume_text"]
+        target_skills = prep_res["target_skills"]
+
+        # Format skills with indexes for the prompt
+        skill_list_for_prompt = "\n".join([f"{i}: {skill}" for i, skill in enumerate(target_skills)])
+
+        # Simplified Prompt focusing on key instructions and format
        prompt = f"""
-Please extract the following information from this resume and format it as YAML:
- name
- email
- experience (list of positions with title and company)
- skills (list of skills)
+Analyze the resume below. Output ONLY the requested information in YAML format.

+**Resume:**
+```
 {resume_text}
+```

-Now, output:
+**Target Skills (use these indexes):**
+```
+{skill_list_for_prompt}
+```
+
+**YAML Output Requirements:**
+- Extract `name` (string).
+- Extract `email` (string).
+- Extract `experience` (list of objects with `title` and `company`).
+- Extract `skill_indexes` (list of integers found from the Target Skills list).
+- **Add a YAML comment (`#`) explaining the source BEFORE `name`, `email`, `experience`, each item in `experience`, and `skill_indexes`.**
+
+**Example Format:**
 ```yaml
-name: John Doe
-email: john@example.com
+# Found name at top
+name: Jane Doe
+# Found email in contact info
+email: jane@example.com
+# Experience section analysis
 experience:
-  - title: Software Engineer
-    company: Tech Company
-  - title: Developer
-    company: Another Company
-skills:
-  - Python
-  - JavaScript
-  - HTML/CSS
-```"""
-        
+  # First job listed
+  - title: Manager
+    company: Corp A
+  # Second job listed
+  - title: Assistant
+    company: Corp B
+# Skills identified from the target list based on resume content
+skill_indexes:
+  # Found 0 at top  
+  - 0
+  # Found 2 in experience
+  - 2
+```
+
+Generate the YAML output now:
+"""
        response = call_llm(prompt)
-        
-        # Extract YAML content from markdown code blocks
+
+        # --- Minimal YAML Extraction ---
+        # Assumes LLM correctly uses ```yaml blocks
        yaml_str = response.split("```yaml")[1].split("```")[0].strip()
        structured_result = yaml.safe_load(yaml_str)
-        
-        # Validate structure
-        assert "name" in structured_result
-        assert "experience" in structured_result
-        assert isinstance(structured_result["experience"], list)
-        assert "skills" in structured_result
-        assert isinstance(structured_result["skills"], list)
-        
-        return structured_result
-    
-    def post(self, shared, prep_res, exec_res):
-        """Store and display structured resume data in YAML"""
-        shared["structured_data"] = exec_res
-        
-        # Print structured data in YAML format
-        print("\n=== STRUCTURED RESUME DATA ===\n")
-        print(yaml.dump(exec_res, sort_keys=False))
-        print("\n============================\n")
-        
-        print("✅ Extracted basic resume information")
-        
-# Create and run the flow
-if __name__ == "__main__":
-    print("=== Simple Resume Parser - YAML Output ===\n")
-    
-    # Read resume text from file
-    shared = {}
-    with open('data.txt', 'r') as file:
-        resume_text = file.read()
-    shared["resume_text"] = resume_text
+        # --- End Minimal Extraction ---

-    
-    flow = Flow(start=ResumeParserNode())
-    flow.run(shared)
+        # --- Basic Validation ---
+        assert structured_result is not None, "Validation Failed: Parsed YAML is None"
+        assert "name" in structured_result, "Validation Failed: Missing 'name'"
+        assert "email" in structured_result, "Validation Failed: Missing 'email'"
+        assert "experience" in structured_result, "Validation Failed: Missing 'experience'"
+        assert isinstance(structured_result.get("experience"), list), "Validation Failed: 'experience' is not a list"
+        assert "skill_indexes" in structured_result, "Validation Failed: Missing 'skill_indexes'"
+        skill_indexes_val = structured_result.get("skill_indexes")
+        assert skill_indexes_val is None or isinstance(skill_indexes_val, list), "Validation Failed: 'skill_indexes' is not a list or None"
+        if isinstance(skill_indexes_val, list):
+             for index in skill_indexes_val:
+                 assert isinstance(index, int), f"Validation Failed: Skill index '{index}' is not an integer"
+        # --- End Basic Validation ---
+
+        return structured_result
+
+    def post(self, shared, prep_res, exec_res):
+        """Store structured data and print it."""
+        shared["structured_data"] = exec_res
+
+        print("\n=== STRUCTURED RESUME DATA (Comments & Skill Index List) ===\n")
+        # Dump YAML ensuring block style for readability
+        print(yaml.dump(exec_res, sort_keys=False, allow_unicode=True, default_flow_style=None))
+        print("\n============================================================\n")
+        print("✅ Extracted resume information.")
+
+
+# === Main Execution Logic ===
+if __name__ == "__main__":
+    print("=== Resume Parser - Structured Output with Indexes & Comments ===\n")
+
+    # --- Configuration ---
+    target_skills_to_find = [
+        "Team leadership & management", # 0
+        "CRM software",                 # 1
+        "Project management",           # 2
+        "Public speaking",              # 3
+        "Microsoft Office",             # 4
+        "Python",                       # 5
+        "Data Analysis"                 # 6
+    ]
+    resume_file = 'data.txt' # Assumes data.txt contains the resume
+
+    # --- Prepare Shared State ---
+    shared = {}
+    try:
+        with open(resume_file, 'r') as file:
+            shared["resume_text"] = file.read()
+    except FileNotFoundError:
+        print(f"Error: Resume file '{resume_file}' not found.")
+        exit(1) # Exit if resume file is missing
+
+    shared["target_skills"] = target_skills_to_find
+
+    # --- Define and Run Flow ---
+    parser_node = ResumeParserNode()
+    flow = Flow(start=parser_node)
+    flow.run(shared) # Execute the parsing node
+
+    # --- Display Found Skills ---
+    if "structured_data" in shared and "skill_indexes" in shared["structured_data"]:
+         print("\n--- Found Target Skills (from Indexes) ---")
+         found_indexes = shared["structured_data"]["skill_indexes"]
+         if found_indexes: # Check if the list is not empty or None
+             for index in found_indexes:
+                 if 0 <= index < len(target_skills_to_find):
+                     print(f"- {target_skills_to_find[index]} (Index: {index})")
+                 else:
+                     print(f"- Warning: Found invalid skill index {index}")
+         else:
+             print("No target skills identified from the list.")
+         print("----------------------------------------\n")