update structrued output demo
This commit is contained in:
parent
9e026c7f14
commit
e938bf3e42
|
|
@ -9,17 +9,27 @@ A minimal demo application showing how to use PocketFlow to extract structured d
|
||||||
|
|
||||||
## Run It
|
## Run It
|
||||||
|
|
||||||
1. Make sure your OpenAI API key is set:
|
1. Install the packages you need with this simple command:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Make sure your OpenAI API key is set:
|
||||||
```bash
|
```bash
|
||||||
export OPENAI_API_KEY="your-api-key-here"
|
export OPENAI_API_KEY="your-api-key-here"
|
||||||
```
|
```
|
||||||
Alternatively, you can edit the `utils.py` file to include your API key directly.
|
Alternatively, you can edit the [`utils.py`](./utils.py) file to include your API key directly.
|
||||||
|
|
||||||
2. Edit data.txt with the resume you want to parse (a sample resume is already included)
|
Let's do a quick check to make sure your API key is working properly:
|
||||||
|
|
||||||
3. Install requirements and run the application:
|
|
||||||
```bash
|
```bash
|
||||||
pip install -r requirements.txt
|
python utils.py
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Edit [data.txt](./data.txt) with the resume you want to parse (a sample resume is already included)
|
||||||
|
|
||||||
|
4. Run the application:
|
||||||
|
```bash
|
||||||
python main.py
|
python main.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -45,24 +55,29 @@ The Resume Parser application uses a single node that:
|
||||||
## Example Output
|
## Example Output
|
||||||
|
|
||||||
```
|
```
|
||||||
=== STRUCTURED RESUME DATA ===
|
=== Resume Parser - Structured Output with Indexes & Comments ===
|
||||||
|
|
||||||
name: John Smith
|
|
||||||
|
=== STRUCTURED RESUME DATA (Comments & Skill Index List) ===
|
||||||
|
|
||||||
|
name: JOHN SMTIH
|
||||||
email: johnsmtih1983@gnail.com
|
email: johnsmtih1983@gnail.com
|
||||||
experience:
|
experience:
|
||||||
- title: Sales Manager
|
- {title: SALES MANAGER, company: ABC Corportaion}
|
||||||
company: ABC Corporation
|
- {title: ASST. MANAGER, company: XYZ Industries}
|
||||||
- title: Assistant Manager
|
- {title: CUSTOMER SERVICE REPRESENTATIVE, company: Fast Solutions Inc}
|
||||||
company: XYZ Industries
|
skill_indexes: [0, 1, 2, 3, 4]
|
||||||
- title: Customer Service Representative
|
|
||||||
company: Fast Solutions Inc
|
|
||||||
skills:
|
|
||||||
- Microsoft Office: Excel, Word, PowerPoint (Advanced)
|
|
||||||
- Customer relationship management (CRM) software
|
|
||||||
- Team leadership & management
|
|
||||||
- Project management
|
|
||||||
- Public speaking
|
|
||||||
- Time management
|
|
||||||
|
|
||||||
============================
|
|
||||||
|
============================================================
|
||||||
|
|
||||||
|
✅ Extracted resume information.
|
||||||
|
|
||||||
|
--- Found Target Skills (from Indexes) ---
|
||||||
|
- Team leadership & management (Index: 0)
|
||||||
|
- CRM software (Index: 1)
|
||||||
|
- Project management (Index: 2)
|
||||||
|
- Public speaking (Index: 3)
|
||||||
|
- Microsoft Office (Index: 4)
|
||||||
|
----------------------------------------
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -1,74 +1,148 @@
|
||||||
from pocketflow import Node, Flow
|
|
||||||
from utils import call_llm
|
|
||||||
import yaml
|
import yaml
|
||||||
|
import os # Needed for the utils import below
|
||||||
|
from pocketflow import Node, Flow
|
||||||
|
from utils import call_llm # Assumes utils.py with call_llm exists
|
||||||
|
|
||||||
class ResumeParserNode(Node):
|
class ResumeParserNode(Node):
|
||||||
def prep(self, shared):
|
def prep(self, shared):
|
||||||
"""Return resume text from shared state"""
|
"""Return resume text and target skills from shared state."""
|
||||||
return shared["resume_text"]
|
return {
|
||||||
|
"resume_text": shared["resume_text"],
|
||||||
def exec(self, resume_text):
|
"target_skills": shared.get("target_skills", [])
|
||||||
"""Extract structured data from resume using prompt engineering"""
|
}
|
||||||
|
|
||||||
|
def exec(self, prep_res):
|
||||||
|
"""Extract structured data from resume using prompt engineering.
|
||||||
|
Requests YAML output with comments and skill indexes as a list.
|
||||||
|
"""
|
||||||
|
resume_text = prep_res["resume_text"]
|
||||||
|
target_skills = prep_res["target_skills"]
|
||||||
|
|
||||||
|
# Format skills with indexes for the prompt
|
||||||
|
skill_list_for_prompt = "\n".join([f"{i}: {skill}" for i, skill in enumerate(target_skills)])
|
||||||
|
|
||||||
|
# Simplified Prompt focusing on key instructions and format
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
Please extract the following information from this resume and format it as YAML:
|
Analyze the resume below. Output ONLY the requested information in YAML format.
|
||||||
- name
|
|
||||||
- email
|
|
||||||
- experience (list of positions with title and company)
|
|
||||||
- skills (list of skills)
|
|
||||||
|
|
||||||
|
**Resume:**
|
||||||
|
```
|
||||||
{resume_text}
|
{resume_text}
|
||||||
|
```
|
||||||
|
|
||||||
Now, output:
|
**Target Skills (use these indexes):**
|
||||||
|
```
|
||||||
|
{skill_list_for_prompt}
|
||||||
|
```
|
||||||
|
|
||||||
|
**YAML Output Requirements:**
|
||||||
|
- Extract `name` (string).
|
||||||
|
- Extract `email` (string).
|
||||||
|
- Extract `experience` (list of objects with `title` and `company`).
|
||||||
|
- Extract `skill_indexes` (list of integers found from the Target Skills list).
|
||||||
|
- **Add a YAML comment (`#`) explaining the source BEFORE `name`, `email`, `experience`, each item in `experience`, and `skill_indexes`.**
|
||||||
|
|
||||||
|
**Example Format:**
|
||||||
```yaml
|
```yaml
|
||||||
name: John Doe
|
# Found name at top
|
||||||
email: john@example.com
|
name: Jane Doe
|
||||||
|
# Found email in contact info
|
||||||
|
email: jane@example.com
|
||||||
|
# Experience section analysis
|
||||||
experience:
|
experience:
|
||||||
- title: Software Engineer
|
# First job listed
|
||||||
company: Tech Company
|
- title: Manager
|
||||||
- title: Developer
|
company: Corp A
|
||||||
company: Another Company
|
# Second job listed
|
||||||
skills:
|
- title: Assistant
|
||||||
- Python
|
company: Corp B
|
||||||
- JavaScript
|
# Skills identified from the target list based on resume content
|
||||||
- HTML/CSS
|
skill_indexes:
|
||||||
```"""
|
# Found 0 at top
|
||||||
|
- 0
|
||||||
|
# Found 2 in experience
|
||||||
|
- 2
|
||||||
|
```
|
||||||
|
|
||||||
|
Generate the YAML output now:
|
||||||
|
"""
|
||||||
response = call_llm(prompt)
|
response = call_llm(prompt)
|
||||||
|
|
||||||
# Extract YAML content from markdown code blocks
|
# --- Minimal YAML Extraction ---
|
||||||
|
# Assumes LLM correctly uses ```yaml blocks
|
||||||
yaml_str = response.split("```yaml")[1].split("```")[0].strip()
|
yaml_str = response.split("```yaml")[1].split("```")[0].strip()
|
||||||
structured_result = yaml.safe_load(yaml_str)
|
structured_result = yaml.safe_load(yaml_str)
|
||||||
|
# --- End Minimal Extraction ---
|
||||||
# Validate structure
|
|
||||||
assert "name" in structured_result
|
|
||||||
assert "experience" in structured_result
|
|
||||||
assert isinstance(structured_result["experience"], list)
|
|
||||||
assert "skills" in structured_result
|
|
||||||
assert isinstance(structured_result["skills"], list)
|
|
||||||
|
|
||||||
return structured_result
|
|
||||||
|
|
||||||
def post(self, shared, prep_res, exec_res):
|
|
||||||
"""Store and display structured resume data in YAML"""
|
|
||||||
shared["structured_data"] = exec_res
|
|
||||||
|
|
||||||
# Print structured data in YAML format
|
|
||||||
print("\n=== STRUCTURED RESUME DATA ===\n")
|
|
||||||
print(yaml.dump(exec_res, sort_keys=False))
|
|
||||||
print("\n============================\n")
|
|
||||||
|
|
||||||
print("✅ Extracted basic resume information")
|
|
||||||
|
|
||||||
# Create and run the flow
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("=== Simple Resume Parser - YAML Output ===\n")
|
|
||||||
|
|
||||||
# Read resume text from file
|
|
||||||
shared = {}
|
|
||||||
with open('data.txt', 'r') as file:
|
|
||||||
resume_text = file.read()
|
|
||||||
shared["resume_text"] = resume_text
|
|
||||||
|
|
||||||
|
# --- Basic Validation ---
|
||||||
flow = Flow(start=ResumeParserNode())
|
assert structured_result is not None, "Validation Failed: Parsed YAML is None"
|
||||||
flow.run(shared)
|
assert "name" in structured_result, "Validation Failed: Missing 'name'"
|
||||||
|
assert "email" in structured_result, "Validation Failed: Missing 'email'"
|
||||||
|
assert "experience" in structured_result, "Validation Failed: Missing 'experience'"
|
||||||
|
assert isinstance(structured_result.get("experience"), list), "Validation Failed: 'experience' is not a list"
|
||||||
|
assert "skill_indexes" in structured_result, "Validation Failed: Missing 'skill_indexes'"
|
||||||
|
skill_indexes_val = structured_result.get("skill_indexes")
|
||||||
|
assert skill_indexes_val is None or isinstance(skill_indexes_val, list), "Validation Failed: 'skill_indexes' is not a list or None"
|
||||||
|
if isinstance(skill_indexes_val, list):
|
||||||
|
for index in skill_indexes_val:
|
||||||
|
assert isinstance(index, int), f"Validation Failed: Skill index '{index}' is not an integer"
|
||||||
|
# --- End Basic Validation ---
|
||||||
|
|
||||||
|
return structured_result
|
||||||
|
|
||||||
|
def post(self, shared, prep_res, exec_res):
|
||||||
|
"""Store structured data and print it."""
|
||||||
|
shared["structured_data"] = exec_res
|
||||||
|
|
||||||
|
print("\n=== STRUCTURED RESUME DATA (Comments & Skill Index List) ===\n")
|
||||||
|
# Dump YAML ensuring block style for readability
|
||||||
|
print(yaml.dump(exec_res, sort_keys=False, allow_unicode=True, default_flow_style=None))
|
||||||
|
print("\n============================================================\n")
|
||||||
|
print("✅ Extracted resume information.")
|
||||||
|
|
||||||
|
|
||||||
|
# === Main Execution Logic ===
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("=== Resume Parser - Structured Output with Indexes & Comments ===\n")
|
||||||
|
|
||||||
|
# --- Configuration ---
|
||||||
|
target_skills_to_find = [
|
||||||
|
"Team leadership & management", # 0
|
||||||
|
"CRM software", # 1
|
||||||
|
"Project management", # 2
|
||||||
|
"Public speaking", # 3
|
||||||
|
"Microsoft Office", # 4
|
||||||
|
"Python", # 5
|
||||||
|
"Data Analysis" # 6
|
||||||
|
]
|
||||||
|
resume_file = 'data.txt' # Assumes data.txt contains the resume
|
||||||
|
|
||||||
|
# --- Prepare Shared State ---
|
||||||
|
shared = {}
|
||||||
|
try:
|
||||||
|
with open(resume_file, 'r') as file:
|
||||||
|
shared["resume_text"] = file.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Error: Resume file '{resume_file}' not found.")
|
||||||
|
exit(1) # Exit if resume file is missing
|
||||||
|
|
||||||
|
shared["target_skills"] = target_skills_to_find
|
||||||
|
|
||||||
|
# --- Define and Run Flow ---
|
||||||
|
parser_node = ResumeParserNode()
|
||||||
|
flow = Flow(start=parser_node)
|
||||||
|
flow.run(shared) # Execute the parsing node
|
||||||
|
|
||||||
|
# --- Display Found Skills ---
|
||||||
|
if "structured_data" in shared and "skill_indexes" in shared["structured_data"]:
|
||||||
|
print("\n--- Found Target Skills (from Indexes) ---")
|
||||||
|
found_indexes = shared["structured_data"]["skill_indexes"]
|
||||||
|
if found_indexes: # Check if the list is not empty or None
|
||||||
|
for index in found_indexes:
|
||||||
|
if 0 <= index < len(target_skills_to_find):
|
||||||
|
print(f"- {target_skills_to_find[index]} (Index: {index})")
|
||||||
|
else:
|
||||||
|
print(f"- Warning: Found invalid skill index {index}")
|
||||||
|
else:
|
||||||
|
print("No target skills identified from the list.")
|
||||||
|
print("----------------------------------------\n")
|
||||||
Loading…
Reference in New Issue