378 lines
15 KiB
Python
378 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script to generate MDC files from the PocketFlow docs folder, creating one MDC file per MD file.
|
|
|
|
Usage:
|
|
python update_pocketflow_mdc.py [--docs-dir PATH] [--rules-dir PATH]
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
from pathlib import Path
|
|
import sys
|
|
import html.parser
|
|
|
|
class HTMLTagStripper(html.parser.HTMLParser):
|
|
"""HTML Parser subclass to strip HTML tags from content"""
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.reset()
|
|
self.strict = False
|
|
self.convert_charrefs = True
|
|
self.text = []
|
|
|
|
def handle_data(self, data):
|
|
self.text.append(data)
|
|
|
|
def get_text(self):
|
|
return ''.join(self.text)
|
|
|
|
def strip_html_tags(html_content):
|
|
"""Remove HTML tags from content"""
|
|
stripper = HTMLTagStripper()
|
|
stripper.feed(html_content)
|
|
return stripper.get_text()
|
|
|
|
def extract_frontmatter(file_path):
|
|
"""Extract title, parent, and nav_order from markdown frontmatter"""
|
|
frontmatter = {}
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Extract frontmatter between --- markers
|
|
fm_match = re.search(r'^---\s*(.+?)\s*---', content, re.DOTALL)
|
|
if fm_match:
|
|
frontmatter_text = fm_match.group(1)
|
|
|
|
# Extract fields
|
|
title_match = re.search(r'title:\s*"?([^"\n]+)"?', frontmatter_text)
|
|
parent_match = re.search(r'parent:\s*"?([^"\n]+)"?', frontmatter_text)
|
|
nav_order_match = re.search(r'nav_order:\s*(\d+)', frontmatter_text)
|
|
|
|
if title_match:
|
|
frontmatter['title'] = title_match.group(1)
|
|
if parent_match:
|
|
frontmatter['parent'] = parent_match.group(1)
|
|
if nav_order_match:
|
|
frontmatter['nav_order'] = int(nav_order_match.group(1))
|
|
except Exception as e:
|
|
print(f"Error reading frontmatter from {file_path}: {e}")
|
|
|
|
return frontmatter
|
|
|
|
def extract_first_heading(file_path):
|
|
"""Extract the first heading from markdown content"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Remove frontmatter
|
|
content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL)
|
|
|
|
# Find first heading
|
|
heading_match = re.search(r'#\s+(.+)', content)
|
|
if heading_match:
|
|
return heading_match.group(1).strip()
|
|
except Exception as e:
|
|
print(f"Error extracting heading from {file_path}: {e}")
|
|
|
|
# Fallback to filename if no heading found
|
|
return Path(file_path).stem.replace('_', ' ').title()
|
|
|
|
def get_mdc_description(md_file, frontmatter, heading):
|
|
"""Generate a description for the MDC file based on file metadata"""
|
|
section = ""
|
|
subsection = ""
|
|
|
|
# Determine section from path
|
|
path_parts = Path(md_file).parts
|
|
if 'core_abstraction' in path_parts:
|
|
section = "Core Abstraction"
|
|
elif 'design_pattern' in path_parts:
|
|
section = "Design Pattern"
|
|
elif 'utility_function' in path_parts:
|
|
section = "Utility Function"
|
|
|
|
# Use frontmatter title or heading as subsection
|
|
if 'title' in frontmatter:
|
|
subsection = frontmatter['title']
|
|
else:
|
|
subsection = heading
|
|
|
|
# For the combined guide and index
|
|
if Path(md_file).name == "guide.md":
|
|
return "Guidelines for using PocketFlow, Agentic Coding"
|
|
|
|
# For index.md at root level, use a different format
|
|
if Path(md_file).name == "index.md" and section == "":
|
|
return "Guidelines for using PocketFlow, a minimalist LLM framework"
|
|
|
|
# For other files, create a more specific description
|
|
if section:
|
|
return f"Guidelines for using PocketFlow, {section}, {subsection}"
|
|
else:
|
|
return f"Guidelines for using PocketFlow, {subsection}"
|
|
|
|
def process_markdown_content(content, remove_local_refs=False):
|
|
"""Process markdown content to make it suitable for MDC file"""
|
|
# Remove frontmatter
|
|
content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL)
|
|
|
|
# Replace HTML div tags and their content
|
|
content = re.sub(r'<div.*?>.*?</div>', '', content, flags=re.DOTALL)
|
|
|
|
if remove_local_refs:
|
|
# Replace markdown links to local documentation with just the text in brackets
|
|
# This prevents automatically including all docs when the file is loaded
|
|
# Keep the brackets around the text for better discoverability
|
|
content = re.sub(r'\[([^\]]+)\]\(\./[^)]+\)', r'[\1]', content)
|
|
else:
|
|
# Adjust relative links to maintain references within the docs structure
|
|
content = re.sub(r'\]\(\./([^)]+)\)', r'](mdc:./\1)', content)
|
|
|
|
# Ensure links to md files work correctly
|
|
content = re.sub(r'\]\(mdc:\./(.+?)\.md\)', r'](mdc:./\1.md)', content)
|
|
content = re.sub(r'\]\(mdc:\./(.+?)\.html\)', r'](mdc:./\1.md)', content)
|
|
|
|
# Strip remaining HTML tags
|
|
content = strip_html_tags(content)
|
|
|
|
return content
|
|
|
|
def get_documentation_first_policy():
|
|
"""Return the DOCUMENTATION FIRST POLICY text to be included in the guide"""
|
|
return """# DOCUMENTATION FIRST POLICY
|
|
|
|
**CRITICAL INSTRUCTION**: When implementing a Pocket Flow app:
|
|
|
|
1. **ALWAYS REQUEST MDC FILES FIRST** - Before writing any code, request and review all relevant MDC documentation files. This doc provides an explaination of the documents.
|
|
2. **UNDERSTAND THE FRAMEWORK** - Gain comprehensive understanding of the Pocket Flow framework from documentation
|
|
3. **AVOID ASSUMPTION-DRIVEN DEVELOPMENT** - Do not base your implementation on assumptions or guesswork. Even if the human didn't explicitly mention pocket flow in their request, if the code you are editing is using pocket flow, you should request relevant docs to help you understand best practice as well before editing.
|
|
|
|
**VERIFICATION**: Begin each implementation with a brief summary of the documentation you've reviewed to inform your approach.
|
|
|
|
"""
|
|
|
|
def generate_mdc_header(md_file, description, always_apply=False):
|
|
"""Generate MDC file header with appropriate frontmatter"""
|
|
# Determine if we should include globs
|
|
# For index.md and guide.md, we include **/*.py to provide high-level context for Python files
|
|
# For other files, leave it empty to be less intrusive
|
|
globs = "**/*.py" if always_apply else ""
|
|
|
|
return f"""---
|
|
description: {description}
|
|
globs: {globs}
|
|
alwaysApply: {"true" if always_apply else "false"}
|
|
---
|
|
"""
|
|
|
|
def has_substantive_content(content):
|
|
"""Check if the processed content has substantive content beyond the frontmatter"""
|
|
# Remove frontmatter
|
|
content_without_frontmatter = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL)
|
|
|
|
# Remove whitespace and common HTML/markdown formatting
|
|
cleaned_content = re.sub(r'\s+', '', content_without_frontmatter)
|
|
cleaned_content = re.sub(r'{:.*?}', '', cleaned_content)
|
|
|
|
# If there's almost nothing left after cleaning, consider it empty
|
|
return len(cleaned_content) > 20 # Arbitrary threshold, adjust as needed
|
|
|
|
def create_combined_guide(docs_dir, rules_dir):
|
|
"""Create a combined guide that includes both the guide and index content"""
|
|
docs_path = Path(docs_dir)
|
|
rules_path = Path(rules_dir)
|
|
|
|
guide_file = docs_path / "guide.md"
|
|
index_file = docs_path / "index.md"
|
|
|
|
if not guide_file.exists() or not index_file.exists():
|
|
print("Warning: guide.md or index.md not found, skipping combined guide creation")
|
|
return False
|
|
|
|
# Get guide content and index content
|
|
with open(guide_file, 'r', encoding='utf-8') as f:
|
|
guide_content = f.read()
|
|
|
|
with open(index_file, 'r', encoding='utf-8') as f:
|
|
index_content = f.read()
|
|
|
|
# Process the content
|
|
processed_guide = process_markdown_content(guide_content, remove_local_refs=True)
|
|
processed_index = process_markdown_content(index_content, remove_local_refs=True)
|
|
|
|
# Get the documentation first policy
|
|
doc_first_policy = get_documentation_first_policy()
|
|
|
|
# Combine the content with the documentation first policy at the beginning
|
|
combined_content = doc_first_policy + processed_guide + "\n\n" + processed_index
|
|
|
|
# Generate the MDC header
|
|
description = "Guidelines for using PocketFlow, Agentic Coding"
|
|
mdc_header = generate_mdc_header(guide_file, description, always_apply=True)
|
|
|
|
# Combine header and processed content
|
|
mdc_content = mdc_header + combined_content
|
|
|
|
# Create the output path with the new filename
|
|
output_path = rules_path / "guide_for_pocketflow.mdc"
|
|
|
|
# Write the MDC file
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(mdc_content)
|
|
|
|
print(f"Created combined guide MDC file: {output_path}")
|
|
return True
|
|
|
|
def convert_md_to_mdc(md_file, output_dir, docs_dir, special_treatment=False):
|
|
"""Convert a markdown file to MDC format and save to the output directory"""
|
|
try:
|
|
print(f"Processing: {md_file}")
|
|
|
|
# Skip guide.md and index.md as they'll be handled separately
|
|
file_name = Path(md_file).name
|
|
if file_name in ["guide.md", "index.md"]:
|
|
print(f"Skipping {file_name} for individual processing - it will be included in the combined guide")
|
|
return True
|
|
|
|
# Skip empty index.md files in subfolders
|
|
parent_dir = Path(md_file).parent.name
|
|
|
|
# Check if this is an index.md in a subfolder (not the main index.md)
|
|
if (file_name == "index.md" and parent_dir != "docs" and
|
|
parent_dir in ["core_abstraction", "design_pattern", "utility_function"]):
|
|
|
|
# Read the content
|
|
with open(md_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Skip if it doesn't have substantive content
|
|
if not has_substantive_content(content):
|
|
print(f"Skipping empty subfolder index: {md_file}")
|
|
return True
|
|
|
|
# Extract metadata from file
|
|
frontmatter = extract_frontmatter(md_file)
|
|
heading = extract_first_heading(md_file)
|
|
description = get_mdc_description(md_file, frontmatter, heading)
|
|
|
|
# Read the content
|
|
with open(md_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Process the content
|
|
processed_content = process_markdown_content(content, remove_local_refs=special_treatment)
|
|
|
|
# Generate the MDC header
|
|
mdc_header = generate_mdc_header(md_file, description, always_apply=special_treatment)
|
|
|
|
# Combine header and processed content
|
|
mdc_content = mdc_header + processed_content
|
|
|
|
# Perform a final check to ensure the processed content is substantive
|
|
if not has_substantive_content(processed_content):
|
|
print(f"Skipping file with no substantive content after processing: {md_file}")
|
|
return True
|
|
|
|
# Get the path relative to the docs directory
|
|
rel_path = os.path.relpath(md_file, start=Path(docs_dir))
|
|
|
|
# Extract just the filename and directory structure without the 'docs/' prefix
|
|
path_parts = Path(rel_path).parts
|
|
if len(path_parts) > 1 and path_parts[0] == 'docs':
|
|
# Remove the 'docs/' prefix from the path
|
|
rel_path = os.path.join(*path_parts[1:])
|
|
|
|
# Create the output path
|
|
output_path = Path(output_dir) / rel_path
|
|
|
|
# Create output directory if it doesn't exist
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Change extension from .md to .mdc
|
|
output_path = output_path.with_suffix('.mdc')
|
|
|
|
# Write the MDC file
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(mdc_content)
|
|
|
|
print(f"Created MDC file: {output_path}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error converting {md_file} to MDC: {e}")
|
|
return False
|
|
|
|
def generate_mdc_files(docs_dir, rules_dir):
|
|
"""Generate MDC files from all markdown files in the docs directory"""
|
|
docs_path = Path(docs_dir)
|
|
rules_path = Path(rules_dir)
|
|
|
|
# Make sure the docs directory exists
|
|
if not docs_path.exists() or not docs_path.is_dir():
|
|
raise ValueError(f"Directory not found: {docs_dir}")
|
|
|
|
print(f"Generating MDC files from docs in: {docs_dir}")
|
|
print(f"Output will be written to: {rules_dir}")
|
|
|
|
# Create the rules directory if it doesn't exist
|
|
rules_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create the combined guide file first (includes both guide.md and index.md)
|
|
create_combined_guide(docs_dir, rules_dir)
|
|
|
|
# Process all other markdown files
|
|
success_count = 0
|
|
failure_count = 0
|
|
|
|
# Find all markdown files
|
|
md_files = list(docs_path.glob("**/*.md"))
|
|
|
|
# Skip the main index.md and guide.md files as we've already processed them in create_combined_guide
|
|
md_files = [f for f in md_files if f.name != "index.md" and f.name != "guide.md"]
|
|
|
|
# Process each markdown file
|
|
for md_file in md_files:
|
|
if convert_md_to_mdc(md_file, rules_path, docs_dir):
|
|
success_count += 1
|
|
else:
|
|
failure_count += 1
|
|
|
|
print(f"\nProcessed {len(md_files) + 1} markdown files:") # +1 for the combined guide
|
|
print(f" - Successfully converted: {success_count + 1}") # +1 for the combined guide
|
|
print(f" - Failed conversions: {failure_count}")
|
|
|
|
return success_count > 0 and failure_count == 0
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Generate MDC files from PocketFlow docs")
|
|
|
|
# Get script directory
|
|
script_dir = Path(__file__).parent.absolute()
|
|
|
|
# Default to PocketFlow/docs directory relative to script location
|
|
default_docs_dir = (script_dir.parent / "docs").as_posix()
|
|
|
|
# Default rules directory - changed to .cursor/rules
|
|
default_rules_dir = (script_dir.parent / ".cursor" / "rules").as_posix()
|
|
|
|
parser.add_argument("--docs-dir",
|
|
default=default_docs_dir,
|
|
help="Path to PocketFlow docs directory")
|
|
parser.add_argument("--rules-dir",
|
|
default=default_rules_dir,
|
|
help="Output directory for MDC files")
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
success = generate_mdc_files(args.docs_dir, args.rules_dir)
|
|
sys.exit(0 if success else 1)
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
sys.exit(1) |