#!/usr/bin/env python3
"""
Script to generate MDC files from the PocketFlow docs folder, creating one MDC file per MD file.
Usage:
python update_pocketflow_mdc.py [--docs-dir PATH] [--rules-dir PATH]
"""
import os
import re
import shutil
from pathlib import Path
import sys
import html.parser
class HTMLTagStripper(html.parser.HTMLParser):
"""HTML Parser subclass to strip HTML tags from content"""
def __init__(self):
super().__init__()
self.reset()
self.strict = False
self.convert_charrefs = True
self.text = []
def handle_data(self, data):
self.text.append(data)
def get_text(self):
return ''.join(self.text)
def strip_html_tags(html_content):
"""Remove HTML tags from content"""
stripper = HTMLTagStripper()
stripper.feed(html_content)
return stripper.get_text()
def extract_frontmatter(file_path):
"""Extract title, parent, and nav_order from markdown frontmatter"""
frontmatter = {}
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Extract frontmatter between --- markers
fm_match = re.search(r'^---\s*(.+?)\s*---', content, re.DOTALL)
if fm_match:
frontmatter_text = fm_match.group(1)
# Extract fields
title_match = re.search(r'title:\s*"?([^"\n]+)"?', frontmatter_text)
parent_match = re.search(r'parent:\s*"?([^"\n]+)"?', frontmatter_text)
nav_order_match = re.search(r'nav_order:\s*(\d+)', frontmatter_text)
if title_match:
frontmatter['title'] = title_match.group(1)
if parent_match:
frontmatter['parent'] = parent_match.group(1)
if nav_order_match:
frontmatter['nav_order'] = int(nav_order_match.group(1))
except Exception as e:
print(f"Error reading frontmatter from {file_path}: {e}")
return frontmatter
def extract_first_heading(file_path):
"""Extract the first heading from markdown content"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Remove frontmatter
content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL)
# Find first heading
heading_match = re.search(r'#\s+(.+)', content)
if heading_match:
return heading_match.group(1).strip()
except Exception as e:
print(f"Error extracting heading from {file_path}: {e}")
# Fallback to filename if no heading found
return Path(file_path).stem.replace('_', ' ').title()
def get_mdc_description(md_file, frontmatter, heading):
"""Generate a description for the MDC file based on file metadata"""
section = ""
subsection = ""
# Determine section from path
path_parts = Path(md_file).parts
if 'core_abstraction' in path_parts:
section = "Core Abstraction"
elif 'design_pattern' in path_parts:
section = "Design Pattern"
elif 'utility_function' in path_parts:
section = "Utility Function"
# Use frontmatter title or heading as subsection
if 'title' in frontmatter:
subsection = frontmatter['title']
else:
subsection = heading
# For the combined guide and index
if Path(md_file).name == "guide.md":
return "Guidelines for using PocketFlow, Agentic Coding"
# For index.md at root level, use a different format
if Path(md_file).name == "index.md" and section == "":
return "Guidelines for using PocketFlow, a minimalist LLM framework"
# For other files, create a more specific description
if section:
return f"Guidelines for using PocketFlow, {section}, {subsection}"
else:
return f"Guidelines for using PocketFlow, {subsection}"
def process_markdown_content(content, remove_local_refs=False):
"""Process markdown content to make it suitable for MDC file"""
# Remove frontmatter
content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL)
# Replace HTML div tags and their content
content = re.sub(r'
.*?', '', content, flags=re.DOTALL)
if remove_local_refs:
# Replace markdown links to local documentation with just the text in brackets
# This prevents automatically including all docs when the file is loaded
# Keep the brackets around the text for better discoverability
content = re.sub(r'\[([^\]]+)\]\(\./[^)]+\)', r'[\1]', content)
else:
# Adjust relative links to maintain references within the docs structure
content = re.sub(r'\]\(\./([^)]+)\)', r'](mdc:./\1)', content)
# Ensure links to md files work correctly
content = re.sub(r'\]\(mdc:\./(.+?)\.md\)', r'](mdc:./\1.md)', content)
content = re.sub(r'\]\(mdc:\./(.+?)\.html\)', r'](mdc:./\1.md)', content)
# Strip remaining HTML tags
content = strip_html_tags(content)
return content
def get_documentation_first_policy():
"""Return the DOCUMENTATION FIRST POLICY text to be included in the guide"""
return """# DOCUMENTATION FIRST POLICY
**CRITICAL INSTRUCTION**: When implementing a Pocket Flow app:
1. **ALWAYS REQUEST MDC FILES FIRST** - Before writing any code, request and review all relevant MDC documentation files. This doc provides an explaination of the documents.
2. **UNDERSTAND THE FRAMEWORK** - Gain comprehensive understanding of the Pocket Flow framework from documentation
3. **AVOID ASSUMPTION-DRIVEN DEVELOPMENT** - Do not base your implementation on assumptions or guesswork. Even if the human didn't explicitly mention pocket flow in their request, if the code you are editing is using pocket flow, you should request relevant docs to help you understand best practice as well before editing.
**VERIFICATION**: Begin each implementation with a brief summary of the documentation you've reviewed to inform your approach.
"""
def generate_mdc_header(md_file, description, always_apply=False):
"""Generate MDC file header with appropriate frontmatter"""
# Determine if we should include globs
# For index.md and guide.md, we include **/*.py to provide high-level context for Python files
# For other files, leave it empty to be less intrusive
globs = "**/*.py" if always_apply else ""
return f"""---
description: {description}
globs: {globs}
alwaysApply: {"true" if always_apply else "false"}
---
"""
def has_substantive_content(content):
"""Check if the processed content has substantive content beyond the frontmatter"""
# Remove frontmatter
content_without_frontmatter = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL)
# Remove whitespace and common HTML/markdown formatting
cleaned_content = re.sub(r'\s+', '', content_without_frontmatter)
cleaned_content = re.sub(r'{:.*?}', '', cleaned_content)
# If there's almost nothing left after cleaning, consider it empty
return len(cleaned_content) > 20 # Arbitrary threshold, adjust as needed
def create_combined_guide(docs_dir, rules_dir):
"""Create a combined guide that includes both the guide and index content"""
docs_path = Path(docs_dir)
rules_path = Path(rules_dir)
guide_file = docs_path / "guide.md"
index_file = docs_path / "index.md"
if not guide_file.exists() or not index_file.exists():
print("Warning: guide.md or index.md not found, skipping combined guide creation")
return False
# Get guide content and index content
with open(guide_file, 'r', encoding='utf-8') as f:
guide_content = f.read()
with open(index_file, 'r', encoding='utf-8') as f:
index_content = f.read()
# Process the content
processed_guide = process_markdown_content(guide_content, remove_local_refs=True)
processed_index = process_markdown_content(index_content, remove_local_refs=True)
# Get the documentation first policy
doc_first_policy = get_documentation_first_policy()
# Combine the content with the documentation first policy at the beginning
combined_content = doc_first_policy + processed_guide + "\n\n" + processed_index
# Generate the MDC header
description = "Guidelines for using PocketFlow, Agentic Coding"
mdc_header = generate_mdc_header(guide_file, description, always_apply=True)
# Combine header and processed content
mdc_content = mdc_header + combined_content
# Create the output path with the new filename
output_path = rules_path / "guide_for_pocketflow.mdc"
# Write the MDC file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(mdc_content)
print(f"Created combined guide MDC file: {output_path}")
return True
def convert_md_to_mdc(md_file, output_dir, docs_dir, special_treatment=False):
"""Convert a markdown file to MDC format and save to the output directory"""
try:
print(f"Processing: {md_file}")
# Skip guide.md and index.md as they'll be handled separately
file_name = Path(md_file).name
if file_name in ["guide.md", "index.md"]:
print(f"Skipping {file_name} for individual processing - it will be included in the combined guide")
return True
# Skip empty index.md files in subfolders
parent_dir = Path(md_file).parent.name
# Check if this is an index.md in a subfolder (not the main index.md)
if (file_name == "index.md" and parent_dir != "docs" and
parent_dir in ["core_abstraction", "design_pattern", "utility_function"]):
# Read the content
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# Skip if it doesn't have substantive content
if not has_substantive_content(content):
print(f"Skipping empty subfolder index: {md_file}")
return True
# Extract metadata from file
frontmatter = extract_frontmatter(md_file)
heading = extract_first_heading(md_file)
description = get_mdc_description(md_file, frontmatter, heading)
# Read the content
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# Process the content
processed_content = process_markdown_content(content, remove_local_refs=special_treatment)
# Generate the MDC header
mdc_header = generate_mdc_header(md_file, description, always_apply=special_treatment)
# Combine header and processed content
mdc_content = mdc_header + processed_content
# Perform a final check to ensure the processed content is substantive
if not has_substantive_content(processed_content):
print(f"Skipping file with no substantive content after processing: {md_file}")
return True
# Get the path relative to the docs directory
rel_path = os.path.relpath(md_file, start=Path(docs_dir))
# Extract just the filename and directory structure without the 'docs/' prefix
path_parts = Path(rel_path).parts
if len(path_parts) > 1 and path_parts[0] == 'docs':
# Remove the 'docs/' prefix from the path
rel_path = os.path.join(*path_parts[1:])
# Create the output path
output_path = Path(output_dir) / rel_path
# Create output directory if it doesn't exist
output_path.parent.mkdir(parents=True, exist_ok=True)
# Change extension from .md to .mdc
output_path = output_path.with_suffix('.mdc')
# Write the MDC file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(mdc_content)
print(f"Created MDC file: {output_path}")
return True
except Exception as e:
print(f"Error converting {md_file} to MDC: {e}")
return False
def generate_mdc_files(docs_dir, rules_dir):
"""Generate MDC files from all markdown files in the docs directory"""
docs_path = Path(docs_dir)
rules_path = Path(rules_dir)
# Make sure the docs directory exists
if not docs_path.exists() or not docs_path.is_dir():
raise ValueError(f"Directory not found: {docs_dir}")
print(f"Generating MDC files from docs in: {docs_dir}")
print(f"Output will be written to: {rules_dir}")
# Create the rules directory if it doesn't exist
rules_path.mkdir(parents=True, exist_ok=True)
# Create the combined guide file first (includes both guide.md and index.md)
create_combined_guide(docs_dir, rules_dir)
# Process all other markdown files
success_count = 0
failure_count = 0
# Find all markdown files
md_files = list(docs_path.glob("**/*.md"))
# Skip the main index.md and guide.md files as we've already processed them in create_combined_guide
md_files = [f for f in md_files if f.name != "index.md" and f.name != "guide.md"]
# Process each markdown file
for md_file in md_files:
if convert_md_to_mdc(md_file, rules_path, docs_dir):
success_count += 1
else:
failure_count += 1
print(f"\nProcessed {len(md_files) + 1} markdown files:") # +1 for the combined guide
print(f" - Successfully converted: {success_count + 1}") # +1 for the combined guide
print(f" - Failed conversions: {failure_count}")
return success_count > 0 and failure_count == 0
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate MDC files from PocketFlow docs")
# Get script directory
script_dir = Path(__file__).parent.absolute()
# Default to PocketFlow/docs directory relative to script location
default_docs_dir = (script_dir.parent / "docs").as_posix()
# Default rules directory - changed to .cursor/rules
default_rules_dir = (script_dir.parent / ".cursor" / "rules").as_posix()
parser.add_argument("--docs-dir",
default=default_docs_dir,
help="Path to PocketFlow docs directory")
parser.add_argument("--rules-dir",
default=default_rules_dir,
help="Output directory for MDC files")
args = parser.parse_args()
try:
success = generate_mdc_files(args.docs_dir, args.rules_dir)
sys.exit(0 if success else 1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)