#!/usr/bin/env python3 """ Script to generate MDC files from the PocketFlow docs folder, creating one MDC file per MD file. Usage: python update_pocketflow_mdc.py [--docs-dir PATH] [--rules-dir PATH] """ import os import re import shutil from pathlib import Path import sys import html.parser class HTMLTagStripper(html.parser.HTMLParser): """HTML Parser subclass to strip HTML tags from content""" def __init__(self): super().__init__() self.reset() self.strict = False self.convert_charrefs = True self.text = [] def handle_data(self, data): self.text.append(data) def get_text(self): return ''.join(self.text) def strip_html_tags(html_content): """Remove HTML tags from content""" stripper = HTMLTagStripper() stripper.feed(html_content) return stripper.get_text() def extract_frontmatter(file_path): """Extract title, parent, and nav_order from markdown frontmatter""" frontmatter = {} try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Extract frontmatter between --- markers fm_match = re.search(r'^---\s*(.+?)\s*---', content, re.DOTALL) if fm_match: frontmatter_text = fm_match.group(1) # Extract fields title_match = re.search(r'title:\s*"?([^"\n]+)"?', frontmatter_text) parent_match = re.search(r'parent:\s*"?([^"\n]+)"?', frontmatter_text) nav_order_match = re.search(r'nav_order:\s*(\d+)', frontmatter_text) if title_match: frontmatter['title'] = title_match.group(1) if parent_match: frontmatter['parent'] = parent_match.group(1) if nav_order_match: frontmatter['nav_order'] = int(nav_order_match.group(1)) except Exception as e: print(f"Error reading frontmatter from {file_path}: {e}") return frontmatter def extract_first_heading(file_path): """Extract the first heading from markdown content""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Remove frontmatter content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL) # Find first heading heading_match = re.search(r'#\s+(.+)', content) if heading_match: return heading_match.group(1).strip() except Exception as e: print(f"Error extracting heading from {file_path}: {e}") # Fallback to filename if no heading found return Path(file_path).stem.replace('_', ' ').title() def get_mdc_description(md_file, frontmatter, heading): """Generate a description for the MDC file based on file metadata""" section = "" subsection = "" # Determine section from path path_parts = Path(md_file).parts if 'core_abstraction' in path_parts: section = "Core Abstraction" elif 'design_pattern' in path_parts: section = "Design Pattern" elif 'utility_function' in path_parts: section = "Utility Function" # Use frontmatter title or heading as subsection if 'title' in frontmatter: subsection = frontmatter['title'] else: subsection = heading # For the combined guide and index if Path(md_file).name == "guide.md": return "Guidelines for using PocketFlow, Agentic Coding" # For index.md at root level, use a different format if Path(md_file).name == "index.md" and section == "": return "Guidelines for using PocketFlow, a minimalist LLM framework" # For other files, create a more specific description if section: return f"Guidelines for using PocketFlow, {section}, {subsection}" else: return f"Guidelines for using PocketFlow, {subsection}" def process_markdown_content(content, remove_local_refs=False): """Process markdown content to make it suitable for MDC file""" # Remove frontmatter content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL) # Replace HTML div tags and their content content = re.sub(r'.*?', '', content, flags=re.DOTALL) if remove_local_refs: # Replace markdown links to local documentation with just the text in brackets # This prevents automatically including all docs when the file is loaded # Keep the brackets around the text for better discoverability content = re.sub(r'\[([^\]]+)\]\(\./[^)]+\)', r'[\1]', content) else: # Adjust relative links to maintain references within the docs structure content = re.sub(r'\]\(\./([^)]+)\)', r'](mdc:./\1)', content) # Ensure links to md files work correctly content = re.sub(r'\]\(mdc:\./(.+?)\.md\)', r'](mdc:./\1.md)', content) content = re.sub(r'\]\(mdc:\./(.+?)\.html\)', r'](mdc:./\1.md)', content) # Strip remaining HTML tags content = strip_html_tags(content) return content def get_documentation_first_policy(): """Return the DOCUMENTATION FIRST POLICY text to be included in the guide""" return """# DOCUMENTATION FIRST POLICY **CRITICAL INSTRUCTION**: When implementing a Pocket Flow app: 1. **ALWAYS REQUEST MDC FILES FIRST** - Before writing any code, request and review all relevant MDC documentation files. This doc provides an explaination of the documents. 2. **UNDERSTAND THE FRAMEWORK** - Gain comprehensive understanding of the Pocket Flow framework from documentation 3. **AVOID ASSUMPTION-DRIVEN DEVELOPMENT** - Do not base your implementation on assumptions or guesswork. Even if the human didn't explicitly mention pocket flow in their request, if the code you are editing is using pocket flow, you should request relevant docs to help you understand best practice as well before editing. **VERIFICATION**: Begin each implementation with a brief summary of the documentation you've reviewed to inform your approach. """ def generate_mdc_header(md_file, description, always_apply=False): """Generate MDC file header with appropriate frontmatter""" # Determine if we should include globs # For index.md and guide.md, we include **/*.py to provide high-level context for Python files # For other files, leave it empty to be less intrusive globs = "**/*.py" if always_apply else "" return f"""--- description: {description} globs: {globs} alwaysApply: {"true" if always_apply else "false"} --- """ def has_substantive_content(content): """Check if the processed content has substantive content beyond the frontmatter""" # Remove frontmatter content_without_frontmatter = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL) # Remove whitespace and common HTML/markdown formatting cleaned_content = re.sub(r'\s+', '', content_without_frontmatter) cleaned_content = re.sub(r'{:.*?}', '', cleaned_content) # If there's almost nothing left after cleaning, consider it empty return len(cleaned_content) > 20 # Arbitrary threshold, adjust as needed def create_combined_guide(docs_dir, rules_dir): """Create a combined guide that includes both the guide and index content""" docs_path = Path(docs_dir) rules_path = Path(rules_dir) guide_file = docs_path / "guide.md" index_file = docs_path / "index.md" if not guide_file.exists() or not index_file.exists(): print("Warning: guide.md or index.md not found, skipping combined guide creation") return False # Get guide content and index content with open(guide_file, 'r', encoding='utf-8') as f: guide_content = f.read() with open(index_file, 'r', encoding='utf-8') as f: index_content = f.read() # Process the content processed_guide = process_markdown_content(guide_content, remove_local_refs=True) processed_index = process_markdown_content(index_content, remove_local_refs=True) # Get the documentation first policy doc_first_policy = get_documentation_first_policy() # Combine the content with the documentation first policy at the beginning combined_content = doc_first_policy + processed_guide + "\n\n" + processed_index # Generate the MDC header description = "Guidelines for using PocketFlow, Agentic Coding" mdc_header = generate_mdc_header(guide_file, description, always_apply=True) # Combine header and processed content mdc_content = mdc_header + combined_content # Create the output path with the new filename output_path = rules_path / "guide_for_pocketflow.mdc" # Write the MDC file with open(output_path, 'w', encoding='utf-8') as f: f.write(mdc_content) print(f"Created combined guide MDC file: {output_path}") return True def convert_md_to_mdc(md_file, output_dir, docs_dir, special_treatment=False): """Convert a markdown file to MDC format and save to the output directory""" try: print(f"Processing: {md_file}") # Skip guide.md and index.md as they'll be handled separately file_name = Path(md_file).name if file_name in ["guide.md", "index.md"]: print(f"Skipping {file_name} for individual processing - it will be included in the combined guide") return True # Skip empty index.md files in subfolders parent_dir = Path(md_file).parent.name # Check if this is an index.md in a subfolder (not the main index.md) if (file_name == "index.md" and parent_dir != "docs" and parent_dir in ["core_abstraction", "design_pattern", "utility_function"]): # Read the content with open(md_file, 'r', encoding='utf-8') as f: content = f.read() # Skip if it doesn't have substantive content if not has_substantive_content(content): print(f"Skipping empty subfolder index: {md_file}") return True # Extract metadata from file frontmatter = extract_frontmatter(md_file) heading = extract_first_heading(md_file) description = get_mdc_description(md_file, frontmatter, heading) # Read the content with open(md_file, 'r', encoding='utf-8') as f: content = f.read() # Process the content processed_content = process_markdown_content(content, remove_local_refs=special_treatment) # Generate the MDC header mdc_header = generate_mdc_header(md_file, description, always_apply=special_treatment) # Combine header and processed content mdc_content = mdc_header + processed_content # Perform a final check to ensure the processed content is substantive if not has_substantive_content(processed_content): print(f"Skipping file with no substantive content after processing: {md_file}") return True # Get the path relative to the docs directory rel_path = os.path.relpath(md_file, start=Path(docs_dir)) # Extract just the filename and directory structure without the 'docs/' prefix path_parts = Path(rel_path).parts if len(path_parts) > 1 and path_parts[0] == 'docs': # Remove the 'docs/' prefix from the path rel_path = os.path.join(*path_parts[1:]) # Create the output path output_path = Path(output_dir) / rel_path # Create output directory if it doesn't exist output_path.parent.mkdir(parents=True, exist_ok=True) # Change extension from .md to .mdc output_path = output_path.with_suffix('.mdc') # Write the MDC file with open(output_path, 'w', encoding='utf-8') as f: f.write(mdc_content) print(f"Created MDC file: {output_path}") return True except Exception as e: print(f"Error converting {md_file} to MDC: {e}") return False def generate_mdc_files(docs_dir, rules_dir): """Generate MDC files from all markdown files in the docs directory""" docs_path = Path(docs_dir) rules_path = Path(rules_dir) # Make sure the docs directory exists if not docs_path.exists() or not docs_path.is_dir(): raise ValueError(f"Directory not found: {docs_dir}") print(f"Generating MDC files from docs in: {docs_dir}") print(f"Output will be written to: {rules_dir}") # Create the rules directory if it doesn't exist rules_path.mkdir(parents=True, exist_ok=True) # Create the combined guide file first (includes both guide.md and index.md) create_combined_guide(docs_dir, rules_dir) # Process all other markdown files success_count = 0 failure_count = 0 # Find all markdown files md_files = list(docs_path.glob("**/*.md")) # Skip the main index.md and guide.md files as we've already processed them in create_combined_guide md_files = [f for f in md_files if f.name != "index.md" and f.name != "guide.md"] # Process each markdown file for md_file in md_files: if convert_md_to_mdc(md_file, rules_path, docs_dir): success_count += 1 else: failure_count += 1 print(f"\nProcessed {len(md_files) + 1} markdown files:") # +1 for the combined guide print(f" - Successfully converted: {success_count + 1}") # +1 for the combined guide print(f" - Failed conversions: {failure_count}") return success_count > 0 and failure_count == 0 if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Generate MDC files from PocketFlow docs") # Get script directory script_dir = Path(__file__).parent.absolute() # Default to PocketFlow/docs directory relative to script location default_docs_dir = (script_dir.parent / "docs").as_posix() # Default rules directory - changed to .cursor/rules default_rules_dir = (script_dir.parent / ".cursor" / "rules").as_posix() parser.add_argument("--docs-dir", default=default_docs_dir, help="Path to PocketFlow docs directory") parser.add_argument("--rules-dir", default=default_rules_dir, help="Output directory for MDC files") args = parser.parse_args() try: success = generate_mdc_files(args.docs_dir, args.rules_dir) sys.exit(0 if success else 1) except Exception as e: print(f"Error: {e}") sys.exit(1)