update cursor rule files

2025-04-30 11:46:43 -04:00 · 2025-04-30 11:46:43 -04:00 · f6c4b06db8
parent b561a10c76
commit f6c4b06db8
12 changed files with 246 additions and 110 deletions
--- a/.cursor/rules/core_abstraction/batch.mdc
+++ b/.cursor/rules/core_abstraction/batch.mdc
@ -50,30 +50,75 @@ flow.run(shared)
 A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
 ### Key Differences from BatchNode
 **Important**: Unlike BatchNode, which processes items and modifies the shared store:
 1. BatchFlow returns **parameters to pass to the child Flow**, not data to process
 2. These parameters are accessed in child nodes via `self.params`, not from the shared store
 3. Each child Flow runs independently with a different set of parameters
 4. Child nodes can be regular Nodes, not BatchNodes (the batching happens at the Flow level)
 ### Example: Summarize Many Files
 ```python
 class SummarizeAllFiles(BatchFlow):
    def prep(self, shared):
-        # Return a list of param dicts (one per file)
+        # IMPORTANT: Return a list of param dictionaries (not data for processing)
        filenames = list(shared["data"].keys())  # e.g., ["file1.txt", "file2.txt", ...]
        return [{"filename": fn} for fn in filenames]
-# Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
+# Child node that accesses filename from params, not shared store
-summarize_file = SummarizeFile(start=load_file)
+class LoadFile(Node):
    def prep(self, shared):
        # Access filename from params (not from shared)
        filename = self.params["filename"]  # Important! Use self.params, not shared
        return filename
    def exec(self, filename):
        with open(filename, 'r') as f:
            return f.read()
    def post(self, shared, prep_res, exec_res):
        # Store file content in shared
        shared["current_file_content"] = exec_res
        return "default"
-# Wrap that flow into a BatchFlow:
+# Summarize node that works on the currently loaded file
 class Summarize(Node):
    def prep(self, shared):
        return shared["current_file_content"]
    def exec(self, content):
        prompt = f"Summarize this file in 50 words: {content}"
        return call_llm(prompt)
    def post(self, shared, prep_res, exec_res):
        # Store summary in shared, indexed by current filename
        filename = self.params["filename"]  # Again, using params
        if "summaries" not in shared:
            shared["summaries"] = {}
        shared["summaries"][filename] = exec_res
        return "default"
 # Create a per-file flow
 load_file = LoadFile()
 summarize = Summarize()
 load_file >> summarize
 summarize_file = Flow(start=load_file)
 # Wrap in a BatchFlow to process all files
 summarize_all_files = SummarizeAllFiles(start=summarize_file)
 summarize_all_files.run(shared)
 ```
 ### Under the Hood
-1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
+1. `prep(shared)` in the BatchFlow returns a list of param dicts—e.g., `[{"filename": "file1.txt"}, {"filename": "file2.txt"}, ...]`.
 2. The **BatchFlow** loops through each dict. For each one:
-   - It merges the dict with the BatchFlow’s own `params`.
+   - It merges the dict with the BatchFlow's own `params` (if any): `{**batch_flow.params, **dict_from_prep}`
-   - It calls `flow.run(shared)` using the merged result.
+   - It calls `flow.run(shared)` using the merged parameters
-3. This means the sub-Flow is run **repeatedly**, once for every param dict.
+   - **IMPORTANT**: These parameters are passed to the child Flow's nodes via `self.params`, NOT via the shared store
 3. This means the sub-Flow is run **repeatedly**, once for every param dict, with each node in the flow accessing the parameters via `self.params`.
 ---
@ -89,6 +134,7 @@ At each level, **BatchFlow** merges its own param dict with the parent’s. By t
 class FileBatchFlow(BatchFlow):
    def prep(self, shared):
        # Access directory from params (set by parent)
        directory = self.params["directory"]
        # e.g., files = ["file1.txt", "file2.txt", ...]
        files = [f for f in os.listdir(directory) if f.endswith(".txt")]
@ -99,7 +145,31 @@ class DirectoryBatchFlow(BatchFlow):
        directories = [ "/path/to/dirA", "/path/to/dirB"]
        return [{"directory": d} for d in directories]
-# MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
+# The actual processing node
-inner_flow = FileBatchFlow(start=MapSummaries())
+class ProcessFile(Node):
    def prep(self, shared):
        # Access both directory and filename from params
        directory = self.params["directory"]  # From outer batch
        filename = self.params["filename"]    # From inner batch
        full_path = os.path.join(directory, filename)
        return full_path
    def exec(self, full_path):
        # Process the file...
        return f"Processed {full_path}"
    def post(self, shared, prep_res, exec_res):
        # Store results, perhaps indexed by path
        if "results" not in shared:
            shared["results"] = {}
        shared["results"][prep_res] = exec_res
        return "default"
 # Set up the nested batch structure
 process_node = ProcessFile()
 inner_flow = FileBatchFlow(start=process_node)
 outer_flow = DirectoryBatchFlow(start=inner_flow)
 # Run it
 outer_flow.run(shared)
 ```
--- a/.cursor/rules/core_abstraction/communication.mdc
+++ b/.cursor/rules/core_abstraction/communication.mdc
@ -13,10 +13,10 @@ Nodes and Flows **communicate** in 2 ways:
   - Great for data results, large content, or anything multiple nodes need.
   - You shall design the data structure and populate it ahead.
-   - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*!  This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](mdc:batch.md).
+   - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*!  This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](mdc:./batch.md).
     {: .best-practice }
-2. **Params (only for [Batch](mdc:batch.md))** 
+2. **Params (only for [Batch](mdc:./batch.md))** 
   - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
   - Good for identifiers like filenames or numeric IDs, in Batch mode.
@ -84,7 +84,7 @@ Here:
 > Only set the uppermost Flow params because others will be overwritten by the parent Flow. 
 > 
-> If you need to set child node params, see [Batch](mdc:batch.md).
+> If you need to set child node params, see [Batch](mdc:./batch.md).
 {: .warning }
 Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
--- a/.cursor/rules/design_pattern/agent.mdc
+++ b/.cursor/rules/design_pattern/agent.mdc
@ -12,7 +12,7 @@ Agent is a powerful design pattern in which nodes can take dynamic actions based
 ## Implement Agent with Graph
 1. **Context and Action:** Implement nodes that supply context and perform actions.  
-2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](mdc:../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
+2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
 3. **Agent Node:** Provide a prompt to decide action—for example:
 ```python
@ -48,7 +48,7 @@ parameters:
 The core of building **high-performance** and **reliable** agents boils down to:
-1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](mdc:rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](mdc:https:/arxiv.org/abs/2307.03172), overlooking mid-prompt content.
+1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](mdc:./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content.
 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or  `read_csvs`. Instead, import CSVs into the database.
--- a/.cursor/rules/design_pattern/mapreduce.mdc
+++ b/.cursor/rules/design_pattern/mapreduce.mdc
@ -13,7 +13,7 @@ and there is a logical way to break the task into smaller, ideally independent p
-You first break down the task using [BatchNode](mdc:../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
+You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
 ### Example: Document Summarization
@ -65,5 +65,5 @@ print("Individual Summaries:", shared["file_summaries"])
 print("\nFinal Summary:\n", shared["all_files_summary"])
 ```
-> **Performance Tip**: The example above works sequentially. You can speed up the map phase by running it in parallel. See [(Advanced) Parallel](mdc:../core_abstraction/parallel.md) for more details.
+> **Performance Tip**: The example above works sequentially. You can speed up the map phase by running it in parallel. See [(Advanced) Parallel](../core_abstraction/parallel.md) for more details.
 {: .note }
--- a/.cursor/rules/design_pattern/multi_agent.mdc
+++ b/.cursor/rules/design_pattern/multi_agent.mdc
@ -5,7 +5,7 @@ alwaysApply: false
 ---
 # (Advanced) Multi-Agents
-Multiple [Agents](mdc:flow.md) can work together by handling subtasks and communicating the progress. 
+Multiple [Agents](mdc:./flow.md) can work together by handling subtasks and communicating the progress. 
 Communication between agents is typically implemented using message queues in shared storage.
 > Most of time, you don't need Multi-Agents. Start with a simple solution first.
--- a/.cursor/rules/design_pattern/rag.mdc
+++ b/.cursor/rules/design_pattern/rag.mdc
@ -16,9 +16,9 @@ For certain LLM tasks like answering questions, providing relevant context is es
 ## Stage 1: Offline Indexing
 We create three Nodes:
-1. `ChunkDocs` – [chunks](mdc:../utility_function/chunking.md) raw text.
+1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
-2. `EmbedDocs` – [embeds](mdc:../utility_function/embedding.md) each chunk.
+2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
-3. `StoreIndex` – stores embeddings into a [vector database](mdc:../utility_function/vector.md).
+3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
 ```python
 class ChunkDocs(BatchNode):
--- a/.cursor/rules/design_pattern/structure.mdc
+++ b/.cursor/rules/design_pattern/structure.mdc
@ -81,7 +81,7 @@ summary:
        return structured_result
 ```
-> Besides using `assert` statements, another popular way to validate schemas is [Pydantic](mdc:https:/github.com/pydantic/pydantic)
+> Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
 {: .note }
 ### Why YAML instead of JSON?
--- a/.cursor/rules/design_pattern/workflow.mdc
+++ b/.cursor/rules/design_pattern/workflow.mdc
@ -5,14 +5,14 @@ alwaysApply: false
 ---
 # Workflow
-Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](mdc:../core_abstraction/flow.md) of multiple Nodes.
+Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
 > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
 > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
 > 
-> You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](mdc:agent.md).
+> You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](mdc:./agent.md).
 {: .best-practice }
 ### Example: Article Writing
@ -46,4 +46,4 @@ shared = {"topic": "AI Safety"}
 writing_flow.run(shared)
 ```
-For *dynamic cases*, consider using [Agents](mdc:agent.md).
+For *dynamic cases*, consider using [Agents](mdc:./agent.md).
--- a/.cursor/rules/guide_for_pocketflow.mdc
+++ b/.cursor/rules/guide_for_pocketflow.mdc
@ -3,9 +3,19 @@ description: Guidelines for using PocketFlow, Agentic Coding
 globs: **/*.py
 alwaysApply: true
 ---
 # DOCUMENTATION FIRST POLICY
 **CRITICAL INSTRUCTION**: When implementing a Pocket Flow app:
 1. **ALWAYS REQUEST MDC FILES FIRST** - Before writing any code, request and review all relevant MDC documentation files. This doc provides an explaination of the documents.
 2. **UNDERSTAND THE FRAMEWORK** - Gain comprehensive understanding of the Pocket Flow framework from documentation
 3. **AVOID ASSUMPTION-DRIVEN DEVELOPMENT** - Do not base your implementation on assumptions or guesswork. Even if the human didn't explicitly mention pocket flow in their request, if the code you are editing is using pocket flow, you should request relevant docs to help you understand best practice as well before editing.
 **VERIFICATION**: Begin each implementation with a brief summary of the documentation you've reviewed to inform your approach.
 # Agentic Coding: Humans Design, Agents code!
-> If you are an AI agents involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
+> If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
 {: .warning }
 ## Agentic Coding Steps
@ -225,3 +235,62 @@ my_project/
  if __name__ == "__main__":
      main()
  ```
 # Pocket Flow
 A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
 - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
 - **Expressive**: Everything you love from larger frameworks—([Multi-])[Agents], [Workflow], [RAG], and more.  
 - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
 ## Core Abstraction
 We model the LLM workflow as a **Graph + Shared Store**:
 - [Node] handles simple (LLM) tasks.
 - [Flow] connects nodes through **Actions** (labeled edges).
 - [Shared Store] enables communication between nodes within flows.
 - [Batch] nodes/flows allow for data-intensive tasks.
 - [Async] nodes/flows allow waiting for asynchronous tasks.
 - [(Advanced) Parallel] nodes/flows handle I/O-bound tasks.
 ## Design Pattern
 From there, it’s easy to implement popular design patterns:
 - [Agent] autonomously makes decisions.
 - [Workflow] chains multiple tasks into pipelines.
 - [RAG] integrates data retrieval with generation.
 - [Map Reduce] splits data tasks into Map and Reduce steps.
 - [Structured Output] formats outputs consistently.
 - [(Advanced) Multi-Agents] coordinate multiple agents.
 ## Utility Function
 We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
 - [LLM Wrapper]
 - [Viz and Debug]
 - [Web Search]
 - [Chunking]
 - [Embedding]
 - [Vector Databases]
 - [Text-to-Speech]
 **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
 - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
 - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
 - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
 ## Ready to build your Apps? 
 Check out [Agentic Coding Guidance], the fastest way to develop LLM projects with Pocket Flow!
--- a/.cursor/rules/index.mdc
+++ b/.cursor/rules/index.mdc
@ -1,62 +0,0 @@
 ---
 description: Guidelines for using PocketFlow, a minimalist LLM framework
 globs: **/*.py
 alwaysApply: true
 ---
 # Pocket Flow
 A [100-line](mdc:https:/github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
 - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
 - **Expressive**: Everything you love from larger frameworks—([Multi-])[Agents], [Workflow], [RAG], and more.  
 - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
 ## Core Abstraction
 We model the LLM workflow as a **Graph + Shared Store**:
 - [Node] handles simple (LLM) tasks.
 - [Flow] connects nodes through **Actions** (labeled edges).
 - [Shared Store] enables communication between nodes within flows.
 - [Batch] nodes/flows allow for data-intensive tasks.
 - [Async] nodes/flows allow waiting for asynchronous tasks.
 - [(Advanced) Parallel] nodes/flows handle I/O-bound tasks.
 ## Design Pattern
 From there, it’s easy to implement popular design patterns:
 - [Agent] autonomously makes decisions.
 - [Workflow] chains multiple tasks into pipelines.
 - [RAG] integrates data retrieval with generation.
 - [Map Reduce] splits data tasks into Map and Reduce steps.
 - [Structured Output] formats outputs consistently.
 - [(Advanced) Multi-Agents] coordinate multiple agents.
 ## Utility Function
 We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
 - [LLM Wrapper]
 - [Viz and Debug]
 - [Web Search]
 - [Chunking]
 - [Embedding]
 - [Vector Databases]
 - [Text-to-Speech]
 **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
 - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
 - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
 - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
 ## Ready to build your Apps? 
 Check out [Agentic Coding Guidance], the fastest way to develop LLM projects with Pocket Flow!
--- a/.cursor/rules/utility_function/viz.mdc
+++ b/.cursor/rules/utility_function/viz.mdc
@ -26,11 +26,11 @@ def build_mermaid(start):
            return parent and link(parent, get_id(node))
        visited.add(node)
        if isinstance(node, Flow):
-            node.start and parent and link(parent, get_id(node.start))
+            node.start_node and parent and link(parent, get_id(node.start_node))
            lines.append(f"\n    subgraph sub_flow_{get_id(node)}[{type(node).__name__}]")
-            node.start and walk(node.start)
+            node.start_node and walk(node.start_node)
            for nxt in node.successors.values():
-                node.start and walk(nxt, get_id(node.start)) or (parent and link(parent, get_id(nxt))) or walk(nxt)
+                node.start_node and walk(nxt, get_id(node.start_node)) or (parent and link(parent, get_id(nxt))) or walk(nxt)
            lines.append("    end\n")
        else:
            lines.append(f"    {(nid := get_id(node))}['{type(node).__name__}']")
--- a/utils/update_pocketflow_mdc.py
+++ b/utils/update_pocketflow_mdc.py
@ -101,6 +101,10 @@ def get_mdc_description(md_file, frontmatter, heading):
    else:
        subsection = heading
    # For the combined guide and index
    if Path(md_file).name == "guide.md":
        return "Guidelines for using PocketFlow, Agentic Coding"
    # For index.md at root level, use a different format
    if Path(md_file).name == "index.md" and section == "":
        return "Guidelines for using PocketFlow, a minimalist LLM framework"
@ -137,6 +141,20 @@ def process_markdown_content(content, remove_local_refs=False):
    return content
 def get_documentation_first_policy():
    """Return the DOCUMENTATION FIRST POLICY text to be included in the guide"""
    return """# DOCUMENTATION FIRST POLICY
 **CRITICAL INSTRUCTION**: When implementing a Pocket Flow app:
 1. **ALWAYS REQUEST MDC FILES FIRST** - Before writing any code, request and review all relevant MDC documentation files. This doc provides an explaination of the documents.
 2. **UNDERSTAND THE FRAMEWORK** - Gain comprehensive understanding of the Pocket Flow framework from documentation
 3. **AVOID ASSUMPTION-DRIVEN DEVELOPMENT** - Do not base your implementation on assumptions or guesswork. Even if the human didn't explicitly mention pocket flow in their request, if the code you are editing is using pocket flow, you should request relevant docs to help you understand best practice as well before editing.
 **VERIFICATION**: Begin each implementation with a brief summary of the documentation you've reviewed to inform your approach.
 """
 def generate_mdc_header(md_file, description, always_apply=False):
    """Generate MDC file header with appropriate frontmatter"""
    # Determine if we should include globs
@ -163,13 +181,64 @@ def has_substantive_content(content):
    # If there's almost nothing left after cleaning, consider it empty
    return len(cleaned_content) > 20  # Arbitrary threshold, adjust as needed
 def create_combined_guide(docs_dir, rules_dir):
    """Create a combined guide that includes both the guide and index content"""
    docs_path = Path(docs_dir)
    rules_path = Path(rules_dir)
    guide_file = docs_path / "guide.md"
    index_file = docs_path / "index.md"
    if not guide_file.exists() or not index_file.exists():
        print("Warning: guide.md or index.md not found, skipping combined guide creation")
        return False
    # Get guide content and index content
    with open(guide_file, 'r', encoding='utf-8') as f:
        guide_content = f.read()
    with open(index_file, 'r', encoding='utf-8') as f:
        index_content = f.read()
    # Process the content
    processed_guide = process_markdown_content(guide_content, remove_local_refs=True)
    processed_index = process_markdown_content(index_content, remove_local_refs=True)
    # Get the documentation first policy
    doc_first_policy = get_documentation_first_policy()
    # Combine the content with the documentation first policy at the beginning
    combined_content = doc_first_policy + processed_guide + "\n\n" + processed_index
    # Generate the MDC header
    description = "Guidelines for using PocketFlow, Agentic Coding"
    mdc_header = generate_mdc_header(guide_file, description, always_apply=True)
    # Combine header and processed content
    mdc_content = mdc_header + combined_content
    # Create the output path with the new filename
    output_path = rules_path / "guide_for_pocketflow.mdc"
    # Write the MDC file
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(mdc_content)
    print(f"Created combined guide MDC file: {output_path}")
    return True
 def convert_md_to_mdc(md_file, output_dir, docs_dir, special_treatment=False):
    """Convert a markdown file to MDC format and save to the output directory"""
    try:
        print(f"Processing: {md_file}")
-        # Skip empty index.md files in subfolders
+        # Skip guide.md and index.md as they'll be handled separately
        file_name = Path(md_file).name
        if file_name in ["guide.md", "index.md"]:
            print(f"Skipping {file_name} for individual processing - it will be included in the combined guide")
            return True
        # Skip empty index.md files in subfolders
        parent_dir = Path(md_file).parent.name
        # Check if this is an index.md in a subfolder (not the main index.md)
@ -194,14 +263,11 @@ def convert_md_to_mdc(md_file, output_dir, docs_dir, special_treatment=False):
        with open(md_file, 'r', encoding='utf-8') as f:
            content = f.read()
        # Check if this file should have special treatment (index.md or guide.md)
        is_special = special_treatment or Path(md_file).name == "guide.md"
        # Process the content
-        processed_content = process_markdown_content(content, remove_local_refs=is_special)
+        processed_content = process_markdown_content(content, remove_local_refs=special_treatment)
        # Generate the MDC header
-        mdc_header = generate_mdc_header(md_file, description, always_apply=is_special)
+        mdc_header = generate_mdc_header(md_file, description, always_apply=special_treatment)
        # Combine header and processed content
        mdc_content = mdc_header + processed_content
@ -255,15 +321,8 @@ def generate_mdc_files(docs_dir, rules_dir):
    # Create the rules directory if it doesn't exist
    rules_path.mkdir(parents=True, exist_ok=True)
-    # Process the main index.md file first
+    # Create the combined guide file first (includes both guide.md and index.md)
-    index_file = docs_path / "index.md"
+    create_combined_guide(docs_dir, rules_dir)
    if index_file.exists():
        convert_md_to_mdc(index_file, rules_path, docs_dir, special_treatment=True)
    # Process guide.md file with special treatment (if it exists)
    guide_file = docs_path / "guide.md"
    if guide_file.exists():
        convert_md_to_mdc(guide_file, rules_path, docs_dir, special_treatment=True)
    # Process all other markdown files
    success_count = 0
@ -272,8 +331,8 @@ def generate_mdc_files(docs_dir, rules_dir):
    # Find all markdown files
    md_files = list(docs_path.glob("**/*.md"))
-    # Skip the main index.md and guide.md files as we've already processed them
+    # Skip the main index.md and guide.md files as we've already processed them in create_combined_guide
-    md_files = [f for f in md_files if f != index_file and f != guide_file]
+    md_files = [f for f in md_files if f.name != "index.md" and f.name != "guide.md"]
    # Process each markdown file
    for md_file in md_files:
@ -282,8 +341,8 @@ def generate_mdc_files(docs_dir, rules_dir):
        else:
            failure_count += 1
-    print(f"\nProcessed {len(md_files) + 2} markdown files:")
+    print(f"\nProcessed {len(md_files) + 1} markdown files:")  # +1 for the combined guide
-    print(f"  - Successfully converted: {success_count + 2}")
+    print(f"  - Successfully converted: {success_count + 1}")  # +1 for the combined guide
    print(f"  - Failed conversions: {failure_count}")
    return success_count > 0 and failure_count == 0