from typing import Dict, List
from utils.call_llm import call_llm

def analyze_content(content: Dict) -> Dict:
    """Analyze webpage content using LLM
    
    Args:
        content (Dict): Webpage content with url, title and text
        
    Returns:
        Dict: Analysis results including summary and topics
    """
    prompt = f"""
Analyze this webpage content:

Title: {content['title']}
URL: {content['url']}
Content: {content['text'][:2000]}  # Limit content length

Please provide:
1. A brief summary (2-3 sentences)
2. Main topics/keywords (up to 5)
3. Content type (article, product page, etc)

Output in YAML format:
```yaml
summary: >
    brief summary here
topics:
    - topic 1
    - topic 2
content_type: type here
```
"""
    
    try:
        response = call_llm(prompt)
        # Extract YAML between code fences
        yaml_str = response.split("```yaml")[1].split("```")[0].strip()
        
        import yaml
        analysis = yaml.safe_load(yaml_str)
        
        # Validate required fields
        assert "summary" in analysis
        assert "topics" in analysis
        assert "content_type" in analysis
        assert isinstance(analysis["topics"], list)
        
        return analysis
        
    except Exception as e:
        print(f"Error analyzing content: {str(e)}")
        return {
            "summary": "Error analyzing content",
            "topics": [],
            "content_type": "unknown"
        }

def analyze_site(crawl_results: List[Dict]) -> List[Dict]:
    """Analyze all crawled pages
    
    Args:
        crawl_results (List[Dict]): List of crawled page contents
        
    Returns:
        List[Dict]: Original content with added analysis
    """
    analyzed_results = []
    
    for content in crawl_results:
        if content and content.get("text"):
            analysis = analyze_content(content)
            content["analysis"] = analysis
            analyzed_results.append(content)
            
    return analyzed_results