claude-code/skills/morning-report/scripts/collectors/news.py

#!/usr/bin/env python3
"""News collector for RSS feeds."""

import json
import subprocess
import xml.etree.ElementTree as ET
import urllib.request
from html import unescape
from typing import Optional


def fetch_feed(url: str, limit: int = 5) -> list:
    """Fetch and parse RSS feed."""
    try:
        req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
        with urllib.request.urlopen(req, timeout=10) as resp:
            content = resp.read().decode("utf-8")

        root = ET.fromstring(content)

        items = []
        # Handle both RSS and Atom formats
        for item in root.findall(".//item")[:limit] or root.findall(".//{http://www.w3.org/2005/Atom}entry")[:limit]:
            title = item.findtext("title") or item.findtext("{http://www.w3.org/2005/Atom}title") or ""
            link = item.findtext("link") or ""

            # For Atom, link might be an attribute
            if not link:
                link_elem = item.find("{http://www.w3.org/2005/Atom}link")
                if link_elem is not None:
                    link = link_elem.get("href", "")

            # Try to get score/points from description or comments
            description = item.findtext("description") or ""
            comments = item.findtext("comments") or ""

            # Hacker News includes points in description
            points = ""
            if "points" in description.lower():
                import re
                match = re.search(r"(\d+)\s*points?", description, re.I)
                if match:
                    points = match.group(1)

            items.append({
                "title": unescape(title.strip()),
                "link": link,
                "points": points
            })

        return items

    except Exception as e:
        return [{"error": str(e)}]


def summarize_with_sonnet(all_items: list, feed_names: list) -> str:
    """Use Sonnet to summarize news headlines."""
    if not all_items or all(len(items) == 1 and "error" in items[0] for items in all_items):
        return "⚠️ Could not fetch news feeds"

    # Build context
    news_text = []
    for i, (items, name) in enumerate(zip(all_items, feed_names)):
        if items and "error" not in items[0]:
            for item in items:
                points_str = f" ({item['points']} pts)" if item.get("points") else ""
                news_text.append(f"[{name}] {item['title']}{points_str}")

    if not news_text:
        return "No news available"

    context = "\n".join(news_text)

    prompt = f"""You are creating a tech news section for a morning report.
Given these headlines from various sources, pick the top 5 most interesting/important ones.
Format each as a bullet with source in parentheses.
Keep titles concise - trim if needed.

Headlines:
{context}

Output ONLY the formatted news list, nothing else."""

    try:
        result = subprocess.run(
            ["claude", "--print", "--model", "sonnet", "-p", prompt],
            capture_output=True,
            text=True,
            timeout=60
        )

        if result.returncode == 0 and result.stdout.strip():
            return result.stdout.strip()
    except Exception:
        pass

    # Fallback - just show first few items
    lines = []
    for items, name in zip(all_items, feed_names):
        if items and "error" not in items[0]:
            for item in items[:2]:
                points_str = f" ({item['points']} pts)" if item.get("points") else ""
                title = item["title"][:60] + "..." if len(item["title"]) > 60 else item["title"]
                lines.append(f"  • {title}{points_str} ({name})")

    return "\n".join(lines[:5]) if lines else "No news available"


def collect(config: dict) -> dict:
    """Main collector entry point."""
    news_config = config.get("news", {})
    feeds = news_config.get("feeds", [
        {"name": "Hacker News", "url": "https://hnrss.org/frontpage", "limit": 5},
        {"name": "Lobsters", "url": "https://lobste.rs/rss", "limit": 3}
    ])
    use_summarize = news_config.get("summarize", True)

    all_items = []
    feed_names = []
    errors = []

    for feed in feeds:
        items = fetch_feed(feed["url"], feed.get("limit", 5))
        all_items.append(items)
        feed_names.append(feed["name"])
        if items and len(items) == 1 and "error" in items[0]:
            errors.append(f"{feed['name']}: {items[0]['error']}")

    if use_summarize:
        formatted = summarize_with_sonnet(all_items, feed_names)
    else:
        # Basic format
        lines = []
        for items, name in zip(all_items, feed_names):
            if items and "error" not in items[0]:
                for item in items[:3]:
                    title = item["title"][:50]
                    points = f" ({item['points']})" if item.get("points") else ""
                    lines.append(f"  • {title}{points} - {name}")
        formatted = "\n".join(lines) if lines else "No news available"

    return {
        "section": "Tech News",
        "icon": "📰",
        "content": formatted,
        "raw": {name: items for name, items in zip(feed_names, all_items)},
        "error": errors[0] if errors else None
    }


if __name__ == "__main__":
    config = {
        "news": {
            "feeds": [
                {"name": "Hacker News", "url": "https://hnrss.org/frontpage", "limit": 3},
                {"name": "Lobsters", "url": "https://lobste.rs/rss", "limit": 2}
            ],
            "summarize": True
        }
    }
    result = collect(config)
    print(f"## {result['icon']} {result['section']}")
    print(result["content"])