#!/usr/bin/env python3 """News collector for RSS feeds.""" import json import subprocess import xml.etree.ElementTree as ET import urllib.request from html import unescape from typing import Optional def fetch_feed(url: str, limit: int = 5) -> list: """Fetch and parse RSS feed.""" try: req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) with urllib.request.urlopen(req, timeout=10) as resp: content = resp.read().decode("utf-8") root = ET.fromstring(content) items = [] # Handle both RSS and Atom formats for item in root.findall(".//item")[:limit] or root.findall(".//{http://www.w3.org/2005/Atom}entry")[:limit]: title = item.findtext("title") or item.findtext("{http://www.w3.org/2005/Atom}title") or "" link = item.findtext("link") or "" # For Atom, link might be an attribute if not link: link_elem = item.find("{http://www.w3.org/2005/Atom}link") if link_elem is not None: link = link_elem.get("href", "") # Try to get score/points from description or comments description = item.findtext("description") or "" comments = item.findtext("comments") or "" # Hacker News includes points in description points = "" if "points" in description.lower(): import re match = re.search(r"(\d+)\s*points?", description, re.I) if match: points = match.group(1) items.append({ "title": unescape(title.strip()), "link": link, "points": points }) return items except Exception as e: return [{"error": str(e)}] def summarize_with_sonnet(all_items: list, feed_names: list) -> str: """Use Sonnet to summarize news headlines.""" if not all_items or all(len(items) == 1 and "error" in items[0] for items in all_items): return "⚠️ Could not fetch news feeds" # Build context news_text = [] for i, (items, name) in enumerate(zip(all_items, feed_names)): if items and "error" not in items[0]: for item in items: points_str = f" ({item['points']} pts)" if item.get("points") else "" news_text.append(f"[{name}] {item['title']}{points_str}") if not news_text: return "No news available" context = "\n".join(news_text) prompt = f"""You are creating a tech news section for a morning report. Given these headlines from various sources, pick the top 5 most interesting/important ones. Format each as a bullet with source in parentheses. Keep titles concise - trim if needed. Headlines: {context} Output ONLY the formatted news list, nothing else.""" try: result = subprocess.run( ["claude", "--print", "--model", "sonnet", "-p", prompt], capture_output=True, text=True, timeout=60 ) if result.returncode == 0 and result.stdout.strip(): return result.stdout.strip() except Exception: pass # Fallback - just show first few items lines = [] for items, name in zip(all_items, feed_names): if items and "error" not in items[0]: for item in items[:2]: points_str = f" ({item['points']} pts)" if item.get("points") else "" title = item["title"][:60] + "..." if len(item["title"]) > 60 else item["title"] lines.append(f" • {title}{points_str} ({name})") return "\n".join(lines[:5]) if lines else "No news available" def collect(config: dict) -> dict: """Main collector entry point.""" news_config = config.get("news", {}) feeds = news_config.get("feeds", [ {"name": "Hacker News", "url": "https://hnrss.org/frontpage", "limit": 5}, {"name": "Lobsters", "url": "https://lobste.rs/rss", "limit": 3} ]) use_summarize = news_config.get("summarize", True) all_items = [] feed_names = [] errors = [] for feed in feeds: items = fetch_feed(feed["url"], feed.get("limit", 5)) all_items.append(items) feed_names.append(feed["name"]) if items and len(items) == 1 and "error" in items[0]: errors.append(f"{feed['name']}: {items[0]['error']}") if use_summarize: formatted = summarize_with_sonnet(all_items, feed_names) else: # Basic format lines = [] for items, name in zip(all_items, feed_names): if items and "error" not in items[0]: for item in items[:3]: title = item["title"][:50] points = f" ({item['points']})" if item.get("points") else "" lines.append(f" • {title}{points} - {name}") formatted = "\n".join(lines) if lines else "No news available" return { "section": "Tech News", "icon": "📰", "content": formatted, "raw": {name: items for name, items in zip(feed_names, all_items)}, "error": errors[0] if errors else None } if __name__ == "__main__": config = { "news": { "feeds": [ {"name": "Hacker News", "url": "https://hnrss.org/frontpage", "limit": 3}, {"name": "Lobsters", "url": "https://lobste.rs/rss", "limit": 2} ], "summarize": True } } result = collect(config) print(f"## {result['icon']} {result['section']}") print(result["content"])