feat: implement news aggregator API with conventional commits

- Add FastAPI application with complete router structure - Implement search, articles, ask, feedback, and health endpoints - Add comprehensive Pydantic schemas for API requests/responses - Include stub service implementations for all business logic - Add full test suite with pytest-asyncio integration - Configure conventional commits enforcement via git hooks - Add project documentation and contribution guidelines - Support both OpenAI and Gemini LLM integration options
2025-11-02 23:11:39 -08:00
parent fed8b629c7
commit 334aa698fa
26 changed files with 3060 additions and 9 deletions
--- a/apps/api/README.md
+++ b/apps/api/README.md
@@ -0,0 +1,3 @@
+# News API
+
+FastAPI service that exposes search, article metadata, conversational answers, and feedback endpoints for the news aggregator MVP. The implementation currently provides stubbed responses that will later be wired to PostgreSQL, Redis, and worker outputs.
--- a/apps/api/pyproject.toml
+++ b/apps/api/pyproject.toml
@@ -0,0 +1,47 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "news-api"
+version = "0.1.0"
+description = "FastAPI service for the classy Reuters news aggregator"
+readme = "README.md"
+requires-python = ">=3.11"
+authors = [{ name = "News Aggregator Team" }]
+keywords = ["fastapi", "news", "aggregator", "hybrid search"]
+dependencies = [
+  "fastapi>=0.111.0,<0.112.0",
+  "uvicorn[standard]>=0.30.0,<0.31.0",
+  "pydantic>=2.7.0,<3.0.0",
+  "pydantic-settings>=2.2.1,<3.0.0",
+  "psycopg[binary]>=3.1.18,<4.0.0",
+  "pgvector>=0.2.4,<0.3.0",
+  "httpx>=0.27.0,<0.28.0",
+  "structlog>=24.1.0,<25.0.0"
+]
+
+[project.optional-dependencies]
+dev = [
+  "pytest>=8.2.0,<9.0.0",
+  "pytest-cov>=4.1.0,<5.0.0"
+]
+
+[tool.pytest.ini_options]
+addopts = "-q"
+testpaths = ["tests"]
+filterwarnings = ["error:::news_api.*"]
+asyncio_mode = "auto"
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/news_api"]
+
+[dependency-groups]
+dev = [
+    "pytest>=8.4.2",
+    "pytest-asyncio>=1.2.0",
+    "pytest-cov>=4.1.0",
+]
--- a/apps/api/requirements.txt
+++ b/apps/api/requirements.txt
@@ -0,0 +1,10 @@
+fastapi>=0.111.0,<0.112.0
+uvicorn[standard]>=0.30.0,<0.31.0
+pydantic>=2.7.0,<3.0.0
+pydantic-settings>=2.2.1,<3.0.0
+psycopg[binary]>=3.1.18,<4.0.0
+pgvector>=0.2.4,<0.3.0
+httpx>=0.27.0,<0.28.0
+structlog>=24.1.0,<25.0.0
+pytest>=8.2.0,<9.0.0
+pytest-cov>=4.1.0,<5.0.0
--- a/apps/api/src/news_api/init.py
+++ b/apps/api/src/news_api/init.py
@@ -0,0 +1,5 @@
+"""News API package exposing the FastAPI application factory."""
+
+from .main import create_app
+
+__all__ = ["create_app"]
--- a/apps/api/src/news_api/config.py
+++ b/apps/api/src/news_api/config.py
@@ -0,0 +1,21 @@
+"""Application configuration via Pydantic settings."""
+
+from functools import lru_cache
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class Settings(BaseSettings):
+    """Runtime configuration for the API service."""
+
+    app_name: str = "News Aggregator API"
+    default_search_mode: str = "hybrid"
+    max_page_size: int = 8
+
+    model_config = SettingsConfigDict(env_prefix="NEWS_API_", extra="ignore")
+
+
+@lru_cache(maxsize=1)
+def get_settings() -> Settings:
+    """Return cached settings instance to avoid re-parsing env vars."""
+
+    return Settings()
--- a/apps/api/src/news_api/main.py
+++ b/apps/api/src/news_api/main.py
@@ -0,0 +1,22 @@
+"""FastAPI application factory."""
+
+from fastapi import FastAPI
+
+from .config import get_settings
+from .routers import register
+
+
+def create_app() -> FastAPI:
+    """Create and configure the FastAPI application instance."""
+
+    settings = get_settings()
+    app = FastAPI(
+        title=settings.app_name,
+        version="0.1.0",
+        summary="Hybrid search and conversational answers over Reuters articles.",
+    )
+    register(app)
+    return app
+
+
+app = create_app()
--- a/apps/api/src/news_api/routers/init.py
+++ b/apps/api/src/news_api/routers/init.py
@@ -0,0 +1,18 @@
+"""Router registration utilities."""
+
+from fastapi import APIRouter
+
+from . import articles, ask, feedback, health, search
+
+
+def register(api: APIRouter) -> None:
+    """Attach all endpoint groups to the provided router or application."""
+
+    api.include_router(health.router)
+    api.include_router(search.router)
+    api.include_router(articles.router)
+    api.include_router(ask.router)
+    api.include_router(feedback.router)
+
+
+__all__ = ["register"]
--- a/apps/api/src/news_api/routers/articles.py
+++ b/apps/api/src/news_api/routers/articles.py
@@ -0,0 +1,15 @@
+"""Article metadata endpoints."""
+
+from fastapi import APIRouter
+
+from ..schemas import ArticleResponse
+from ..services.articles import fetch_article
+
+router = APIRouter(prefix="/v1", tags=["articles"])
+
+
+@router.get("/articles/{article_id}", response_model=ArticleResponse)
+def get_article(article_id: str) -> ArticleResponse:
+    """Return metadata for a specific article."""
+
+    return fetch_article(article_id)
--- a/apps/api/src/news_api/routers/ask.py
+++ b/apps/api/src/news_api/routers/ask.py
@@ -0,0 +1,15 @@
+"""Conversational answer endpoint."""
+
+from fastapi import APIRouter
+
+from ..schemas import AskRequest, AskResponse
+from ..services.ask import answer_question
+
+router = APIRouter(prefix="/v1", tags=["ask"])
+
+
+@router.post("/ask", response_model=AskResponse)
+def ask(payload: AskRequest) -> AskResponse:
+    """Return an answer to the user query."""
+
+    return answer_question(payload)
--- a/apps/api/src/news_api/routers/feedback.py
+++ b/apps/api/src/news_api/routers/feedback.py
@@ -0,0 +1,15 @@
+"""Feedback intake endpoint."""
+
+from fastapi import APIRouter
+
+from ..schemas import FeedbackRequest, FeedbackResponse
+from ..services.feedback import record_feedback
+
+router = APIRouter(prefix="/v1", tags=["feedback"])
+
+
+@router.post("/feedback", response_model=FeedbackResponse)
+def feedback(payload: FeedbackRequest) -> FeedbackResponse:
+    """Accept thumbs up/down feedback for later processing."""
+
+    return record_feedback(payload)
--- a/apps/api/src/news_api/routers/health.py
+++ b/apps/api/src/news_api/routers/health.py
@@ -0,0 +1,19 @@
+"""Health endpoints for Kubernetes probes."""
+
+from fastapi import APIRouter
+
+router = APIRouter(tags=["health"])
+
+
+@router.get("/healthz")
+def health() -> dict[str, str]:
+    """Return a simple ok response for liveness probes."""
+
+    return {"status": "ok"}
+
+
+@router.get("/readyz")
+def ready() -> dict[str, str]:
+    """Return ready until upstream dependencies are integrated."""
+
+    return {"status": "ready"}
--- a/apps/api/src/news_api/routers/search.py
+++ b/apps/api/src/news_api/routers/search.py
@@ -0,0 +1,22 @@
+"""Search endpoints."""
+
+from fastapi import APIRouter, Depends, Query
+
+from ..config import Settings, get_settings
+from ..schemas import SearchMode, SearchResponse
+from ..services.search import perform_search
+
+router = APIRouter(prefix="/v1", tags=["search"])
+
+
+@router.get("/search", response_model=SearchResponse)
+def search(
+    q: str = Query("", description="User supplied search query"),
+    mode: SearchMode | None = Query(None, description="Search mode override"),
+    page: int = Query(1, ge=1, description="1-indexed page number"),
+    settings: Settings = Depends(get_settings),
+) -> SearchResponse:
+    """Return hybrid search results (stubbed until storage wiring lands)."""
+
+    chosen_mode = mode or SearchMode(settings.default_search_mode)
+    return perform_search(q, chosen_mode, page, settings.max_page_size)
--- a/apps/api/src/news_api/schemas.py
+++ b/apps/api/src/news_api/schemas.py
@@ -0,0 +1,106 @@
+"""Pydantic models for API requests and responses."""
+
+from datetime import datetime
+from enum import Enum
+from typing import List, Optional
+from pydantic import BaseModel, Field, HttpUrl
+
+
+class SearchMode(str, Enum):
+    """Supported search modes."""
+
+    HYBRID = "hybrid"
+    KEYWORD = "keyword"
+    SEMANTIC = "semantic"
+
+
+class SourceBadge(BaseModel):
+    """Represents a human friendly badge for a source."""
+
+    name: str
+    url: HttpUrl
+
+
+class Citation(BaseModel):
+    """Citation metadata for an answer."""
+
+    id: str
+    title: str
+    url: HttpUrl
+
+
+class SearchResult(BaseModel):
+    """Single search result card."""
+
+    id: str
+    title: str
+    snippet: str
+    canonical_url: HttpUrl = Field(..., description="Canonical link to the source article")
+    published_at: datetime
+    score: float = Field(..., ge=0)
+    badges: List[SourceBadge]
+
+class SearchResponse(BaseModel):
+    """Response envelope for search requests."""
+
+    query: str
+    mode: SearchMode
+    page: int = Field(..., ge=1)
+    results: List[SearchResult]
+
+
+class ArticleResponse(BaseModel):
+    """Metadata and summary for a single article."""
+
+    id: str
+    title: str
+    snippet: str
+    summary: Optional[str]
+    canonical_url: HttpUrl = Field(..., description="Canonical link to the source article")
+    published_at: datetime
+    authors: List[str]
+    topics: List[str] = []
+
+class AskRequest(BaseModel):
+    """Request payload for the conversational endpoint."""
+
+    query: str
+    conversation_id: Optional[str] = None
+
+
+class AnswerSentence(BaseModel):
+    """Single sentence in an answer with citations."""
+
+    text: str
+    citations: List[str]  # citation ids referencing SearchResult IDs
+
+
+class AskResponse(BaseModel):
+    """Answer payload with citations."""
+
+    answer: List[AnswerSentence]
+    citations: List[Citation]
+    conversation_id: str
+
+
+class FeedbackVerdict(str, Enum):
+    """Allowed feedback verdicts."""
+
+    UP = "up"
+    DOWN = "down"
+
+
+class FeedbackRequest(BaseModel):
+    """Request payload for feedback submission."""
+
+    query: str
+    answer_id: Optional[str] = None
+    verdict: FeedbackVerdict
+    comment: Optional[str] = Field(None, max_length=500)
+
+
+class FeedbackResponse(BaseModel):
+    """Acknowledgement response for feedback."""
+
+    status: str
+    received_at: datetime
--- a/apps/api/src/news_api/services/articles.py
+++ b/apps/api/src/news_api/services/articles.py
@@ -0,0 +1,20 @@
+"""Article retrieval stubs."""
+
+from datetime import datetime
+
+from ..schemas import ArticleResponse
+
+
+def fetch_article(article_id: str) -> ArticleResponse:
+    """Return static article metadata while the DB layer is stubbed."""
+
+    return ArticleResponse(
+        id=article_id,
+        title="Stubbed Reuters piece",
+        snippet="An ingest worker will eventually populate this field with live data.",
+        summary="This summary is generated by the summarizer worker during ingestion.",
+        canonical_url="https://www.reuters.com/world/stubbed-piece-2024-01-01/",
+        published_at=datetime(2024, 1, 1, 0, 0, 0),
+        authors=["Reuters Staff"],
+        topics=["world"],
+    )
--- a/apps/api/src/news_api/services/ask.py
+++ b/apps/api/src/news_api/services/ask.py
@@ -0,0 +1,35 @@
+"""Conversational answer scaffolding."""
+
+from datetime import datetime
+from typing import List
+
+from ..schemas import (
+    AnswerSentence,
+    AskRequest,
+    AskResponse,
+    Citation,
+)
+from .search import generate_conversation_id
+
+
+def answer_question(payload: AskRequest) -> AskResponse:
+    """Produce a placeholder answer that references stub citations."""
+
+    conversation_id = payload.conversation_id or generate_conversation_id()
+    sentences: List[AnswerSentence] = [
+        AnswerSentence(
+            text=(
+                "This is a placeholder answer generated by the API skeleton; "
+                "it will be replaced once the summarizer worker is connected."
+            ),
+            citations=["stub-1"],
+        )
+    ]
+    citations = [
+        Citation(
+            id="stub-1",
+            title="Stubbed Reuters piece",
+            url="https://www.reuters.com/world/stubbed-piece-2024-01-01/",
+        )
+    ]
+    return AskResponse(answer=sentences, citations=citations, conversation_id=conversation_id)
--- a/apps/api/src/news_api/services/feedback.py
+++ b/apps/api/src/news_api/services/feedback.py
@@ -0,0 +1,12 @@
+"""Feedback persistence placeholder."""
+
+from datetime import datetime, timezone
+
+from ..schemas import FeedbackRequest, FeedbackResponse
+
+
+def record_feedback(payload: FeedbackRequest) -> FeedbackResponse:
+    """Return a simple acknowledgement until persistence is wired up."""
+
+    # A real implementation would enqueue this payload to Redis or persist to Postgres.
+    return FeedbackResponse(status="queued", received_at=datetime.now(tz=timezone.utc))
--- a/apps/api/src/news_api/services/search.py
+++ b/apps/api/src/news_api/services/search.py
@@ -0,0 +1,36 @@
+"""Stubbed search service that will later interface with PostgreSQL and pgvector."""
+
+from datetime import datetime
+from typing import List
+from uuid import uuid4
+
+from ..schemas import SearchMode, SearchResponse, SearchResult, SourceBadge
+
+
+def perform_search(query: str, mode: SearchMode, page: int, page_size: int) -> SearchResponse:
+    """Return a deterministic stub search response for scaffolding purposes."""
+
+    normalized_query = query.strip() or "latest news"
+    # Provide a single deterministic card to unblock UI development.
+    result_id = f"stub-{page}-{abs(hash(normalized_query)) % 10_000}"
+    badges: List[SourceBadge] = [
+        SourceBadge(name="Reuters", url="https://www.reuters.com"),
+    ]
+    results = [
+        SearchResult(
+            id=result_id,
+            title=f"Stubbed headline for '{normalized_query}'",
+            snippet="This is placeholder snippet text until the ingest pipeline is ready.",
+            canonical_url="https://www.reuters.com/world/europe/stubbed-headline-2024-01-01/",
+            published_at=datetime(2024, 1, 1, 0, 0, 0),
+            score=0.42,
+            badges=badges,
+        )
+    ]
+    return SearchResponse(query=normalized_query, mode=mode, page=page, results=results)
+
+
+def generate_conversation_id() -> str:
+    """Return a predictable opaque identifier for conversations."""
+
+    return uuid4().hex
--- a/apps/api/tests/test_main.py
+++ b/apps/api/tests/test_main.py
@@ -0,0 +1,64 @@
+import pytest
+import httpx
+
+from news_api.main import create_app
+
+
+@pytest.fixture()
+async def client():
+    app = create_app()
+    async with httpx.AsyncClient(app=app, base_url="http://testserver") as async_client:
+        yield async_client
+
+
+@pytest.mark.asyncio
+async def test_healthz(client: httpx.AsyncClient) -> None:
+    response = await client.get("/healthz")
+    assert response.status_code == 200
+    assert response.json() == {"status": "ok"}
+
+
+@pytest.mark.asyncio
+async def test_search_defaults(client: httpx.AsyncClient) -> None:
+    response = await client.get("/v1/search", params={"q": "energy"})
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["mode"] == "hybrid"
+    assert payload["page"] == 1
+    assert payload["results"], "Expected at least one search result"
+    first = payload["results"][0]
+    assert first["title"].startswith("Stubbed headline")
+    assert first["badges"][0]["name"] == "Reuters"
+
+
+@pytest.mark.asyncio
+async def test_get_article_by_id(client: httpx.AsyncClient) -> None:
+    article_id = "stub-article"
+    response = await client.get(f"/v1/articles/{article_id}")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["id"] == article_id
+    assert payload["canonical_url"].startswith("https://www.reuters.com")
+
+
+@pytest.mark.asyncio
+async def test_ask_endpoint_returns_citations(client: httpx.AsyncClient) -> None:
+    response = await client.post("/v1/ask", json={"query": "What is the latest in energy?"})
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["answer"], "Expected sentences in answer"
+    assert payload["citations"], "Expected citations"
+    assert all(sentence["citations"] for sentence in payload["answer"])
+    assert payload["conversation_id"], "Expected conversation id"
+
+
+@pytest.mark.asyncio
+async def test_feedback_endpoint_acknowledges(client: httpx.AsyncClient) -> None:
+    response = await client.post(
+        "/v1/feedback",
+        json={"query": "test", "answer_id": "stub", "verdict": "up", "comment": "nice"},
+    )
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["status"] == "queued"
+    assert "received_at" in payload
--- a/apps/api/uv.lock
+++ b/apps/api/uv.lock