feat: implement news aggregator API with conventional commits

- Add FastAPI application with complete router structure
- Implement search, articles, ask, feedback, and health endpoints
- Add comprehensive Pydantic schemas for API requests/responses
- Include stub service implementations for all business logic
- Add full test suite with pytest-asyncio integration
- Configure conventional commits enforcement via git hooks
- Add project documentation and contribution guidelines
- Support both OpenAI and Gemini LLM integration options
This commit is contained in:
William Valentin
2025-11-02 23:11:39 -08:00
parent fed8b629c7
commit 334aa698fa
26 changed files with 3060 additions and 9 deletions

3
apps/api/README.md Normal file
View File

@@ -0,0 +1,3 @@
# News API
FastAPI service that exposes search, article metadata, conversational answers, and feedback endpoints for the news aggregator MVP. The implementation currently provides stubbed responses that will later be wired to PostgreSQL, Redis, and worker outputs.

47
apps/api/pyproject.toml Normal file
View File

@@ -0,0 +1,47 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "news-api"
version = "0.1.0"
description = "FastAPI service for the classy Reuters news aggregator"
readme = "README.md"
requires-python = ">=3.11"
authors = [{ name = "News Aggregator Team" }]
keywords = ["fastapi", "news", "aggregator", "hybrid search"]
dependencies = [
"fastapi>=0.111.0,<0.112.0",
"uvicorn[standard]>=0.30.0,<0.31.0",
"pydantic>=2.7.0,<3.0.0",
"pydantic-settings>=2.2.1,<3.0.0",
"psycopg[binary]>=3.1.18,<4.0.0",
"pgvector>=0.2.4,<0.3.0",
"httpx>=0.27.0,<0.28.0",
"structlog>=24.1.0,<25.0.0"
]
[project.optional-dependencies]
dev = [
"pytest>=8.2.0,<9.0.0",
"pytest-cov>=4.1.0,<5.0.0"
]
[tool.pytest.ini_options]
addopts = "-q"
testpaths = ["tests"]
filterwarnings = ["error:::news_api.*"]
asyncio_mode = "auto"
[tool.hatch.metadata]
allow-direct-references = true
[tool.hatch.build.targets.wheel]
packages = ["src/news_api"]
[dependency-groups]
dev = [
"pytest>=8.4.2",
"pytest-asyncio>=1.2.0",
"pytest-cov>=4.1.0",
]

10
apps/api/requirements.txt Normal file
View File

@@ -0,0 +1,10 @@
fastapi>=0.111.0,<0.112.0
uvicorn[standard]>=0.30.0,<0.31.0
pydantic>=2.7.0,<3.0.0
pydantic-settings>=2.2.1,<3.0.0
psycopg[binary]>=3.1.18,<4.0.0
pgvector>=0.2.4,<0.3.0
httpx>=0.27.0,<0.28.0
structlog>=24.1.0,<25.0.0
pytest>=8.2.0,<9.0.0
pytest-cov>=4.1.0,<5.0.0

View File

@@ -0,0 +1,5 @@
"""News API package exposing the FastAPI application factory."""
from .main import create_app
__all__ = ["create_app"]

View File

@@ -0,0 +1,21 @@
"""Application configuration via Pydantic settings."""
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Runtime configuration for the API service."""
app_name: str = "News Aggregator API"
default_search_mode: str = "hybrid"
max_page_size: int = 8
model_config = SettingsConfigDict(env_prefix="NEWS_API_", extra="ignore")
@lru_cache(maxsize=1)
def get_settings() -> Settings:
"""Return cached settings instance to avoid re-parsing env vars."""
return Settings()

View File

@@ -0,0 +1,22 @@
"""FastAPI application factory."""
from fastapi import FastAPI
from .config import get_settings
from .routers import register
def create_app() -> FastAPI:
"""Create and configure the FastAPI application instance."""
settings = get_settings()
app = FastAPI(
title=settings.app_name,
version="0.1.0",
summary="Hybrid search and conversational answers over Reuters articles.",
)
register(app)
return app
app = create_app()

View File

@@ -0,0 +1,18 @@
"""Router registration utilities."""
from fastapi import APIRouter
from . import articles, ask, feedback, health, search
def register(api: APIRouter) -> None:
"""Attach all endpoint groups to the provided router or application."""
api.include_router(health.router)
api.include_router(search.router)
api.include_router(articles.router)
api.include_router(ask.router)
api.include_router(feedback.router)
__all__ = ["register"]

View File

@@ -0,0 +1,15 @@
"""Article metadata endpoints."""
from fastapi import APIRouter
from ..schemas import ArticleResponse
from ..services.articles import fetch_article
router = APIRouter(prefix="/v1", tags=["articles"])
@router.get("/articles/{article_id}", response_model=ArticleResponse)
def get_article(article_id: str) -> ArticleResponse:
"""Return metadata for a specific article."""
return fetch_article(article_id)

View File

@@ -0,0 +1,15 @@
"""Conversational answer endpoint."""
from fastapi import APIRouter
from ..schemas import AskRequest, AskResponse
from ..services.ask import answer_question
router = APIRouter(prefix="/v1", tags=["ask"])
@router.post("/ask", response_model=AskResponse)
def ask(payload: AskRequest) -> AskResponse:
"""Return an answer to the user query."""
return answer_question(payload)

View File

@@ -0,0 +1,15 @@
"""Feedback intake endpoint."""
from fastapi import APIRouter
from ..schemas import FeedbackRequest, FeedbackResponse
from ..services.feedback import record_feedback
router = APIRouter(prefix="/v1", tags=["feedback"])
@router.post("/feedback", response_model=FeedbackResponse)
def feedback(payload: FeedbackRequest) -> FeedbackResponse:
"""Accept thumbs up/down feedback for later processing."""
return record_feedback(payload)

View File

@@ -0,0 +1,19 @@
"""Health endpoints for Kubernetes probes."""
from fastapi import APIRouter
router = APIRouter(tags=["health"])
@router.get("/healthz")
def health() -> dict[str, str]:
"""Return a simple ok response for liveness probes."""
return {"status": "ok"}
@router.get("/readyz")
def ready() -> dict[str, str]:
"""Return ready until upstream dependencies are integrated."""
return {"status": "ready"}

View File

@@ -0,0 +1,22 @@
"""Search endpoints."""
from fastapi import APIRouter, Depends, Query
from ..config import Settings, get_settings
from ..schemas import SearchMode, SearchResponse
from ..services.search import perform_search
router = APIRouter(prefix="/v1", tags=["search"])
@router.get("/search", response_model=SearchResponse)
def search(
q: str = Query("", description="User supplied search query"),
mode: SearchMode | None = Query(None, description="Search mode override"),
page: int = Query(1, ge=1, description="1-indexed page number"),
settings: Settings = Depends(get_settings),
) -> SearchResponse:
"""Return hybrid search results (stubbed until storage wiring lands)."""
chosen_mode = mode or SearchMode(settings.default_search_mode)
return perform_search(q, chosen_mode, page, settings.max_page_size)

View File

@@ -0,0 +1,106 @@
"""Pydantic models for API requests and responses."""
from datetime import datetime
from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field, HttpUrl
class SearchMode(str, Enum):
"""Supported search modes."""
HYBRID = "hybrid"
KEYWORD = "keyword"
SEMANTIC = "semantic"
class SourceBadge(BaseModel):
"""Represents a human friendly badge for a source."""
name: str
url: HttpUrl
class Citation(BaseModel):
"""Citation metadata for an answer."""
id: str
title: str
url: HttpUrl
class SearchResult(BaseModel):
"""Single search result card."""
id: str
title: str
snippet: str
canonical_url: HttpUrl = Field(..., description="Canonical link to the source article")
published_at: datetime
score: float = Field(..., ge=0)
badges: List[SourceBadge]
class SearchResponse(BaseModel):
"""Response envelope for search requests."""
query: str
mode: SearchMode
page: int = Field(..., ge=1)
results: List[SearchResult]
class ArticleResponse(BaseModel):
"""Metadata and summary for a single article."""
id: str
title: str
snippet: str
summary: Optional[str]
canonical_url: HttpUrl = Field(..., description="Canonical link to the source article")
published_at: datetime
authors: List[str]
topics: List[str] = []
class AskRequest(BaseModel):
"""Request payload for the conversational endpoint."""
query: str
conversation_id: Optional[str] = None
class AnswerSentence(BaseModel):
"""Single sentence in an answer with citations."""
text: str
citations: List[str] # citation ids referencing SearchResult IDs
class AskResponse(BaseModel):
"""Answer payload with citations."""
answer: List[AnswerSentence]
citations: List[Citation]
conversation_id: str
class FeedbackVerdict(str, Enum):
"""Allowed feedback verdicts."""
UP = "up"
DOWN = "down"
class FeedbackRequest(BaseModel):
"""Request payload for feedback submission."""
query: str
answer_id: Optional[str] = None
verdict: FeedbackVerdict
comment: Optional[str] = Field(None, max_length=500)
class FeedbackResponse(BaseModel):
"""Acknowledgement response for feedback."""
status: str
received_at: datetime

View File

@@ -0,0 +1,20 @@
"""Article retrieval stubs."""
from datetime import datetime
from ..schemas import ArticleResponse
def fetch_article(article_id: str) -> ArticleResponse:
"""Return static article metadata while the DB layer is stubbed."""
return ArticleResponse(
id=article_id,
title="Stubbed Reuters piece",
snippet="An ingest worker will eventually populate this field with live data.",
summary="This summary is generated by the summarizer worker during ingestion.",
canonical_url="https://www.reuters.com/world/stubbed-piece-2024-01-01/",
published_at=datetime(2024, 1, 1, 0, 0, 0),
authors=["Reuters Staff"],
topics=["world"],
)

View File

@@ -0,0 +1,35 @@
"""Conversational answer scaffolding."""
from datetime import datetime
from typing import List
from ..schemas import (
AnswerSentence,
AskRequest,
AskResponse,
Citation,
)
from .search import generate_conversation_id
def answer_question(payload: AskRequest) -> AskResponse:
"""Produce a placeholder answer that references stub citations."""
conversation_id = payload.conversation_id or generate_conversation_id()
sentences: List[AnswerSentence] = [
AnswerSentence(
text=(
"This is a placeholder answer generated by the API skeleton; "
"it will be replaced once the summarizer worker is connected."
),
citations=["stub-1"],
)
]
citations = [
Citation(
id="stub-1",
title="Stubbed Reuters piece",
url="https://www.reuters.com/world/stubbed-piece-2024-01-01/",
)
]
return AskResponse(answer=sentences, citations=citations, conversation_id=conversation_id)

View File

@@ -0,0 +1,12 @@
"""Feedback persistence placeholder."""
from datetime import datetime, timezone
from ..schemas import FeedbackRequest, FeedbackResponse
def record_feedback(payload: FeedbackRequest) -> FeedbackResponse:
"""Return a simple acknowledgement until persistence is wired up."""
# A real implementation would enqueue this payload to Redis or persist to Postgres.
return FeedbackResponse(status="queued", received_at=datetime.now(tz=timezone.utc))

View File

@@ -0,0 +1,36 @@
"""Stubbed search service that will later interface with PostgreSQL and pgvector."""
from datetime import datetime
from typing import List
from uuid import uuid4
from ..schemas import SearchMode, SearchResponse, SearchResult, SourceBadge
def perform_search(query: str, mode: SearchMode, page: int, page_size: int) -> SearchResponse:
"""Return a deterministic stub search response for scaffolding purposes."""
normalized_query = query.strip() or "latest news"
# Provide a single deterministic card to unblock UI development.
result_id = f"stub-{page}-{abs(hash(normalized_query)) % 10_000}"
badges: List[SourceBadge] = [
SourceBadge(name="Reuters", url="https://www.reuters.com"),
]
results = [
SearchResult(
id=result_id,
title=f"Stubbed headline for '{normalized_query}'",
snippet="This is placeholder snippet text until the ingest pipeline is ready.",
canonical_url="https://www.reuters.com/world/europe/stubbed-headline-2024-01-01/",
published_at=datetime(2024, 1, 1, 0, 0, 0),
score=0.42,
badges=badges,
)
]
return SearchResponse(query=normalized_query, mode=mode, page=page, results=results)
def generate_conversation_id() -> str:
"""Return a predictable opaque identifier for conversations."""
return uuid4().hex

View File

@@ -0,0 +1,64 @@
import pytest
import httpx
from news_api.main import create_app
@pytest.fixture()
async def client():
app = create_app()
async with httpx.AsyncClient(app=app, base_url="http://testserver") as async_client:
yield async_client
@pytest.mark.asyncio
async def test_healthz(client: httpx.AsyncClient) -> None:
response = await client.get("/healthz")
assert response.status_code == 200
assert response.json() == {"status": "ok"}
@pytest.mark.asyncio
async def test_search_defaults(client: httpx.AsyncClient) -> None:
response = await client.get("/v1/search", params={"q": "energy"})
assert response.status_code == 200
payload = response.json()
assert payload["mode"] == "hybrid"
assert payload["page"] == 1
assert payload["results"], "Expected at least one search result"
first = payload["results"][0]
assert first["title"].startswith("Stubbed headline")
assert first["badges"][0]["name"] == "Reuters"
@pytest.mark.asyncio
async def test_get_article_by_id(client: httpx.AsyncClient) -> None:
article_id = "stub-article"
response = await client.get(f"/v1/articles/{article_id}")
assert response.status_code == 200
payload = response.json()
assert payload["id"] == article_id
assert payload["canonical_url"].startswith("https://www.reuters.com")
@pytest.mark.asyncio
async def test_ask_endpoint_returns_citations(client: httpx.AsyncClient) -> None:
response = await client.post("/v1/ask", json={"query": "What is the latest in energy?"})
assert response.status_code == 200
payload = response.json()
assert payload["answer"], "Expected sentences in answer"
assert payload["citations"], "Expected citations"
assert all(sentence["citations"] for sentence in payload["answer"])
assert payload["conversation_id"], "Expected conversation id"
@pytest.mark.asyncio
async def test_feedback_endpoint_acknowledges(client: httpx.AsyncClient) -> None:
response = await client.post(
"/v1/feedback",
json={"query": "test", "answer_id": "stub", "verdict": "up", "comment": "nice"},
)
assert response.status_code == 200
payload = response.json()
assert payload["status"] == "queued"
assert "received_at" in payload

1221
apps/api/uv.lock generated Normal file

File diff suppressed because it is too large Load Diff