feat(query-api): add richer stats and retention

This commit is contained in:
William Valentin
2026-03-26 11:22:34 -07:00
parent fdfcb50e80
commit 43877a5448
10 changed files with 583 additions and 85 deletions
+97 -18
View File
@@ -1,7 +1,9 @@
package main
import (
"context"
"database/sql"
"encoding/json"
"log"
"net/http"
"os"
@@ -18,11 +20,16 @@ import (
"github.com/nats-io/nats.go"
)
type wsClient struct {
conn *websocket.Conn
send chan []byte
}
var (
wsUpgrader = websocket.Upgrader{
CheckOrigin: func(r *http.Request) bool { return true },
}
wsClients = make(map[*websocket.Conn]bool)
wsClients = make(map[*wsClient]bool)
wsMu sync.RWMutex
natsConn *nats.Conn
)
@@ -31,23 +38,15 @@ func subscribeToNATS(nc *nats.Conn) {
topic := envDefault("NATS_TOPIC", "agentmon.events.v1")
sub, err := nc.Subscribe(topic, func(msg *nats.Msg) {
wsMu.RLock()
var stale []*websocket.Conn
for conn := range wsClients {
err := conn.WriteMessage(websocket.TextMessage, msg.Data)
if err != nil {
conn.Close()
stale = append(stale, conn)
for client := range wsClients {
select {
case client.send <- msg.Data:
default:
// Slow client; close and remove in background.
go removeClient(client)
}
}
wsMu.RUnlock()
if len(stale) > 0 {
wsMu.Lock()
for _, conn := range stale {
delete(wsClients, conn)
}
wsMu.Unlock()
}
})
if err != nil {
log.Printf("failed to subscribe to NATS: %v", err)
@@ -57,19 +56,44 @@ func subscribeToNATS(nc *nats.Conn) {
_ = sub
}
func removeClient(c *wsClient) {
wsMu.Lock()
if wsClients[c] {
delete(wsClients, c)
close(c.send)
c.conn.Close()
}
wsMu.Unlock()
}
func wsHandler(w http.ResponseWriter, r *http.Request) {
conn, err := wsUpgrader.Upgrade(w, r, nil)
if err != nil {
return
}
defer conn.Close()
client := &wsClient{
conn: conn,
send: make(chan []byte, 256),
}
wsMu.Lock()
wsClients[conn] = true
wsClients[client] = true
wsMu.Unlock()
log.Printf("WebSocket client connected")
// Writer goroutine: sole owner of conn writes.
go func() {
defer conn.Close()
for msg := range client.send {
if err := conn.WriteMessage(websocket.TextMessage, msg); err != nil {
break
}
}
}()
// Read loop blocks until the client disconnects.
for {
_, _, err := conn.ReadMessage()
if err != nil {
@@ -78,8 +102,12 @@ func wsHandler(w http.ResponseWriter, r *http.Request) {
}
wsMu.Lock()
delete(wsClients, conn)
if wsClients[client] {
delete(wsClients, client)
close(client.send)
}
wsMu.Unlock()
log.Printf("WebSocket client disconnected")
}
@@ -249,6 +277,36 @@ func main() {
httpx.WriteJSON(w, http.StatusOK, map[string]any{"tools": tools})
})
r.Get("/v1/stats/top-models", func(w http.ResponseWriter, r *http.Request) {
limit, _ := strconv.Atoi(r.URL.Query().Get("limit"))
models, err := db.GetTopModels(r.Context(), limit)
if err != nil {
httpx.WriteJSON(w, http.StatusInternalServerError, map[string]any{"error": "db_error"})
return
}
if models == nil {
models = []postgres.TopModel{}
}
httpx.WriteJSON(w, http.StatusOK, map[string]any{"models": models})
})
r.Post("/v1/admin/retention", func(w http.ResponseWriter, r *http.Request) {
var req struct {
Days int `json:"days"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Days <= 0 {
httpx.WriteJSON(w, http.StatusBadRequest, map[string]any{"error": "invalid_request", "message": "days must be a positive integer"})
return
}
cutoff := time.Now().AddDate(0, 0, -req.Days)
deleted, err := db.DeleteOlderThan(r.Context(), cutoff)
if err != nil {
httpx.WriteJSON(w, http.StatusInternalServerError, map[string]any{"error": "db_error"})
return
}
httpx.WriteJSON(w, http.StatusOK, map[string]any{"deleted": deleted, "cutoff": cutoff.Format(time.RFC3339)})
})
r.Get("/v1/stats/timeseries", func(w http.ResponseWriter, r *http.Request) {
window := r.URL.Query().Get("window")
switch window {
@@ -267,6 +325,27 @@ func main() {
httpx.WriteJSON(w, http.StatusOK, timeseries)
})
// Background retention cleanup
retentionDays := 30
if v := os.Getenv("RETENTION_DAYS"); v != "" {
if n, err := strconv.Atoi(v); err == nil && n > 0 {
retentionDays = n
}
}
go func() {
ticker := time.NewTicker(24 * time.Hour)
defer ticker.Stop()
for range ticker.C {
cutoff := time.Now().AddDate(0, 0, -retentionDays)
deleted, err := db.DeleteOlderThan(context.Background(), cutoff)
if err != nil {
log.Printf("retention cleanup error: %v", err)
} else if deleted > 0 {
log.Printf("retention cleanup: deleted %d events older than %s", deleted, cutoff.Format(time.RFC3339))
}
}
}()
log.Printf("query-api listening on %s", addr)
log.Fatal(http.ListenAndServe(addr, r))
}