feat(query-api): add richer stats and retention
This commit is contained in:
+97
-18
@@ -1,7 +1,9 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -18,11 +20,16 @@ import (
|
||||
"github.com/nats-io/nats.go"
|
||||
)
|
||||
|
||||
type wsClient struct {
|
||||
conn *websocket.Conn
|
||||
send chan []byte
|
||||
}
|
||||
|
||||
var (
|
||||
wsUpgrader = websocket.Upgrader{
|
||||
CheckOrigin: func(r *http.Request) bool { return true },
|
||||
}
|
||||
wsClients = make(map[*websocket.Conn]bool)
|
||||
wsClients = make(map[*wsClient]bool)
|
||||
wsMu sync.RWMutex
|
||||
natsConn *nats.Conn
|
||||
)
|
||||
@@ -31,23 +38,15 @@ func subscribeToNATS(nc *nats.Conn) {
|
||||
topic := envDefault("NATS_TOPIC", "agentmon.events.v1")
|
||||
sub, err := nc.Subscribe(topic, func(msg *nats.Msg) {
|
||||
wsMu.RLock()
|
||||
var stale []*websocket.Conn
|
||||
for conn := range wsClients {
|
||||
err := conn.WriteMessage(websocket.TextMessage, msg.Data)
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
stale = append(stale, conn)
|
||||
for client := range wsClients {
|
||||
select {
|
||||
case client.send <- msg.Data:
|
||||
default:
|
||||
// Slow client; close and remove in background.
|
||||
go removeClient(client)
|
||||
}
|
||||
}
|
||||
wsMu.RUnlock()
|
||||
|
||||
if len(stale) > 0 {
|
||||
wsMu.Lock()
|
||||
for _, conn := range stale {
|
||||
delete(wsClients, conn)
|
||||
}
|
||||
wsMu.Unlock()
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("failed to subscribe to NATS: %v", err)
|
||||
@@ -57,19 +56,44 @@ func subscribeToNATS(nc *nats.Conn) {
|
||||
_ = sub
|
||||
}
|
||||
|
||||
func removeClient(c *wsClient) {
|
||||
wsMu.Lock()
|
||||
if wsClients[c] {
|
||||
delete(wsClients, c)
|
||||
close(c.send)
|
||||
c.conn.Close()
|
||||
}
|
||||
wsMu.Unlock()
|
||||
}
|
||||
|
||||
func wsHandler(w http.ResponseWriter, r *http.Request) {
|
||||
conn, err := wsUpgrader.Upgrade(w, r, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
client := &wsClient{
|
||||
conn: conn,
|
||||
send: make(chan []byte, 256),
|
||||
}
|
||||
|
||||
wsMu.Lock()
|
||||
wsClients[conn] = true
|
||||
wsClients[client] = true
|
||||
wsMu.Unlock()
|
||||
|
||||
log.Printf("WebSocket client connected")
|
||||
|
||||
// Writer goroutine: sole owner of conn writes.
|
||||
go func() {
|
||||
defer conn.Close()
|
||||
for msg := range client.send {
|
||||
if err := conn.WriteMessage(websocket.TextMessage, msg); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Read loop blocks until the client disconnects.
|
||||
for {
|
||||
_, _, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
@@ -78,8 +102,12 @@ func wsHandler(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
wsMu.Lock()
|
||||
delete(wsClients, conn)
|
||||
if wsClients[client] {
|
||||
delete(wsClients, client)
|
||||
close(client.send)
|
||||
}
|
||||
wsMu.Unlock()
|
||||
|
||||
log.Printf("WebSocket client disconnected")
|
||||
}
|
||||
|
||||
@@ -249,6 +277,36 @@ func main() {
|
||||
httpx.WriteJSON(w, http.StatusOK, map[string]any{"tools": tools})
|
||||
})
|
||||
|
||||
r.Get("/v1/stats/top-models", func(w http.ResponseWriter, r *http.Request) {
|
||||
limit, _ := strconv.Atoi(r.URL.Query().Get("limit"))
|
||||
models, err := db.GetTopModels(r.Context(), limit)
|
||||
if err != nil {
|
||||
httpx.WriteJSON(w, http.StatusInternalServerError, map[string]any{"error": "db_error"})
|
||||
return
|
||||
}
|
||||
if models == nil {
|
||||
models = []postgres.TopModel{}
|
||||
}
|
||||
httpx.WriteJSON(w, http.StatusOK, map[string]any{"models": models})
|
||||
})
|
||||
|
||||
r.Post("/v1/admin/retention", func(w http.ResponseWriter, r *http.Request) {
|
||||
var req struct {
|
||||
Days int `json:"days"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Days <= 0 {
|
||||
httpx.WriteJSON(w, http.StatusBadRequest, map[string]any{"error": "invalid_request", "message": "days must be a positive integer"})
|
||||
return
|
||||
}
|
||||
cutoff := time.Now().AddDate(0, 0, -req.Days)
|
||||
deleted, err := db.DeleteOlderThan(r.Context(), cutoff)
|
||||
if err != nil {
|
||||
httpx.WriteJSON(w, http.StatusInternalServerError, map[string]any{"error": "db_error"})
|
||||
return
|
||||
}
|
||||
httpx.WriteJSON(w, http.StatusOK, map[string]any{"deleted": deleted, "cutoff": cutoff.Format(time.RFC3339)})
|
||||
})
|
||||
|
||||
r.Get("/v1/stats/timeseries", func(w http.ResponseWriter, r *http.Request) {
|
||||
window := r.URL.Query().Get("window")
|
||||
switch window {
|
||||
@@ -267,6 +325,27 @@ func main() {
|
||||
httpx.WriteJSON(w, http.StatusOK, timeseries)
|
||||
})
|
||||
|
||||
// Background retention cleanup
|
||||
retentionDays := 30
|
||||
if v := os.Getenv("RETENTION_DAYS"); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil && n > 0 {
|
||||
retentionDays = n
|
||||
}
|
||||
}
|
||||
go func() {
|
||||
ticker := time.NewTicker(24 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
cutoff := time.Now().AddDate(0, 0, -retentionDays)
|
||||
deleted, err := db.DeleteOlderThan(context.Background(), cutoff)
|
||||
if err != nil {
|
||||
log.Printf("retention cleanup error: %v", err)
|
||||
} else if deleted > 0 {
|
||||
log.Printf("retention cleanup: deleted %d events older than %s", deleted, cutoff.Format(time.RFC3339))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
log.Printf("query-api listening on %s", addr)
|
||||
log.Fatal(http.ListenAndServe(addr, r))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user