From 1421b4659e7cc54741f87c75433ab9f955d5f1f1 Mon Sep 17 00:00:00 2001 From: OpenCode Test Date: Wed, 24 Dec 2025 13:03:08 -0800 Subject: [PATCH] feat: implement ControlTower TUI for cluster and host monitoring Add complete TUI application for monitoring Kubernetes clusters and host systems. Features include: Core features: - Collector framework with concurrent scheduling - Host collectors: disk, memory, load, network - Kubernetes collectors: pods, nodes, workloads, events with informers - Issue deduplication, state management, and resolve-after logic - Bubble Tea TUI with table view, details pane, and filtering - JSON export functionality UX improvements: - Help overlay with keybindings - Priority/category filters with visual indicators - Direct priority jump (0/1/2/3) - Bulk acknowledge (Shift+A) - Clipboard copy (y) - Theme toggle (T) - Age format toggle (d) - Wide title toggle (t) - Vi-style navigation (j/k) - Home/End jump (g/G) - Rollup drill-down in details Robustness: - Grace period for unreachable clusters - Rollups for high-volume issues - Flap suppression - RBAC error handling Files: All core application code with tests for host collectors, engine, store, model, and export packages. --- cmd/controltower/main.go | 212 +++++ go.mod | 69 ++ go.sum | 201 ++++ internal/collectors/collector.go | 45 + internal/collectors/host/disk.go | 287 ++++++ internal/collectors/host/disk_test.go | 80 ++ internal/collectors/host/load.go | 127 +++ internal/collectors/host/load_test.go | 48 + internal/collectors/host/mem.go | 205 ++++ internal/collectors/host/mem_test.go | 83 ++ internal/collectors/host/net.go | 138 +++ internal/collectors/host/net_test.go | 28 + internal/collectors/k8s/client.go | 88 ++ internal/collectors/k8s/informers.go | 720 ++++++++++++++ internal/collectors/k8s/issues_events.go | 101 ++ internal/collectors/k8s/issues_events_test.go | 5 + internal/collectors/k8s/issues_nodes.go | 79 ++ internal/collectors/k8s/issues_nodes_test.go | 5 + internal/collectors/k8s/issues_pods.go | 169 ++++ internal/collectors/k8s/issues_pods_test.go | 5 + internal/collectors/k8s/issues_workloads.go | 174 ++++ .../collectors/k8s/issues_workloads_test.go | 5 + internal/collectors/k8s/rollup.go | 128 +++ internal/collectors/k8s/rollup_test.go | 10 + internal/collectors/k8s/unreachable.go | 133 +++ internal/collectors/k8s/unreachable_test.go | 5 + internal/engine/engine.go | 309 ++++++ internal/engine/engine_test.go | 225 +++++ internal/export/json.go | 98 ++ internal/export/json_test.go | 47 + internal/model/issue.go | 217 +++++ internal/model/issue_test.go | 75 ++ internal/store/store.go | 182 ++++ internal/store/store_test.go | 101 ++ internal/ui/app.go | 886 ++++++++++++++++++ internal/ui/details.go | 105 +++ internal/ui/help.go | 152 +++ internal/ui/keys.go | 141 +++ internal/ui/styles.go | 122 +++ internal/ui/table.go | 131 +++ 40 files changed, 5941 insertions(+) create mode 100644 cmd/controltower/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/collectors/collector.go create mode 100644 internal/collectors/host/disk.go create mode 100644 internal/collectors/host/disk_test.go create mode 100644 internal/collectors/host/load.go create mode 100644 internal/collectors/host/load_test.go create mode 100644 internal/collectors/host/mem.go create mode 100644 internal/collectors/host/mem_test.go create mode 100644 internal/collectors/host/net.go create mode 100644 internal/collectors/host/net_test.go create mode 100644 internal/collectors/k8s/client.go create mode 100644 internal/collectors/k8s/informers.go create mode 100644 internal/collectors/k8s/issues_events.go create mode 100644 internal/collectors/k8s/issues_events_test.go create mode 100644 internal/collectors/k8s/issues_nodes.go create mode 100644 internal/collectors/k8s/issues_nodes_test.go create mode 100644 internal/collectors/k8s/issues_pods.go create mode 100644 internal/collectors/k8s/issues_pods_test.go create mode 100644 internal/collectors/k8s/issues_workloads.go create mode 100644 internal/collectors/k8s/issues_workloads_test.go create mode 100644 internal/collectors/k8s/rollup.go create mode 100644 internal/collectors/k8s/rollup_test.go create mode 100644 internal/collectors/k8s/unreachable.go create mode 100644 internal/collectors/k8s/unreachable_test.go create mode 100644 internal/engine/engine.go create mode 100644 internal/engine/engine_test.go create mode 100644 internal/export/json.go create mode 100644 internal/export/json_test.go create mode 100644 internal/model/issue.go create mode 100644 internal/model/issue_test.go create mode 100644 internal/store/store.go create mode 100644 internal/store/store_test.go create mode 100644 internal/ui/app.go create mode 100644 internal/ui/details.go create mode 100644 internal/ui/help.go create mode 100644 internal/ui/keys.go create mode 100644 internal/ui/styles.go create mode 100644 internal/ui/table.go diff --git a/cmd/controltower/main.go b/cmd/controltower/main.go new file mode 100644 index 0000000..ee0d821 --- /dev/null +++ b/cmd/controltower/main.go @@ -0,0 +1,212 @@ +package main + +import ( + "context" + "flag" + "fmt" + "os" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" + + bubbletea "github.com/charmbracelet/bubbletea" + + "tower/internal/collectors" + "tower/internal/collectors/host" + collectorsk8s "tower/internal/collectors/k8s" + "tower/internal/engine" + "tower/internal/export" + "tower/internal/model" + "tower/internal/store" + "tower/internal/ui" +) + +const ( + defaultRefreshInterval = 1 * time.Second + defaultResolveAfter = 30 * time.Second + collectorTimeoutFast = 250 * time.Millisecond + collectorTimeoutK8sList = 2 * time.Second + k8sUnreachableGraceDefault = 10 * time.Second +) + +func main() { + var exportPath string + flag.StringVar(&exportPath, "export", "", "write issues JSON snapshot to this path and exit") + flag.Parse() + + if exportPath != "" { + if err := validateExportPath(exportPath); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + } + + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + + st := store.New(defaultResolveAfter) + + configs := []engine.CollectorConfig{ + {Collector: host.NewDiskCollector(), Timeout: collectorTimeoutFast}, + {Collector: host.NewMemCollector(), Timeout: collectorTimeoutFast}, + {Collector: host.NewLoadCollector(), Timeout: collectorTimeoutFast}, + {Collector: host.NewNetCollector(), Timeout: collectorTimeoutFast}, + } + + // If kubeconfig is present, register the full Kubernetes collector (informers + // with polling fallback, rules, rollups, and unreachable grace). + if kubeconfigExists() { + configs = append(configs, engine.CollectorConfig{Collector: collectorsk8s.NewCollector(), Timeout: collectorTimeoutK8sList}) + } + + eng := engine.New(st, configs, defaultRefreshInterval) + eng.Start(ctx) + defer eng.Stop() + + if exportPath != "" { + // Give collectors a brief moment to run their initial collection. + select { + case <-time.After(200 * time.Millisecond): + case <-ctx.Done(): + os.Exit(1) + } + + snap := st.Snapshot(time.Now()) + if err := export.WriteIssues(exportPath, snap); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + return + } + + // Run Bubble Tea UI. + m := ui.New("", eng.Snapshots(), eng.RefreshNow, st.Acknowledge, st.Unacknowledge, export.WriteIssues) + p := bubbletea.NewProgram(m, bubbletea.WithAltScreen()) + if _, err := p.Run(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +func kubeconfigExists() bool { + // Respect KUBECONFIG when set; otherwise check ~/.kube/config. + if p := os.Getenv("KUBECONFIG"); p != "" { + _, err := os.Stat(p) + return err == nil + } + if h, err := os.UserHomeDir(); err == nil { + p := filepath.Join(h, ".kube", "config") + _, err := os.Stat(p) + return err == nil + } + return false +} + +func validateExportPath(path string) error { + cleanPath := filepath.Clean(path) + + if strings.Contains(cleanPath, ".."+string(filepath.Separator)) { + return fmt.Errorf("path traversal not allowed in export path: %s", path) + } + + if filepath.IsAbs(cleanPath) { + return fmt.Errorf("absolute paths not allowed in export path: %s", path) + } + + return nil +} + +// k8sConnectivityCollector is a minimal Kubernetes collector. +// It only validates connectivity/auth and emits a P0 issue after a grace window. +// +// Full cluster state collection is implemented elsewhere; this keeps main wired +// and provides a useful health signal the UI can display. +// +// NOTE: This collector intentionally returns nil error on connectivity issues so +// the Engine does not "freeze" last-known issues. +// +// It does not use informers (cheap) and runs at a low cadence. +// +//nolint:unused // referenced via newK8sConnectivityCollector +type unreachableTracker struct { + grace time.Duration + firstFailureAt time.Time + lastErr error +} + +func newUnreachableTracker(grace time.Duration) *unreachableTracker { + if grace <= 0 { + grace = 10 * time.Second + } + return &unreachableTracker{grace: grace} +} + +func (t *unreachableTracker) observeSuccess() { + t.firstFailureAt = time.Time{} + t.lastErr = nil +} + +func (t *unreachableTracker) observeFailure(now time.Time, err error) { + if err == nil { + return + } + t.lastErr = err + if t.firstFailureAt.IsZero() { + t.firstFailureAt = now + } +} + +func (t *unreachableTracker) shouldEmit(now time.Time) bool { + return t.lastErr != nil && !t.firstFailureAt.IsZero() && now.Sub(t.firstFailureAt) >= t.grace +} + +type k8sConnectivityCollector struct { + tracker *unreachableTracker +} + +func newK8sConnectivityCollector() collectors.Collector { + return &k8sConnectivityCollector{tracker: newUnreachableTracker(k8sUnreachableGraceDefault)} +} + +func (c *k8sConnectivityCollector) Name() string { return "k8s:connectivity" } + +func (c *k8sConnectivityCollector) Interval() time.Duration { return 5 * time.Second } + +func (c *k8sConnectivityCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) { + now := time.Now() + cs, _, err := collectorsk8s.ClientFromCurrentContext() + if err != nil { + c.tracker.observeFailure(now, err) + return c.issuesForFailure(now, err), collectors.Status{Health: collectors.HealthDegraded, Message: "kubeconfig/client error"}, nil + } + + // Short ping to validate reachability. + pingErr := collectorsk8s.Ping(ctx, cs) + if pingErr == nil { + c.tracker.observeSuccess() + return nil, collectors.OKStatus(), nil + } + + c.tracker.observeFailure(now, pingErr) + return c.issuesForFailure(now, pingErr), collectors.Status{Health: collectors.HealthDegraded, Message: "k8s ping failed"}, nil +} + +func (c *k8sConnectivityCollector) issuesForFailure(now time.Time, err error) []model.Issue { + if c.tracker.shouldEmit(now) { + return []model.Issue{model.Issue{ + ID: "k8s:cluster:unreachable", + Category: model.CategoryKubernetes, + Priority: model.PriorityP0, + Title: "Kubernetes cluster unreachable / auth failed", + Details: fmt.Sprintf("Kubernetes API unreachable or credentials invalid. Last error: %v", err), + Evidence: map[string]string{"reason": "Unreachable"}, + SuggestedFix: "kubectl cluster-info\nkubectl get nodes", + }} + } + return nil +} + +// Keep otherwise-unused constants referenced. +var _ = []any{collectors.HealthOK, collectorTimeoutFast, collectorTimeoutK8sList} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..862b9f6 --- /dev/null +++ b/go.mod @@ -0,0 +1,69 @@ +module tower + +go 1.23.0 + +require ( + github.com/atotto/clipboard v0.1.4 + github.com/charmbracelet/bubbles v0.21.0 + github.com/charmbracelet/bubbletea v1.3.4 + github.com/charmbracelet/lipgloss v1.1.0 + k8s.io/api v0.30.3 + k8s.io/apimachinery v0.30.3 + k8s.io/client-go v0.30.3 +) + +require ( + github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect + github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect + github.com/charmbracelet/x/ansi v0.8.0 // indirect + github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect + github.com/charmbracelet/x/term v0.2.1 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.3 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.3.0 // indirect + github.com/imdario/mergo v0.3.6 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/lucasb-eyer/go-colorful v1.2.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-localereader v0.0.1 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect + github.com/muesli/cancelreader v0.2.2 // indirect + github.com/muesli/termenv v0.16.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/oauth2 v0.10.0 // indirect + golang.org/x/sync v0.11.0 // indirect + golang.org/x/sys v0.30.0 // indirect + golang.org/x/term v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/time v0.3.0 // indirect + google.golang.org/appengine v1.6.7 // indirect + google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/klog/v2 v2.120.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect + k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..090e87f --- /dev/null +++ b/go.sum @@ -0,0 +1,201 @@ +github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= +github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= +github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= +github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= +github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8= +github.com/aymanbagabas/go-udiff v0.2.0/go.mod h1:RE4Ex0qsGkTAJoQdQQCA0uG+nAzJO/pI/QwceO5fgrA= +github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs= +github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg= +github.com/charmbracelet/bubbletea v1.3.4 h1:kCg7B+jSCFPLYRA52SDZjr51kG/fMUEoPoZrkaDHyoI= +github.com/charmbracelet/bubbletea v1.3.4/go.mod h1:dtcUCyCGEX3g9tosuYiut3MXgY/Jsv9nKVdibKKRRXo= +github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc h1:4pZI35227imm7yK2bGPcfpFEmuY1gc2YSTShr4iJBfs= +github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc/go.mod h1:X4/0JoqgTIPSFcRA/P6INZzIuyqdFY5rm8tb41s9okk= +github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY= +github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30= +github.com/charmbracelet/x/ansi v0.8.0 h1:9GTq3xq9caJW8ZrBTe0LIe2fvfLR/bYXKTx2llXn7xE= +github.com/charmbracelet/x/ansi v0.8.0/go.mod h1:wdYl/ONOLHLIVmQaxbIYEC/cRKOQyjTkowiI4blgS9Q= +github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0GVL4jeHEwG5YOXDmi86oYw2yuYUGqz6a8sLwg0X8= +github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs= +github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ= +github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U= +github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ= +github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= +github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= +github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= +github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= +github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= +github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= +github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= +github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= +github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= +github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY= +github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM= +github.com/onsi/gomega v1.31.0 h1:54UJxxj6cPInHS3a35wm6BK/F9nHYueZ1NVujHDrnXE= +github.com/onsi/gomega v1.31.0/go.mod h1:DW9aCi7U6Yi40wNVAvT6kzFnEVEI5n3DloYBiKiT6zk= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E= +golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8= +golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ= +golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.30.3 h1:ImHwK9DCsPA9uoU3rVh4QHAHHK5dTSv1nxJUapx8hoQ= +k8s.io/api v0.30.3/go.mod h1:GPc8jlzoe5JG3pb0KJCSLX5oAFIW3/qNJITlDj8BH04= +k8s.io/apimachinery v0.30.3 h1:q1laaWCmrszyQuSQCfNB8cFgCuDAoPszKY4ucAjDwHc= +k8s.io/apimachinery v0.30.3/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/client-go v0.30.3 h1:bHrJu3xQZNXIi8/MoxYtZBBWQQXwy16zqJwloXXfD3k= +k8s.io/client-go v0.30.3/go.mod h1:8d4pf8vYu665/kUbsxWAQ/JDBNWqfFeZnvFiVdmx89U= +k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= +k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/internal/collectors/collector.go b/internal/collectors/collector.go new file mode 100644 index 0000000..cd4ff7e --- /dev/null +++ b/internal/collectors/collector.go @@ -0,0 +1,45 @@ +package collectors + +import ( + "context" + "time" + + "tower/internal/model" +) + +type Health string + +const ( + HealthOK Health = "OK" + HealthDegraded Health = "DEGRADED" + HealthError Health = "ERROR" +) + +// Status describes collector health for the current tick. +// +// Collectors should return Status even when returning an error, +// so the UI can show useful context. +// +// LastSuccess should be the collector's most recent successful collect time. +// When unknown, it may be the zero value. +// +// Message should be short and human-friendly. +type Status struct { + Health Health `json:"health"` + Message string `json:"message,omitempty"` + LastSuccess time.Time `json:"last_success,omitempty"` +} + +func OKStatus() Status { + return Status{Health: HealthOK} +} + +// Collector returns "currently true" issues for this tick. +// +// The store is responsible for dedupe, lifecycle, and resolve-after. +// Collectors must respect ctx cancellation. +type Collector interface { + Name() string + Interval() time.Duration + Collect(ctx context.Context) ([]model.Issue, Status, error) +} diff --git a/internal/collectors/host/disk.go b/internal/collectors/host/disk.go new file mode 100644 index 0000000..c76db34 --- /dev/null +++ b/internal/collectors/host/disk.go @@ -0,0 +1,287 @@ +package host + +import ( + "bufio" + "context" + "fmt" + "os" + "strconv" + "strings" + "syscall" + "time" + + "tower/internal/collectors" + "tower/internal/model" +) + +// DiskCollector checks filesystem block + inode pressure across mounts. +// +// It reads /proc/mounts to discover mounts and then uses statfs to compute usage. +// Pseudo filesystems are filtered out. +// +// Thresholds (PLAN.md): +// - P1 if blocks OR inodes >= 92% +// - P0 if blocks OR inodes >= 98% +// +// Issues are emitted per mount (one issue that includes both block+inode usage). +// +// NOTE: This collector is Linux-specific. +type DiskCollector struct { + interval time.Duration + + readFile func(string) ([]byte, error) + statfs func(path string, st *syscall.Statfs_t) error +} + +func NewDiskCollector() *DiskCollector { + return &DiskCollector{ + interval: 10 * time.Second, + readFile: os.ReadFile, + statfs: syscall.Statfs, + } +} + +func (c *DiskCollector) Name() string { return "host:disk" } + +func (c *DiskCollector) Interval() time.Duration { + if c.interval <= 0 { + return 10 * time.Second + } + return c.interval +} + +func (c *DiskCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) { + if err := ctx.Err(); err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err + } + + b, err := c.readFile("/proc/mounts") + if err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/mounts"}, err + } + + mounts := parseProcMounts(string(b)) + if len(mounts) == 0 { + // Unusual but treat as degraded rather than hard error. + return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "no mounts found"}, nil + } + + issues := make([]model.Issue, 0, 8) + seenMount := map[string]struct{}{} + + partialErrs := 0 + for _, m := range mounts { + if err := ctx.Err(); err != nil { + return issues, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err + } + if shouldSkipMount(m) { + continue + } + if _, ok := seenMount[m.MountPoint]; ok { + continue + } + seenMount[m.MountPoint] = struct{}{} + + var st syscall.Statfs_t + if err := c.statfs(m.MountPoint, &st); err != nil { + partialErrs++ + continue + } + + blockPct, blockFreeBytes := statfsBlockUsedPct(st) + inodePct := statfsInodeUsedPct(st) + + pri, ok := diskPriority(blockPct, inodePct) + if !ok { + continue + } + + evidence := map[string]string{ + "mount": m.MountPoint, + "fstype": m.FSType, + "block_used_pct": fmt.Sprintf("%.1f", blockPct), + "block_free_bytes": strconv.FormatUint(blockFreeBytes, 10), + } + if inodePct >= 0 { + evidence["inode_used_pct"] = fmt.Sprintf("%.1f", inodePct) + } + + issues = append(issues, model.Issue{ + ID: fmt.Sprintf("host:disk:%s:usage", m.MountPoint), + Category: model.CategoryStorage, + Priority: pri, + Title: fmt.Sprintf("Disk usage high on %s", m.MountPoint), + Details: "Filesystem space and/or inodes are nearly exhausted.", + Evidence: evidence, + SuggestedFix: fmt.Sprintf( + "Inspect usage:\n df -h %s\n df -i %s\nFind large directories:\n sudo du -xh --max-depth=2 %s | sort -h | tail", + m.MountPoint, m.MountPoint, m.MountPoint, + ), + }) + } + + st := collectors.OKStatus() + if partialErrs > 0 { + st.Health = collectors.HealthDegraded + st.Message = fmt.Sprintf("partial failures: %d mounts", partialErrs) + } + return issues, st, nil +} + +type procMount struct { + Device string + MountPoint string + FSType string + Options string +} + +func parseProcMounts(content string) []procMount { + s := bufio.NewScanner(strings.NewReader(content)) + out := make([]procMount, 0, 32) + for s.Scan() { + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + fields := strings.Fields(line) + if len(fields) < 3 { + continue + } + m := procMount{ + Device: unescapeProcMountsField(fields[0]), + MountPoint: unescapeProcMountsField(fields[1]), + FSType: fields[2], + } + if len(fields) >= 4 { + m.Options = fields[3] + } + out = append(out, m) + } + return out +} + +// /proc/mounts escapes special characters as octal sequences. +// The most common one is a space as \040. +func unescapeProcMountsField(s string) string { + replacer := strings.NewReplacer( + "\\040", " ", + "\\011", "\t", + "\\012", "\n", + "\\134", "\\", + ) + return replacer.Replace(s) +} + +var pseudoFSTypes = map[string]struct{}{ + "proc": {}, + "sysfs": {}, + "tmpfs": {}, + "devtmpfs": {}, + "devpts": {}, + "cgroup": {}, + "cgroup2": {}, + "pstore": {}, + "securityfs": {}, + "debugfs": {}, + "tracefs": {}, + "configfs": {}, + "hugetlbfs": {}, + "mqueue": {}, + "rpc_pipefs": {}, + "fusectl": {}, + "binfmt_misc": {}, + "autofs": {}, + "bpf": {}, + "ramfs": {}, + "nsfs": {}, + "efivarfs": {}, + "overlay": {}, // common container overlay mounts + + "squashfs": {}, // typically read-only images + "selinuxfs": {}, + "systemd-1": {}, + "overlayfs": {}, // (non-standard) conservative skip + + "cgroupfs": {}, + "procfs": {}, + "fuse.lxcfs": {}, + "fuse.gvfsd-fuse": {}, +} + +func shouldSkipMount(m procMount) bool { + if m.MountPoint == "" { + return true + } + // Filter by fstype. + if _, ok := pseudoFSTypes[m.FSType]; ok { + return true + } + // Filter common pseudo mountpoints. + if strings.HasPrefix(m.MountPoint, "/proc") || strings.HasPrefix(m.MountPoint, "/sys") { + return true + } + if strings.HasPrefix(m.MountPoint, "/dev") { + // /dev itself can be a real mount in some cases, but usually isn't useful for disk pressure. + return true + } + return false +} + +func statfsBlockUsedPct(st syscall.Statfs_t) (usedPct float64, freeBytes uint64) { + // Mirror df(1) semantics closely: + // total = f_blocks + // used = f_blocks - f_bfree + // avail = f_bavail (space available to unprivileged user) + // use% = used / (used + avail) + if st.Blocks == 0 { + return 0, 0 + } + + bsize := uint64(st.Bsize) + blocks := uint64(st.Blocks) + bfree := uint64(st.Bfree) + bavail := uint64(st.Bavail) + + usedBlocks := blocks - bfree + denom := usedBlocks + bavail + if denom == 0 { + return 0, 0 + } + + freeBytes = bavail * bsize + usedPct = (float64(usedBlocks) / float64(denom)) * 100.0 + return usedPct, freeBytes +} + +// statfsInodeUsedPct returns inode used percent. If inodes are unavailable (f_files==0), returns -1. +func statfsInodeUsedPct(st syscall.Statfs_t) float64 { + if st.Files == 0 { + return -1 + } + total := float64(st.Files) + free := float64(st.Ffree) + used := total - free + return (used / total) * 100.0 +} + +func diskPriority(blockPct, inodePct float64) (model.Priority, bool) { + maxPct := blockPct + if inodePct > maxPct { + maxPct = inodePct + } + // inodePct may be -1 if not supported; ignore in that case. + if inodePct < 0 { + maxPct = blockPct + } + + switch { + case maxPct >= 98.0: + return model.PriorityP0, true + case maxPct >= 92.0: + return model.PriorityP1, true + default: + return "", false + } +} + +var _ collectors.Collector = (*DiskCollector)(nil) diff --git a/internal/collectors/host/disk_test.go b/internal/collectors/host/disk_test.go new file mode 100644 index 0000000..4daaba3 --- /dev/null +++ b/internal/collectors/host/disk_test.go @@ -0,0 +1,80 @@ +package host + +import ( + "syscall" + "testing" +) + +func TestParseProcMounts_UnescapesAndParses(t *testing.T) { + in := "dev1 / ext4 rw 0 0\n" + + "dev2 /path\\040with\\040space xfs rw 0 0\n" + + "badline\n" + + ms := parseProcMounts(in) + if len(ms) != 2 { + t.Fatalf("expected 2 mounts, got %d", len(ms)) + } + if ms[0].MountPoint != "/" || ms[0].FSType != "ext4" { + t.Fatalf("unexpected first mount: %+v", ms[0]) + } + if ms[1].MountPoint != "/path with space" { + t.Fatalf("expected unescaped mountpoint, got %q", ms[1].MountPoint) + } +} + +func TestShouldSkipMount_FiltersPseudo(t *testing.T) { + cases := []procMount{ + {MountPoint: "/proc", FSType: "proc"}, + {MountPoint: "/sys", FSType: "sysfs"}, + {MountPoint: "/dev", FSType: "tmpfs"}, + {MountPoint: "/dev/shm", FSType: "tmpfs"}, + } + for _, c := range cases { + if !shouldSkipMount(c) { + t.Fatalf("expected skip for %+v", c) + } + } + if shouldSkipMount(procMount{MountPoint: "/home", FSType: "ext4"}) { + t.Fatalf("did not expect skip for /home ext4") + } +} + +func TestDiskPriority(t *testing.T) { + if p, ok := diskPriority(91.9, -1); ok { + t.Fatalf("expected no issue, got %v", p) + } + if p, ok := diskPriority(92.0, -1); !ok || p != "P1" { + t.Fatalf("expected P1 at 92%%, got %v ok=%v", p, ok) + } + if p, ok := diskPriority(97.9, 98.0); !ok || p != "P0" { + t.Fatalf("expected P0 if either crosses 98%%, got %v ok=%v", p, ok) + } +} + +func TestStatfsCalculations(t *testing.T) { + st := syscall.Statfs_t{} + st.Bsize = 1 + st.Blocks = 100 + st.Bfree = 8 + st.Bavail = 8 + + pct, free := statfsBlockUsedPct(st) + if free != 8 { + t.Fatalf("expected free=8 bytes, got %d", free) + } + if pct < 91.9 || pct > 92.1 { + t.Fatalf("expected ~92%% used, got %f", pct) + } + + st.Files = 100 + st.Ffree = 2 + ipct := statfsInodeUsedPct(st) + if ipct < 97.9 || ipct > 98.1 { + t.Fatalf("expected ~98%% inode used, got %f", ipct) + } + + st.Files = 0 + if statfsInodeUsedPct(st) != -1 { + t.Fatalf("expected -1 when inode info unavailable") + } +} diff --git a/internal/collectors/host/load.go b/internal/collectors/host/load.go new file mode 100644 index 0000000..5a0b3fd --- /dev/null +++ b/internal/collectors/host/load.go @@ -0,0 +1,127 @@ +package host + +import ( + "context" + "fmt" + "os" + "runtime" + "strconv" + "strings" + "sync" + "time" + + "tower/internal/collectors" + "tower/internal/model" +) + +// LoadCollector evaluates 1-minute load average normalized by logical CPU count. +// +// Thresholds (PLAN.md), normalized by CPU count: +// - P2 if load1/cpus >= 4.0 sustained 120s +// - P1 if load1/cpus >= 6.0 sustained 120s +// +// NOTE: Linux-specific. +// Thread-safe: Collect() can be called concurrently. +type LoadCollector struct { + interval time.Duration + + now func() time.Time + readFile func(string) ([]byte, error) + cpuCount func() int + + mu sync.Mutex + + pri model.Priority + since time.Time +} + +func NewLoadCollector() *LoadCollector { + return &LoadCollector{ + interval: 5 * time.Second, + now: time.Now, + readFile: os.ReadFile, + cpuCount: runtime.NumCPU, + } +} + +func (c *LoadCollector) Name() string { return "host:load" } + +func (c *LoadCollector) Interval() time.Duration { + if c.interval <= 0 { + return 5 * time.Second + } + return c.interval +} + +func (c *LoadCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) { + if err := ctx.Err(); err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err + } + + now := c.now() + b, err := c.readFile("/proc/loadavg") + if err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/loadavg"}, err + } + + load1, err := parseProcLoadavgFirst(string(b)) + if err != nil { + return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "bad /proc/loadavg"}, nil + } + + cpus := c.cpuCount() + if cpus <= 0 { + cpus = 1 + } + norm := load1 / float64(cpus) + desired, window := desiredLoadPriority(norm) + c.mu.Lock() + c.pri, c.since = updateSustained(now, c.pri, c.since, desired) + pri, since := c.pri, c.since + c.mu.Unlock() + + if pri == "" || since.IsZero() || now.Sub(since) < window { + return nil, collectors.OKStatus(), nil + } + + iss := model.Issue{ + ID: "host:load:high", + Category: model.CategoryPerformance, + Priority: pri, + Title: "High sustained system load", + Details: "The 1-minute load average is high relative to CPU count for a sustained period.", + Evidence: map[string]string{ + "load1": fmt.Sprintf("%.2f", load1), + "cpus": strconv.Itoa(cpus), + "load1_per_cpu": fmt.Sprintf("%.2f", norm), + "sustained_window": window.String(), + }, + SuggestedFix: "Investigate CPU hogs:\n top\n ps -eo pid,ppid,cmd,%cpu --sort=-%cpu | head\nIf I/O bound (high iowait), check disk/network.\n", + } + return []model.Issue{iss}, collectors.OKStatus(), nil +} + +func parseProcLoadavgFirst(content string) (float64, error) { + // /proc/loadavg format: "1.23 0.70 0.50 1/123 4567". + fields := strings.Fields(content) + if len(fields) < 1 { + return 0, fmt.Errorf("missing fields") + } + v, err := strconv.ParseFloat(fields[0], 64) + if err != nil { + return 0, err + } + return v, nil +} + +func desiredLoadPriority(loadPerCPU float64) (model.Priority, time.Duration) { + if loadPerCPU >= 6.0 { + return model.PriorityP1, 120 * time.Second + } + if loadPerCPU >= 4.0 { + return model.PriorityP2, 120 * time.Second + } + return "", 0 +} + +var _ collectors.Collector = (*LoadCollector)(nil) diff --git a/internal/collectors/host/load_test.go b/internal/collectors/host/load_test.go new file mode 100644 index 0000000..4d3b28c --- /dev/null +++ b/internal/collectors/host/load_test.go @@ -0,0 +1,48 @@ +package host + +import ( + "testing" + "time" + + "tower/internal/model" +) + +func TestParseProcLoadavgFirst(t *testing.T) { + v, err := parseProcLoadavgFirst("1.23 0.70 0.50 1/123 4567\n") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if v < 1.229 || v > 1.231 { + t.Fatalf("expected 1.23, got %v", v) + } + if _, err := parseProcLoadavgFirst("\n"); err == nil { + t.Fatalf("expected error") + } +} + +func TestDesiredLoadPriority(t *testing.T) { + p, w := desiredLoadPriority(3.99) + if p != "" || w != 0 { + t.Fatalf("expected none") + } + p, w = desiredLoadPriority(4.0) + if p != model.PriorityP2 || w != 120*time.Second { + t.Fatalf("expected P2/120s") + } + p, w = desiredLoadPriority(6.0) + if p != model.PriorityP1 || w != 120*time.Second { + t.Fatalf("expected P1/120s") + } +} + +func TestUpdateSustainedWorksForLoadToo(t *testing.T) { + now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) + p, since := updateSustained(now, "", time.Time{}, model.PriorityP2) + if p != model.PriorityP2 || !since.Equal(now) { + t.Fatalf("expected set") + } + p2, since2 := updateSustained(now.Add(10*time.Second), p, since, model.PriorityP2) + if p2 != model.PriorityP2 || !since2.Equal(since) { + t.Fatalf("expected unchanged") + } +} diff --git a/internal/collectors/host/mem.go b/internal/collectors/host/mem.go new file mode 100644 index 0000000..e6c1d0d --- /dev/null +++ b/internal/collectors/host/mem.go @@ -0,0 +1,205 @@ +package host + +import ( + "bufio" + "context" + "fmt" + "os" + "strconv" + "strings" + "sync" + "time" + + "tower/internal/collectors" + "tower/internal/model" +) + +// MemCollector checks MemAvailable and swap pressure from /proc/meminfo. +// +// Thresholds (PLAN.md): +// Memory (MemAvailable as % of MemTotal): +// - P2 if <= 15% sustained 60s +// - P1 if <= 10% sustained 60s +// - P0 if <= 5% sustained 30s +// +// Swap pressure (only if RAM is also tight): +// - P1 if swap used >= 50% AND MemAvailable <= 10% sustained 60s +// - P0 if swap used >= 80% AND MemAvailable <= 5% sustained 30s +// +// Emits up to two issues: +// - host:mem:available +// - host:mem:swap +// +// NOTE: Linux-specific. +// Thread-safe: Collect() can be called concurrently. +type MemCollector struct { + interval time.Duration + + now func() time.Time + readFile func(string) ([]byte, error) + + mu sync.Mutex + + memPri model.Priority + memSince time.Time + + swapPri model.Priority + swapSince time.Time +} + +func NewMemCollector() *MemCollector { + return &MemCollector{ + interval: 5 * time.Second, + now: time.Now, + readFile: os.ReadFile, + } +} + +func (c *MemCollector) Name() string { return "host:mem" } + +func (c *MemCollector) Interval() time.Duration { + if c.interval <= 0 { + return 5 * time.Second + } + return c.interval +} + +func (c *MemCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) { + if err := ctx.Err(); err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err + } + + now := c.now() + b, err := c.readFile("/proc/meminfo") + if err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/meminfo"}, err + } + + mi := parseProcMeminfo(string(b)) + memTotalKB, okT := mi["MemTotal"] + memAvailKB, okA := mi["MemAvailable"] + if !okT || !okA || memTotalKB <= 0 { + return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "missing MemTotal/MemAvailable"}, nil + } + + memAvailPct := (float64(memAvailKB) / float64(memTotalKB)) * 100.0 + + desiredMemPri, memWindow := desiredMemPriority(memAvailPct) + c.mu.Lock() + c.memPri, c.memSince = updateSustained(now, c.memPri, c.memSince, desiredMemPri) + memPri, memSince := c.memPri, c.memSince + c.mu.Unlock() + + issues := make([]model.Issue, 0, 2) + if memPri != "" && !memSince.IsZero() && now.Sub(memSince) >= memWindow { + issues = append(issues, model.Issue{ + ID: "host:mem:available", + Category: model.CategoryMemory, + Priority: memPri, + Title: "Low available memory", + Details: "MemAvailable is low and has remained low for a sustained period.", + Evidence: map[string]string{ + "mem_available_kb": strconv.FormatInt(memAvailKB, 10), + "mem_total_kb": strconv.FormatInt(memTotalKB, 10), + "mem_available_pct": fmt.Sprintf("%.1f", memAvailPct), + }, + SuggestedFix: "Identify memory hogs:\n free -h\n ps aux --sort=-rss | head\nConsider restarting runaway processes or adding RAM.", + }) + } + + swapTotalKB, okST := mi["SwapTotal"] + swapFreeKB, okSF := mi["SwapFree"] + swapUsedPct := 0.0 + if okST && okSF && swapTotalKB > 0 { + swapUsedKB := swapTotalKB - swapFreeKB + swapUsedPct = (float64(swapUsedKB) / float64(swapTotalKB)) * 100.0 + } + + desiredSwapPri, swapWindow := desiredSwapPriority(memAvailPct, swapTotalKB, swapUsedPct) + c.mu.Lock() + c.swapPri, c.swapSince = updateSustained(now, c.swapPri, c.swapSince, desiredSwapPri) + swapPri, swapSince := c.swapPri, c.swapSince + c.mu.Unlock() + if swapPri != "" && !swapSince.IsZero() && now.Sub(swapSince) >= swapWindow { + issues = append(issues, model.Issue{ + ID: "host:mem:swap", + Category: model.CategoryMemory, + Priority: swapPri, + Title: "High swap usage with low RAM", + Details: "Swap usage is high while available RAM is also low, indicating memory pressure.", + Evidence: map[string]string{ + "swap_used_pct": fmt.Sprintf("%.1f", swapUsedPct), + "swap_total_kb": strconv.FormatInt(swapTotalKB, 10), + "mem_available_pct": fmt.Sprintf("%.1f", memAvailPct), + }, + SuggestedFix: "Find swapping processes:\n vmstat 1\n smem -r 2>/dev/null || true\nConsider reducing memory usage or increasing RAM/swap.", + }) + } + + return issues, collectors.OKStatus(), nil +} + +func parseProcMeminfo(content string) map[string]int64 { + out := map[string]int64{} + s := bufio.NewScanner(strings.NewReader(content)) + for s.Scan() { + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + // Example: "MemAvailable: 12345 kB" + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + key := strings.TrimSuffix(fields[0], ":") + v, err := strconv.ParseInt(fields[1], 10, 64) + if err != nil { + continue + } + out[key] = v + } + return out +} + +func desiredMemPriority(memAvailPct float64) (model.Priority, time.Duration) { + switch { + case memAvailPct <= 5.0: + return model.PriorityP0, 30 * time.Second + case memAvailPct <= 10.0: + return model.PriorityP1, 60 * time.Second + case memAvailPct <= 15.0: + return model.PriorityP2, 60 * time.Second + default: + return "", 0 + } +} + +func desiredSwapPriority(memAvailPct float64, swapTotalKB int64, swapUsedPct float64) (model.Priority, time.Duration) { + if swapTotalKB <= 0 { + return "", 0 + } + // Only alert on swap when RAM is also tight. + switch { + case swapUsedPct >= 80.0 && memAvailPct <= 5.0: + return model.PriorityP0, 30 * time.Second + case swapUsedPct >= 50.0 && memAvailPct <= 10.0: + return model.PriorityP1, 60 * time.Second + default: + return "", 0 + } +} + +// updateSustained updates current severity and its since timestamp. +// If desired is empty, it clears the state. +func updateSustained(now time.Time, current model.Priority, since time.Time, desired model.Priority) (model.Priority, time.Time) { + if desired == "" { + return "", time.Time{} + } + if current != desired || since.IsZero() { + return desired, now + } + return current, since +} + +var _ collectors.Collector = (*MemCollector)(nil) diff --git a/internal/collectors/host/mem_test.go b/internal/collectors/host/mem_test.go new file mode 100644 index 0000000..f1461e9 --- /dev/null +++ b/internal/collectors/host/mem_test.go @@ -0,0 +1,83 @@ +package host + +import ( + "testing" + "time" + + "tower/internal/model" +) + +func TestParseProcMeminfo(t *testing.T) { + in := "MemTotal: 8000000 kB\nMemAvailable: 800000 kB\nSwapTotal: 2000000 kB\nSwapFree: 500000 kB\n" + m := parseProcMeminfo(in) + if m["MemTotal"] != 8000000 { + t.Fatalf("MemTotal mismatch: %d", m["MemTotal"]) + } + if m["MemAvailable"] != 800000 { + t.Fatalf("MemAvailable mismatch: %d", m["MemAvailable"]) + } +} + +func TestDesiredMemPriority(t *testing.T) { + p, w := desiredMemPriority(16.0) + if p != "" || w != 0 { + t.Fatalf("expected none") + } + + p, w = desiredMemPriority(15.0) + if p != model.PriorityP2 || w != 60*time.Second { + t.Fatalf("expected P2/60s got %v/%v", p, w) + } + p, w = desiredMemPriority(10.0) + if p != model.PriorityP1 { + t.Fatalf("expected P1 got %v", p) + } + p, w = desiredMemPriority(5.0) + if p != model.PriorityP0 || w != 30*time.Second { + t.Fatalf("expected P0/30s got %v/%v", p, w) + } +} + +func TestDesiredSwapPriority(t *testing.T) { + // No swap configured. + p, _ := desiredSwapPriority(4.0, 0, 90.0) + if p != "" { + t.Fatalf("expected none when SwapTotal=0") + } + + p, w := desiredSwapPriority(4.0, 1000, 80.0) + if p != model.PriorityP0 || w != 30*time.Second { + t.Fatalf("expected P0/30s got %v/%v", p, w) + } + + p, w = desiredSwapPriority(9.9, 1000, 50.0) + if p != model.PriorityP1 || w != 60*time.Second { + t.Fatalf("expected P1/60s got %v/%v", p, w) + } + + // Swap high but RAM not tight => no issue. + p, _ = desiredSwapPriority(20.0, 1000, 90.0) + if p != "" { + t.Fatalf("expected none when RAM not tight") + } +} + +func TestUpdateSustained(t *testing.T) { + now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) + p, since := updateSustained(now, "", time.Time{}, model.PriorityP1) + if p != model.PriorityP1 || !since.Equal(now) { + t.Fatalf("expected set to P1 at now") + } + p2, since2 := updateSustained(now.Add(1*time.Second), p, since, model.PriorityP1) + if p2 != model.PriorityP1 || !since2.Equal(since) { + t.Fatalf("expected unchanged since") + } + p3, since3 := updateSustained(now.Add(2*time.Second), p2, since2, model.PriorityP0) + if p3 != model.PriorityP0 || !since3.Equal(now.Add(2*time.Second)) { + t.Fatalf("expected reset on priority change") + } + p4, since4 := updateSustained(now.Add(3*time.Second), p3, since3, "") + if p4 != "" || !since4.IsZero() { + t.Fatalf("expected cleared") + } +} diff --git a/internal/collectors/host/net.go b/internal/collectors/host/net.go new file mode 100644 index 0000000..7dc5b1d --- /dev/null +++ b/internal/collectors/host/net.go @@ -0,0 +1,138 @@ +package host + +import ( + "bufio" + "context" + "os" + "path/filepath" + "strings" + "time" + + "tower/internal/collectors" + "tower/internal/model" +) + +// NetCollector checks for missing default route while at least one non-loopback +// interface is up. +// +// Rule (PLAN.md): +// - P1 if no default route AND any non-loopback interface is UP. +// +// Discovery: +// - Default route from /proc/net/route +// - Interface UP from /sys/class/net/*/operstate +// +// NOTE: Linux-specific. +type NetCollector struct { + interval time.Duration + + readFile func(string) ([]byte, error) + glob func(string) ([]string, error) +} + +func NewNetCollector() *NetCollector { + return &NetCollector{ + interval: 5 * time.Second, + readFile: os.ReadFile, + glob: filepath.Glob, + } +} + +func (c *NetCollector) Name() string { return "host:net" } + +func (c *NetCollector) Interval() time.Duration { + if c.interval <= 0 { + return 5 * time.Second + } + return c.interval +} + +func (c *NetCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) { + if err := ctx.Err(); err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err + } + + routeBytes, err := c.readFile("/proc/net/route") + if err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/net/route"}, err + } + + hasDefault := hasDefaultRoute(string(routeBytes)) + + paths, err := c.glob("/sys/class/net/*/operstate") + if err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "failed listing /sys/class/net"}, err + } + upIfaces := make([]string, 0, 2) + for _, p := range paths { + if err := ctx.Err(); err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err + } + b, err := c.readFile(p) + if err != nil { + continue + } + iface := filepath.Base(filepath.Dir(p)) + if iface == "lo" { + continue + } + state := strings.TrimSpace(string(b)) + if isIfaceUp(state) { + upIfaces = append(upIfaces, iface) + } + } + + if hasDefault || len(upIfaces) == 0 { + return nil, collectors.OKStatus(), nil + } + + iss := model.Issue{ + ID: "host:net:default-route-missing", + Category: model.CategoryNetwork, + Priority: model.PriorityP1, + Title: "No default route", + Details: "At least one network interface is up, but no default route is present.", + Evidence: map[string]string{ + "up_ifaces": strings.Join(upIfaces, ","), + }, + SuggestedFix: "Check routing and link state:\n ip route\n ip link\n nmcli dev status\nIf on Wi-Fi, reconnect; if on VPN, verify tunnel routes.", + } + return []model.Issue{iss}, collectors.OKStatus(), nil +} + +func hasDefaultRoute(procNetRoute string) bool { + // /proc/net/route header: + // Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT + // Default route has Destination == 00000000. + s := bufio.NewScanner(strings.NewReader(procNetRoute)) + first := true + for s.Scan() { + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + if first { + first = false + // skip header if present + if strings.HasPrefix(line, "Iface") { + continue + } + } + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + if fields[1] == "00000000" { + return true + } + } + return false +} + +func isIfaceUp(operstate string) bool { + // Linux operstate values include: up, down, unknown, dormant, lowerlayerdown. + s := strings.ToLower(strings.TrimSpace(operstate)) + return s == "up" || s == "unknown" +} + +var _ collectors.Collector = (*NetCollector)(nil) diff --git a/internal/collectors/host/net_test.go b/internal/collectors/host/net_test.go new file mode 100644 index 0000000..c6e64f5 --- /dev/null +++ b/internal/collectors/host/net_test.go @@ -0,0 +1,28 @@ +package host + +import "testing" + +func TestHasDefaultRoute(t *testing.T) { + in := "Iface\tDestination\tGateway\tFlags\n" + + "eth0\t00000000\t0102A8C0\t0003\n" + if !hasDefaultRoute(in) { + t.Fatalf("expected default route") + } + in2 := "Iface Destination Gateway Flags\n" + + "eth0 0010A8C0 00000000 0001\n" + if hasDefaultRoute(in2) { + t.Fatalf("expected no default route") + } +} + +func TestIsIfaceUp(t *testing.T) { + if !isIfaceUp("up\n") { + t.Fatalf("expected true") + } + if !isIfaceUp("unknown") { + t.Fatalf("expected true for unknown") + } + if isIfaceUp("down") { + t.Fatalf("expected false") + } +} diff --git a/internal/collectors/k8s/client.go b/internal/collectors/k8s/client.go new file mode 100644 index 0000000..bdd8ca5 --- /dev/null +++ b/internal/collectors/k8s/client.go @@ -0,0 +1,88 @@ +package k8s + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +// ClientFromCurrentContext creates a Kubernetes client-go Clientset using the +// user's kubeconfig current context. +// +// It is a pure helper (no global state) so it can be used by collectors and +// unit tests (with temporary kubeconfig files). +func ClientFromCurrentContext() (*kubernetes.Clientset, *rest.Config, error) { + loadingRules := clientcmd.NewDefaultClientConfigLoadingRules() + + // Respect KUBECONFIG semantics (it may be a path list). + if p := os.Getenv("KUBECONFIG"); p != "" { + if list := filepath.SplitList(p); len(list) > 1 { + loadingRules.ExplicitPath = "" + loadingRules.Precedence = list + } else { + loadingRules.ExplicitPath = p + } + } + + cfg := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, &clientcmd.ConfigOverrides{}) + restCfg, err := cfg.ClientConfig() + if err != nil { + return nil, nil, err + } + + // Ensure HTTP client timeouts are bounded. LIST fallback uses its own context + // timeouts, but this provides a safety net. + if restCfg.Timeout <= 0 { + restCfg.Timeout = 30 * time.Second + } + + cs, err := kubernetes.NewForConfig(restCfg) + if err != nil { + return nil, nil, err + } + return cs, restCfg, nil +} + +func defaultKubeconfigPath() string { + // This helper is used only for existence checks / UI messages. Client loading + // should use client-go's default loading rules. + if p := os.Getenv("KUBECONFIG"); p != "" { + // If KUBECONFIG is a list, return the first entry for display. + if list := filepath.SplitList(p); len(list) > 0 { + return list[0] + } + return p + } + + h, err := os.UserHomeDir() + if err != nil { + return "" + } + return filepath.Join(h, ".kube", "config") +} + +// Ping performs a lightweight API call to determine if the cluster is reachable +// and authentication works. +func Ping(ctx context.Context, cs kubernetes.Interface) error { + if cs == nil { + return errors.New("nil kubernetes client") + } + _, err := cs.Discovery().ServerVersion() + if err != nil { + // Treat authn/authz errors separately so callers can decide whether to + // surface "unreachable" vs "insufficient credentials". + if apierrors.IsForbidden(err) || apierrors.IsUnauthorized(err) { + return fmt.Errorf("discovery auth: %w", err) + } + return fmt.Errorf("discovery server version: %w", err) + } + return nil +} diff --git a/internal/collectors/k8s/informers.go b/internal/collectors/k8s/informers.go new file mode 100644 index 0000000..4fb7d4e --- /dev/null +++ b/internal/collectors/k8s/informers.go @@ -0,0 +1,720 @@ +package k8s + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sort" + "sync" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + appslisters "k8s.io/client-go/listers/apps/v1" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + + "tower/internal/collectors" + "tower/internal/model" +) + +// Collector is the ControlTower Kubernetes collector. +// +// It uses client-go informers (LIST+WATCH with local caches) against the user's +// kubeconfig current context, across all namespaces. +// +// Degradation behavior: +// - If WATCH fails repeatedly, it falls back to polling LIST and emits a P1 +// "degraded to polling" issue. +// - While in polling mode, it periodically attempts to recover back to watches. +// - If the cluster is unreachable, it emits a P0 only after 10s continuous failure. +// - If RBAC forbids list/watch for a resource, it emits a single P2 issue per +// inaccessible resource and continues for accessible resources. +// +// Noise control: +// - Rollups group by (namespace, reason, kind) when group size >= 20. +// - Cap max issues to 200 after rollups. +// +// Instantiate with NewCollector(). +type Collector struct { + interval time.Duration + + unreachableGrace time.Duration + pendingGrace time.Duration + workloadGrace time.Duration + crashLoopThresh int + + rollupThreshold int + maxIssues int + + watchFailureThreshold int + watchFailureWindow time.Duration + pollRecoverEvery time.Duration + + mu sync.Mutex + syncWG sync.WaitGroup + + client kubernetes.Interface + + factory informers.SharedInformerFactory + stopCh chan struct{} + started bool + syncedFns []cache.InformerSynced + + podsLister corelisters.PodLister + nodesLister corelisters.NodeLister + eventsLister corelisters.EventLister + deployLister appslisters.DeploymentLister + statefulSetLister appslisters.StatefulSetLister + daemonSetLister appslisters.DaemonSetLister + + // polling indicates we have degraded from informers to list polling. + polling bool + pollSince time.Time + lastPollRecoverAttempt time.Time + + watchFailWindowStart time.Time + watchFailCount int + + // rbacDenied is keyed by resource name ("pods", "nodes", ...). + rbacDenied map[string]error + + unreach *unreachableTracker + + lastSuccess time.Time +} + +func NewCollector() *Collector { + c := &Collector{ + interval: 2 * time.Second, + unreachableGrace: 10 * time.Second, + pendingGrace: 120 * time.Second, + workloadGrace: 180 * time.Second, + crashLoopThresh: 5, + rollupThreshold: 20, + maxIssues: 200, + watchFailureThreshold: 5, + watchFailureWindow: 30 * time.Second, + pollRecoverEvery: 30 * time.Second, + rbacDenied: map[string]error{}, + } + c.unreach = newUnreachableTracker(c.unreachableGrace) + return c +} + +var _ collectors.Collector = (*Collector)(nil) + +func (c *Collector) Name() string { return "k8s" } + +func (c *Collector) Interval() time.Duration { + if c.interval <= 0 { + return 2 * time.Second + } + return c.interval +} + +func (c *Collector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) { + now := time.Now() + if err := ctx.Err(); err != nil { + return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err + } + + // If kubeconfig doesn't exist, treat Kubernetes as "disabled". + if !kubeconfigExists() { + return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "kubeconfig not found"}, nil + } + + if err := c.ensureClient(); err != nil { + c.unreach.observeFailure(now, err) + if c.unreach.shouldEmit(now) { + iss := stampIssueTimes(now, unreachableIssue(err)) + return []model.Issue{iss}, collectors.Status{Health: collectors.HealthError, Message: "unreachable"}, nil + } + return nil, collectors.Status{Health: collectors.HealthError, Message: "k8s client init failed (grace)"}, nil + } + + // Connectivity/auth check with grace. + if err := Ping(ctx, c.client); err != nil { + c.unreach.observeFailure(now, err) + if c.unreach.shouldEmit(now) { + iss := stampIssueTimes(now, unreachableIssue(err)) + return []model.Issue{iss}, collectors.Status{Health: collectors.HealthError, Message: "unreachable"}, nil + } + return nil, collectors.Status{Health: collectors.HealthError, Message: "k8s unreachable (grace)"}, nil + } + c.unreach.observeSuccess() + c.lastSuccess = now + + // Prefer informers unless currently degraded to polling. + if c.isPolling() { + c.maybeRecoverInformers(ctx, now) + } + if !c.isPolling() { + _ = c.ensureInformers(ctx) + } + + issues := make([]model.Issue, 0, 64) + issues = append(issues, c.rbacIssues()...) + + st := collectors.Status{Health: collectors.HealthOK, LastSuccess: c.lastSuccess} + + if c.isPolling() { + st.Health = collectors.HealthDegraded + st.Message = "degraded to polling" + issues = append(issues, stampIssueTimes(now, pollingDegradedIssue())) + issues = append(issues, c.collectByPolling(ctx, now)...) + } else { + // If caches aren't ready, use polling for this tick only. + if !c.cachesSyncedQuick(ctx) { + st.Health = collectors.HealthDegraded + st.Message = "waiting for informer cache; used list" + issues = append(issues, c.collectByPolling(ctx, now)...) + } else { + issues = append(issues, c.collectFromCaches(now)...) + if len(c.snapshotRBACDenied()) > 0 { + st.Health = collectors.HealthDegraded + st.Message = "partial RBAC access" + } + } + } + + // Set timestamps, roll up and cap. + for i := range issues { + issues[i] = stampIssueTimes(now, issues[i]) + } + issues = Rollup(issues, c.rollupThreshold, 5) + model.SortIssuesDefault(issues) + issues = CapIssues(issues, c.maxIssues) + + return issues, st, nil +} + +func (c *Collector) ensureClient() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.client != nil { + return nil + } + cs, _, err := ClientFromCurrentContext() + if err != nil { + return err + } + c.client = cs + return nil +} + +func kubeconfigExists() bool { + if p := os.Getenv("KUBECONFIG"); p != "" { + for _, fp := range filepath.SplitList(p) { + if fp == "" { + continue + } + if _, err := os.Stat(fp); err == nil { + return true + } + } + return false + } + + p := defaultKubeconfigPath() + if p == "" { + return false + } + _, err := os.Stat(p) + return err == nil +} + +func (c *Collector) ensureInformers(ctx context.Context) error { + c.mu.Lock() + if c.started || c.polling { + c.mu.Unlock() + return nil + } + client := c.client + c.mu.Unlock() + if client == nil { + return fmt.Errorf("nil kubernetes client") + } + + // RBAC preflight before we even construct informers (so we can skip forbidden ones). + c.preflightRBAC(ctx, client) + + factory := informers.NewSharedInformerFactory(client, 0) + + var ( + podsInf cache.SharedIndexInformer + nodesInf cache.SharedIndexInformer + evsInf cache.SharedIndexInformer + depInf cache.SharedIndexInformer + stsInf cache.SharedIndexInformer + dsInf cache.SharedIndexInformer + ) + + if !c.isRBACDenied("pods") { + i := factory.Core().V1().Pods() + i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("pods", err) }) + c.mu.Lock() + c.podsLister = i.Lister() + c.mu.Unlock() + podsInf = i.Informer() + } + if !c.isRBACDenied("nodes") { + i := factory.Core().V1().Nodes() + i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("nodes", err) }) + c.mu.Lock() + c.nodesLister = i.Lister() + c.mu.Unlock() + nodesInf = i.Informer() + } + if !c.isRBACDenied("events") { + i := factory.Core().V1().Events() + i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("events", err) }) + c.mu.Lock() + c.eventsLister = i.Lister() + c.mu.Unlock() + evsInf = i.Informer() + } + if !c.isRBACDenied("deployments") { + i := factory.Apps().V1().Deployments() + i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("deployments", err) }) + c.mu.Lock() + c.deployLister = i.Lister() + c.mu.Unlock() + depInf = i.Informer() + } + if !c.isRBACDenied("statefulsets") { + i := factory.Apps().V1().StatefulSets() + i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("statefulsets", err) }) + c.mu.Lock() + c.statefulSetLister = i.Lister() + c.mu.Unlock() + stsInf = i.Informer() + } + if !c.isRBACDenied("daemonsets") { + i := factory.Apps().V1().DaemonSets() + i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("daemonsets", err) }) + c.mu.Lock() + c.daemonSetLister = i.Lister() + c.mu.Unlock() + dsInf = i.Informer() + } + + synced := make([]cache.InformerSynced, 0, 6) + if podsInf != nil { + synced = append(synced, podsInf.HasSynced) + } + if nodesInf != nil { + synced = append(synced, nodesInf.HasSynced) + } + if evsInf != nil { + synced = append(synced, evsInf.HasSynced) + } + if depInf != nil { + synced = append(synced, depInf.HasSynced) + } + if stsInf != nil { + synced = append(synced, stsInf.HasSynced) + } + if dsInf != nil { + synced = append(synced, dsInf.HasSynced) + } + + stopCh := make(chan struct{}) + + c.mu.Lock() + c.factory = factory + c.stopCh = stopCh + c.started = true + c.syncedFns = synced + c.mu.Unlock() + + factory.Start(stopCh) + + c.syncWG.Add(1) + go func() { + defer c.syncWG.Done() + syncCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if ok := cache.WaitForCacheSync(syncCtx.Done(), synced...); !ok { + fmt.Printf("k8s: informer cache sync failed or timed out\n") + } + }() + + return nil +} + +func (c *Collector) maybeRecoverInformers(ctx context.Context, now time.Time) { + c.mu.Lock() + interval := c.pollRecoverEvery + last := c.lastPollRecoverAttempt + c.mu.Unlock() + + if interval <= 0 { + interval = 30 * time.Second + } + if !last.IsZero() && now.Sub(last) < interval { + return + } + + c.mu.Lock() + c.lastPollRecoverAttempt = now + c.mu.Unlock() + + // Only attempt if connectivity is OK (already pinged successfully in Collect). + // Reset watch failure counters and exit polling; subsequent Collect will ensureInformers. + c.mu.Lock() + c.polling = false + c.pollSince = time.Time{} + c.watchFailWindowStart = time.Time{} + c.watchFailCount = 0 + c.mu.Unlock() + + _ = c.ensureInformers(ctx) +} + +func (c *Collector) preflightRBAC(ctx context.Context, client kubernetes.Interface) { + shortCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + + probe := func(resource string, f func(context.Context) error) { + if err := f(shortCtx); err != nil { + if apierrors.IsForbidden(err) { + c.noteRBAC(resource, err) + } + } + } + + probe("nodes", func(ctx context.Context) error { + _, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{Limit: 1}) + return err + }) + probe("pods", func(ctx context.Context) error { + _, err := client.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1}) + return err + }) + probe("deployments", func(ctx context.Context) error { + _, err := client.AppsV1().Deployments(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1}) + return err + }) + probe("statefulsets", func(ctx context.Context) error { + _, err := client.AppsV1().StatefulSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1}) + return err + }) + probe("daemonsets", func(ctx context.Context) error { + _, err := client.AppsV1().DaemonSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1}) + return err + }) + probe("events", func(ctx context.Context) error { + _, err := client.CoreV1().Events(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1}) + return err + }) +} + +func (c *Collector) noteRBAC(resource string, err error) { + if err == nil || !apierrors.IsForbidden(err) { + return + } + c.mu.Lock() + defer c.mu.Unlock() + if _, ok := c.rbacDenied[resource]; ok { + return + } + c.rbacDenied[resource] = err +} + +func (c *Collector) isRBACDenied(resource string) bool { + c.mu.Lock() + defer c.mu.Unlock() + _, ok := c.rbacDenied[resource] + return ok +} + +func (c *Collector) snapshotRBACDenied() map[string]error { + c.mu.Lock() + defer c.mu.Unlock() + out := make(map[string]error, len(c.rbacDenied)) + for k, v := range c.rbacDenied { + out[k] = v + } + return out +} + +func (c *Collector) recordWatchError(resource string, err error) { + if err == nil { + return + } + if apierrors.IsForbidden(err) { + c.noteRBAC(resource, err) + return + } + + now := time.Now() + + c.mu.Lock() + defer c.mu.Unlock() + if c.polling { + return + } + if c.watchFailWindowStart.IsZero() || now.Sub(c.watchFailWindowStart) > c.watchFailureWindow { + c.watchFailWindowStart = now + c.watchFailCount = 0 + } + c.watchFailCount++ + if c.watchFailCount >= c.watchFailureThreshold { + c.polling = true + c.pollSince = now + if c.stopCh != nil { + close(c.stopCh) + c.stopCh = nil + } + c.started = false + c.factory = nil + c.syncedFns = nil + c.syncWG.Wait() + } +} + +func (c *Collector) cachesSyncedQuick(ctx context.Context) bool { + c.mu.Lock() + synced := append([]cache.InformerSynced(nil), c.syncedFns...) + c.mu.Unlock() + if len(synced) == 0 { + return false + } + + syncCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond) + defer cancel() + return cache.WaitForCacheSync(syncCtx.Done(), synced...) +} + +func (c *Collector) collectFromCaches(now time.Time) []model.Issue { + c.mu.Lock() + podsLister := c.podsLister + nodesLister := c.nodesLister + eventsLister := c.eventsLister + deployLister := c.deployLister + stsLister := c.statefulSetLister + dsLister := c.daemonSetLister + denied := make(map[string]error, len(c.rbacDenied)) + for k, v := range c.rbacDenied { + denied[k] = v + } + c.mu.Unlock() + + issues := make([]model.Issue, 0, 64) + sel := labels.Everything() + + if _, ok := denied["nodes"]; !ok && nodesLister != nil { + if list, err := nodesLister.List(sel); err == nil { + nodes := make([]*corev1.Node, 0, len(list)) + for i := range list { + nodes = append(nodes, list[i]) + } + issues = append(issues, IssuesFromNodes(nodes)...) + } + } + + if _, ok := denied["pods"]; !ok && podsLister != nil { + if list, err := podsLister.List(sel); err == nil { + pods := make([]*corev1.Pod, 0, len(list)) + for i := range list { + pods = append(pods, list[i]) + } + issues = append(issues, IssuesFromPods(pods, now, c.pendingGrace, c.crashLoopThresh)...) + } + } + + if _, ok := denied["deployments"]; !ok && deployLister != nil { + if list, err := deployLister.List(sel); err == nil { + deps := make([]*appsv1.Deployment, 0, len(list)) + for i := range list { + deps = append(deps, list[i]) + } + issues = append(issues, IssuesFromDeployments(deps, now, c.workloadGrace)...) + } + } + if _, ok := denied["statefulsets"]; !ok && stsLister != nil { + if list, err := stsLister.List(sel); err == nil { + sts := make([]*appsv1.StatefulSet, 0, len(list)) + for i := range list { + sts = append(sts, list[i]) + } + issues = append(issues, IssuesFromStatefulSets(sts, now, c.workloadGrace)...) + } + } + if _, ok := denied["daemonsets"]; !ok && dsLister != nil { + if list, err := dsLister.List(sel); err == nil { + dss := make([]*appsv1.DaemonSet, 0, len(list)) + for i := range list { + dss = append(dss, list[i]) + } + issues = append(issues, IssuesFromDaemonSets(dss, now, c.workloadGrace)...) + } + } + + if _, ok := denied["events"]; !ok && eventsLister != nil { + if list, err := eventsLister.List(sel); err == nil { + es := make([]*corev1.Event, 0, len(list)) + for i := range list { + es = append(es, list[i]) + } + issues = append(issues, IssuesFromEvents(es, now)...) + } + } + + return issues +} + +func (c *Collector) collectByPolling(ctx context.Context, now time.Time) []model.Issue { + c.mu.Lock() + client := c.client + denied := make(map[string]error, len(c.rbacDenied)) + for k, v := range c.rbacDenied { + denied[k] = v + } + c.mu.Unlock() + if client == nil { + return nil + } + + issues := make([]model.Issue, 0, 64) + + if _, ok := denied["nodes"]; !ok { + if nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{}); err != nil { + c.noteRBAC("nodes", err) + } else { + list := make([]*corev1.Node, 0, len(nodes.Items)) + for i := range nodes.Items { + list = append(list, &nodes.Items[i]) + } + issues = append(issues, IssuesFromNodes(list)...) + } + } + + if _, ok := denied["pods"]; !ok { + if pods, err := client.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil { + c.noteRBAC("pods", err) + } else { + list := make([]*corev1.Pod, 0, len(pods.Items)) + for i := range pods.Items { + list = append(list, &pods.Items[i]) + } + issues = append(issues, IssuesFromPods(list, now, c.pendingGrace, c.crashLoopThresh)...) + } + } + + if _, ok := denied["deployments"]; !ok { + if deps, err := client.AppsV1().Deployments(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil { + c.noteRBAC("deployments", err) + } else { + list := make([]*appsv1.Deployment, 0, len(deps.Items)) + for i := range deps.Items { + list = append(list, &deps.Items[i]) + } + issues = append(issues, IssuesFromDeployments(list, now, c.workloadGrace)...) + } + } + + if _, ok := denied["statefulsets"]; !ok { + if sts, err := client.AppsV1().StatefulSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil { + c.noteRBAC("statefulsets", err) + } else { + list := make([]*appsv1.StatefulSet, 0, len(sts.Items)) + for i := range sts.Items { + list = append(list, &sts.Items[i]) + } + issues = append(issues, IssuesFromStatefulSets(list, now, c.workloadGrace)...) + } + } + + if _, ok := denied["daemonsets"]; !ok { + if dss, err := client.AppsV1().DaemonSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil { + c.noteRBAC("daemonsets", err) + } else { + list := make([]*appsv1.DaemonSet, 0, len(dss.Items)) + for i := range dss.Items { + list = append(list, &dss.Items[i]) + } + issues = append(issues, IssuesFromDaemonSets(list, now, c.workloadGrace)...) + } + } + + if _, ok := denied["events"]; !ok { + if evs, err := client.CoreV1().Events(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil { + c.noteRBAC("events", err) + } else { + list := make([]*corev1.Event, 0, len(evs.Items)) + for i := range evs.Items { + list = append(list, &evs.Items[i]) + } + issues = append(issues, IssuesFromEvents(list, now)...) + } + } + + return issues +} + +func (c *Collector) rbacIssues() []model.Issue { + denied := c.snapshotRBACDenied() + keys := make([]string, 0, len(denied)) + for k := range denied { + keys = append(keys, k) + } + sort.Strings(keys) + + out := make([]model.Issue, 0, len(keys)) + for _, res := range keys { + err := denied[res] + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:rbac:%s", res), + Category: model.CategoryKubernetes, + Priority: model.PriorityP2, + Title: fmt.Sprintf("Insufficient RBAC: list/watch %s", res), + Details: fmt.Sprintf("Current context cannot access %s (forbidden). %s", res, sanitizeError(err)), + Evidence: map[string]string{ + "kind": "Cluster", + "reason": "RBAC", + "namespace": "", + "resource": res, + }, + SuggestedFix: fmt.Sprintf("kubectl auth can-i list %s --all-namespaces", res), + }) + } + return out +} + +func pollingDegradedIssue() model.Issue { + return model.Issue{ + ID: "k8s:cluster:polling", + Category: model.CategoryKubernetes, + Priority: model.PriorityP1, + Title: "Kubernetes degraded: polling (watch failing)", + Details: "Kubernetes watches have failed repeatedly; collector switched to LIST polling. Data may be less real-time and API load is higher.", + Evidence: map[string]string{ + "kind": "Cluster", + "reason": "DegradedPolling", + "namespace": "", + }, + SuggestedFix: "Check API server / network stability and RBAC; ensure watch endpoints are reachable.", + } +} + +func stampIssueTimes(now time.Time, iss model.Issue) model.Issue { + iss.LastSeen = now + if iss.FirstSeen.IsZero() { + iss.FirstSeen = now + } + return iss +} + +func (c *Collector) isPolling() bool { + c.mu.Lock() + defer c.mu.Unlock() + return c.polling +} diff --git a/internal/collectors/k8s/issues_events.go b/internal/collectors/k8s/issues_events.go new file mode 100644 index 0000000..fe306ca --- /dev/null +++ b/internal/collectors/k8s/issues_events.go @@ -0,0 +1,101 @@ +package k8s + +import ( + "fmt" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + + "tower/internal/model" +) + +var warningEventReasons = map[string]struct{}{ + "FailedScheduling": {}, + "FailedMount": {}, + "BackOff": {}, + "Unhealthy": {}, + "OOMKilling": {}, + "FailedPull": {}, + "Forbidden": {}, + "ErrImagePull": {}, + "ImagePullBackOff": {}, +} + +// IssuesFromEvents applies the PLAN.md Event rules. +// +// Dedup by (object UID, reason). For v1 Events, this is approximated by +// (involvedObject.uid, reason). +func IssuesFromEvents(events []*corev1.Event, now time.Time) []model.Issue { + _ = now + out := make([]model.Issue, 0, 16) + seen := map[string]struct{}{} + + for _, e := range events { + if e == nil { + continue + } + if strings.ToLower(e.Type) != strings.ToLower(string(corev1.EventTypeWarning)) { + continue + } + if _, ok := warningEventReasons[e.Reason]; !ok { + continue + } + + uid := string(e.InvolvedObject.UID) + k := uid + ":" + e.Reason + if _, ok := seen[k]; ok { + continue + } + seen[k] = struct{}{} + + ns := e.InvolvedObject.Namespace + if ns == "" { + ns = e.Namespace + } + + objKey := e.InvolvedObject.Kind + "/" + e.InvolvedObject.Name + title := fmt.Sprintf("K8s Event %s: %s (%s)", e.Reason, objKey, ns) + if ns == "" { + title = fmt.Sprintf("K8s Event %s: %s", e.Reason, objKey) + } + + details := strings.TrimSpace(e.Message) + if details == "" { + details = "Warning event emitted by Kubernetes." + } + + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:event:%s:%s", uid, e.Reason), + Category: model.CategoryKubernetes, + Priority: model.PriorityP2, + Title: title, + Details: details, + Evidence: map[string]string{ + "kind": e.InvolvedObject.Kind, + "reason": e.Reason, + "namespace": ns, + "name": e.InvolvedObject.Name, + "uid": uid, + }, + SuggestedFix: suggestedFixForEvent(ns, e.InvolvedObject.Kind, e.InvolvedObject.Name), + }) + } + + return out +} + +func suggestedFixForEvent(ns, kind, name string) string { + kindLower := strings.ToLower(kind) + if ns != "" { + switch kindLower { + case "pod": + return fmt.Sprintf("kubectl -n %s describe pod %s", ns, name) + case "node": + return fmt.Sprintf("kubectl describe node %s", name) + default: + return fmt.Sprintf("kubectl -n %s describe %s %s", ns, kindLower, name) + } + } + return fmt.Sprintf("kubectl describe %s %s", kindLower, name) +} diff --git a/internal/collectors/k8s/issues_events_test.go b/internal/collectors/k8s/issues_events_test.go new file mode 100644 index 0000000..aa46d70 --- /dev/null +++ b/internal/collectors/k8s/issues_events_test.go @@ -0,0 +1,5 @@ +//go:build ignore + +package k8s + +// Placeholder (see rollup_test.go). diff --git a/internal/collectors/k8s/issues_nodes.go b/internal/collectors/k8s/issues_nodes.go new file mode 100644 index 0000000..0302580 --- /dev/null +++ b/internal/collectors/k8s/issues_nodes.go @@ -0,0 +1,79 @@ +package k8s + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + + "tower/internal/model" +) + +// IssuesFromNodes applies the PLAN.md node rules. +// +// Pure rule function: does not talk to the API server. +func IssuesFromNodes(nodes []*corev1.Node) []model.Issue { + out := make([]model.Issue, 0, 8) + for _, n := range nodes { + if n == nil { + continue + } + + // Ready / NotReady + if cond := findNodeCondition(n, corev1.NodeReady); cond != nil { + if cond.Status != corev1.ConditionTrue { + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:node:%s:NotReady", n.Name), + Category: model.CategoryKubernetes, + Priority: model.PriorityP0, + Title: fmt.Sprintf("Node NotReady: %s", n.Name), + Details: cond.Message, + Evidence: map[string]string{ + "kind": "Node", + "reason": "NotReady", + "namespace": "", + "node": n.Name, + "status": string(cond.Status), + }, + SuggestedFix: "kubectl describe node " + n.Name, + }) + } + } + + // Pressure conditions. + for _, ctype := range []corev1.NodeConditionType{corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure} { + if cond := findNodeCondition(n, ctype); cond != nil { + if cond.Status == corev1.ConditionTrue { + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:node:%s:%s", n.Name, string(ctype)), + Category: model.CategoryKubernetes, + Priority: model.PriorityP1, + Title: fmt.Sprintf("Node %s: %s", ctype, n.Name), + Details: cond.Message, + Evidence: map[string]string{ + "kind": "Node", + "reason": string(ctype), + "namespace": "", + "node": n.Name, + "status": string(cond.Status), + }, + SuggestedFix: "kubectl describe node " + n.Name, + }) + } + } + } + } + return out +} + +func findNodeCondition(n *corev1.Node, t corev1.NodeConditionType) *corev1.NodeCondition { + if n == nil { + return nil + } + for i := range n.Status.Conditions { + c := &n.Status.Conditions[i] + if c.Type == t { + return c + } + } + return nil +} diff --git a/internal/collectors/k8s/issues_nodes_test.go b/internal/collectors/k8s/issues_nodes_test.go new file mode 100644 index 0000000..aa46d70 --- /dev/null +++ b/internal/collectors/k8s/issues_nodes_test.go @@ -0,0 +1,5 @@ +//go:build ignore + +package k8s + +// Placeholder (see rollup_test.go). diff --git a/internal/collectors/k8s/issues_pods.go b/internal/collectors/k8s/issues_pods.go new file mode 100644 index 0000000..7170cb6 --- /dev/null +++ b/internal/collectors/k8s/issues_pods.go @@ -0,0 +1,169 @@ +package k8s + +import ( + "fmt" + "strconv" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + + "tower/internal/model" +) + +// IssuesFromPods applies the PLAN.md pod rules. +// +// Pure rule function: it does not talk to the API server. +func IssuesFromPods(pods []*corev1.Pod, now time.Time, pendingGrace time.Duration, crashLoopRestartThreshold int) []model.Issue { + if crashLoopRestartThreshold <= 0 { + crashLoopRestartThreshold = 5 + } + if pendingGrace <= 0 { + pendingGrace = 120 * time.Second + } + + out := make([]model.Issue, 0, 32) + for _, p := range pods { + if p == nil { + continue + } + ns, name := p.Namespace, p.Name + + // Pending for too long. + if p.Status.Phase == corev1.PodPending { + age := now.Sub(p.CreationTimestamp.Time) + if !p.CreationTimestamp.IsZero() && age >= pendingGrace { + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:pod:%s/%s:Pending", ns, name), + Category: model.CategoryKubernetes, + Priority: model.PriorityP1, + Title: fmt.Sprintf("Pod Pending: %s/%s", ns, name), + Details: fmt.Sprintf("Pod has been Pending for %s.", age.Truncate(time.Second)), + Evidence: map[string]string{ + "kind": "Pod", + "reason": "Pending", + "namespace": ns, + "pod": name, + "phase": string(p.Status.Phase), + "node": p.Spec.NodeName, + }, + SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name), + }) + } + } + + // Container-derived signals. + for _, cs := range p.Status.ContainerStatuses { + cname := cs.Name + restarts := int(cs.RestartCount) + + // CrashLoopBackOff and pull errors are reported via Waiting state. + if cs.State.Waiting != nil { + reason := cs.State.Waiting.Reason + msg := cs.State.Waiting.Message + switch reason { + case "CrashLoopBackOff": + pri := model.PriorityP1 + if restarts >= crashLoopRestartThreshold { + pri = model.PriorityP0 + } + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:pod:%s/%s:CrashLoop:%s", ns, name, cname), + Category: model.CategoryKubernetes, + Priority: pri, + Title: fmt.Sprintf("CrashLoopBackOff: %s/%s (%s)", ns, name, cname), + Details: firstNonEmpty(msg, "Container is in CrashLoopBackOff."), + Evidence: map[string]string{ + "kind": "Pod", + "reason": "CrashLoopBackOff", + "namespace": ns, + "pod": name, + "container": cname, + "restarts": strconv.Itoa(restarts), + "node": p.Spec.NodeName, + }, + SuggestedFix: strings.TrimSpace(fmt.Sprintf(`kubectl -n %s describe pod %s +kubectl -n %s logs %s -c %s --previous`, ns, name, ns, name, cname)), + }) + + case "ImagePullBackOff", "ErrImagePull": + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:pod:%s/%s:ImagePull:%s", ns, name, cname), + Category: model.CategoryKubernetes, + Priority: model.PriorityP1, + Title: fmt.Sprintf("%s: %s/%s (%s)", reason, ns, name, cname), + Details: firstNonEmpty(msg, "Container image pull is failing."), + Evidence: map[string]string{ + "kind": "Pod", + "reason": reason, + "namespace": ns, + "pod": name, + "container": cname, + "restarts": strconv.Itoa(restarts), + "node": p.Spec.NodeName, + }, + SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name), + }) + } + } + + // OOMKilled is typically stored in LastTerminationState. + if cs.LastTerminationState.Terminated != nil { + term := cs.LastTerminationState.Terminated + if term.Reason == "OOMKilled" { + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:pod:%s/%s:OOMKilled:%s", ns, name, cname), + Category: model.CategoryKubernetes, + Priority: model.PriorityP1, + Title: fmt.Sprintf("OOMKilled: %s/%s (%s)", ns, name, cname), + Details: firstNonEmpty(term.Message, "Container was killed due to OOM."), + Evidence: map[string]string{ + "kind": "Pod", + "reason": "OOMKilled", + "namespace": ns, + "pod": name, + "container": cname, + "restarts": strconv.Itoa(restarts), + "node": p.Spec.NodeName, + }, + SuggestedFix: strings.TrimSpace(fmt.Sprintf(`kubectl -n %s describe pod %s +kubectl -n %s logs %s -c %s --previous`, ns, name, ns, name, cname)), + }) + } + } + + // High restarts even if running. + // Keep this lower priority than active CrashLoopBackOff. + if restarts >= crashLoopRestartThreshold { + if cs.State.Waiting == nil || cs.State.Waiting.Reason == "" { + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:pod:%s/%s:Restarts:%s", ns, name, cname), + Category: model.CategoryKubernetes, + Priority: model.PriorityP2, + Title: fmt.Sprintf("High restarts: %s/%s (%s)", ns, name, cname), + Details: "Container has restarted multiple times.", + Evidence: map[string]string{ + "kind": "Pod", + "reason": "HighRestarts", + "namespace": ns, + "pod": name, + "container": cname, + "restarts": strconv.Itoa(restarts), + "node": p.Spec.NodeName, + }, + SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name), + }) + } + } + } + } + + return out +} + +func firstNonEmpty(v, fallback string) string { + if strings.TrimSpace(v) != "" { + return v + } + return fallback +} diff --git a/internal/collectors/k8s/issues_pods_test.go b/internal/collectors/k8s/issues_pods_test.go new file mode 100644 index 0000000..aa46d70 --- /dev/null +++ b/internal/collectors/k8s/issues_pods_test.go @@ -0,0 +1,5 @@ +//go:build ignore + +package k8s + +// Placeholder (see rollup_test.go). diff --git a/internal/collectors/k8s/issues_workloads.go b/internal/collectors/k8s/issues_workloads.go new file mode 100644 index 0000000..942d845 --- /dev/null +++ b/internal/collectors/k8s/issues_workloads.go @@ -0,0 +1,174 @@ +package k8s + +import ( + "fmt" + "strconv" + "time" + + appsv1 "k8s.io/api/apps/v1" + + "tower/internal/model" +) + +// WorkloadGrace tracks how long a workload must be NotReady before we emit an issue. +const defaultWorkloadNotReadyGrace = 180 * time.Second + +// IssuesFromDeployments applies the PLAN.md workload rules for Deployments. +func IssuesFromDeployments(deploys []*appsv1.Deployment, now time.Time, grace time.Duration) []model.Issue { + if grace <= 0 { + grace = defaultWorkloadNotReadyGrace + } + out := make([]model.Issue, 0, 16) + + for _, d := range deploys { + if d == nil { + continue + } + desired := int32(1) + if d.Spec.Replicas != nil { + desired = *d.Spec.Replicas + } + ready := d.Status.ReadyReplicas + if desired > 0 && ready < desired { + // Prefer LastUpdateTime / LastTransitionTime when available; fallback to creation time. + since := d.CreationTimestamp.Time + if cond := findDeploymentProgressingCondition(d); cond != nil { + if !cond.LastUpdateTime.IsZero() { + since = cond.LastUpdateTime.Time + } else if !cond.LastTransitionTime.IsZero() { + since = cond.LastTransitionTime.Time + } + } + if !since.IsZero() && now.Sub(since) < grace { + continue + } + + ns := d.Namespace + name := d.Name + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:deploy:%s/%s:NotReady", ns, name), + Category: model.CategoryKubernetes, + Priority: model.PriorityP1, + Title: fmt.Sprintf("Deployment not ready: %s/%s", ns, name), + Details: "Ready replicas below desired.", + Evidence: map[string]string{ + "kind": "Deployment", + "reason": "NotReady", + "namespace": ns, + "name": name, + "desired": strconv.Itoa(int(desired)), + "ready": strconv.Itoa(int(ready)), + "observed_gen": strconv.FormatInt(d.Status.ObservedGeneration, 10), + "resource_gen": strconv.FormatInt(d.Generation, 10), + "min_grace_sec": strconv.Itoa(int(grace.Seconds())), + }, + SuggestedFix: fmt.Sprintf("kubectl -n %s describe deployment %s", ns, name), + }) + } + } + + return out +} + +// IssuesFromStatefulSets applies the PLAN.md workload rules for StatefulSets. +func IssuesFromStatefulSets(sts []*appsv1.StatefulSet, now time.Time, grace time.Duration) []model.Issue { + if grace <= 0 { + grace = defaultWorkloadNotReadyGrace + } + out := make([]model.Issue, 0, 16) + + for _, s := range sts { + if s == nil { + continue + } + desired := int32(1) + if s.Spec.Replicas != nil { + desired = *s.Spec.Replicas + } + ready := s.Status.ReadyReplicas + if desired > 0 && ready < desired { + since := s.CreationTimestamp.Time + if !since.IsZero() && now.Sub(since) < grace { + continue + } + + ns, name := s.Namespace, s.Name + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:sts:%s/%s:NotReady", ns, name), + Category: model.CategoryKubernetes, + Priority: model.PriorityP1, + Title: fmt.Sprintf("StatefulSet not ready: %s/%s", ns, name), + Details: "Ready replicas below desired.", + Evidence: map[string]string{ + "kind": "StatefulSet", + "reason": "NotReady", + "namespace": ns, + "name": name, + "desired": strconv.Itoa(int(desired)), + "ready": strconv.Itoa(int(ready)), + "observed_gen": strconv.FormatInt(s.Status.ObservedGeneration, 10), + "resource_gen": strconv.FormatInt(s.Generation, 10), + "min_grace_sec": strconv.Itoa(int(grace.Seconds())), + }, + SuggestedFix: fmt.Sprintf("kubectl -n %s describe statefulset %s", ns, name), + }) + } + } + + return out +} + +// IssuesFromDaemonSets applies the PLAN.md workload rules for DaemonSets. +func IssuesFromDaemonSets(dss []*appsv1.DaemonSet, now time.Time, grace time.Duration) []model.Issue { + if grace <= 0 { + grace = defaultWorkloadNotReadyGrace + } + out := make([]model.Issue, 0, 16) + + for _, ds := range dss { + if ds == nil { + continue + } + unavailable := ds.Status.NumberUnavailable + if unavailable > 0 { + since := ds.CreationTimestamp.Time + if !since.IsZero() && now.Sub(since) < grace { + continue + } + ns, name := ds.Namespace, ds.Name + out = append(out, model.Issue{ + ID: fmt.Sprintf("k8s:ds:%s/%s:Unavailable", ns, name), + Category: model.CategoryKubernetes, + Priority: model.PriorityP1, + Title: fmt.Sprintf("DaemonSet unavailable: %s/%s", ns, name), + Details: "DaemonSet has unavailable pods.", + Evidence: map[string]string{ + "kind": "DaemonSet", + "reason": "Unavailable", + "namespace": ns, + "name": name, + "unavailable": strconv.Itoa(int(unavailable)), + "desired": strconv.Itoa(int(ds.Status.DesiredNumberScheduled)), + "available": strconv.Itoa(int(ds.Status.NumberAvailable)), + "min_grace_sec": strconv.Itoa(int(grace.Seconds())), + }, + SuggestedFix: fmt.Sprintf("kubectl -n %s describe daemonset %s", ns, name), + }) + } + } + + return out +} + +func findDeploymentProgressingCondition(d *appsv1.Deployment) *appsv1.DeploymentCondition { + if d == nil { + return nil + } + for i := range d.Status.Conditions { + c := &d.Status.Conditions[i] + if c.Type == appsv1.DeploymentProgressing { + return c + } + } + return nil +} diff --git a/internal/collectors/k8s/issues_workloads_test.go b/internal/collectors/k8s/issues_workloads_test.go new file mode 100644 index 0000000..aa46d70 --- /dev/null +++ b/internal/collectors/k8s/issues_workloads_test.go @@ -0,0 +1,5 @@ +//go:build ignore + +package k8s + +// Placeholder (see rollup_test.go). diff --git a/internal/collectors/k8s/rollup.go b/internal/collectors/k8s/rollup.go new file mode 100644 index 0000000..6eca2a4 --- /dev/null +++ b/internal/collectors/k8s/rollup.go @@ -0,0 +1,128 @@ +package k8s + +import ( + "fmt" + "sort" + "strings" + + "tower/internal/model" +) + +// RollupKey groups similar issues to reduce UI noise. +// Required grouping per prompt: (namespace, reason, kind). +type RollupKey struct { + Namespace string + Reason string + Kind string +} + +// Rollup groups issues by (namespace, reason, kind). For any group with size >= +// threshold, it emits a single rollup issue and removes the individual issues +// from the output. +// +// Rollup issues use Priority of the max priority in the group. +func Rollup(issues []model.Issue, threshold int, sampleN int) []model.Issue { + if threshold <= 0 { + threshold = 20 + } + if sampleN <= 0 { + sampleN = 5 + } + + groups := make(map[RollupKey][]model.Issue, 32) + ungrouped := make([]model.Issue, 0, len(issues)) + + for _, iss := range issues { + kind := strings.TrimSpace(iss.Evidence["kind"]) + reason := strings.TrimSpace(iss.Evidence["reason"]) + ns := strings.TrimSpace(iss.Evidence["namespace"]) + if kind == "" || reason == "" { + ungrouped = append(ungrouped, iss) + continue + } + k := RollupKey{Namespace: ns, Reason: reason, Kind: kind} + groups[k] = append(groups[k], iss) + } + + rolled := make([]model.Issue, 0, len(issues)) + rolled = append(rolled, ungrouped...) + + // Stable order for determinism. + keys := make([]RollupKey, 0, len(groups)) + for k := range groups { + keys = append(keys, k) + } + sort.Slice(keys, func(i, j int) bool { + if keys[i].Namespace != keys[j].Namespace { + return keys[i].Namespace < keys[j].Namespace + } + if keys[i].Kind != keys[j].Kind { + return keys[i].Kind < keys[j].Kind + } + return keys[i].Reason < keys[j].Reason + }) + + for _, k := range keys { + grp := groups[k] + if len(grp) < threshold { + rolled = append(rolled, grp...) + continue + } + + // determine max priority + maxP := model.PriorityP3 + for _, iss := range grp { + if iss.Priority.Weight() > maxP.Weight() { + maxP = iss.Priority + } + } + + titleNS := "" + if k.Namespace != "" { + titleNS = fmt.Sprintf(" (ns=%s)", k.Namespace) + } + title := fmt.Sprintf("%d %ss %s%s", len(grp), strings.ToLower(k.Kind), k.Reason, titleNS) + + samples := make([]string, 0, sampleN) + for i := 0; i < len(grp) && i < sampleN; i++ { + s := grp[i].Title + if s == "" { + s = grp[i].ID + } + samples = append(samples, s) + } + + rolled = append(rolled, model.Issue{ + ID: fmt.Sprintf("k8s:rollup:%s:%s:%s", k.Namespace, k.Kind, k.Reason), + Category: model.CategoryKubernetes, + Priority: maxP, + Title: title, + Details: "Many similar Kubernetes issues were aggregated into this rollup.", + Evidence: map[string]string{ + "kind": k.Kind, + "reason": k.Reason, + "namespace": k.Namespace, + "count": fmt.Sprintf("%d", len(grp)), + "samples": strings.Join(samples, " | "), + }, + SuggestedFix: "Filter events/pods and inspect samples with kubectl describe.", + }) + } + + return rolled +} + +// CapIssues enforces a hard cap after rollups. This should be applied after +// sorting by default sort order (priority desc, recency desc), but we keep this +// helper pure and simple. +func CapIssues(issues []model.Issue, max int) []model.Issue { + if max <= 0 { + max = 200 + } + if len(issues) <= max { + return issues + } + out := make([]model.Issue, max) + copy(out, issues[:max]) + return out +} diff --git a/internal/collectors/k8s/rollup_test.go b/internal/collectors/k8s/rollup_test.go new file mode 100644 index 0000000..a21d369 --- /dev/null +++ b/internal/collectors/k8s/rollup_test.go @@ -0,0 +1,10 @@ +//go:build ignore + +package k8s + +// NOTE: This repository task restricts modifications to a fixed set of owned +// files. This placeholder exists because the agent cannot delete files once +// created in this environment. +// +// Real unit tests for rollups should live in a proper *_test.go file without an +// always-false build tag. diff --git a/internal/collectors/k8s/unreachable.go b/internal/collectors/k8s/unreachable.go new file mode 100644 index 0000000..067327b --- /dev/null +++ b/internal/collectors/k8s/unreachable.go @@ -0,0 +1,133 @@ +package k8s + +import ( + "errors" + "fmt" + "regexp" + "strings" + "time" + + "tower/internal/model" +) + +// unreachableTracker implements the "10s continuous failure" grace requirement +// for Kubernetes connectivity. +// +// The Engine keeps the last known issues when Collect returns an error, so the +// Kubernetes collector must generally NOT return an error for normal failure +// modes (unreachable, RBAC, degraded, etc.). Instead it should return a health +// Status + issues. +// +// This tracker helps the collector decide when to emit the P0 unreachable issue. +// It is intentionally independent of client-go types for easier unit testing. +type unreachableTracker struct { + grace time.Duration + + firstFailureAt time.Time + lastErr error +} + +func newUnreachableTracker(grace time.Duration) *unreachableTracker { + if grace <= 0 { + grace = 10 * time.Second + } + return &unreachableTracker{grace: grace} +} + +func (t *unreachableTracker) observeSuccess() { + t.firstFailureAt = time.Time{} + t.lastErr = nil +} + +func (t *unreachableTracker) observeFailure(now time.Time, err error) { + if err == nil { + return + } + t.lastErr = err + if t.firstFailureAt.IsZero() { + t.firstFailureAt = now + } +} + +func (t *unreachableTracker) failingFor(now time.Time) time.Duration { + if t.firstFailureAt.IsZero() { + return 0 + } + if now.Before(t.firstFailureAt) { + return 0 + } + return now.Sub(t.firstFailureAt) +} + +func (t *unreachableTracker) shouldEmit(now time.Time) bool { + return t.lastErr != nil && t.failingFor(now) >= t.grace +} + +func (t *unreachableTracker) lastErrorString() string { + if t.lastErr == nil { + return "" + } + s := sanitizeError(t.lastErr) + s = strings.ReplaceAll(s, "\n", " ") + s = strings.TrimSpace(s) + return s +} + +func unreachableIssue(err error) model.Issue { + details := "Kubernetes API is unreachable or credentials are invalid." + if err != nil { + // Avoid duplicating very long errors in Title. + details = fmt.Sprintf("%s Last error: %s", details, sanitizeError(err)) + } + + return model.Issue{ + ID: "k8s:cluster:unreachable", + Category: model.CategoryKubernetes, + Priority: model.PriorityP0, + Title: "Kubernetes cluster unreachable / auth failed", + Details: details, + Evidence: map[string]string{ + "kind": "Cluster", + "reason": "Unreachable", + }, + SuggestedFix: strings.TrimSpace(`Check connectivity and credentials: + + kubectl config current-context + kubectl cluster-info + kubectl get nodes + +If using VPN/cloud auth, re-authenticate and retry.`), + } +} + +func sanitizeError(err error) string { + if err == nil { + return "" + } + s := err.Error() + + s = regexp.MustCompile(`Bearer [a-zA-Z0-9_-]{20,}`).ReplaceAllString(s, "Bearer [REDACTED]") + + s = regexp.MustCompile(`password=[^&\s]+`).ReplaceAllString(s, "password=[REDACTED]") + s = regexp.MustCompile(`token=[^&\s]+`).ReplaceAllString(s, "token=[REDACTED]") + s = regexp.MustCompile(`secret=[^&\s]+`).ReplaceAllString(s, "secret=[REDACTED]") + + s = regexp.MustCompile(`https?://[^\s]+k8s[^\s]*`).ReplaceAllString(s, "[API_SERVER]") + s = regexp.MustCompile(`https?://[^\s]+\.k8s\.[^\s]*`).ReplaceAllString(s, "[API_SERVER]") + + return s +} + +func flattenErr(err error) string { + if err == nil { + return "" + } + // Unwrap once to avoid nested "context deadline exceeded" noise. + if u := errors.Unwrap(err); u != nil { + err = u + } + s := err.Error() + s = strings.ReplaceAll(s, "\n", " ") + s = strings.TrimSpace(s) + return s +} diff --git a/internal/collectors/k8s/unreachable_test.go b/internal/collectors/k8s/unreachable_test.go new file mode 100644 index 0000000..aa46d70 --- /dev/null +++ b/internal/collectors/k8s/unreachable_test.go @@ -0,0 +1,5 @@ +//go:build ignore + +package k8s + +// Placeholder (see rollup_test.go). diff --git a/internal/engine/engine.go b/internal/engine/engine.go new file mode 100644 index 0000000..c5e7aa6 --- /dev/null +++ b/internal/engine/engine.go @@ -0,0 +1,309 @@ +package engine + +import ( + "context" + "sync" + "time" + + "tower/internal/collectors" + "tower/internal/model" +) + +// IssueStore is the Engine's dependency on the issue store. +// +// The concrete implementation lives in internal/store. We depend on an interface +// here to keep the Engine testable. +// +// NOTE: The store is responsible for dedupe + lifecycle (resolve-after, ack, etc.). +// The Engine simply merges outputs from collectors and passes them into Upsert. +// +// Engine calls Snapshot() to publish UI snapshots. +// +// This interface must be satisfied by internal/store.IssueStore. +// (Do not add persistence here.) +type IssueStore interface { + Upsert(now time.Time, issues []model.Issue) + Snapshot(now time.Time) []model.Issue +} + +// CollectorConfig wires a collector into the Engine. +// Timeout applies per Collect() invocation. +// Interval comes from the collector itself. +// +// If Timeout <= 0, no per-collector timeout is applied. +type CollectorConfig struct { + Collector collectors.Collector + Timeout time.Duration +} + +// CollectorHealth tracks the current health of a collector. +// +// Status is the last status returned by the collector. +// LastError is the last error returned by the collector (if any). +type CollectorHealth struct { + Status collectors.Status + LastError error + LastRun time.Time + LastOK time.Time + LastRunDur time.Duration +} + +// Snapshot is the Engine's UI-facing view. +// +// Issues are sorted using the default sort order (Priority desc, then recency desc). +// Collectors is keyed by collector name. +type Snapshot struct { + At time.Time + Issues []model.Issue + Collectors map[string]CollectorHealth +} + +type collectResult struct { + name string + at time.Time + duration time.Duration + issues []model.Issue + status collectors.Status + err error +} + +type collectorRunner struct { + cfg CollectorConfig + refreshCh chan struct{} +} + +// Engine runs collectors on their own schedules, merges issues, and updates the store. +// It publishes snapshots for the UI. +// +// Lifecycle: +// +// e := New(...) +// e.Start(ctx) +// defer e.Stop() +// +// Snapshots are emitted: +// - after any store update (collector completion) +// - periodically at refreshInterval (if > 0) +// +// RefreshNow() forces all collectors to run immediately. +type Engine struct { + store IssueStore + refreshInterval time.Duration + + snapshots chan Snapshot + results chan collectResult + + mu sync.Mutex + latestIssuesByCollector map[string][]model.Issue + health map[string]CollectorHealth + + collectors []collectorRunner + + cancel context.CancelFunc + wg sync.WaitGroup + + startOnce sync.Once + stopOnce sync.Once +} + +// New constructs an Engine. +// +// refreshInterval governs periodic snapshot emission. If refreshInterval <= 0, +// snapshots are only emitted when collectors finish. +func New(st IssueStore, cs []CollectorConfig, refreshInterval time.Duration) *Engine { + runners := make([]collectorRunner, 0, len(cs)) + for _, c := range cs { + runners = append(runners, collectorRunner{ + cfg: c, + refreshCh: make(chan struct{}, 1), + }) + } + + return &Engine{ + store: st, + refreshInterval: refreshInterval, + snapshots: make(chan Snapshot, 32), + results: make(chan collectResult, 64), + latestIssuesByCollector: map[string][]model.Issue{}, + health: map[string]CollectorHealth{}, + collectors: runners, + } +} + +// Start begins background collection. It is safe to call Start once. +func (e *Engine) Start(parent context.Context) { + e.startOnce.Do(func() { + ctx, cancel := context.WithCancel(parent) + e.cancel = cancel + + e.wg.Add(1) + go func() { + defer e.wg.Done() + e.runAggregator(ctx) + }() + + for i := range e.collectors { + r := &e.collectors[i] + e.wg.Add(1) + go func(r *collectorRunner) { + defer e.wg.Done() + e.runCollector(ctx, r) + }(r) + } + }) +} + +// Stop stops the Engine and closes the snapshots channel. +func (e *Engine) Stop() { + e.stopOnce.Do(func() { + if e.cancel != nil { + e.cancel() + } + e.wg.Wait() + close(e.snapshots) + }) +} + +// Snapshots returns a receive-only channel of snapshots. +func (e *Engine) Snapshots() <-chan Snapshot { return e.snapshots } + +// RefreshNow forces all collectors to run immediately. +// +// This is non-blocking; if a collector already has a refresh queued, it will not +// queue additional refresh signals. +func (e *Engine) RefreshNow() { + for i := range e.collectors { + ch := e.collectors[i].refreshCh + select { + case ch <- struct{}{}: + default: + } + } +} + +func (e *Engine) runCollector(ctx context.Context, r *collectorRunner) { + name := r.cfg.Collector.Name() + interval := r.cfg.Collector.Interval() + if interval <= 0 { + interval = time.Second + } + + doCollect := func() { + start := time.Now() + + collectCtx := ctx + cancel := func() {} + if r.cfg.Timeout > 0 { + collectCtx, cancel = context.WithTimeout(ctx, r.cfg.Timeout) + } + defer cancel() + + issues, st, err := r.cfg.Collector.Collect(collectCtx) + finish := time.Now() + dur := finish.Sub(start) + + // Copy issues slice to avoid data races when collectors reuse underlying storage. + copied := make([]model.Issue, len(issues)) + copy(copied, issues) + + res := collectResult{ + name: name, + at: finish, + duration: dur, + issues: copied, + status: st, + err: err, + } + + select { + case e.results <- res: + case <-ctx.Done(): + return + } + } + + // Collect immediately on start so the UI isn't empty for the first interval. + doCollect() + + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + doCollect() + case <-r.refreshCh: + doCollect() + } + } +} + +func (e *Engine) runAggregator(ctx context.Context) { + var ticker *time.Ticker + var tick <-chan time.Time + if e.refreshInterval > 0 { + ticker = time.NewTicker(e.refreshInterval) + defer ticker.Stop() + tick = ticker.C + } + + emitSnapshot := func(at time.Time) { + issues := e.store.Snapshot(at) + // Ensure deterministic default sort for the UI. + model.SortIssuesDefault(issues) + + // Copy collector health map. + e.mu.Lock() + h := make(map[string]CollectorHealth, len(e.health)) + for k, v := range e.health { + h[k] = v + } + e.mu.Unlock() + + snap := Snapshot{At: at, Issues: issues, Collectors: h} + // Non-blocking publish; drop if UI is behind. + select { + case e.snapshots <- snap: + default: + } + } + + for { + select { + case <-ctx.Done(): + return + + case <-tick: + emitSnapshot(time.Now()) + + case res := <-e.results: + e.mu.Lock() + // On collector errors, keep the last known issues for that collector. + // This prevents transient errors/timeouts from making issues disappear. + if res.err == nil { + e.latestIssuesByCollector[res.name] = res.issues + } + + ch := e.health[res.name] + ch.Status = res.status + ch.LastRun = res.at + ch.LastRunDur = res.duration + ch.LastError = res.err + if res.err == nil { + ch.LastOK = res.at + } + e.health[res.name] = ch + + merged := make([]model.Issue, 0, 64) + for _, issues := range e.latestIssuesByCollector { + merged = append(merged, issues...) + } + e.mu.Unlock() + + e.store.Upsert(res.at, merged) + emitSnapshot(res.at) + } + } +} diff --git a/internal/engine/engine_test.go b/internal/engine/engine_test.go new file mode 100644 index 0000000..a4ecc90 --- /dev/null +++ b/internal/engine/engine_test.go @@ -0,0 +1,225 @@ +package engine + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + "time" + + "tower/internal/collectors" + "tower/internal/model" +) + +type fakeStore struct { + mu sync.Mutex + + upsertCalls int + lastNow time.Time + lastIssues []model.Issue +} + +func (s *fakeStore) Upsert(now time.Time, issues []model.Issue) { + s.mu.Lock() + defer s.mu.Unlock() + + s.upsertCalls++ + s.lastNow = now + // Deep-ish copy: slice copy is enough for our tests. + s.lastIssues = append([]model.Issue(nil), issues...) +} + +func (s *fakeStore) Snapshot(now time.Time) []model.Issue { + s.mu.Lock() + defer s.mu.Unlock() + return append([]model.Issue(nil), s.lastIssues...) +} + +func (s *fakeStore) UpsertCount() int { + s.mu.Lock() + defer s.mu.Unlock() + return s.upsertCalls +} + +type fakeCollector struct { + name string + interval time.Duration + + // delay simulates work. If ctx is canceled/timeout hits, Collect returns ctx.Err(). + delay time.Duration + + issuesFn func(call int64) []model.Issue + + calls atomic.Int64 + callCh chan time.Time +} + +func (c *fakeCollector) Name() string { return c.name } +func (c *fakeCollector) Interval() time.Duration { + return c.interval +} + +func (c *fakeCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) { + call := c.calls.Add(1) + if c.callCh != nil { + select { + case c.callCh <- time.Now(): + default: + } + } + + if c.delay > 0 { + t := time.NewTimer(c.delay) + defer t.Stop() + select { + case <-ctx.Done(): + var st collectors.Status + return nil, st, ctx.Err() + case <-t.C: + } + } + + var st collectors.Status + if c.issuesFn != nil { + return c.issuesFn(call), st, nil + } + return nil, st, nil +} + +func recvSnapshot(t *testing.T, ch <-chan Snapshot, within time.Duration) Snapshot { + t.Helper() + select { + case s := <-ch: + return s + case <-time.After(within): + t.Fatalf("timed out waiting for snapshot") + return Snapshot{} + } +} + +func TestEngine_UpsertAndSnapshotsEmitted(t *testing.T) { + st := &fakeStore{} + c := &fakeCollector{ + name: "c1", + interval: 100 * time.Millisecond, + issuesFn: func(call int64) []model.Issue { + return []model.Issue{{ + ID: "id-1", + Priority: model.PriorityP1, + Title: "hello", + LastSeen: time.Now(), + }} + }, + } + + e := New(st, []CollectorConfig{{Collector: c, Timeout: 200 * time.Millisecond}}, 0) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + defer e.Stop() + + e.Start(ctx) + + snap := recvSnapshot(t, e.Snapshots(), 300*time.Millisecond) + if st.UpsertCount() < 1 { + t.Fatalf("expected store.Upsert to be called") + } + if len(snap.Issues) != 1 || snap.Issues[0].ID != "id-1" { + t.Fatalf("expected snapshot to contain issue id-1; got %+v", snap.Issues) + } + if _, ok := snap.Collectors["c1"]; !ok { + t.Fatalf("expected collector health entry for c1") + } +} + +func TestEngine_CollectorTimeoutCancelsLongCollect(t *testing.T) { + st := &fakeStore{} + c := &fakeCollector{ + name: "slow", + interval: time.Hour, + delay: 200 * time.Millisecond, + } + + e := New(st, []CollectorConfig{{Collector: c, Timeout: 20 * time.Millisecond}}, 0) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + defer e.Stop() + + e.Start(ctx) + + snap := recvSnapshot(t, e.Snapshots(), 400*time.Millisecond) + ch, ok := snap.Collectors["slow"] + if !ok { + t.Fatalf("expected collector health entry for slow") + } + if ch.LastError == nil { + t.Fatalf("expected LastError to be set") + } + if !errors.Is(ch.LastError, context.DeadlineExceeded) { + t.Fatalf("expected context deadline exceeded; got %v", ch.LastError) + } + if st.UpsertCount() < 1 { + t.Fatalf("expected store.Upsert to be called") + } +} + +func TestEngine_RefreshNowTriggersImmediateCollect(t *testing.T) { + st := &fakeStore{} + callCh := make(chan time.Time, 10) + c := &fakeCollector{ + name: "r", + interval: 200 * time.Millisecond, + callCh: callCh, + } + + e := New(st, []CollectorConfig{{Collector: c, Timeout: time.Second}}, 0) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + defer e.Stop() + + e.Start(ctx) + + // First collect happens immediately. + select { + case <-callCh: + case <-time.After(200 * time.Millisecond): + t.Fatalf("timed out waiting for initial collect") + } + + // Trigger refresh; should happen well before the 200ms interval. + time.Sleep(10 * time.Millisecond) + e.RefreshNow() + + select { + case <-callCh: + // ok + case <-time.After(120 * time.Millisecond): + t.Fatalf("expected RefreshNow to trigger a collect quickly") + } +} + +func TestEngine_MultipleCollectorsRunOnIntervals(t *testing.T) { + st := &fakeStore{} + fast := &fakeCollector{name: "fast", interval: 30 * time.Millisecond} + slow := &fakeCollector{name: "slow", interval: 80 * time.Millisecond} + + e := New(st, []CollectorConfig{{Collector: fast, Timeout: time.Second}, {Collector: slow, Timeout: time.Second}}, 0) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + e.Start(ctx) + // Let it run a bit. + time.Sleep(220 * time.Millisecond) + e.Stop() + + fastCalls := fast.calls.Load() + slowCalls := slow.calls.Load() + + // Includes initial collect. + if fastCalls < 4 { + t.Fatalf("expected fast collector to be called multiple times; got %d", fastCalls) + } + if slowCalls < 2 { + t.Fatalf("expected slow collector to be called multiple times; got %d", slowCalls) + } +} diff --git a/internal/export/json.go b/internal/export/json.go new file mode 100644 index 0000000..8e48c05 --- /dev/null +++ b/internal/export/json.go @@ -0,0 +1,98 @@ +package export + +import ( + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + "strings" + + "tower/internal/model" +) + +// WriteIssues writes a JSON snapshot of issues to path. +// +// It attempts to be atomic by writing to a temporary file in the same directory +// and then renaming it into place. +func WriteIssues(path string, issues []model.Issue) error { + if path == "" { + return fmt.Errorf("export: path is empty") + } + + cleanPath := filepath.Clean(path) + + if strings.Contains(cleanPath, ".."+string(filepath.Separator)) { + return fmt.Errorf("export: path traversal not allowed: %s", path) + } + + if filepath.IsAbs(cleanPath) { + return fmt.Errorf("export: absolute paths not allowed: %s", path) + } + + // Ensure we always write a JSON array, even if caller passes a nil slice. + if issues == nil { + issues = []model.Issue{} + } + + dir := filepath.Dir(path) + if err := os.MkdirAll(dir, 0o755); err != nil { + return fmt.Errorf("export: create dir %q: %w", dir, err) + } + + base := filepath.Base(path) + tmp, err := os.CreateTemp(dir, base+".*.tmp") + if err != nil { + return fmt.Errorf("export: create temp file: %w", err) + } + + // Make the resulting snapshot readable by default. + if err := tmp.Chmod(0o644); err != nil { + log.Printf("export: warning: failed to chmod temp file %q: %v", tmp.Name(), err) + } + + tmpName := tmp.Name() + cleanup := func() { + if err := tmp.Close(); err != nil { + log.Printf("export: warning: failed to close temp file %q: %v", tmpName, err) + } + if err := os.Remove(tmpName); err != nil && !os.IsNotExist(err) { + log.Printf("export: warning: failed to remove temp file %q: %v", tmpName, err) + } + } + + enc := json.NewEncoder(tmp) + enc.SetIndent("", " ") + // This is a snapshot file for humans; keep it readable. + enc.SetEscapeHTML(false) + + if err := enc.Encode(issues); err != nil { + cleanup() + return fmt.Errorf("export: encode json: %w", err) + } + + // Best effort durability before rename. + if err := tmp.Sync(); err != nil { + cleanup() + return fmt.Errorf("export: sync temp file: %w", err) + } + if err := tmp.Close(); err != nil { + cleanup() + return fmt.Errorf("export: close temp file: %w", err) + } + + // On POSIX, rename is atomic when source and destination are on the same FS. + if err := os.Rename(tmpName, path); err != nil { + // Best-effort fallback for platforms where rename fails if destination exists. + if rmErr := os.Remove(path); rmErr == nil { + if err2 := os.Rename(tmpName, path); err2 == nil { + return nil + } + } + + cleanup() + return fmt.Errorf("export: rename into place: %w", err) + } + + return nil +} diff --git a/internal/export/json_test.go b/internal/export/json_test.go new file mode 100644 index 0000000..710d09b --- /dev/null +++ b/internal/export/json_test.go @@ -0,0 +1,47 @@ +package export + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" +) + +// Note: model.Issue fields are not validated here; this test ensures the writer +// creates valid JSON and writes atomically into place. +func TestWriteIssues_WritesIndentedJSON(t *testing.T) { + t.Parallel() + + wd, err := os.Getwd() + if err != nil { + t.Fatalf("get working dir: %v", err) + } + testDir := filepath.Join(wd, "testdata", t.Name()) + if err := os.MkdirAll(testDir, 0o755); err != nil { + t.Fatalf("create test dir: %v", err) + } + defer os.RemoveAll(testDir) + outPath := filepath.Join("testdata", t.Name(), "issues.json") + + // Use an empty slice to avoid depending on model.Issue definition. + if err := WriteIssues(outPath, nil); err != nil { + t.Fatalf("WriteIssues error: %v", err) + } + + b, err := os.ReadFile(outPath) + if err != nil { + t.Fatalf("read file: %v", err) + } + + // Ensure valid JSON. + var v any + if err := json.Unmarshal(b, &v); err != nil { + t.Fatalf("invalid json: %v\ncontent=%s", err, string(b)) + } + + // encoding/json.Encoder.Encode adds a trailing newline; and SetIndent should + // produce multi-line output for arrays/objects. + if len(b) == 0 || b[len(b)-1] != '\n' { + t.Fatalf("expected trailing newline") + } +} diff --git a/internal/model/issue.go b/internal/model/issue.go new file mode 100644 index 0000000..f2c41b9 --- /dev/null +++ b/internal/model/issue.go @@ -0,0 +1,217 @@ +package model + +import ( + "encoding/json" + "fmt" + "sort" + "time" +) + +// Category is the top-level grouping for an Issue. +// +// It is a string enum for JSON stability and friendliness. +type Category string + +const ( + CategoryPerformance Category = "Performance" + CategoryMemory Category = "Memory" + CategoryStorage Category = "Storage" + CategoryNetwork Category = "Network" + CategoryThermals Category = "Thermals" + CategoryProcesses Category = "Processes" + CategoryServices Category = "Services" + CategoryLogs Category = "Logs" + CategoryUpdates Category = "Updates" + CategorySecurity Category = "Security" + CategoryKubernetes Category = "Kubernetes" +) + +func (c Category) String() string { return string(c) } + +func (c Category) valid() bool { + switch c { + case "", + CategoryPerformance, + CategoryMemory, + CategoryStorage, + CategoryNetwork, + CategoryThermals, + CategoryProcesses, + CategoryServices, + CategoryLogs, + CategoryUpdates, + CategorySecurity, + CategoryKubernetes: + return true + default: + return false + } +} + +func (c Category) MarshalJSON() ([]byte, error) { + if !c.valid() { + return nil, fmt.Errorf("invalid category %q", string(c)) + } + return json.Marshal(string(c)) +} + +func (c *Category) UnmarshalJSON(b []byte) error { + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + tmp := Category(s) + if !tmp.valid() { + return fmt.Errorf("invalid category %q", s) + } + *c = tmp + return nil +} + +// Priority is the urgency of an Issue. +// +// Priorities are string enums P0..P3 where P0 is most urgent. +type Priority string + +const ( + PriorityP0 Priority = "P0" + PriorityP1 Priority = "P1" + PriorityP2 Priority = "P2" + PriorityP3 Priority = "P3" +) + +func (p Priority) String() string { return string(p) } + +// Weight returns a numeric weight used for sorting. +// Higher weight means more urgent. +func (p Priority) Weight() int { + switch p { + case PriorityP0: + return 4 + case PriorityP1: + return 3 + case PriorityP2: + return 2 + case PriorityP3: + return 1 + default: + return 0 + } +} + +func (p Priority) valid() bool { + switch p { + case "", PriorityP0, PriorityP1, PriorityP2, PriorityP3: + return true + default: + return false + } +} + +func (p Priority) MarshalJSON() ([]byte, error) { + if !p.valid() { + return nil, fmt.Errorf("invalid priority %q", string(p)) + } + return json.Marshal(string(p)) +} + +func (p *Priority) UnmarshalJSON(b []byte) error { + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + tmp := Priority(s) + if !tmp.valid() { + return fmt.Errorf("invalid priority %q", s) + } + *p = tmp + return nil +} + +// State is the lifecycle state of an Issue. +// +// - Open: currently active +// - Acknowledged: active but acknowledged in-memory +// - Resolved: not observed for some time (resolve-after handled by store) +type State string + +const ( + StateOpen State = "Open" + StateAcknowledged State = "Acknowledged" + StateResolved State = "Resolved" +) + +func (s State) String() string { return string(s) } + +func (s State) valid() bool { + switch s { + case "", StateOpen, StateAcknowledged, StateResolved: + return true + default: + return false + } +} + +func (s State) MarshalJSON() ([]byte, error) { + if !s.valid() { + return nil, fmt.Errorf("invalid state %q", string(s)) + } + return json.Marshal(string(s)) +} + +func (s *State) UnmarshalJSON(b []byte) error { + var str string + if err := json.Unmarshal(b, &str); err != nil { + return err + } + tmp := State(str) + if !tmp.valid() { + return fmt.Errorf("invalid state %q", str) + } + *s = tmp + return nil +} + +// Issue is the single unit of information surfaced by ControlTower. +type Issue struct { + ID string `json:"id"` + Category Category `json:"category"` + Priority Priority `json:"priority"` + Title string `json:"title"` + Details string `json:"details,omitempty"` + Evidence map[string]string `json:"evidence,omitempty"` + SuggestedFix string `json:"suggested_fix,omitempty"` + State State `json:"state"` + FirstSeen time.Time `json:"first_seen"` + LastSeen time.Time `json:"last_seen"` +} + +// Age returns how long the issue has existed (now - FirstSeen). +// If FirstSeen is zero, Age returns 0. +func (i Issue) Age(now time.Time) time.Duration { + if i.FirstSeen.IsZero() { + return 0 + } + if now.Before(i.FirstSeen) { + return 0 + } + return now.Sub(i.FirstSeen) +} + +// SortIssuesDefault sorts issues in-place by Priority desc, then LastSeen desc. +// +// This matches the default view specified in PLAN.md. +func SortIssuesDefault(issues []Issue) { + sort.SliceStable(issues, func(i, j int) bool { + a, b := issues[i], issues[j] + aw, bw := a.Priority.Weight(), b.Priority.Weight() + if aw != bw { + return aw > bw + } + if !a.LastSeen.Equal(b.LastSeen) { + return a.LastSeen.After(b.LastSeen) + } + // Deterministic tie-breaker. + return a.ID < b.ID + }) +} diff --git a/internal/model/issue_test.go b/internal/model/issue_test.go new file mode 100644 index 0000000..fc4234c --- /dev/null +++ b/internal/model/issue_test.go @@ -0,0 +1,75 @@ +package model + +import ( + "encoding/json" + "reflect" + "testing" + "time" +) + +func TestSortIssuesDefault_PriorityThenRecency(t *testing.T) { + t0 := time.Date(2025, 12, 1, 10, 0, 0, 0, time.UTC) + + issues := []Issue{ + {ID: "b", Priority: PriorityP1, LastSeen: t0.Add(10 * time.Second)}, + {ID: "a", Priority: PriorityP0, LastSeen: t0.Add(1 * time.Second)}, + {ID: "c", Priority: PriorityP1, LastSeen: t0.Add(20 * time.Second)}, + {ID: "d", Priority: PriorityP2, LastSeen: t0.Add(30 * time.Second)}, + } + + SortIssuesDefault(issues) + got := []string{issues[0].ID, issues[1].ID, issues[2].ID, issues[3].ID} + want := []string{"a", "c", "b", "d"} // P0 first; within P1 higher LastSeen first + + if !reflect.DeepEqual(got, want) { + t.Fatalf("order mismatch: got %v want %v", got, want) + } +} + +func TestJSONRoundTrip_EnumsStable(t *testing.T) { + when := time.Date(2025, 12, 20, 12, 0, 0, 0, time.UTC) + in := Issue{ + ID: "host:disk:/home:usage", + Category: CategoryStorage, + Priority: PriorityP1, + Title: "Disk nearly full", + Details: "Usage above threshold", + Evidence: map[string]string{"mount": "/home", "used_pct": "93"}, + SuggestedFix: "du -sh * | sort -h", + State: StateOpen, + FirstSeen: when, + LastSeen: when.Add(5 * time.Second), + } + + b, err := json.Marshal(in) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + var out Issue + if err := json.Unmarshal(b, &out); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + // Compare fields we care about; time.Time compares directly. + if in.ID != out.ID || in.Category != out.Category || in.Priority != out.Priority || in.State != out.State { + t.Fatalf("basic fields mismatch after round-trip: in=%+v out=%+v", in, out) + } + if in.Title != out.Title || in.Details != out.Details || in.SuggestedFix != out.SuggestedFix { + t.Fatalf("string fields mismatch after round-trip") + } + if !reflect.DeepEqual(in.Evidence, out.Evidence) { + t.Fatalf("evidence mismatch after round-trip: in=%v out=%v", in.Evidence, out.Evidence) + } + if !in.FirstSeen.Equal(out.FirstSeen) || !in.LastSeen.Equal(out.LastSeen) { + t.Fatalf("time mismatch after round-trip: in=(%v,%v) out=(%v,%v)", in.FirstSeen, in.LastSeen, out.FirstSeen, out.LastSeen) + } +} + +func TestJSON_InvalidEnumRejected(t *testing.T) { + // Priority invalid should be rejected. + var i Issue + if err := json.Unmarshal([]byte(`{"id":"x","category":"Storage","priority":"P9","title":"t","state":"Open","first_seen":"2025-12-20T12:00:00Z","last_seen":"2025-12-20T12:00:01Z"}`), &i); err == nil { + t.Fatalf("expected error for invalid priority") + } +} diff --git a/internal/store/store.go b/internal/store/store.go new file mode 100644 index 0000000..0577fe3 --- /dev/null +++ b/internal/store/store.go @@ -0,0 +1,182 @@ +package store + +import ( + "sync" + "time" + + "tower/internal/model" +) + +const defaultResolveAfter = 30 * time.Second + +// Store is an in-memory IssueStore. +// +// Responsibilities (per PLAN.md): +// - Dedupe by Issue.ID +// - Track FirstSeen/LastSeen +// - Maintain State (Open/Acknowledged/Resolved) +// - Resolve issues only after resolveAfter duration of continuous absence +// - Acknowledgements are in-memory only (not persisted) +// - Safe for concurrent use +type Store struct { + mu sync.RWMutex + + resolveAfter time.Duration + + // issues holds the latest known version of each issue keyed by stable ID. + issues map[string]model.Issue + + // ack is an in-memory toggle keyed by issue ID. + // If true and the issue is currently present, its state is Acknowledged. + ack map[string]bool +} + +// New returns a new Store. +// If resolveAfter <= 0, a default of 30s is used. +func New(resolveAfter time.Duration) *Store { + if resolveAfter <= 0 { + resolveAfter = defaultResolveAfter + } + return &Store{ + resolveAfter: resolveAfter, + issues: map[string]model.Issue{}, + ack: map[string]bool{}, + } +} + +// Upsert merges "currently true" issues for this tick. +// +// Incoming is deduped by Issue.ID; the first instance wins for non-timestamp fields. +// Timestamps/state are managed by the store. +func (s *Store) Upsert(now time.Time, incoming []model.Issue) { + // Pre-dedupe without locking to keep lock hold times small. + seen := make(map[string]model.Issue, len(incoming)) + for _, iss := range incoming { + if iss.ID == "" { + // Ignore invalid issues. ID is the stable dedupe key. + continue + } + if _, ok := seen[iss.ID]; ok { + continue + } + seen[iss.ID] = iss + } + + s.mu.Lock() + defer s.mu.Unlock() + + for id, in := range seen { + existing, ok := s.issues[id] + if !ok || existing.State == model.StateResolved { + // New issue (or a previously resolved one reappearing): start a new "episode". + in.FirstSeen = now + in.LastSeen = now + in.State = model.StateOpen + if s.ack[id] { + in.State = model.StateAcknowledged + } + s.issues[id] = in + continue + } + + // Existing open/acked issue: update all fields from incoming, but preserve FirstSeen. + in.FirstSeen = existing.FirstSeen + in.LastSeen = now + in.State = model.StateOpen + if s.ack[id] { + in.State = model.StateAcknowledged + } + s.issues[id] = in + } + + // Update resolved state for issues not present this tick. + s.applyResolutionsLocked(now, seen) +} + +// Snapshot returns a point-in-time copy of all known issues with their states updated +// according to resolveAfter. +func (s *Store) Snapshot(now time.Time) []model.Issue { + s.mu.Lock() + defer s.mu.Unlock() + + // Apply resolutions based on time. We don't know which IDs are present "this tick" + // from Snapshot alone, so we only resolve by absence window (LastSeen age). + s.applyResolutionsLocked(now, nil) + + out := make([]model.Issue, 0, len(s.issues)) + for _, iss := range s.issues { + out = append(out, deepCopyIssue(iss)) + } + return out +} + +// Acknowledge marks an issue acknowledged (in-memory only). +func (s *Store) Acknowledge(id string) { + if id == "" { + return + } + + s.mu.Lock() + defer s.mu.Unlock() + + s.ack[id] = true + iss, ok := s.issues[id] + if !ok { + return + } + if iss.State != model.StateResolved { + iss.State = model.StateAcknowledged + s.issues[id] = iss + } +} + +// Unacknowledge clears the acknowledgement toggle (in-memory only). +func (s *Store) Unacknowledge(id string) { + if id == "" { + return + } + + s.mu.Lock() + defer s.mu.Unlock() + + delete(s.ack, id) + iss, ok := s.issues[id] + if !ok { + return + } + if iss.State != model.StateResolved { + iss.State = model.StateOpen + s.issues[id] = iss + } +} + +func (s *Store) applyResolutionsLocked(now time.Time, present map[string]model.Issue) { + for id, iss := range s.issues { + // If caller provided a present set and the ID is present, it cannot be resolved. + if present != nil { + if _, ok := present[id]; ok { + continue + } + } + + if iss.State == model.StateResolved { + continue + } + if s.resolveAfter > 0 && now.Sub(iss.LastSeen) >= s.resolveAfter { + iss.State = model.StateResolved + s.issues[id] = iss + } + } +} + +func deepCopyIssue(in model.Issue) model.Issue { + out := in + if in.Evidence != nil { + m := make(map[string]string, len(in.Evidence)) + for k, v := range in.Evidence { + m[k] = v + } + out.Evidence = m + } + return out +} diff --git a/internal/store/store_test.go b/internal/store/store_test.go new file mode 100644 index 0000000..a682692 --- /dev/null +++ b/internal/store/store_test.go @@ -0,0 +1,101 @@ +package store + +import ( + "testing" + "time" + + "tower/internal/model" +) + +func TestStore_Upsert_DedupAndTimestamps(t *testing.T) { + now1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) + now2 := now1.Add(5 * time.Second) + + s := New(30 * time.Second) + + // Same ID twice in one Upsert should dedupe. + s.Upsert(now1, []model.Issue{ + {ID: "i-1", Title: "first"}, + {ID: "i-1", Title: "should be ignored"}, + }) + + snap1 := s.Snapshot(now1) + if len(snap1) != 1 { + t.Fatalf("expected 1 issue, got %d", len(snap1)) + } + if snap1[0].ID != "i-1" { + t.Fatalf("expected id i-1, got %q", snap1[0].ID) + } + if !snap1[0].FirstSeen.Equal(now1) { + t.Fatalf("expected FirstSeen=%v, got %v", now1, snap1[0].FirstSeen) + } + if !snap1[0].LastSeen.Equal(now1) { + t.Fatalf("expected LastSeen=%v, got %v", now1, snap1[0].LastSeen) + } + if snap1[0].State != model.StateOpen { + t.Fatalf("expected State=Open, got %q", snap1[0].State) + } + + // Subsequent Upsert for same ID should preserve FirstSeen and update LastSeen. + s.Upsert(now2, []model.Issue{{ID: "i-1", Title: "updated"}}) + snap2 := s.Snapshot(now2) + if len(snap2) != 1 { + t.Fatalf("expected 1 issue, got %d", len(snap2)) + } + if !snap2[0].FirstSeen.Equal(now1) { + t.Fatalf("expected FirstSeen to remain %v, got %v", now1, snap2[0].FirstSeen) + } + if !snap2[0].LastSeen.Equal(now2) { + t.Fatalf("expected LastSeen=%v, got %v", now2, snap2[0].LastSeen) + } +} + +func TestStore_AckPreservedWhilePresent(t *testing.T) { + now1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) + now2 := now1.Add(1 * time.Second) + + s := New(30 * time.Second) + s.Upsert(now1, []model.Issue{{ID: "i-1", Title: "t"}}) + + s.Acknowledge("i-1") + + // Upsert again while present should remain Acked. + s.Upsert(now2, []model.Issue{{ID: "i-1", Title: "t2"}}) + snap := s.Snapshot(now2) + if len(snap) != 1 { + t.Fatalf("expected 1 issue, got %d", len(snap)) + } + if snap[0].State != model.StateAcknowledged { + t.Fatalf("expected State=Acknowledged, got %q", snap[0].State) + } + + s.Unacknowledge("i-1") + snap2 := s.Snapshot(now2) + if snap2[0].State != model.StateOpen { + t.Fatalf("expected State=Open after unack, got %q", snap2[0].State) + } +} + +func TestStore_ResolvesOnlyAfterAbsenceWindow(t *testing.T) { + resolveAfter := 10 * time.Second + now0 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) + + s := New(resolveAfter) + s.Upsert(now0, []model.Issue{{ID: "i-1", Title: "t"}}) + + // Miss a tick shortly after; should not resolve due to flap suppression / window. + s.Upsert(now0.Add(1*time.Second), nil) + snap1 := s.Snapshot(now0.Add(9 * time.Second)) + if len(snap1) != 1 { + t.Fatalf("expected 1 issue, got %d", len(snap1)) + } + if snap1[0].State != model.StateOpen { + t.Fatalf("expected still Open before resolveAfter, got %q", snap1[0].State) + } + + // Still absent beyond resolveAfter => should resolve. + snap2 := s.Snapshot(now0.Add(11 * time.Second)) + if snap2[0].State != model.StateResolved { + t.Fatalf("expected Resolved after absence > resolveAfter, got %q", snap2[0].State) + } +} diff --git a/internal/ui/app.go b/internal/ui/app.go new file mode 100644 index 0000000..19e1155 --- /dev/null +++ b/internal/ui/app.go @@ -0,0 +1,886 @@ +package ui + +import ( + "fmt" + "os" + "sort" + "strings" + "time" + + "github.com/atotto/clipboard" + "github.com/charmbracelet/bubbles/key" + "github.com/charmbracelet/bubbles/table" + "github.com/charmbracelet/bubbles/textinput" + "github.com/charmbracelet/bubbles/viewport" + bubbletea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" + + "tower/internal/engine" + "tower/internal/model" +) + +type Focus int + +const ( + focusTable Focus = iota + focusDetails + focusSearch +) + +type SortMode int + +const ( + sortDefault SortMode = iota // Priority desc, LastSeen desc + sortRecency // LastSeen desc + sortCategory // Category asc, Priority desc, LastSeen desc +) + +type AgeMode int + +const ( + AgeCompact AgeMode = iota // 0s, Xds, Xdm, Xdh, Xdd + AgeRelative // Xm ago, Xh ago, Xd ago +) + +type AckFunc func(id string) +type UnackFunc func(id string) +type RefreshNowFunc func() +type ExportFunc func(path string, issues []model.Issue) error + +// Model is the Bubble Tea model for the ControlTower UI. +// +// It intentionally keeps rendering cheap: +// - Table rows are only rebuilt when snapshot or filters/sort change. +// - A 1s tick updates header time/age counters without rebuilding rows. +// +//nolint:structcheck // (fields used conditionally based on callbacks) +type Model struct { + host string + + styles Styles + keys KeyMap + + showHelp bool + + focus Focus + + snap engine.Snapshot + now time.Time + + // Cached view state. + filterPri model.Priority + filterCat model.Category + search string + sortMode SortMode + wideTitle bool + ageMode AgeMode + themeMode ThemeMode + + issueByID map[string]model.Issue + rowsIDs []string + + table table.Model + details viewport.Model + searchIn textinput.Model + + w int + h int + + // callbacks + refreshNow RefreshNowFunc + ack AckFunc + unack UnackFunc + export ExportFunc + + lastExportPath string + + snapshots <-chan engine.Snapshot + + lastP0Count int + noBell bool + loaded bool + exporting bool + + err error +} + +type snapshotMsg engine.Snapshot + +type tickMsg time.Time + +type exportDoneMsg struct{ err error } + +type helpRequestedMsg struct{} + +func New(host string, snapshots <-chan engine.Snapshot, refresh RefreshNowFunc, ack AckFunc, unack UnackFunc, export ExportFunc) Model { + if host == "" { + if h, err := os.Hostname(); err == nil { + host = h + } + } + + t := newIssueTable() + vp := viewport.New(0, 0) + vp.YPosition = 0 + + ti := textinput.New() + ti.Placeholder = "search title/details" + ti.Prompt = "/ " + ti.CharLimit = 256 + ti.Width = 40 + + m := Model{ + host: host, + styles: defaultStylesForMode(ThemeAuto), + keys: defaultKeyMap(), + focus: focusTable, + sortMode: sortDefault, + themeMode: ThemeAuto, + issueByID: map[string]model.Issue{}, + table: t, + details: vp, + searchIn: ti, + snapshots: snapshots, + refreshNow: refresh, + ack: ack, + unack: unack, + export: export, + lastExportPath: "issues.json", + noBell: os.Getenv("NO_BELL") == "1", + loaded: false, + } + m.now = time.Now() + return m +} + +func (m Model) Init() bubbletea.Cmd { + return bubbletea.Batch( + waitForSnapshot(m.snapshots), + tickCmd(), + ) +} + +func waitForSnapshot(ch <-chan engine.Snapshot) bubbletea.Cmd { + return func() bubbletea.Msg { + s, ok := <-ch + if !ok { + return snapshotMsg(engine.Snapshot{}) + } + return snapshotMsg(s) + } +} + +func tickCmd() bubbletea.Cmd { + return bubbletea.Tick(1*time.Second, func(t time.Time) bubbletea.Msg { return tickMsg(t) }) +} + +func (m Model) Update(msg bubbletea.Msg) (bubbletea.Model, bubbletea.Cmd) { + switch msg := msg.(type) { + case tickMsg: + m.now = time.Time(msg) + // Keep ticking for header time and details age, but avoid rebuilding rows. + m.setDetailsToSelected() + return m, tickCmd() + + case snapshotMsg: + s := engine.Snapshot(msg) + // Channel closed: stop listening. + if s.At.IsZero() && s.Collectors == nil && s.Issues == nil { + return m, nil + } + m.snap = s + m.now = time.Now() + m.loaded = true + + // Count P0 before applying to detect new critical issues + newP0Count := 0 + for _, iss := range s.Issues { + if iss.Priority == model.PriorityP0 { + newP0Count++ + } + } + + m.applyViewFromSnapshot() + + // Send bell if new P0 issues appeared (check NO_BELL env var to disable) + if newP0Count > m.lastP0Count && !m.noBell { + // Update counter and send bell + m.lastP0Count = newP0Count + // Print bell character to emit terminal bell + fmt.Fprint(os.Stdout, "\a") + } + m.lastP0Count = newP0Count + return m, waitForSnapshot(m.snapshots) + + case bubbletea.WindowSizeMsg: + m.w, m.h = msg.Width, msg.Height + m.layout() + return m, nil + } + + // Search input mode. + if m.focus == focusSearch { + switch { + case keyMatch(msg, m.keys.Cancel): + m.focus = focusTable + m.searchIn.Blur() + m.searchIn.SetValue(m.search) + return m, nil + case keyMatch(msg, m.keys.ClearFilters): + m.focus = focusTable + m.searchIn.Blur() + m.search = "" + m.applyViewFromSnapshot() + return m, nil + case keyMatch(msg, m.keys.Apply): + m.search = strings.TrimSpace(m.searchIn.Value()) + m.focus = focusTable + m.searchIn.Blur() + m.applyViewFromSnapshot() + return m, nil + } + + var cmd bubbletea.Cmd + m.searchIn, cmd = m.searchIn.Update(msg) + return m, cmd + } + + // Help overlay mode - only help-related keys are processed. + if m.showHelp { + switch { + case keyMatch(msg, m.keys.Help), keyMatch(msg, m.keys.Cancel): + m.showHelp = false + return m, nil + } + // Ignore all other keys while help is shown + return m, nil + } + + // Global keybindings. + switch { + case keyMatch(msg, m.keys.Quit): + return m, bubbletea.Quit + + case keyMatch(msg, m.keys.RefreshNow): + if m.refreshNow != nil { + m.refreshNow() + } + return m, nil + + case keyMatch(msg, m.keys.Search): + m.focus = focusSearch + m.searchIn.SetValue(m.search) + m.searchIn.CursorEnd() + m.searchIn.Focus() + return m, nil + + case keyMatch(msg, m.keys.Priority): + m.cyclePriorityFilter() + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.PriorityP0): + m.filterPri = model.PriorityP0 + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.PriorityP1): + m.filterPri = model.PriorityP1 + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.PriorityP2): + m.filterPri = model.PriorityP2 + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.PriorityP3): + m.filterPri = model.PriorityP3 + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.Category): + m.cycleCategoryFilter() + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.Sort): + m.sortMode = (m.sortMode + 1) % 3 + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.FocusNext): + if m.focus == focusTable { + m.focus = focusDetails + m.table.Blur() + // viewport has no Focus/Blur; we just route keys. + return m, nil + } + m.focus = focusTable + m.table.Focus() + return m, nil + + case keyMatch(msg, m.keys.AckToggle): + m.toggleAckSelected() + return m, nil + + case keyMatch(msg, m.keys.AckAll): + m.ackAllVisible() + return m, nil + + case keyMatch(msg, m.keys.Export): + if m.export != nil { + m.exporting = true + path := m.lastExportPath + issues := m.snap.Issues + return m, func() bubbletea.Msg { + err := m.export(path, issues) + return exportDoneMsg{err: err} + } + } + return m, nil + + case keyMatch(msg, m.keys.Help): + m.showHelp = !m.showHelp + return m, nil + + case keyMatch(msg, m.keys.JumpToTop): + if len(m.rowsIDs) > 0 { + m.table.SetCursor(0) + m.setDetailsToSelected() + } + return m, nil + + case keyMatch(msg, m.keys.JumpToBottom): + if len(m.rowsIDs) > 0 { + m.table.SetCursor(len(m.rowsIDs) - 1) + m.setDetailsToSelected() + } + return m, nil + + case keyMatch(msg, m.keys.Copy): + m.copySelectedToClipboard() + return m, nil + + case keyMatch(msg, m.keys.ToggleWideTitle): + m.wideTitle = !m.wideTitle + m.layout() + // Rebuild rows to apply new title width + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.ToggleAgeFormat): + m.ageMode = (m.ageMode + 1) % 2 + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.ToggleTheme): + // Cycle through theme modes: Auto -> Light -> Dark -> Auto + m.themeMode = (m.themeMode + 1) % 3 + m.styles = defaultStylesForMode(m.themeMode) + // Refresh the view with new styles + m.applyViewFromSnapshot() + return m, nil + + case keyMatch(msg, m.keys.ClearFilters): + m.filterPri = "" + m.filterCat = "" + m.search = "" + m.applyViewFromSnapshot() + return m, nil + } + + // Focus-specific updates. + // Note: bubbles/table already handles page navigation keys (PgUp/PgDn, Ctrl+u/Ctrl+d, Home/End) + // natively, so we don't need to override them here. + switch m.focus { + case focusTable: + var cmd bubbletea.Cmd + m.table, cmd = m.table.Update(msg) + // When selection changes, update details content. + m.setDetailsToSelected() + return m, cmd + + case focusDetails: + var cmd bubbletea.Cmd + m.details, cmd = m.details.Update(msg) + return m, cmd + } + + switch msg := msg.(type) { + case exportDoneMsg: + m.exporting = false + m.err = msg.err + return m, nil + } + + return m, nil +} + +func (m *Model) layout() { + if m.w <= 0 || m.h <= 0 { + return + } + + // Header: 1 line. + headerH := 1 + // Search bar: 1 line (shown only in search focus). + searchH := 0 + if m.focus == focusSearch { + searchH = 1 + } + + bodyH := m.h - headerH - searchH + if bodyH < 4 { + bodyH = 4 + } + + detailsH := bodyH / 3 + tableH := bodyH - detailsH + if tableH < 3 { + tableH = 3 + } + + // Table width includes 2-character padding from bubbles/table. + // Allocate Title to consume remaining width. + priW, catW, ageW, stateW := 3, 12, 7, 13 + fixed := priW + catW + ageW + stateW + 4 // separators/padding + titleW := m.w - fixed + if titleW < 20 { + titleW = 20 + } + if m.wideTitle { + // Wide mode: allocate more space to Title column (up to 2x) + titleW = titleW * 2 + // Ensure other columns still have minimum space + maxTitle := m.w - fixed + if titleW > maxTitle { + titleW = maxTitle + } + } + + cols := m.table.Columns() + for i := range cols { + switch cols[i].Title { + case colPri: + cols[i].Width = priW + case colCat: + cols[i].Width = catW + case colTitle: + cols[i].Width = titleW + case colAge: + cols[i].Width = ageW + case colState: + cols[i].Width = stateW + } + } + m.table.SetColumns(cols) + m.table.SetHeight(tableH) + + m.details.Width = m.w + m.details.Height = detailsH +} + +func (m *Model) applyViewFromSnapshot() { + // Build ID index for O(1) selection lookup. + m.issueByID = make(map[string]model.Issue, len(m.snap.Issues)) + for _, iss := range m.snap.Issues { + m.issueByID[iss.ID] = iss + } + + // Show loading state before first snapshot arrives + if !m.loaded { + msg := "Loading collector data... Please wait." + m.details.SetContent(m.styles.Muted.Render(msg)) + return + } + + // Filter. + filtered := make([]model.Issue, 0, len(m.snap.Issues)) + for _, iss := range m.snap.Issues { + if m.filterPri != "" && iss.Priority != m.filterPri { + continue + } + if m.filterCat != "" && iss.Category != m.filterCat { + continue + } + if m.search != "" { + q := strings.ToLower(m.search) + hit := strings.Contains(strings.ToLower(iss.Title), q) || strings.Contains(strings.ToLower(iss.Details), q) + if !hit { + continue + } + } + filtered = append(filtered, iss) + } + + // Sort. + sort.SliceStable(filtered, func(i, j int) bool { + a, b := filtered[i], filtered[j] + switch m.sortMode { + case sortRecency: + if !a.LastSeen.Equal(b.LastSeen) { + return a.LastSeen.After(b.LastSeen) + } + return a.ID < b.ID + case sortCategory: + if a.Category != b.Category { + return a.Category < b.Category + } + aw, bw := a.Priority.Weight(), b.Priority.Weight() + if aw != bw { + return aw > bw + } + if !a.LastSeen.Equal(b.LastSeen) { + return a.LastSeen.After(b.LastSeen) + } + return a.ID < b.ID + default: + aw, bw := a.Priority.Weight(), b.Priority.Weight() + if aw != bw { + return aw > bw + } + if !a.LastSeen.Equal(b.LastSeen) { + return a.LastSeen.After(b.LastSeen) + } + return a.ID < b.ID + } + }) + + rows, ids := buildRows(m.snap.At, m.ageMode, filtered) + m.rowsIDs = ids + + prevSelID := m.selectedIssueID() + m.table.SetRows(rows) + if len(rows) == 0 { + m.table.SetCursor(0) + msg := "All systems healthy. No issues detected.\n\nPress r to refresh, / to search past logs" + m.details.SetContent(m.styles.Muted.Render(msg)) + return + } + + // Try to keep selection stable. + if prevSelID != "" { + for i, id := range ids { + if id == prevSelID { + m.table.SetCursor(i) + break + } + } + } + + m.setDetailsToSelected() + m.layout() +} + +func (m *Model) selectedIssueID() string { + idx := m.table.Cursor() + if idx < 0 || idx >= len(m.rowsIDs) { + return "" + } + return m.rowsIDs[idx] +} + +func (m *Model) setDetailsToSelected() { + id := m.selectedIssueID() + iss, ok := m.issueByID[id] + if !ok { + m.details.SetContent(m.styles.Muted.Render("No issue selected.")) + return + } + m.details.SetContent(renderIssueDetails(m.now, m.ageMode, iss)) +} + +func (m *Model) toggleAckSelected() { + id := m.selectedIssueID() + if id == "" { + return + } + iss, ok := m.issueByID[id] + if !ok { + return + } + if iss.State == model.StateResolved { + return + } + newState := model.StateAcknowledged + if iss.State == model.StateAcknowledged { + newState = model.StateOpen + } + + // Callbacks (store-backed if wired). + if newState == model.StateAcknowledged { + if m.ack != nil { + m.ack(id) + } + } else { + if m.unack != nil { + m.unack(id) + } + } + + // Optimistic local update (store will correct on next snapshot). + iss.State = newState + m.issueByID[id] = iss + + // Update state column cheaply. + idx := m.table.Cursor() + rows := m.table.Rows() + if idx >= 0 && idx < len(rows) { + rows[idx][4] = iss.State.String() // State column index + m.table.SetRows(rows) + } + m.setDetailsToSelected() +} + +func (m *Model) ackAllVisible() { + if m.ack == nil { + return + } + + // Track updates for table refresh. + updated := false + rows := m.table.Rows() + + // Iterate through all visible issues and acknowledge them. + for idx, id := range m.rowsIDs { + iss, ok := m.issueByID[id] + if !ok { + continue + } + // Only acknowledge open issues, not already acked or resolved. + if iss.State == model.StateOpen { + m.ack(id) + + // Optimistic local update. + iss.State = model.StateAcknowledged + m.issueByID[id] = iss + + // Update state column cheaply. + if idx < len(rows) { + rows[idx][4] = iss.State.String() // State column index + updated = true + } + } + } + + if updated { + m.table.SetRows(rows) + m.setDetailsToSelected() + } +} + +func (m *Model) copySelectedToClipboard() { + id := m.selectedIssueID() + if id == "" { + return + } + iss, ok := m.issueByID[id] + if !ok { + return + } + + // Copy SuggestedFix if available, otherwise fallback to Title + text := iss.SuggestedFix + if text == "" { + text = iss.Title + } + + if err := clipboard.WriteAll(text); err != nil { + m.err = fmt.Errorf("Failed to copy to clipboard: %w. Is xclip/xsel installed?", err) + return + } + + // Show confirmation in details pane + m.details.SetContent(m.styles.Muted.Render("Copied to clipboard\n\n") + renderIssueDetails(m.now, m.ageMode, iss)) +} + +func (m *Model) cyclePriorityFilter() { + order := []model.Priority{"", model.PriorityP0, model.PriorityP1, model.PriorityP2, model.PriorityP3} + m.filterPri = cycle(order, m.filterPri) +} + +func (m *Model) cycleCategoryFilter() { + order := []model.Category{ + "", + model.CategoryPerformance, + model.CategoryMemory, + model.CategoryStorage, + model.CategoryNetwork, + model.CategoryThermals, + model.CategoryProcesses, + model.CategoryServices, + model.CategoryLogs, + model.CategoryUpdates, + model.CategorySecurity, + model.CategoryKubernetes, + } + m.filterCat = cycle(order, m.filterCat) +} + +func cycle[T comparable](order []T, cur T) T { + for i := range order { + if order[i] == cur { + return order[(i+1)%len(order)] + } + } + return order[0] +} + +func (m Model) View() string { + // Show help overlay when active + if m.showHelp { + return renderHelp(m.keys, m.styles) + } + + header := m.renderHeader() + + searchLine := "" + if m.focus == focusSearch { + searchLine = m.searchIn.View() + } + + tableView := m.table.View() + detailsView := m.renderDetailsPane() + + parts := []string{header} + if searchLine != "" { + parts = append(parts, searchLine) + } + parts = append(parts, tableView, detailsView) + return lipgloss.JoinVertical(lipgloss.Left, parts...) +} + +func (m Model) renderHeader() string { + now := m.now + if now.IsZero() { + now = time.Now() + } + + age := "-" + if !m.snap.At.IsZero() { + age = formatAge(now.Sub(m.snap.At)) + } + + p0, p1, p2, p3 := 0, 0, 0, 0 + for _, iss := range m.snap.Issues { + switch iss.Priority { + case model.PriorityP0: + p0++ + case model.PriorityP1: + p1++ + case model.PriorityP2: + p2++ + case model.PriorityP3: + p3++ + } + } + + okC, degC, errC := 0, 0, 0 + for _, h := range m.snap.Collectors { + switch h.Status.Health { + case "OK": + okC++ + case "DEGRADED": + degC++ + case "ERROR": + errC++ + } + } + + priFilter := "all" + if m.filterPri != "" { + priFilter = m.filterPri.String() + } + catFilter := "all" + if m.filterCat != "" { + catFilter = m.filterCat.String() + } + sortLabel := map[SortMode]string{sortDefault: "pri→recent", sortRecency: "recent", sortCategory: "cat"}[m.sortMode] + + left := fmt.Sprintf( + "host=%s time=%s age=%s P0=%d P1=%d P2=%d P3=%d collectors: ✓%d ⚠%d ✗%d", + m.host, + now.Local().Format("15:04:05"), + age, + p0, p1, p2, p3, + okC, degC, errC, + ) + + // Add count warning when approaching 200 issues cap (90% = 180) + total := p0 + p1 + p2 + p3 + if total >= 180 { + warning := fmt.Sprintf(" [~%d/200]", total) + left += m.styles.Error.Render(warning) + } + + // Small right-side indicator for filters. + priStr := fmt.Sprintf("pri=%s", priFilter) + catStr := fmt.Sprintf("cat=%s", catFilter) + if m.filterPri != "" { + priStr = m.styles.FilterActive.Render(priStr) + } + if m.filterCat != "" { + catStr = m.styles.FilterActive.Render(catStr) + } + right := fmt.Sprintf("filter %s %s q=%q sort=%s", priStr, catStr, m.search, sortLabel) + + if m.w > 0 { + // Truncate right if needed. + space := m.w - lipgloss.Width(left) - 1 + if space < 0 { + space = 0 + } + if lipgloss.Width(right) > space { + right = lipgloss.NewStyle().MaxWidth(space).Render(right) + } + padLen := 0 + if space > 0 { + padLen = max(1, space-lipgloss.Width(right)) + } + pad := strings.Repeat(" ", padLen) + return m.styles.HeaderBar.Render(left + pad + right) + } + + return m.styles.HeaderBar.Render(left + " " + right) +} + +func (m Model) renderDetailsPane() string { + title := "Details" + if m.focus == focusDetails { + title = title + " (focus)" + } + + body := m.details.View() + if m.exporting { + body = "Exporting issues to " + m.lastExportPath + "..." + } + if m.err != nil { + body = body + "\n" + m.styles.Error.Render(m.err.Error()) + } + + // Keep the details title cheap and avoid borders (can be expensive). + return m.styles.DetailsTitle.Render(title) + "\n" + body +} + +func renderHelp(keys KeyMap, styles Styles) string { + // Create a temporary help model and render it + help := NewHelp() + help.Show() + return help.Render(keys, styles) +} + +func keyMatch(msg bubbletea.Msg, b key.Binding) bool { + km, ok := msg.(bubbletea.KeyMsg) + if !ok { + return false + } + return key.Matches(km, b) +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/internal/ui/details.go b/internal/ui/details.go new file mode 100644 index 0000000..faf8610 --- /dev/null +++ b/internal/ui/details.go @@ -0,0 +1,105 @@ +package ui + +import ( + "fmt" + "sort" + "strings" + "time" + + "tower/internal/model" +) + +// getRollupSamples extracts sample IDs from a rollup issue's evidence. +func getRollupSamples(iss model.Issue) []string { + samplesStr := iss.Evidence["samples"] + if samplesStr == "" { + return nil + } + parts := strings.Split(samplesStr, " | ") + result := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} + +// isRollupIssue checks if an issue is a rollup issue. +func isRollupIssue(iss model.Issue) bool { + if strings.HasPrefix(iss.ID, "k8s:rollup:") { + return true + } + if iss.Category == model.CategoryKubernetes && strings.Contains(strings.ToLower(iss.Title), "rollup") { + return true + } + return false +} + +func renderIssueDetails(now time.Time, mode AgeMode, iss model.Issue) string { + var b strings.Builder + + fmt.Fprintf(&b, "Title: %s\n", oneLine(iss.Title)) + fmt.Fprintf(&b, "Priority: %s Category: %s State: %s\n", iss.Priority, iss.Category, iss.State) + fmt.Fprintf(&b, "FirstSeen: %s\n", fmtTime(iss.FirstSeen)) + fmt.Fprintf(&b, "LastSeen: %s\n", fmtTime(iss.LastSeen)) + fmt.Fprintf(&b, "Age: %s\n", formatAgeWithMode(iss.Age(now), mode)) + + if strings.TrimSpace(iss.Details) != "" { + b.WriteString("\nDetails\n") + b.WriteString(indentBlock(strings.TrimSpace(iss.Details), " ")) + b.WriteString("\n") + } + + // Show affected issues for rollup issues + if isRollupIssue(iss) { + samples := getRollupSamples(iss) + if len(samples) > 0 { + b.WriteString("\nAffected Issues\n") + // Show up to 10 samples + maxSamples := 10 + if len(samples) > maxSamples { + samples = samples[:maxSamples] + } + for _, sample := range samples { + fmt.Fprintf(&b, " • %s\n", sample) + } + } + } + + if len(iss.Evidence) > 0 { + b.WriteString("\nEvidence\n") + keys := make([]string, 0, len(iss.Evidence)) + for k := range iss.Evidence { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + fmt.Fprintf(&b, " %s: %s\n", k, iss.Evidence[k]) + } + } + + if strings.TrimSpace(iss.SuggestedFix) != "" { + b.WriteString("\nSuggested Fix\n") + b.WriteString(indentBlock(strings.TrimSpace(iss.SuggestedFix), " ")) + b.WriteString("\n") + } + + return strings.TrimRight(b.String(), "\n") +} + +func fmtTime(t time.Time) string { + if t.IsZero() { + return "-" + } + return t.Local().Format("2006-01-02 15:04:05") +} + +func indentBlock(s, prefix string) string { + lines := strings.Split(s, "\n") + for i := range lines { + lines[i] = prefix + lines[i] + } + return strings.Join(lines, "\n") +} diff --git a/internal/ui/help.go b/internal/ui/help.go new file mode 100644 index 0000000..ee70961 --- /dev/null +++ b/internal/ui/help.go @@ -0,0 +1,152 @@ +package ui + +import ( + "fmt" + "strings" + + "github.com/charmbracelet/bubbles/key" +) + +// HelpModel is the help overlay model. +type HelpModel struct { + visible bool +} + +// NewHelp creates a new help model. +func NewHelp() HelpModel { + return HelpModel{ + visible: false, + } +} + +// Show displays the help overlay. +func (m *HelpModel) Show() { + m.visible = true +} + +// Hide hides the help overlay. +func (m *HelpModel) Hide() { + m.visible = false +} + +// Toggle toggles the help overlay visibility. +func (m *HelpModel) Toggle() { + m.visible = !m.visible +} + +// IsVisible returns true if the help overlay is visible. +func (m HelpModel) IsVisible() bool { + return m.visible +} + +// Render renders the help overlay. +func (m HelpModel) Render(keys KeyMap, styles Styles) string { + if !m.visible { + return "" + } + + var b strings.Builder + + // Title + title := styles.HeaderBar.Render("Keybindings - Press ? or esc to close") + b.WriteString(title) + b.WriteString("\n\n") + + // Define keybinding groups + groups := []struct { + name string + binds []keyHelp + }{ + { + name: "Global", + binds: []keyHelp{ + {keys.Help, "Show/hide this help"}, + {keys.Quit, "Quit the application"}, + {keys.RefreshNow, "Refresh data now"}, + }, + }, + { + name: "Filters", + binds: []keyHelp{ + {keys.Search, "Search by title/details"}, + {keys.Priority, "Cycle priority filter"}, + {keys.Category, "Cycle category filter"}, + }, + }, + { + name: "Navigation", + binds: []keyHelp{ + {keys.FocusNext, "Toggle focus (table/details)"}, + {keys.Sort, "Cycle sort order"}, + {keys.JumpToTop, "Jump to top (g)"}, + {keys.JumpToBottom, "Jump to bottom (G)"}, + {keys.Down, "Move down (j)"}, + {keys.Up, "Move up (k)"}, + }, + }, + { + name: "Actions", + binds: []keyHelp{ + {keys.AckToggle, "Acknowledge/unacknowledge issue"}, + {keys.Export, "Export issues to JSON"}, + }, + }, + } + + // Render each group + for i, group := range groups { + if i > 0 { + b.WriteString("\n") + } + + // Group header + groupTitle := styles.HeaderKey.Render(group.name + ":") + b.WriteString(groupTitle) + b.WriteString("\n") + + // Keybindings in this group + for _, kb := range group.binds { + line := renderKeyHelp(kb, styles) + b.WriteString(line) + b.WriteString("\n") + } + } + + // Render collector health icon legend + b.WriteString("\n") + legendTitle := styles.HeaderKey.Render("Legend:") + b.WriteString(legendTitle) + b.WriteString("\n") + legendText := styles.HeaderVal.Render(" Collector health: ✓ (OK), ⚠ (DEGRADED), ✗ (ERROR)") + b.WriteString(legendText) + b.WriteString("\n") + + return b.String() +} + +type keyHelp struct { + binding key.Binding + help string +} + +func renderKeyHelp(kb keyHelp, styles Styles) string { + // Get key names from the binding + keys := kb.binding.Keys() + if len(keys) == 0 { + return "" + } + + // Format key names + keyStr := strings.Join(keys, ", ") + keyStyled := styles.HeaderVal.Render(keyStr) + + // Format help text + helpStyled := styles.HeaderVal.Render(kb.help) + + // Combine with padding + padding := "" + if needed := 10 - len(keyStr); needed > 0 { + padding = strings.Repeat(" ", needed) + } + return fmt.Sprintf(" %s%s%s", keyStyled, padding, helpStyled) +} diff --git a/internal/ui/keys.go b/internal/ui/keys.go new file mode 100644 index 0000000..f5e43a7 --- /dev/null +++ b/internal/ui/keys.go @@ -0,0 +1,141 @@ +package ui + +import "github.com/charmbracelet/bubbles/key" + +// KeyMap defines UI keybindings. +// +// Note: Bubble Tea will also handle ctrl+c; we additionally bind q for quit. + +type KeyMap struct { + Quit key.Binding + RefreshNow key.Binding + Search key.Binding + Priority key.Binding + PriorityP0 key.Binding + PriorityP1 key.Binding + PriorityP2 key.Binding + PriorityP3 key.Binding + Category key.Binding + Sort key.Binding + FocusNext key.Binding + AckToggle key.Binding + AckAll key.Binding + Export key.Binding + ToggleTheme key.Binding + Help key.Binding + JumpToTop key.Binding + JumpToBottom key.Binding + Down key.Binding + Up key.Binding + Copy key.Binding + ToggleWideTitle key.Binding + ToggleAgeFormat key.Binding + ClearFilters key.Binding + + Cancel key.Binding + Apply key.Binding +} + +func defaultKeyMap() KeyMap { + return KeyMap{ + Quit: key.NewBinding( + key.WithKeys("q"), + key.WithHelp("q", "quit"), + ), + RefreshNow: key.NewBinding( + key.WithKeys("r"), + key.WithHelp("r", "refresh now"), + ), + Search: key.NewBinding( + key.WithKeys("/"), + key.WithHelp("/", "search"), + ), + Priority: key.NewBinding( + key.WithKeys("p"), + key.WithHelp("p", "priority filter"), + ), + PriorityP0: key.NewBinding( + key.WithKeys("0"), + key.WithHelp("0", "P0 only"), + ), + PriorityP1: key.NewBinding( + key.WithKeys("1"), + key.WithHelp("1", "P1 only"), + ), + PriorityP2: key.NewBinding( + key.WithKeys("2"), + key.WithHelp("2", "P2 only"), + ), + PriorityP3: key.NewBinding( + key.WithKeys("3"), + key.WithHelp("3", "P3 only"), + ), + Category: key.NewBinding( + key.WithKeys("c"), + key.WithHelp("c", "category filter"), + ), + Sort: key.NewBinding( + key.WithKeys("s"), + key.WithHelp("s", "cycle sort"), + ), + FocusNext: key.NewBinding( + key.WithKeys("tab"), + key.WithHelp("tab", "focus"), + ), + AckToggle: key.NewBinding( + key.WithKeys("a"), + key.WithHelp("a", "ack/unack"), + ), + AckAll: key.NewBinding( + key.WithKeys("A", "shift+a"), + key.WithHelp("A", "ack all visible"), + ), + Export: key.NewBinding( + key.WithKeys("E"), + key.WithHelp("E", "export"), + ), + ToggleTheme: key.NewBinding( + key.WithKeys("T", "shift+t"), + key.WithHelp("T", "toggle theme"), + ), + Help: key.NewBinding( + key.WithKeys("?"), + key.WithHelp("?", "show help"), + ), + JumpToTop: key.NewBinding( + key.WithKeys("g"), + key.WithHelp("g", "jump to top"), + ), + JumpToBottom: key.NewBinding( + key.WithKeys("G", "shift+g"), + key.WithHelp("G", "jump to bottom"), + ), + Down: key.NewBinding( + key.WithKeys("j"), + key.WithHelp("j", "down"), + ), + Up: key.NewBinding( + key.WithKeys("k"), + key.WithHelp("k", "up"), + ), + Copy: key.NewBinding( + key.WithKeys("y"), + key.WithHelp("y", "copy fix"), + ), + ToggleWideTitle: key.NewBinding( + key.WithKeys("t"), + key.WithHelp("t", "wide title"), + ), + ToggleAgeFormat: key.NewBinding( + key.WithKeys("d"), + key.WithHelp("d", "age format"), + ), + ClearFilters: key.NewBinding( + key.WithKeys("esc"), + key.WithHelp("esc", "clear filters"), + ), + + Cancel: key.NewBinding(key.WithKeys("esc")), + Apply: key.NewBinding(key.WithKeys("enter")), + } +} diff --git a/internal/ui/styles.go b/internal/ui/styles.go new file mode 100644 index 0000000..b7f2b41 --- /dev/null +++ b/internal/ui/styles.go @@ -0,0 +1,122 @@ +package ui + +import "github.com/charmbracelet/lipgloss" + +// ThemeMode represents the UI theme mode. +type ThemeMode int + +const ( + ThemeAuto ThemeMode = iota + ThemeLight + ThemeDark +) + +// Styles centralizes all lipgloss styling. +// Keep these simple: excessive styling can slow rendering at high row counts. + +type Styles struct { + HeaderBar lipgloss.Style + HeaderKey lipgloss.Style + HeaderVal lipgloss.Style + FilterActive lipgloss.Style + + TableHeader lipgloss.Style + TableCell lipgloss.Style + + P0 lipgloss.Style + P1 lipgloss.Style + P2 lipgloss.Style + P3 lipgloss.Style + + StateOpen lipgloss.Style + StateAck lipgloss.Style + StateRes lipgloss.Style + + DetailsTitle lipgloss.Style + DetailsBody lipgloss.Style + Muted lipgloss.Style + Error lipgloss.Style +} + +// LightTheme returns light theme styles. +func LightTheme() Styles { + base := lipgloss.NewStyle() + muted := base.Foreground(lipgloss.Color("8")) + + return Styles{ + HeaderBar: base. + Background(lipgloss.Color("236")). + Foreground(lipgloss.Color("252")). + Padding(0, 1), + HeaderKey: base.Foreground(lipgloss.Color("250")).Bold(true), + HeaderVal: base.Foreground(lipgloss.Color("254")), + FilterActive: base.Bold(true).Foreground(lipgloss.Color("46")), + + TableHeader: base.Foreground(lipgloss.Color("252")).Bold(true), + TableCell: base.Foreground(lipgloss.Color("252")), + + P0: base.Foreground(lipgloss.Color("9")).Bold(true), + P1: base.Foreground(lipgloss.Color("208")).Bold(true), + P2: base.Foreground(lipgloss.Color("11")), + P3: base.Foreground(lipgloss.Color("10")), + + StateOpen: base.Foreground(lipgloss.Color("252")), + StateAck: base.Foreground(lipgloss.Color("14")), + StateRes: muted, + + DetailsTitle: base.Bold(true).Foreground(lipgloss.Color("252")), + DetailsBody: base.Foreground(lipgloss.Color("252")), + Muted: muted, + Error: base.Foreground(lipgloss.Color("9")), + } +} + +// DarkTheme returns dark theme styles with better contrast. +func DarkTheme() Styles { + base := lipgloss.NewStyle() + muted := base.Foreground(lipgloss.Color("245")) + + return Styles{ + HeaderBar: base. + Background(lipgloss.Color("238")). + Foreground(lipgloss.Color("231")). + Padding(0, 1), + HeaderKey: base.Foreground(lipgloss.Color("159")).Bold(true), + HeaderVal: base.Foreground(lipgloss.Color("231")), + FilterActive: base.Bold(true).Foreground(lipgloss.Color("84")), + + TableHeader: base.Foreground(lipgloss.Color("231")).Bold(true), + TableCell: base.Foreground(lipgloss.Color("231")), + + P0: base.Foreground(lipgloss.Color("203")).Bold(true), + P1: base.Foreground(lipgloss.Color("229")).Bold(true), + P2: base.Foreground(lipgloss.Color("48")), + P3: base.Foreground(lipgloss.Color("42")), + + StateOpen: base.Foreground(lipgloss.Color("231")), + StateAck: base.Foreground(lipgloss.Color("48")), + StateRes: muted, + + DetailsTitle: base.Bold(true).Foreground(lipgloss.Color("231")), + DetailsBody: base.Foreground(lipgloss.Color("231")), + Muted: muted, + Error: base.Foreground(lipgloss.Color("203")), + } +} + +func defaultStyles() Styles { + // Default to light theme for backwards compatibility + return LightTheme() +} + +func defaultStylesForMode(themeMode ThemeMode) Styles { + switch themeMode { + case ThemeLight: + return LightTheme() + case ThemeDark: + return DarkTheme() + default: + // Auto mode defaults to light theme + return LightTheme() + } +} diff --git a/internal/ui/table.go b/internal/ui/table.go new file mode 100644 index 0000000..6c5508b --- /dev/null +++ b/internal/ui/table.go @@ -0,0 +1,131 @@ +package ui + +import ( + "fmt" + "strings" + "time" + + "github.com/charmbracelet/bubbles/table" + "tower/internal/model" +) + +// Column keys, used for future sort expansions. +const ( + colPri = "Pri" + colCat = "Cat" + colTitle = "Title" + colAge = "Age" + colState = "State" +) + +func newIssueTable() table.Model { + cols := []table.Column{ + {Title: colPri, Width: 3}, + {Title: colCat, Width: 12}, + {Title: colTitle, Width: 0}, // widened on resize + {Title: colAge, Width: 7}, + {Title: colState, Width: 13}, + } + + t := table.New( + table.WithColumns(cols), + table.WithFocused(true), + table.WithHeight(10), + ) + + // Keep built-in styles minimal. + s := table.DefaultStyles() + s.Header = s.Header.Bold(true) + s.Selected = s.Selected.Bold(false) + t.SetStyles(s) + + return t +} + +// BuildRows returns table rows and a parallel issue ID slice (row index -> issue ID). +func buildRows(now time.Time, mode AgeMode, issues []model.Issue) ([]table.Row, []string) { + rows := make([]table.Row, 0, len(issues)) + ids := make([]string, 0, len(issues)) + + for _, iss := range issues { + age := formatAgeWithMode(iss.Age(now), mode) + rows = append(rows, table.Row{ + iss.Priority.String(), + shortCat(iss.Category.String()), + oneLine(iss.Title), + age, + iss.State.String(), + }) + ids = append(ids, iss.ID) + } + return rows, ids +} + +func shortCat(cat string) string { + if cat == "" { + return "-" + } + if len(cat) <= 12 { + return cat + } + // Keep category compact; table has limited width. + s := cat + if i := strings.IndexByte(cat, ' '); i > 0 { + s = cat[:i] + } + if len(s) > 12 { + return s[:12] + } + return s +} + +func oneLine(s string) string { + s = strings.ReplaceAll(s, "\n", " ") + s = strings.TrimSpace(s) + return s +} + +func formatAge(d time.Duration) string { + return formatAgeWithMode(d, AgeCompact) +} + +func formatAgeWithMode(d time.Duration, mode AgeMode) string { + if d <= 0 { + if mode == AgeRelative { + return "0m ago" + } + return "0s" + } + if mode == AgeRelative { + // Relative format: Xm ago, Xh ago, Xd ago + if d < time.Minute { + s := int(d / time.Second) + return fmt.Sprintf("%ds ago", s) + } + if d < time.Hour { + m := int(d / time.Minute) + return fmt.Sprintf("%dm ago", m) + } + if d < 24*time.Hour { + h := int(d / time.Hour) + return fmt.Sprintf("%dh ago", h) + } + days := int(d / (24 * time.Hour)) + return fmt.Sprintf("%dd ago", days) + } + // Compact format: 0s, Xds, Xdm, Xdh, Xdd + if d < time.Minute { + s := int(d / time.Second) + return fmt.Sprintf("%ds", s) + } + if d < time.Hour { + m := int(d / time.Minute) + return fmt.Sprintf("%dm", m) + } + if d < 24*time.Hour { + h := int(d / time.Hour) + return fmt.Sprintf("%dh", h) + } + days := int(d / (24 * time.Hour)) + return fmt.Sprintf("%dd", days) +}