feat: implement ControlTower TUI for cluster and host monitoring
Add complete TUI application for monitoring Kubernetes clusters and host systems. Features include: Core features: - Collector framework with concurrent scheduling - Host collectors: disk, memory, load, network - Kubernetes collectors: pods, nodes, workloads, events with informers - Issue deduplication, state management, and resolve-after logic - Bubble Tea TUI with table view, details pane, and filtering - JSON export functionality UX improvements: - Help overlay with keybindings - Priority/category filters with visual indicators - Direct priority jump (0/1/2/3) - Bulk acknowledge (Shift+A) - Clipboard copy (y) - Theme toggle (T) - Age format toggle (d) - Wide title toggle (t) - Vi-style navigation (j/k) - Home/End jump (g/G) - Rollup drill-down in details Robustness: - Grace period for unreachable clusters - Rollups for high-volume issues - Flap suppression - RBAC error handling Files: All core application code with tests for host collectors, engine, store, model, and export packages.
This commit is contained in:
@@ -0,0 +1,212 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
bubbletea "github.com/charmbracelet/bubbletea"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/collectors/host"
|
||||
collectorsk8s "tower/internal/collectors/k8s"
|
||||
"tower/internal/engine"
|
||||
"tower/internal/export"
|
||||
"tower/internal/model"
|
||||
"tower/internal/store"
|
||||
"tower/internal/ui"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultRefreshInterval = 1 * time.Second
|
||||
defaultResolveAfter = 30 * time.Second
|
||||
collectorTimeoutFast = 250 * time.Millisecond
|
||||
collectorTimeoutK8sList = 2 * time.Second
|
||||
k8sUnreachableGraceDefault = 10 * time.Second
|
||||
)
|
||||
|
||||
func main() {
|
||||
var exportPath string
|
||||
flag.StringVar(&exportPath, "export", "", "write issues JSON snapshot to this path and exit")
|
||||
flag.Parse()
|
||||
|
||||
if exportPath != "" {
|
||||
if err := validateExportPath(exportPath); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
st := store.New(defaultResolveAfter)
|
||||
|
||||
configs := []engine.CollectorConfig{
|
||||
{Collector: host.NewDiskCollector(), Timeout: collectorTimeoutFast},
|
||||
{Collector: host.NewMemCollector(), Timeout: collectorTimeoutFast},
|
||||
{Collector: host.NewLoadCollector(), Timeout: collectorTimeoutFast},
|
||||
{Collector: host.NewNetCollector(), Timeout: collectorTimeoutFast},
|
||||
}
|
||||
|
||||
// If kubeconfig is present, register the full Kubernetes collector (informers
|
||||
// with polling fallback, rules, rollups, and unreachable grace).
|
||||
if kubeconfigExists() {
|
||||
configs = append(configs, engine.CollectorConfig{Collector: collectorsk8s.NewCollector(), Timeout: collectorTimeoutK8sList})
|
||||
}
|
||||
|
||||
eng := engine.New(st, configs, defaultRefreshInterval)
|
||||
eng.Start(ctx)
|
||||
defer eng.Stop()
|
||||
|
||||
if exportPath != "" {
|
||||
// Give collectors a brief moment to run their initial collection.
|
||||
select {
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
case <-ctx.Done():
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
snap := st.Snapshot(time.Now())
|
||||
if err := export.WriteIssues(exportPath, snap); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Run Bubble Tea UI.
|
||||
m := ui.New("", eng.Snapshots(), eng.RefreshNow, st.Acknowledge, st.Unacknowledge, export.WriteIssues)
|
||||
p := bubbletea.NewProgram(m, bubbletea.WithAltScreen())
|
||||
if _, err := p.Run(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func kubeconfigExists() bool {
|
||||
// Respect KUBECONFIG when set; otherwise check ~/.kube/config.
|
||||
if p := os.Getenv("KUBECONFIG"); p != "" {
|
||||
_, err := os.Stat(p)
|
||||
return err == nil
|
||||
}
|
||||
if h, err := os.UserHomeDir(); err == nil {
|
||||
p := filepath.Join(h, ".kube", "config")
|
||||
_, err := os.Stat(p)
|
||||
return err == nil
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func validateExportPath(path string) error {
|
||||
cleanPath := filepath.Clean(path)
|
||||
|
||||
if strings.Contains(cleanPath, ".."+string(filepath.Separator)) {
|
||||
return fmt.Errorf("path traversal not allowed in export path: %s", path)
|
||||
}
|
||||
|
||||
if filepath.IsAbs(cleanPath) {
|
||||
return fmt.Errorf("absolute paths not allowed in export path: %s", path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// k8sConnectivityCollector is a minimal Kubernetes collector.
|
||||
// It only validates connectivity/auth and emits a P0 issue after a grace window.
|
||||
//
|
||||
// Full cluster state collection is implemented elsewhere; this keeps main wired
|
||||
// and provides a useful health signal the UI can display.
|
||||
//
|
||||
// NOTE: This collector intentionally returns nil error on connectivity issues so
|
||||
// the Engine does not "freeze" last-known issues.
|
||||
//
|
||||
// It does not use informers (cheap) and runs at a low cadence.
|
||||
//
|
||||
//nolint:unused // referenced via newK8sConnectivityCollector
|
||||
type unreachableTracker struct {
|
||||
grace time.Duration
|
||||
firstFailureAt time.Time
|
||||
lastErr error
|
||||
}
|
||||
|
||||
func newUnreachableTracker(grace time.Duration) *unreachableTracker {
|
||||
if grace <= 0 {
|
||||
grace = 10 * time.Second
|
||||
}
|
||||
return &unreachableTracker{grace: grace}
|
||||
}
|
||||
|
||||
func (t *unreachableTracker) observeSuccess() {
|
||||
t.firstFailureAt = time.Time{}
|
||||
t.lastErr = nil
|
||||
}
|
||||
|
||||
func (t *unreachableTracker) observeFailure(now time.Time, err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
t.lastErr = err
|
||||
if t.firstFailureAt.IsZero() {
|
||||
t.firstFailureAt = now
|
||||
}
|
||||
}
|
||||
|
||||
func (t *unreachableTracker) shouldEmit(now time.Time) bool {
|
||||
return t.lastErr != nil && !t.firstFailureAt.IsZero() && now.Sub(t.firstFailureAt) >= t.grace
|
||||
}
|
||||
|
||||
type k8sConnectivityCollector struct {
|
||||
tracker *unreachableTracker
|
||||
}
|
||||
|
||||
func newK8sConnectivityCollector() collectors.Collector {
|
||||
return &k8sConnectivityCollector{tracker: newUnreachableTracker(k8sUnreachableGraceDefault)}
|
||||
}
|
||||
|
||||
func (c *k8sConnectivityCollector) Name() string { return "k8s:connectivity" }
|
||||
|
||||
func (c *k8sConnectivityCollector) Interval() time.Duration { return 5 * time.Second }
|
||||
|
||||
func (c *k8sConnectivityCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
|
||||
now := time.Now()
|
||||
cs, _, err := collectorsk8s.ClientFromCurrentContext()
|
||||
if err != nil {
|
||||
c.tracker.observeFailure(now, err)
|
||||
return c.issuesForFailure(now, err), collectors.Status{Health: collectors.HealthDegraded, Message: "kubeconfig/client error"}, nil
|
||||
}
|
||||
|
||||
// Short ping to validate reachability.
|
||||
pingErr := collectorsk8s.Ping(ctx, cs)
|
||||
if pingErr == nil {
|
||||
c.tracker.observeSuccess()
|
||||
return nil, collectors.OKStatus(), nil
|
||||
}
|
||||
|
||||
c.tracker.observeFailure(now, pingErr)
|
||||
return c.issuesForFailure(now, pingErr), collectors.Status{Health: collectors.HealthDegraded, Message: "k8s ping failed"}, nil
|
||||
}
|
||||
|
||||
func (c *k8sConnectivityCollector) issuesForFailure(now time.Time, err error) []model.Issue {
|
||||
if c.tracker.shouldEmit(now) {
|
||||
return []model.Issue{model.Issue{
|
||||
ID: "k8s:cluster:unreachable",
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP0,
|
||||
Title: "Kubernetes cluster unreachable / auth failed",
|
||||
Details: fmt.Sprintf("Kubernetes API unreachable or credentials invalid. Last error: %v", err),
|
||||
Evidence: map[string]string{"reason": "Unreachable"},
|
||||
SuggestedFix: "kubectl cluster-info\nkubectl get nodes",
|
||||
}}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Keep otherwise-unused constants referenced.
|
||||
var _ = []any{collectors.HealthOK, collectorTimeoutFast, collectorTimeoutK8sList}
|
||||
@@ -0,0 +1,69 @@
|
||||
module tower
|
||||
|
||||
go 1.23.0
|
||||
|
||||
require (
|
||||
github.com/atotto/clipboard v0.1.4
|
||||
github.com/charmbracelet/bubbles v0.21.0
|
||||
github.com/charmbracelet/bubbletea v1.3.4
|
||||
github.com/charmbracelet/lipgloss v1.1.0
|
||||
k8s.io/api v0.30.3
|
||||
k8s.io/apimachinery v0.30.3
|
||||
k8s.io/client-go v0.30.3
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
|
||||
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect
|
||||
github.com/charmbracelet/x/ansi v0.8.0 // indirect
|
||||
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect
|
||||
github.com/charmbracelet/x/term v0.2.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
|
||||
github.com/go-logr/logr v1.4.1 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.6 // indirect
|
||||
github.com/go-openapi/jsonreference v0.20.2 // indirect
|
||||
github.com/go-openapi/swag v0.22.3 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/google/gnostic-models v0.6.8 // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/gofuzz v1.2.0 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/imdario/mergo v0.3.6 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-localereader v0.0.1 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.16 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
|
||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||
github.com/muesli/termenv v0.16.0 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||
golang.org/x/net v0.23.0 // indirect
|
||||
golang.org/x/oauth2 v0.10.0 // indirect
|
||||
golang.org/x/sync v0.11.0 // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
golang.org/x/term v0.18.0 // indirect
|
||||
golang.org/x/text v0.14.0 // indirect
|
||||
golang.org/x/time v0.3.0 // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/protobuf v1.33.0 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
k8s.io/klog/v2 v2.120.1 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
|
||||
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
|
||||
sigs.k8s.io/yaml v1.3.0 // indirect
|
||||
)
|
||||
@@ -0,0 +1,201 @@
|
||||
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
|
||||
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
|
||||
github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8=
|
||||
github.com/aymanbagabas/go-udiff v0.2.0/go.mod h1:RE4Ex0qsGkTAJoQdQQCA0uG+nAzJO/pI/QwceO5fgrA=
|
||||
github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs=
|
||||
github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg=
|
||||
github.com/charmbracelet/bubbletea v1.3.4 h1:kCg7B+jSCFPLYRA52SDZjr51kG/fMUEoPoZrkaDHyoI=
|
||||
github.com/charmbracelet/bubbletea v1.3.4/go.mod h1:dtcUCyCGEX3g9tosuYiut3MXgY/Jsv9nKVdibKKRRXo=
|
||||
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc h1:4pZI35227imm7yK2bGPcfpFEmuY1gc2YSTShr4iJBfs=
|
||||
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc/go.mod h1:X4/0JoqgTIPSFcRA/P6INZzIuyqdFY5rm8tb41s9okk=
|
||||
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
|
||||
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
|
||||
github.com/charmbracelet/x/ansi v0.8.0 h1:9GTq3xq9caJW8ZrBTe0LIe2fvfLR/bYXKTx2llXn7xE=
|
||||
github.com/charmbracelet/x/ansi v0.8.0/go.mod h1:wdYl/ONOLHLIVmQaxbIYEC/cRKOQyjTkowiI4blgS9Q=
|
||||
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0GVL4jeHEwG5YOXDmi86oYw2yuYUGqz6a8sLwg0X8=
|
||||
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs=
|
||||
github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ=
|
||||
github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
|
||||
github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
|
||||
github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
|
||||
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
|
||||
github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
|
||||
github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
|
||||
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
|
||||
github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
|
||||
github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
|
||||
github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g=
|
||||
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
|
||||
github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
|
||||
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec=
|
||||
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
||||
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
|
||||
github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
|
||||
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
|
||||
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
|
||||
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
|
||||
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
|
||||
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
|
||||
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
|
||||
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
|
||||
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY=
|
||||
github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM=
|
||||
github.com/onsi/gomega v1.31.0 h1:54UJxxj6cPInHS3a35wm6BK/F9nHYueZ1NVujHDrnXE=
|
||||
github.com/onsi/gomega v1.31.0/go.mod h1:DW9aCi7U6Yi40wNVAvT6kzFnEVEI5n3DloYBiKiT6zk=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
|
||||
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
|
||||
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8=
|
||||
golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
|
||||
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
|
||||
golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
|
||||
golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ=
|
||||
golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
|
||||
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
|
||||
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
|
||||
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
|
||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
k8s.io/api v0.30.3 h1:ImHwK9DCsPA9uoU3rVh4QHAHHK5dTSv1nxJUapx8hoQ=
|
||||
k8s.io/api v0.30.3/go.mod h1:GPc8jlzoe5JG3pb0KJCSLX5oAFIW3/qNJITlDj8BH04=
|
||||
k8s.io/apimachinery v0.30.3 h1:q1laaWCmrszyQuSQCfNB8cFgCuDAoPszKY4ucAjDwHc=
|
||||
k8s.io/apimachinery v0.30.3/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc=
|
||||
k8s.io/client-go v0.30.3 h1:bHrJu3xQZNXIi8/MoxYtZBBWQQXwy16zqJwloXXfD3k=
|
||||
k8s.io/client-go v0.30.3/go.mod h1:8d4pf8vYu665/kUbsxWAQ/JDBNWqfFeZnvFiVdmx89U=
|
||||
k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw=
|
||||
k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
|
||||
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
|
||||
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
|
||||
k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI=
|
||||
k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
|
||||
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
|
||||
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
|
||||
@@ -0,0 +1,45 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
type Health string
|
||||
|
||||
const (
|
||||
HealthOK Health = "OK"
|
||||
HealthDegraded Health = "DEGRADED"
|
||||
HealthError Health = "ERROR"
|
||||
)
|
||||
|
||||
// Status describes collector health for the current tick.
|
||||
//
|
||||
// Collectors should return Status even when returning an error,
|
||||
// so the UI can show useful context.
|
||||
//
|
||||
// LastSuccess should be the collector's most recent successful collect time.
|
||||
// When unknown, it may be the zero value.
|
||||
//
|
||||
// Message should be short and human-friendly.
|
||||
type Status struct {
|
||||
Health Health `json:"health"`
|
||||
Message string `json:"message,omitempty"`
|
||||
LastSuccess time.Time `json:"last_success,omitempty"`
|
||||
}
|
||||
|
||||
func OKStatus() Status {
|
||||
return Status{Health: HealthOK}
|
||||
}
|
||||
|
||||
// Collector returns "currently true" issues for this tick.
|
||||
//
|
||||
// The store is responsible for dedupe, lifecycle, and resolve-after.
|
||||
// Collectors must respect ctx cancellation.
|
||||
type Collector interface {
|
||||
Name() string
|
||||
Interval() time.Duration
|
||||
Collect(ctx context.Context) ([]model.Issue, Status, error)
|
||||
}
|
||||
@@ -0,0 +1,287 @@
|
||||
package host
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// DiskCollector checks filesystem block + inode pressure across mounts.
|
||||
//
|
||||
// It reads /proc/mounts to discover mounts and then uses statfs to compute usage.
|
||||
// Pseudo filesystems are filtered out.
|
||||
//
|
||||
// Thresholds (PLAN.md):
|
||||
// - P1 if blocks OR inodes >= 92%
|
||||
// - P0 if blocks OR inodes >= 98%
|
||||
//
|
||||
// Issues are emitted per mount (one issue that includes both block+inode usage).
|
||||
//
|
||||
// NOTE: This collector is Linux-specific.
|
||||
type DiskCollector struct {
|
||||
interval time.Duration
|
||||
|
||||
readFile func(string) ([]byte, error)
|
||||
statfs func(path string, st *syscall.Statfs_t) error
|
||||
}
|
||||
|
||||
func NewDiskCollector() *DiskCollector {
|
||||
return &DiskCollector{
|
||||
interval: 10 * time.Second,
|
||||
readFile: os.ReadFile,
|
||||
statfs: syscall.Statfs,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *DiskCollector) Name() string { return "host:disk" }
|
||||
|
||||
func (c *DiskCollector) Interval() time.Duration {
|
||||
if c.interval <= 0 {
|
||||
return 10 * time.Second
|
||||
}
|
||||
return c.interval
|
||||
}
|
||||
|
||||
func (c *DiskCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
|
||||
b, err := c.readFile("/proc/mounts")
|
||||
if err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/mounts"}, err
|
||||
}
|
||||
|
||||
mounts := parseProcMounts(string(b))
|
||||
if len(mounts) == 0 {
|
||||
// Unusual but treat as degraded rather than hard error.
|
||||
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "no mounts found"}, nil
|
||||
}
|
||||
|
||||
issues := make([]model.Issue, 0, 8)
|
||||
seenMount := map[string]struct{}{}
|
||||
|
||||
partialErrs := 0
|
||||
for _, m := range mounts {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return issues, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
if shouldSkipMount(m) {
|
||||
continue
|
||||
}
|
||||
if _, ok := seenMount[m.MountPoint]; ok {
|
||||
continue
|
||||
}
|
||||
seenMount[m.MountPoint] = struct{}{}
|
||||
|
||||
var st syscall.Statfs_t
|
||||
if err := c.statfs(m.MountPoint, &st); err != nil {
|
||||
partialErrs++
|
||||
continue
|
||||
}
|
||||
|
||||
blockPct, blockFreeBytes := statfsBlockUsedPct(st)
|
||||
inodePct := statfsInodeUsedPct(st)
|
||||
|
||||
pri, ok := diskPriority(blockPct, inodePct)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
evidence := map[string]string{
|
||||
"mount": m.MountPoint,
|
||||
"fstype": m.FSType,
|
||||
"block_used_pct": fmt.Sprintf("%.1f", blockPct),
|
||||
"block_free_bytes": strconv.FormatUint(blockFreeBytes, 10),
|
||||
}
|
||||
if inodePct >= 0 {
|
||||
evidence["inode_used_pct"] = fmt.Sprintf("%.1f", inodePct)
|
||||
}
|
||||
|
||||
issues = append(issues, model.Issue{
|
||||
ID: fmt.Sprintf("host:disk:%s:usage", m.MountPoint),
|
||||
Category: model.CategoryStorage,
|
||||
Priority: pri,
|
||||
Title: fmt.Sprintf("Disk usage high on %s", m.MountPoint),
|
||||
Details: "Filesystem space and/or inodes are nearly exhausted.",
|
||||
Evidence: evidence,
|
||||
SuggestedFix: fmt.Sprintf(
|
||||
"Inspect usage:\n df -h %s\n df -i %s\nFind large directories:\n sudo du -xh --max-depth=2 %s | sort -h | tail",
|
||||
m.MountPoint, m.MountPoint, m.MountPoint,
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
st := collectors.OKStatus()
|
||||
if partialErrs > 0 {
|
||||
st.Health = collectors.HealthDegraded
|
||||
st.Message = fmt.Sprintf("partial failures: %d mounts", partialErrs)
|
||||
}
|
||||
return issues, st, nil
|
||||
}
|
||||
|
||||
type procMount struct {
|
||||
Device string
|
||||
MountPoint string
|
||||
FSType string
|
||||
Options string
|
||||
}
|
||||
|
||||
func parseProcMounts(content string) []procMount {
|
||||
s := bufio.NewScanner(strings.NewReader(content))
|
||||
out := make([]procMount, 0, 32)
|
||||
for s.Scan() {
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 3 {
|
||||
continue
|
||||
}
|
||||
m := procMount{
|
||||
Device: unescapeProcMountsField(fields[0]),
|
||||
MountPoint: unescapeProcMountsField(fields[1]),
|
||||
FSType: fields[2],
|
||||
}
|
||||
if len(fields) >= 4 {
|
||||
m.Options = fields[3]
|
||||
}
|
||||
out = append(out, m)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// /proc/mounts escapes special characters as octal sequences.
|
||||
// The most common one is a space as \040.
|
||||
func unescapeProcMountsField(s string) string {
|
||||
replacer := strings.NewReplacer(
|
||||
"\\040", " ",
|
||||
"\\011", "\t",
|
||||
"\\012", "\n",
|
||||
"\\134", "\\",
|
||||
)
|
||||
return replacer.Replace(s)
|
||||
}
|
||||
|
||||
var pseudoFSTypes = map[string]struct{}{
|
||||
"proc": {},
|
||||
"sysfs": {},
|
||||
"tmpfs": {},
|
||||
"devtmpfs": {},
|
||||
"devpts": {},
|
||||
"cgroup": {},
|
||||
"cgroup2": {},
|
||||
"pstore": {},
|
||||
"securityfs": {},
|
||||
"debugfs": {},
|
||||
"tracefs": {},
|
||||
"configfs": {},
|
||||
"hugetlbfs": {},
|
||||
"mqueue": {},
|
||||
"rpc_pipefs": {},
|
||||
"fusectl": {},
|
||||
"binfmt_misc": {},
|
||||
"autofs": {},
|
||||
"bpf": {},
|
||||
"ramfs": {},
|
||||
"nsfs": {},
|
||||
"efivarfs": {},
|
||||
"overlay": {}, // common container overlay mounts
|
||||
|
||||
"squashfs": {}, // typically read-only images
|
||||
"selinuxfs": {},
|
||||
"systemd-1": {},
|
||||
"overlayfs": {}, // (non-standard) conservative skip
|
||||
|
||||
"cgroupfs": {},
|
||||
"procfs": {},
|
||||
"fuse.lxcfs": {},
|
||||
"fuse.gvfsd-fuse": {},
|
||||
}
|
||||
|
||||
func shouldSkipMount(m procMount) bool {
|
||||
if m.MountPoint == "" {
|
||||
return true
|
||||
}
|
||||
// Filter by fstype.
|
||||
if _, ok := pseudoFSTypes[m.FSType]; ok {
|
||||
return true
|
||||
}
|
||||
// Filter common pseudo mountpoints.
|
||||
if strings.HasPrefix(m.MountPoint, "/proc") || strings.HasPrefix(m.MountPoint, "/sys") {
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(m.MountPoint, "/dev") {
|
||||
// /dev itself can be a real mount in some cases, but usually isn't useful for disk pressure.
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func statfsBlockUsedPct(st syscall.Statfs_t) (usedPct float64, freeBytes uint64) {
|
||||
// Mirror df(1) semantics closely:
|
||||
// total = f_blocks
|
||||
// used = f_blocks - f_bfree
|
||||
// avail = f_bavail (space available to unprivileged user)
|
||||
// use% = used / (used + avail)
|
||||
if st.Blocks == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
bsize := uint64(st.Bsize)
|
||||
blocks := uint64(st.Blocks)
|
||||
bfree := uint64(st.Bfree)
|
||||
bavail := uint64(st.Bavail)
|
||||
|
||||
usedBlocks := blocks - bfree
|
||||
denom := usedBlocks + bavail
|
||||
if denom == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
freeBytes = bavail * bsize
|
||||
usedPct = (float64(usedBlocks) / float64(denom)) * 100.0
|
||||
return usedPct, freeBytes
|
||||
}
|
||||
|
||||
// statfsInodeUsedPct returns inode used percent. If inodes are unavailable (f_files==0), returns -1.
|
||||
func statfsInodeUsedPct(st syscall.Statfs_t) float64 {
|
||||
if st.Files == 0 {
|
||||
return -1
|
||||
}
|
||||
total := float64(st.Files)
|
||||
free := float64(st.Ffree)
|
||||
used := total - free
|
||||
return (used / total) * 100.0
|
||||
}
|
||||
|
||||
func diskPriority(blockPct, inodePct float64) (model.Priority, bool) {
|
||||
maxPct := blockPct
|
||||
if inodePct > maxPct {
|
||||
maxPct = inodePct
|
||||
}
|
||||
// inodePct may be -1 if not supported; ignore in that case.
|
||||
if inodePct < 0 {
|
||||
maxPct = blockPct
|
||||
}
|
||||
|
||||
switch {
|
||||
case maxPct >= 98.0:
|
||||
return model.PriorityP0, true
|
||||
case maxPct >= 92.0:
|
||||
return model.PriorityP1, true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
|
||||
var _ collectors.Collector = (*DiskCollector)(nil)
|
||||
@@ -0,0 +1,80 @@
|
||||
package host
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseProcMounts_UnescapesAndParses(t *testing.T) {
|
||||
in := "dev1 / ext4 rw 0 0\n" +
|
||||
"dev2 /path\\040with\\040space xfs rw 0 0\n" +
|
||||
"badline\n"
|
||||
|
||||
ms := parseProcMounts(in)
|
||||
if len(ms) != 2 {
|
||||
t.Fatalf("expected 2 mounts, got %d", len(ms))
|
||||
}
|
||||
if ms[0].MountPoint != "/" || ms[0].FSType != "ext4" {
|
||||
t.Fatalf("unexpected first mount: %+v", ms[0])
|
||||
}
|
||||
if ms[1].MountPoint != "/path with space" {
|
||||
t.Fatalf("expected unescaped mountpoint, got %q", ms[1].MountPoint)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldSkipMount_FiltersPseudo(t *testing.T) {
|
||||
cases := []procMount{
|
||||
{MountPoint: "/proc", FSType: "proc"},
|
||||
{MountPoint: "/sys", FSType: "sysfs"},
|
||||
{MountPoint: "/dev", FSType: "tmpfs"},
|
||||
{MountPoint: "/dev/shm", FSType: "tmpfs"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if !shouldSkipMount(c) {
|
||||
t.Fatalf("expected skip for %+v", c)
|
||||
}
|
||||
}
|
||||
if shouldSkipMount(procMount{MountPoint: "/home", FSType: "ext4"}) {
|
||||
t.Fatalf("did not expect skip for /home ext4")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiskPriority(t *testing.T) {
|
||||
if p, ok := diskPriority(91.9, -1); ok {
|
||||
t.Fatalf("expected no issue, got %v", p)
|
||||
}
|
||||
if p, ok := diskPriority(92.0, -1); !ok || p != "P1" {
|
||||
t.Fatalf("expected P1 at 92%%, got %v ok=%v", p, ok)
|
||||
}
|
||||
if p, ok := diskPriority(97.9, 98.0); !ok || p != "P0" {
|
||||
t.Fatalf("expected P0 if either crosses 98%%, got %v ok=%v", p, ok)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatfsCalculations(t *testing.T) {
|
||||
st := syscall.Statfs_t{}
|
||||
st.Bsize = 1
|
||||
st.Blocks = 100
|
||||
st.Bfree = 8
|
||||
st.Bavail = 8
|
||||
|
||||
pct, free := statfsBlockUsedPct(st)
|
||||
if free != 8 {
|
||||
t.Fatalf("expected free=8 bytes, got %d", free)
|
||||
}
|
||||
if pct < 91.9 || pct > 92.1 {
|
||||
t.Fatalf("expected ~92%% used, got %f", pct)
|
||||
}
|
||||
|
||||
st.Files = 100
|
||||
st.Ffree = 2
|
||||
ipct := statfsInodeUsedPct(st)
|
||||
if ipct < 97.9 || ipct > 98.1 {
|
||||
t.Fatalf("expected ~98%% inode used, got %f", ipct)
|
||||
}
|
||||
|
||||
st.Files = 0
|
||||
if statfsInodeUsedPct(st) != -1 {
|
||||
t.Fatalf("expected -1 when inode info unavailable")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
package host
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// LoadCollector evaluates 1-minute load average normalized by logical CPU count.
|
||||
//
|
||||
// Thresholds (PLAN.md), normalized by CPU count:
|
||||
// - P2 if load1/cpus >= 4.0 sustained 120s
|
||||
// - P1 if load1/cpus >= 6.0 sustained 120s
|
||||
//
|
||||
// NOTE: Linux-specific.
|
||||
// Thread-safe: Collect() can be called concurrently.
|
||||
type LoadCollector struct {
|
||||
interval time.Duration
|
||||
|
||||
now func() time.Time
|
||||
readFile func(string) ([]byte, error)
|
||||
cpuCount func() int
|
||||
|
||||
mu sync.Mutex
|
||||
|
||||
pri model.Priority
|
||||
since time.Time
|
||||
}
|
||||
|
||||
func NewLoadCollector() *LoadCollector {
|
||||
return &LoadCollector{
|
||||
interval: 5 * time.Second,
|
||||
now: time.Now,
|
||||
readFile: os.ReadFile,
|
||||
cpuCount: runtime.NumCPU,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *LoadCollector) Name() string { return "host:load" }
|
||||
|
||||
func (c *LoadCollector) Interval() time.Duration {
|
||||
if c.interval <= 0 {
|
||||
return 5 * time.Second
|
||||
}
|
||||
return c.interval
|
||||
}
|
||||
|
||||
func (c *LoadCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
|
||||
now := c.now()
|
||||
b, err := c.readFile("/proc/loadavg")
|
||||
if err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/loadavg"}, err
|
||||
}
|
||||
|
||||
load1, err := parseProcLoadavgFirst(string(b))
|
||||
if err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "bad /proc/loadavg"}, nil
|
||||
}
|
||||
|
||||
cpus := c.cpuCount()
|
||||
if cpus <= 0 {
|
||||
cpus = 1
|
||||
}
|
||||
norm := load1 / float64(cpus)
|
||||
desired, window := desiredLoadPriority(norm)
|
||||
c.mu.Lock()
|
||||
c.pri, c.since = updateSustained(now, c.pri, c.since, desired)
|
||||
pri, since := c.pri, c.since
|
||||
c.mu.Unlock()
|
||||
|
||||
if pri == "" || since.IsZero() || now.Sub(since) < window {
|
||||
return nil, collectors.OKStatus(), nil
|
||||
}
|
||||
|
||||
iss := model.Issue{
|
||||
ID: "host:load:high",
|
||||
Category: model.CategoryPerformance,
|
||||
Priority: pri,
|
||||
Title: "High sustained system load",
|
||||
Details: "The 1-minute load average is high relative to CPU count for a sustained period.",
|
||||
Evidence: map[string]string{
|
||||
"load1": fmt.Sprintf("%.2f", load1),
|
||||
"cpus": strconv.Itoa(cpus),
|
||||
"load1_per_cpu": fmt.Sprintf("%.2f", norm),
|
||||
"sustained_window": window.String(),
|
||||
},
|
||||
SuggestedFix: "Investigate CPU hogs:\n top\n ps -eo pid,ppid,cmd,%cpu --sort=-%cpu | head\nIf I/O bound (high iowait), check disk/network.\n",
|
||||
}
|
||||
return []model.Issue{iss}, collectors.OKStatus(), nil
|
||||
}
|
||||
|
||||
func parseProcLoadavgFirst(content string) (float64, error) {
|
||||
// /proc/loadavg format: "1.23 0.70 0.50 1/123 4567".
|
||||
fields := strings.Fields(content)
|
||||
if len(fields) < 1 {
|
||||
return 0, fmt.Errorf("missing fields")
|
||||
}
|
||||
v, err := strconv.ParseFloat(fields[0], 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func desiredLoadPriority(loadPerCPU float64) (model.Priority, time.Duration) {
|
||||
if loadPerCPU >= 6.0 {
|
||||
return model.PriorityP1, 120 * time.Second
|
||||
}
|
||||
if loadPerCPU >= 4.0 {
|
||||
return model.PriorityP2, 120 * time.Second
|
||||
}
|
||||
return "", 0
|
||||
}
|
||||
|
||||
var _ collectors.Collector = (*LoadCollector)(nil)
|
||||
@@ -0,0 +1,48 @@
|
||||
package host
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
func TestParseProcLoadavgFirst(t *testing.T) {
|
||||
v, err := parseProcLoadavgFirst("1.23 0.70 0.50 1/123 4567\n")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if v < 1.229 || v > 1.231 {
|
||||
t.Fatalf("expected 1.23, got %v", v)
|
||||
}
|
||||
if _, err := parseProcLoadavgFirst("\n"); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDesiredLoadPriority(t *testing.T) {
|
||||
p, w := desiredLoadPriority(3.99)
|
||||
if p != "" || w != 0 {
|
||||
t.Fatalf("expected none")
|
||||
}
|
||||
p, w = desiredLoadPriority(4.0)
|
||||
if p != model.PriorityP2 || w != 120*time.Second {
|
||||
t.Fatalf("expected P2/120s")
|
||||
}
|
||||
p, w = desiredLoadPriority(6.0)
|
||||
if p != model.PriorityP1 || w != 120*time.Second {
|
||||
t.Fatalf("expected P1/120s")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateSustainedWorksForLoadToo(t *testing.T) {
|
||||
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
p, since := updateSustained(now, "", time.Time{}, model.PriorityP2)
|
||||
if p != model.PriorityP2 || !since.Equal(now) {
|
||||
t.Fatalf("expected set")
|
||||
}
|
||||
p2, since2 := updateSustained(now.Add(10*time.Second), p, since, model.PriorityP2)
|
||||
if p2 != model.PriorityP2 || !since2.Equal(since) {
|
||||
t.Fatalf("expected unchanged")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,205 @@
|
||||
package host
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// MemCollector checks MemAvailable and swap pressure from /proc/meminfo.
|
||||
//
|
||||
// Thresholds (PLAN.md):
|
||||
// Memory (MemAvailable as % of MemTotal):
|
||||
// - P2 if <= 15% sustained 60s
|
||||
// - P1 if <= 10% sustained 60s
|
||||
// - P0 if <= 5% sustained 30s
|
||||
//
|
||||
// Swap pressure (only if RAM is also tight):
|
||||
// - P1 if swap used >= 50% AND MemAvailable <= 10% sustained 60s
|
||||
// - P0 if swap used >= 80% AND MemAvailable <= 5% sustained 30s
|
||||
//
|
||||
// Emits up to two issues:
|
||||
// - host:mem:available
|
||||
// - host:mem:swap
|
||||
//
|
||||
// NOTE: Linux-specific.
|
||||
// Thread-safe: Collect() can be called concurrently.
|
||||
type MemCollector struct {
|
||||
interval time.Duration
|
||||
|
||||
now func() time.Time
|
||||
readFile func(string) ([]byte, error)
|
||||
|
||||
mu sync.Mutex
|
||||
|
||||
memPri model.Priority
|
||||
memSince time.Time
|
||||
|
||||
swapPri model.Priority
|
||||
swapSince time.Time
|
||||
}
|
||||
|
||||
func NewMemCollector() *MemCollector {
|
||||
return &MemCollector{
|
||||
interval: 5 * time.Second,
|
||||
now: time.Now,
|
||||
readFile: os.ReadFile,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *MemCollector) Name() string { return "host:mem" }
|
||||
|
||||
func (c *MemCollector) Interval() time.Duration {
|
||||
if c.interval <= 0 {
|
||||
return 5 * time.Second
|
||||
}
|
||||
return c.interval
|
||||
}
|
||||
|
||||
func (c *MemCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
|
||||
now := c.now()
|
||||
b, err := c.readFile("/proc/meminfo")
|
||||
if err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/meminfo"}, err
|
||||
}
|
||||
|
||||
mi := parseProcMeminfo(string(b))
|
||||
memTotalKB, okT := mi["MemTotal"]
|
||||
memAvailKB, okA := mi["MemAvailable"]
|
||||
if !okT || !okA || memTotalKB <= 0 {
|
||||
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "missing MemTotal/MemAvailable"}, nil
|
||||
}
|
||||
|
||||
memAvailPct := (float64(memAvailKB) / float64(memTotalKB)) * 100.0
|
||||
|
||||
desiredMemPri, memWindow := desiredMemPriority(memAvailPct)
|
||||
c.mu.Lock()
|
||||
c.memPri, c.memSince = updateSustained(now, c.memPri, c.memSince, desiredMemPri)
|
||||
memPri, memSince := c.memPri, c.memSince
|
||||
c.mu.Unlock()
|
||||
|
||||
issues := make([]model.Issue, 0, 2)
|
||||
if memPri != "" && !memSince.IsZero() && now.Sub(memSince) >= memWindow {
|
||||
issues = append(issues, model.Issue{
|
||||
ID: "host:mem:available",
|
||||
Category: model.CategoryMemory,
|
||||
Priority: memPri,
|
||||
Title: "Low available memory",
|
||||
Details: "MemAvailable is low and has remained low for a sustained period.",
|
||||
Evidence: map[string]string{
|
||||
"mem_available_kb": strconv.FormatInt(memAvailKB, 10),
|
||||
"mem_total_kb": strconv.FormatInt(memTotalKB, 10),
|
||||
"mem_available_pct": fmt.Sprintf("%.1f", memAvailPct),
|
||||
},
|
||||
SuggestedFix: "Identify memory hogs:\n free -h\n ps aux --sort=-rss | head\nConsider restarting runaway processes or adding RAM.",
|
||||
})
|
||||
}
|
||||
|
||||
swapTotalKB, okST := mi["SwapTotal"]
|
||||
swapFreeKB, okSF := mi["SwapFree"]
|
||||
swapUsedPct := 0.0
|
||||
if okST && okSF && swapTotalKB > 0 {
|
||||
swapUsedKB := swapTotalKB - swapFreeKB
|
||||
swapUsedPct = (float64(swapUsedKB) / float64(swapTotalKB)) * 100.0
|
||||
}
|
||||
|
||||
desiredSwapPri, swapWindow := desiredSwapPriority(memAvailPct, swapTotalKB, swapUsedPct)
|
||||
c.mu.Lock()
|
||||
c.swapPri, c.swapSince = updateSustained(now, c.swapPri, c.swapSince, desiredSwapPri)
|
||||
swapPri, swapSince := c.swapPri, c.swapSince
|
||||
c.mu.Unlock()
|
||||
if swapPri != "" && !swapSince.IsZero() && now.Sub(swapSince) >= swapWindow {
|
||||
issues = append(issues, model.Issue{
|
||||
ID: "host:mem:swap",
|
||||
Category: model.CategoryMemory,
|
||||
Priority: swapPri,
|
||||
Title: "High swap usage with low RAM",
|
||||
Details: "Swap usage is high while available RAM is also low, indicating memory pressure.",
|
||||
Evidence: map[string]string{
|
||||
"swap_used_pct": fmt.Sprintf("%.1f", swapUsedPct),
|
||||
"swap_total_kb": strconv.FormatInt(swapTotalKB, 10),
|
||||
"mem_available_pct": fmt.Sprintf("%.1f", memAvailPct),
|
||||
},
|
||||
SuggestedFix: "Find swapping processes:\n vmstat 1\n smem -r 2>/dev/null || true\nConsider reducing memory usage or increasing RAM/swap.",
|
||||
})
|
||||
}
|
||||
|
||||
return issues, collectors.OKStatus(), nil
|
||||
}
|
||||
|
||||
func parseProcMeminfo(content string) map[string]int64 {
|
||||
out := map[string]int64{}
|
||||
s := bufio.NewScanner(strings.NewReader(content))
|
||||
for s.Scan() {
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
// Example: "MemAvailable: 12345 kB"
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
key := strings.TrimSuffix(fields[0], ":")
|
||||
v, err := strconv.ParseInt(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out[key] = v
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func desiredMemPriority(memAvailPct float64) (model.Priority, time.Duration) {
|
||||
switch {
|
||||
case memAvailPct <= 5.0:
|
||||
return model.PriorityP0, 30 * time.Second
|
||||
case memAvailPct <= 10.0:
|
||||
return model.PriorityP1, 60 * time.Second
|
||||
case memAvailPct <= 15.0:
|
||||
return model.PriorityP2, 60 * time.Second
|
||||
default:
|
||||
return "", 0
|
||||
}
|
||||
}
|
||||
|
||||
func desiredSwapPriority(memAvailPct float64, swapTotalKB int64, swapUsedPct float64) (model.Priority, time.Duration) {
|
||||
if swapTotalKB <= 0 {
|
||||
return "", 0
|
||||
}
|
||||
// Only alert on swap when RAM is also tight.
|
||||
switch {
|
||||
case swapUsedPct >= 80.0 && memAvailPct <= 5.0:
|
||||
return model.PriorityP0, 30 * time.Second
|
||||
case swapUsedPct >= 50.0 && memAvailPct <= 10.0:
|
||||
return model.PriorityP1, 60 * time.Second
|
||||
default:
|
||||
return "", 0
|
||||
}
|
||||
}
|
||||
|
||||
// updateSustained updates current severity and its since timestamp.
|
||||
// If desired is empty, it clears the state.
|
||||
func updateSustained(now time.Time, current model.Priority, since time.Time, desired model.Priority) (model.Priority, time.Time) {
|
||||
if desired == "" {
|
||||
return "", time.Time{}
|
||||
}
|
||||
if current != desired || since.IsZero() {
|
||||
return desired, now
|
||||
}
|
||||
return current, since
|
||||
}
|
||||
|
||||
var _ collectors.Collector = (*MemCollector)(nil)
|
||||
@@ -0,0 +1,83 @@
|
||||
package host
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
func TestParseProcMeminfo(t *testing.T) {
|
||||
in := "MemTotal: 8000000 kB\nMemAvailable: 800000 kB\nSwapTotal: 2000000 kB\nSwapFree: 500000 kB\n"
|
||||
m := parseProcMeminfo(in)
|
||||
if m["MemTotal"] != 8000000 {
|
||||
t.Fatalf("MemTotal mismatch: %d", m["MemTotal"])
|
||||
}
|
||||
if m["MemAvailable"] != 800000 {
|
||||
t.Fatalf("MemAvailable mismatch: %d", m["MemAvailable"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestDesiredMemPriority(t *testing.T) {
|
||||
p, w := desiredMemPriority(16.0)
|
||||
if p != "" || w != 0 {
|
||||
t.Fatalf("expected none")
|
||||
}
|
||||
|
||||
p, w = desiredMemPriority(15.0)
|
||||
if p != model.PriorityP2 || w != 60*time.Second {
|
||||
t.Fatalf("expected P2/60s got %v/%v", p, w)
|
||||
}
|
||||
p, w = desiredMemPriority(10.0)
|
||||
if p != model.PriorityP1 {
|
||||
t.Fatalf("expected P1 got %v", p)
|
||||
}
|
||||
p, w = desiredMemPriority(5.0)
|
||||
if p != model.PriorityP0 || w != 30*time.Second {
|
||||
t.Fatalf("expected P0/30s got %v/%v", p, w)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDesiredSwapPriority(t *testing.T) {
|
||||
// No swap configured.
|
||||
p, _ := desiredSwapPriority(4.0, 0, 90.0)
|
||||
if p != "" {
|
||||
t.Fatalf("expected none when SwapTotal=0")
|
||||
}
|
||||
|
||||
p, w := desiredSwapPriority(4.0, 1000, 80.0)
|
||||
if p != model.PriorityP0 || w != 30*time.Second {
|
||||
t.Fatalf("expected P0/30s got %v/%v", p, w)
|
||||
}
|
||||
|
||||
p, w = desiredSwapPriority(9.9, 1000, 50.0)
|
||||
if p != model.PriorityP1 || w != 60*time.Second {
|
||||
t.Fatalf("expected P1/60s got %v/%v", p, w)
|
||||
}
|
||||
|
||||
// Swap high but RAM not tight => no issue.
|
||||
p, _ = desiredSwapPriority(20.0, 1000, 90.0)
|
||||
if p != "" {
|
||||
t.Fatalf("expected none when RAM not tight")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateSustained(t *testing.T) {
|
||||
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
p, since := updateSustained(now, "", time.Time{}, model.PriorityP1)
|
||||
if p != model.PriorityP1 || !since.Equal(now) {
|
||||
t.Fatalf("expected set to P1 at now")
|
||||
}
|
||||
p2, since2 := updateSustained(now.Add(1*time.Second), p, since, model.PriorityP1)
|
||||
if p2 != model.PriorityP1 || !since2.Equal(since) {
|
||||
t.Fatalf("expected unchanged since")
|
||||
}
|
||||
p3, since3 := updateSustained(now.Add(2*time.Second), p2, since2, model.PriorityP0)
|
||||
if p3 != model.PriorityP0 || !since3.Equal(now.Add(2*time.Second)) {
|
||||
t.Fatalf("expected reset on priority change")
|
||||
}
|
||||
p4, since4 := updateSustained(now.Add(3*time.Second), p3, since3, "")
|
||||
if p4 != "" || !since4.IsZero() {
|
||||
t.Fatalf("expected cleared")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
package host
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// NetCollector checks for missing default route while at least one non-loopback
|
||||
// interface is up.
|
||||
//
|
||||
// Rule (PLAN.md):
|
||||
// - P1 if no default route AND any non-loopback interface is UP.
|
||||
//
|
||||
// Discovery:
|
||||
// - Default route from /proc/net/route
|
||||
// - Interface UP from /sys/class/net/*/operstate
|
||||
//
|
||||
// NOTE: Linux-specific.
|
||||
type NetCollector struct {
|
||||
interval time.Duration
|
||||
|
||||
readFile func(string) ([]byte, error)
|
||||
glob func(string) ([]string, error)
|
||||
}
|
||||
|
||||
func NewNetCollector() *NetCollector {
|
||||
return &NetCollector{
|
||||
interval: 5 * time.Second,
|
||||
readFile: os.ReadFile,
|
||||
glob: filepath.Glob,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *NetCollector) Name() string { return "host:net" }
|
||||
|
||||
func (c *NetCollector) Interval() time.Duration {
|
||||
if c.interval <= 0 {
|
||||
return 5 * time.Second
|
||||
}
|
||||
return c.interval
|
||||
}
|
||||
|
||||
func (c *NetCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
|
||||
routeBytes, err := c.readFile("/proc/net/route")
|
||||
if err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/net/route"}, err
|
||||
}
|
||||
|
||||
hasDefault := hasDefaultRoute(string(routeBytes))
|
||||
|
||||
paths, err := c.glob("/sys/class/net/*/operstate")
|
||||
if err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed listing /sys/class/net"}, err
|
||||
}
|
||||
upIfaces := make([]string, 0, 2)
|
||||
for _, p := range paths {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
b, err := c.readFile(p)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
iface := filepath.Base(filepath.Dir(p))
|
||||
if iface == "lo" {
|
||||
continue
|
||||
}
|
||||
state := strings.TrimSpace(string(b))
|
||||
if isIfaceUp(state) {
|
||||
upIfaces = append(upIfaces, iface)
|
||||
}
|
||||
}
|
||||
|
||||
if hasDefault || len(upIfaces) == 0 {
|
||||
return nil, collectors.OKStatus(), nil
|
||||
}
|
||||
|
||||
iss := model.Issue{
|
||||
ID: "host:net:default-route-missing",
|
||||
Category: model.CategoryNetwork,
|
||||
Priority: model.PriorityP1,
|
||||
Title: "No default route",
|
||||
Details: "At least one network interface is up, but no default route is present.",
|
||||
Evidence: map[string]string{
|
||||
"up_ifaces": strings.Join(upIfaces, ","),
|
||||
},
|
||||
SuggestedFix: "Check routing and link state:\n ip route\n ip link\n nmcli dev status\nIf on Wi-Fi, reconnect; if on VPN, verify tunnel routes.",
|
||||
}
|
||||
return []model.Issue{iss}, collectors.OKStatus(), nil
|
||||
}
|
||||
|
||||
func hasDefaultRoute(procNetRoute string) bool {
|
||||
// /proc/net/route header:
|
||||
// Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT
|
||||
// Default route has Destination == 00000000.
|
||||
s := bufio.NewScanner(strings.NewReader(procNetRoute))
|
||||
first := true
|
||||
for s.Scan() {
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
if first {
|
||||
first = false
|
||||
// skip header if present
|
||||
if strings.HasPrefix(line, "Iface") {
|
||||
continue
|
||||
}
|
||||
}
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
if fields[1] == "00000000" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isIfaceUp(operstate string) bool {
|
||||
// Linux operstate values include: up, down, unknown, dormant, lowerlayerdown.
|
||||
s := strings.ToLower(strings.TrimSpace(operstate))
|
||||
return s == "up" || s == "unknown"
|
||||
}
|
||||
|
||||
var _ collectors.Collector = (*NetCollector)(nil)
|
||||
@@ -0,0 +1,28 @@
|
||||
package host
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestHasDefaultRoute(t *testing.T) {
|
||||
in := "Iface\tDestination\tGateway\tFlags\n" +
|
||||
"eth0\t00000000\t0102A8C0\t0003\n"
|
||||
if !hasDefaultRoute(in) {
|
||||
t.Fatalf("expected default route")
|
||||
}
|
||||
in2 := "Iface Destination Gateway Flags\n" +
|
||||
"eth0 0010A8C0 00000000 0001\n"
|
||||
if hasDefaultRoute(in2) {
|
||||
t.Fatalf("expected no default route")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsIfaceUp(t *testing.T) {
|
||||
if !isIfaceUp("up\n") {
|
||||
t.Fatalf("expected true")
|
||||
}
|
||||
if !isIfaceUp("unknown") {
|
||||
t.Fatalf("expected true for unknown")
|
||||
}
|
||||
if isIfaceUp("down") {
|
||||
t.Fatalf("expected false")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
package k8s
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/clientcmd"
|
||||
)
|
||||
|
||||
// ClientFromCurrentContext creates a Kubernetes client-go Clientset using the
|
||||
// user's kubeconfig current context.
|
||||
//
|
||||
// It is a pure helper (no global state) so it can be used by collectors and
|
||||
// unit tests (with temporary kubeconfig files).
|
||||
func ClientFromCurrentContext() (*kubernetes.Clientset, *rest.Config, error) {
|
||||
loadingRules := clientcmd.NewDefaultClientConfigLoadingRules()
|
||||
|
||||
// Respect KUBECONFIG semantics (it may be a path list).
|
||||
if p := os.Getenv("KUBECONFIG"); p != "" {
|
||||
if list := filepath.SplitList(p); len(list) > 1 {
|
||||
loadingRules.ExplicitPath = ""
|
||||
loadingRules.Precedence = list
|
||||
} else {
|
||||
loadingRules.ExplicitPath = p
|
||||
}
|
||||
}
|
||||
|
||||
cfg := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, &clientcmd.ConfigOverrides{})
|
||||
restCfg, err := cfg.ClientConfig()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Ensure HTTP client timeouts are bounded. LIST fallback uses its own context
|
||||
// timeouts, but this provides a safety net.
|
||||
if restCfg.Timeout <= 0 {
|
||||
restCfg.Timeout = 30 * time.Second
|
||||
}
|
||||
|
||||
cs, err := kubernetes.NewForConfig(restCfg)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return cs, restCfg, nil
|
||||
}
|
||||
|
||||
func defaultKubeconfigPath() string {
|
||||
// This helper is used only for existence checks / UI messages. Client loading
|
||||
// should use client-go's default loading rules.
|
||||
if p := os.Getenv("KUBECONFIG"); p != "" {
|
||||
// If KUBECONFIG is a list, return the first entry for display.
|
||||
if list := filepath.SplitList(p); len(list) > 0 {
|
||||
return list[0]
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
h, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return filepath.Join(h, ".kube", "config")
|
||||
}
|
||||
|
||||
// Ping performs a lightweight API call to determine if the cluster is reachable
|
||||
// and authentication works.
|
||||
func Ping(ctx context.Context, cs kubernetes.Interface) error {
|
||||
if cs == nil {
|
||||
return errors.New("nil kubernetes client")
|
||||
}
|
||||
_, err := cs.Discovery().ServerVersion()
|
||||
if err != nil {
|
||||
// Treat authn/authz errors separately so callers can decide whether to
|
||||
// surface "unreachable" vs "insufficient credentials".
|
||||
if apierrors.IsForbidden(err) || apierrors.IsUnauthorized(err) {
|
||||
return fmt.Errorf("discovery auth: %w", err)
|
||||
}
|
||||
return fmt.Errorf("discovery server version: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,720 @@
|
||||
package k8s
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
appslisters "k8s.io/client-go/listers/apps/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// Collector is the ControlTower Kubernetes collector.
|
||||
//
|
||||
// It uses client-go informers (LIST+WATCH with local caches) against the user's
|
||||
// kubeconfig current context, across all namespaces.
|
||||
//
|
||||
// Degradation behavior:
|
||||
// - If WATCH fails repeatedly, it falls back to polling LIST and emits a P1
|
||||
// "degraded to polling" issue.
|
||||
// - While in polling mode, it periodically attempts to recover back to watches.
|
||||
// - If the cluster is unreachable, it emits a P0 only after 10s continuous failure.
|
||||
// - If RBAC forbids list/watch for a resource, it emits a single P2 issue per
|
||||
// inaccessible resource and continues for accessible resources.
|
||||
//
|
||||
// Noise control:
|
||||
// - Rollups group by (namespace, reason, kind) when group size >= 20.
|
||||
// - Cap max issues to 200 after rollups.
|
||||
//
|
||||
// Instantiate with NewCollector().
|
||||
type Collector struct {
|
||||
interval time.Duration
|
||||
|
||||
unreachableGrace time.Duration
|
||||
pendingGrace time.Duration
|
||||
workloadGrace time.Duration
|
||||
crashLoopThresh int
|
||||
|
||||
rollupThreshold int
|
||||
maxIssues int
|
||||
|
||||
watchFailureThreshold int
|
||||
watchFailureWindow time.Duration
|
||||
pollRecoverEvery time.Duration
|
||||
|
||||
mu sync.Mutex
|
||||
syncWG sync.WaitGroup
|
||||
|
||||
client kubernetes.Interface
|
||||
|
||||
factory informers.SharedInformerFactory
|
||||
stopCh chan struct{}
|
||||
started bool
|
||||
syncedFns []cache.InformerSynced
|
||||
|
||||
podsLister corelisters.PodLister
|
||||
nodesLister corelisters.NodeLister
|
||||
eventsLister corelisters.EventLister
|
||||
deployLister appslisters.DeploymentLister
|
||||
statefulSetLister appslisters.StatefulSetLister
|
||||
daemonSetLister appslisters.DaemonSetLister
|
||||
|
||||
// polling indicates we have degraded from informers to list polling.
|
||||
polling bool
|
||||
pollSince time.Time
|
||||
lastPollRecoverAttempt time.Time
|
||||
|
||||
watchFailWindowStart time.Time
|
||||
watchFailCount int
|
||||
|
||||
// rbacDenied is keyed by resource name ("pods", "nodes", ...).
|
||||
rbacDenied map[string]error
|
||||
|
||||
unreach *unreachableTracker
|
||||
|
||||
lastSuccess time.Time
|
||||
}
|
||||
|
||||
func NewCollector() *Collector {
|
||||
c := &Collector{
|
||||
interval: 2 * time.Second,
|
||||
unreachableGrace: 10 * time.Second,
|
||||
pendingGrace: 120 * time.Second,
|
||||
workloadGrace: 180 * time.Second,
|
||||
crashLoopThresh: 5,
|
||||
rollupThreshold: 20,
|
||||
maxIssues: 200,
|
||||
watchFailureThreshold: 5,
|
||||
watchFailureWindow: 30 * time.Second,
|
||||
pollRecoverEvery: 30 * time.Second,
|
||||
rbacDenied: map[string]error{},
|
||||
}
|
||||
c.unreach = newUnreachableTracker(c.unreachableGrace)
|
||||
return c
|
||||
}
|
||||
|
||||
var _ collectors.Collector = (*Collector)(nil)
|
||||
|
||||
func (c *Collector) Name() string { return "k8s" }
|
||||
|
||||
func (c *Collector) Interval() time.Duration {
|
||||
if c.interval <= 0 {
|
||||
return 2 * time.Second
|
||||
}
|
||||
return c.interval
|
||||
}
|
||||
|
||||
func (c *Collector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
|
||||
now := time.Now()
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
|
||||
// If kubeconfig doesn't exist, treat Kubernetes as "disabled".
|
||||
if !kubeconfigExists() {
|
||||
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "kubeconfig not found"}, nil
|
||||
}
|
||||
|
||||
if err := c.ensureClient(); err != nil {
|
||||
c.unreach.observeFailure(now, err)
|
||||
if c.unreach.shouldEmit(now) {
|
||||
iss := stampIssueTimes(now, unreachableIssue(err))
|
||||
return []model.Issue{iss}, collectors.Status{Health: collectors.HealthError, Message: "unreachable"}, nil
|
||||
}
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "k8s client init failed (grace)"}, nil
|
||||
}
|
||||
|
||||
// Connectivity/auth check with grace.
|
||||
if err := Ping(ctx, c.client); err != nil {
|
||||
c.unreach.observeFailure(now, err)
|
||||
if c.unreach.shouldEmit(now) {
|
||||
iss := stampIssueTimes(now, unreachableIssue(err))
|
||||
return []model.Issue{iss}, collectors.Status{Health: collectors.HealthError, Message: "unreachable"}, nil
|
||||
}
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "k8s unreachable (grace)"}, nil
|
||||
}
|
||||
c.unreach.observeSuccess()
|
||||
c.lastSuccess = now
|
||||
|
||||
// Prefer informers unless currently degraded to polling.
|
||||
if c.isPolling() {
|
||||
c.maybeRecoverInformers(ctx, now)
|
||||
}
|
||||
if !c.isPolling() {
|
||||
_ = c.ensureInformers(ctx)
|
||||
}
|
||||
|
||||
issues := make([]model.Issue, 0, 64)
|
||||
issues = append(issues, c.rbacIssues()...)
|
||||
|
||||
st := collectors.Status{Health: collectors.HealthOK, LastSuccess: c.lastSuccess}
|
||||
|
||||
if c.isPolling() {
|
||||
st.Health = collectors.HealthDegraded
|
||||
st.Message = "degraded to polling"
|
||||
issues = append(issues, stampIssueTimes(now, pollingDegradedIssue()))
|
||||
issues = append(issues, c.collectByPolling(ctx, now)...)
|
||||
} else {
|
||||
// If caches aren't ready, use polling for this tick only.
|
||||
if !c.cachesSyncedQuick(ctx) {
|
||||
st.Health = collectors.HealthDegraded
|
||||
st.Message = "waiting for informer cache; used list"
|
||||
issues = append(issues, c.collectByPolling(ctx, now)...)
|
||||
} else {
|
||||
issues = append(issues, c.collectFromCaches(now)...)
|
||||
if len(c.snapshotRBACDenied()) > 0 {
|
||||
st.Health = collectors.HealthDegraded
|
||||
st.Message = "partial RBAC access"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set timestamps, roll up and cap.
|
||||
for i := range issues {
|
||||
issues[i] = stampIssueTimes(now, issues[i])
|
||||
}
|
||||
issues = Rollup(issues, c.rollupThreshold, 5)
|
||||
model.SortIssuesDefault(issues)
|
||||
issues = CapIssues(issues, c.maxIssues)
|
||||
|
||||
return issues, st, nil
|
||||
}
|
||||
|
||||
func (c *Collector) ensureClient() error {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if c.client != nil {
|
||||
return nil
|
||||
}
|
||||
cs, _, err := ClientFromCurrentContext()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.client = cs
|
||||
return nil
|
||||
}
|
||||
|
||||
func kubeconfigExists() bool {
|
||||
if p := os.Getenv("KUBECONFIG"); p != "" {
|
||||
for _, fp := range filepath.SplitList(p) {
|
||||
if fp == "" {
|
||||
continue
|
||||
}
|
||||
if _, err := os.Stat(fp); err == nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
p := defaultKubeconfigPath()
|
||||
if p == "" {
|
||||
return false
|
||||
}
|
||||
_, err := os.Stat(p)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (c *Collector) ensureInformers(ctx context.Context) error {
|
||||
c.mu.Lock()
|
||||
if c.started || c.polling {
|
||||
c.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
client := c.client
|
||||
c.mu.Unlock()
|
||||
if client == nil {
|
||||
return fmt.Errorf("nil kubernetes client")
|
||||
}
|
||||
|
||||
// RBAC preflight before we even construct informers (so we can skip forbidden ones).
|
||||
c.preflightRBAC(ctx, client)
|
||||
|
||||
factory := informers.NewSharedInformerFactory(client, 0)
|
||||
|
||||
var (
|
||||
podsInf cache.SharedIndexInformer
|
||||
nodesInf cache.SharedIndexInformer
|
||||
evsInf cache.SharedIndexInformer
|
||||
depInf cache.SharedIndexInformer
|
||||
stsInf cache.SharedIndexInformer
|
||||
dsInf cache.SharedIndexInformer
|
||||
)
|
||||
|
||||
if !c.isRBACDenied("pods") {
|
||||
i := factory.Core().V1().Pods()
|
||||
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("pods", err) })
|
||||
c.mu.Lock()
|
||||
c.podsLister = i.Lister()
|
||||
c.mu.Unlock()
|
||||
podsInf = i.Informer()
|
||||
}
|
||||
if !c.isRBACDenied("nodes") {
|
||||
i := factory.Core().V1().Nodes()
|
||||
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("nodes", err) })
|
||||
c.mu.Lock()
|
||||
c.nodesLister = i.Lister()
|
||||
c.mu.Unlock()
|
||||
nodesInf = i.Informer()
|
||||
}
|
||||
if !c.isRBACDenied("events") {
|
||||
i := factory.Core().V1().Events()
|
||||
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("events", err) })
|
||||
c.mu.Lock()
|
||||
c.eventsLister = i.Lister()
|
||||
c.mu.Unlock()
|
||||
evsInf = i.Informer()
|
||||
}
|
||||
if !c.isRBACDenied("deployments") {
|
||||
i := factory.Apps().V1().Deployments()
|
||||
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("deployments", err) })
|
||||
c.mu.Lock()
|
||||
c.deployLister = i.Lister()
|
||||
c.mu.Unlock()
|
||||
depInf = i.Informer()
|
||||
}
|
||||
if !c.isRBACDenied("statefulsets") {
|
||||
i := factory.Apps().V1().StatefulSets()
|
||||
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("statefulsets", err) })
|
||||
c.mu.Lock()
|
||||
c.statefulSetLister = i.Lister()
|
||||
c.mu.Unlock()
|
||||
stsInf = i.Informer()
|
||||
}
|
||||
if !c.isRBACDenied("daemonsets") {
|
||||
i := factory.Apps().V1().DaemonSets()
|
||||
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("daemonsets", err) })
|
||||
c.mu.Lock()
|
||||
c.daemonSetLister = i.Lister()
|
||||
c.mu.Unlock()
|
||||
dsInf = i.Informer()
|
||||
}
|
||||
|
||||
synced := make([]cache.InformerSynced, 0, 6)
|
||||
if podsInf != nil {
|
||||
synced = append(synced, podsInf.HasSynced)
|
||||
}
|
||||
if nodesInf != nil {
|
||||
synced = append(synced, nodesInf.HasSynced)
|
||||
}
|
||||
if evsInf != nil {
|
||||
synced = append(synced, evsInf.HasSynced)
|
||||
}
|
||||
if depInf != nil {
|
||||
synced = append(synced, depInf.HasSynced)
|
||||
}
|
||||
if stsInf != nil {
|
||||
synced = append(synced, stsInf.HasSynced)
|
||||
}
|
||||
if dsInf != nil {
|
||||
synced = append(synced, dsInf.HasSynced)
|
||||
}
|
||||
|
||||
stopCh := make(chan struct{})
|
||||
|
||||
c.mu.Lock()
|
||||
c.factory = factory
|
||||
c.stopCh = stopCh
|
||||
c.started = true
|
||||
c.syncedFns = synced
|
||||
c.mu.Unlock()
|
||||
|
||||
factory.Start(stopCh)
|
||||
|
||||
c.syncWG.Add(1)
|
||||
go func() {
|
||||
defer c.syncWG.Done()
|
||||
syncCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
if ok := cache.WaitForCacheSync(syncCtx.Done(), synced...); !ok {
|
||||
fmt.Printf("k8s: informer cache sync failed or timed out\n")
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Collector) maybeRecoverInformers(ctx context.Context, now time.Time) {
|
||||
c.mu.Lock()
|
||||
interval := c.pollRecoverEvery
|
||||
last := c.lastPollRecoverAttempt
|
||||
c.mu.Unlock()
|
||||
|
||||
if interval <= 0 {
|
||||
interval = 30 * time.Second
|
||||
}
|
||||
if !last.IsZero() && now.Sub(last) < interval {
|
||||
return
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
c.lastPollRecoverAttempt = now
|
||||
c.mu.Unlock()
|
||||
|
||||
// Only attempt if connectivity is OK (already pinged successfully in Collect).
|
||||
// Reset watch failure counters and exit polling; subsequent Collect will ensureInformers.
|
||||
c.mu.Lock()
|
||||
c.polling = false
|
||||
c.pollSince = time.Time{}
|
||||
c.watchFailWindowStart = time.Time{}
|
||||
c.watchFailCount = 0
|
||||
c.mu.Unlock()
|
||||
|
||||
_ = c.ensureInformers(ctx)
|
||||
}
|
||||
|
||||
func (c *Collector) preflightRBAC(ctx context.Context, client kubernetes.Interface) {
|
||||
shortCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer cancel()
|
||||
|
||||
probe := func(resource string, f func(context.Context) error) {
|
||||
if err := f(shortCtx); err != nil {
|
||||
if apierrors.IsForbidden(err) {
|
||||
c.noteRBAC(resource, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
probe("nodes", func(ctx context.Context) error {
|
||||
_, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{Limit: 1})
|
||||
return err
|
||||
})
|
||||
probe("pods", func(ctx context.Context) error {
|
||||
_, err := client.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
|
||||
return err
|
||||
})
|
||||
probe("deployments", func(ctx context.Context) error {
|
||||
_, err := client.AppsV1().Deployments(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
|
||||
return err
|
||||
})
|
||||
probe("statefulsets", func(ctx context.Context) error {
|
||||
_, err := client.AppsV1().StatefulSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
|
||||
return err
|
||||
})
|
||||
probe("daemonsets", func(ctx context.Context) error {
|
||||
_, err := client.AppsV1().DaemonSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
|
||||
return err
|
||||
})
|
||||
probe("events", func(ctx context.Context) error {
|
||||
_, err := client.CoreV1().Events(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
func (c *Collector) noteRBAC(resource string, err error) {
|
||||
if err == nil || !apierrors.IsForbidden(err) {
|
||||
return
|
||||
}
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if _, ok := c.rbacDenied[resource]; ok {
|
||||
return
|
||||
}
|
||||
c.rbacDenied[resource] = err
|
||||
}
|
||||
|
||||
func (c *Collector) isRBACDenied(resource string) bool {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
_, ok := c.rbacDenied[resource]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (c *Collector) snapshotRBACDenied() map[string]error {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
out := make(map[string]error, len(c.rbacDenied))
|
||||
for k, v := range c.rbacDenied {
|
||||
out[k] = v
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (c *Collector) recordWatchError(resource string, err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
if apierrors.IsForbidden(err) {
|
||||
c.noteRBAC(resource, err)
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if c.polling {
|
||||
return
|
||||
}
|
||||
if c.watchFailWindowStart.IsZero() || now.Sub(c.watchFailWindowStart) > c.watchFailureWindow {
|
||||
c.watchFailWindowStart = now
|
||||
c.watchFailCount = 0
|
||||
}
|
||||
c.watchFailCount++
|
||||
if c.watchFailCount >= c.watchFailureThreshold {
|
||||
c.polling = true
|
||||
c.pollSince = now
|
||||
if c.stopCh != nil {
|
||||
close(c.stopCh)
|
||||
c.stopCh = nil
|
||||
}
|
||||
c.started = false
|
||||
c.factory = nil
|
||||
c.syncedFns = nil
|
||||
c.syncWG.Wait()
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Collector) cachesSyncedQuick(ctx context.Context) bool {
|
||||
c.mu.Lock()
|
||||
synced := append([]cache.InformerSynced(nil), c.syncedFns...)
|
||||
c.mu.Unlock()
|
||||
if len(synced) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
syncCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
|
||||
defer cancel()
|
||||
return cache.WaitForCacheSync(syncCtx.Done(), synced...)
|
||||
}
|
||||
|
||||
func (c *Collector) collectFromCaches(now time.Time) []model.Issue {
|
||||
c.mu.Lock()
|
||||
podsLister := c.podsLister
|
||||
nodesLister := c.nodesLister
|
||||
eventsLister := c.eventsLister
|
||||
deployLister := c.deployLister
|
||||
stsLister := c.statefulSetLister
|
||||
dsLister := c.daemonSetLister
|
||||
denied := make(map[string]error, len(c.rbacDenied))
|
||||
for k, v := range c.rbacDenied {
|
||||
denied[k] = v
|
||||
}
|
||||
c.mu.Unlock()
|
||||
|
||||
issues := make([]model.Issue, 0, 64)
|
||||
sel := labels.Everything()
|
||||
|
||||
if _, ok := denied["nodes"]; !ok && nodesLister != nil {
|
||||
if list, err := nodesLister.List(sel); err == nil {
|
||||
nodes := make([]*corev1.Node, 0, len(list))
|
||||
for i := range list {
|
||||
nodes = append(nodes, list[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromNodes(nodes)...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := denied["pods"]; !ok && podsLister != nil {
|
||||
if list, err := podsLister.List(sel); err == nil {
|
||||
pods := make([]*corev1.Pod, 0, len(list))
|
||||
for i := range list {
|
||||
pods = append(pods, list[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromPods(pods, now, c.pendingGrace, c.crashLoopThresh)...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := denied["deployments"]; !ok && deployLister != nil {
|
||||
if list, err := deployLister.List(sel); err == nil {
|
||||
deps := make([]*appsv1.Deployment, 0, len(list))
|
||||
for i := range list {
|
||||
deps = append(deps, list[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromDeployments(deps, now, c.workloadGrace)...)
|
||||
}
|
||||
}
|
||||
if _, ok := denied["statefulsets"]; !ok && stsLister != nil {
|
||||
if list, err := stsLister.List(sel); err == nil {
|
||||
sts := make([]*appsv1.StatefulSet, 0, len(list))
|
||||
for i := range list {
|
||||
sts = append(sts, list[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromStatefulSets(sts, now, c.workloadGrace)...)
|
||||
}
|
||||
}
|
||||
if _, ok := denied["daemonsets"]; !ok && dsLister != nil {
|
||||
if list, err := dsLister.List(sel); err == nil {
|
||||
dss := make([]*appsv1.DaemonSet, 0, len(list))
|
||||
for i := range list {
|
||||
dss = append(dss, list[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromDaemonSets(dss, now, c.workloadGrace)...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := denied["events"]; !ok && eventsLister != nil {
|
||||
if list, err := eventsLister.List(sel); err == nil {
|
||||
es := make([]*corev1.Event, 0, len(list))
|
||||
for i := range list {
|
||||
es = append(es, list[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromEvents(es, now)...)
|
||||
}
|
||||
}
|
||||
|
||||
return issues
|
||||
}
|
||||
|
||||
func (c *Collector) collectByPolling(ctx context.Context, now time.Time) []model.Issue {
|
||||
c.mu.Lock()
|
||||
client := c.client
|
||||
denied := make(map[string]error, len(c.rbacDenied))
|
||||
for k, v := range c.rbacDenied {
|
||||
denied[k] = v
|
||||
}
|
||||
c.mu.Unlock()
|
||||
if client == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
issues := make([]model.Issue, 0, 64)
|
||||
|
||||
if _, ok := denied["nodes"]; !ok {
|
||||
if nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{}); err != nil {
|
||||
c.noteRBAC("nodes", err)
|
||||
} else {
|
||||
list := make([]*corev1.Node, 0, len(nodes.Items))
|
||||
for i := range nodes.Items {
|
||||
list = append(list, &nodes.Items[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromNodes(list)...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := denied["pods"]; !ok {
|
||||
if pods, err := client.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
|
||||
c.noteRBAC("pods", err)
|
||||
} else {
|
||||
list := make([]*corev1.Pod, 0, len(pods.Items))
|
||||
for i := range pods.Items {
|
||||
list = append(list, &pods.Items[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromPods(list, now, c.pendingGrace, c.crashLoopThresh)...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := denied["deployments"]; !ok {
|
||||
if deps, err := client.AppsV1().Deployments(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
|
||||
c.noteRBAC("deployments", err)
|
||||
} else {
|
||||
list := make([]*appsv1.Deployment, 0, len(deps.Items))
|
||||
for i := range deps.Items {
|
||||
list = append(list, &deps.Items[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromDeployments(list, now, c.workloadGrace)...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := denied["statefulsets"]; !ok {
|
||||
if sts, err := client.AppsV1().StatefulSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
|
||||
c.noteRBAC("statefulsets", err)
|
||||
} else {
|
||||
list := make([]*appsv1.StatefulSet, 0, len(sts.Items))
|
||||
for i := range sts.Items {
|
||||
list = append(list, &sts.Items[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromStatefulSets(list, now, c.workloadGrace)...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := denied["daemonsets"]; !ok {
|
||||
if dss, err := client.AppsV1().DaemonSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
|
||||
c.noteRBAC("daemonsets", err)
|
||||
} else {
|
||||
list := make([]*appsv1.DaemonSet, 0, len(dss.Items))
|
||||
for i := range dss.Items {
|
||||
list = append(list, &dss.Items[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromDaemonSets(list, now, c.workloadGrace)...)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := denied["events"]; !ok {
|
||||
if evs, err := client.CoreV1().Events(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
|
||||
c.noteRBAC("events", err)
|
||||
} else {
|
||||
list := make([]*corev1.Event, 0, len(evs.Items))
|
||||
for i := range evs.Items {
|
||||
list = append(list, &evs.Items[i])
|
||||
}
|
||||
issues = append(issues, IssuesFromEvents(list, now)...)
|
||||
}
|
||||
}
|
||||
|
||||
return issues
|
||||
}
|
||||
|
||||
func (c *Collector) rbacIssues() []model.Issue {
|
||||
denied := c.snapshotRBACDenied()
|
||||
keys := make([]string, 0, len(denied))
|
||||
for k := range denied {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
out := make([]model.Issue, 0, len(keys))
|
||||
for _, res := range keys {
|
||||
err := denied[res]
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:rbac:%s", res),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP2,
|
||||
Title: fmt.Sprintf("Insufficient RBAC: list/watch %s", res),
|
||||
Details: fmt.Sprintf("Current context cannot access %s (forbidden). %s", res, sanitizeError(err)),
|
||||
Evidence: map[string]string{
|
||||
"kind": "Cluster",
|
||||
"reason": "RBAC",
|
||||
"namespace": "",
|
||||
"resource": res,
|
||||
},
|
||||
SuggestedFix: fmt.Sprintf("kubectl auth can-i list %s --all-namespaces", res),
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func pollingDegradedIssue() model.Issue {
|
||||
return model.Issue{
|
||||
ID: "k8s:cluster:polling",
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP1,
|
||||
Title: "Kubernetes degraded: polling (watch failing)",
|
||||
Details: "Kubernetes watches have failed repeatedly; collector switched to LIST polling. Data may be less real-time and API load is higher.",
|
||||
Evidence: map[string]string{
|
||||
"kind": "Cluster",
|
||||
"reason": "DegradedPolling",
|
||||
"namespace": "",
|
||||
},
|
||||
SuggestedFix: "Check API server / network stability and RBAC; ensure watch endpoints are reachable.",
|
||||
}
|
||||
}
|
||||
|
||||
func stampIssueTimes(now time.Time, iss model.Issue) model.Issue {
|
||||
iss.LastSeen = now
|
||||
if iss.FirstSeen.IsZero() {
|
||||
iss.FirstSeen = now
|
||||
}
|
||||
return iss
|
||||
}
|
||||
|
||||
func (c *Collector) isPolling() bool {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return c.polling
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
package k8s
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
var warningEventReasons = map[string]struct{}{
|
||||
"FailedScheduling": {},
|
||||
"FailedMount": {},
|
||||
"BackOff": {},
|
||||
"Unhealthy": {},
|
||||
"OOMKilling": {},
|
||||
"FailedPull": {},
|
||||
"Forbidden": {},
|
||||
"ErrImagePull": {},
|
||||
"ImagePullBackOff": {},
|
||||
}
|
||||
|
||||
// IssuesFromEvents applies the PLAN.md Event rules.
|
||||
//
|
||||
// Dedup by (object UID, reason). For v1 Events, this is approximated by
|
||||
// (involvedObject.uid, reason).
|
||||
func IssuesFromEvents(events []*corev1.Event, now time.Time) []model.Issue {
|
||||
_ = now
|
||||
out := make([]model.Issue, 0, 16)
|
||||
seen := map[string]struct{}{}
|
||||
|
||||
for _, e := range events {
|
||||
if e == nil {
|
||||
continue
|
||||
}
|
||||
if strings.ToLower(e.Type) != strings.ToLower(string(corev1.EventTypeWarning)) {
|
||||
continue
|
||||
}
|
||||
if _, ok := warningEventReasons[e.Reason]; !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
uid := string(e.InvolvedObject.UID)
|
||||
k := uid + ":" + e.Reason
|
||||
if _, ok := seen[k]; ok {
|
||||
continue
|
||||
}
|
||||
seen[k] = struct{}{}
|
||||
|
||||
ns := e.InvolvedObject.Namespace
|
||||
if ns == "" {
|
||||
ns = e.Namespace
|
||||
}
|
||||
|
||||
objKey := e.InvolvedObject.Kind + "/" + e.InvolvedObject.Name
|
||||
title := fmt.Sprintf("K8s Event %s: %s (%s)", e.Reason, objKey, ns)
|
||||
if ns == "" {
|
||||
title = fmt.Sprintf("K8s Event %s: %s", e.Reason, objKey)
|
||||
}
|
||||
|
||||
details := strings.TrimSpace(e.Message)
|
||||
if details == "" {
|
||||
details = "Warning event emitted by Kubernetes."
|
||||
}
|
||||
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:event:%s:%s", uid, e.Reason),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP2,
|
||||
Title: title,
|
||||
Details: details,
|
||||
Evidence: map[string]string{
|
||||
"kind": e.InvolvedObject.Kind,
|
||||
"reason": e.Reason,
|
||||
"namespace": ns,
|
||||
"name": e.InvolvedObject.Name,
|
||||
"uid": uid,
|
||||
},
|
||||
SuggestedFix: suggestedFixForEvent(ns, e.InvolvedObject.Kind, e.InvolvedObject.Name),
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func suggestedFixForEvent(ns, kind, name string) string {
|
||||
kindLower := strings.ToLower(kind)
|
||||
if ns != "" {
|
||||
switch kindLower {
|
||||
case "pod":
|
||||
return fmt.Sprintf("kubectl -n %s describe pod %s", ns, name)
|
||||
case "node":
|
||||
return fmt.Sprintf("kubectl describe node %s", name)
|
||||
default:
|
||||
return fmt.Sprintf("kubectl -n %s describe %s %s", ns, kindLower, name)
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("kubectl describe %s %s", kindLower, name)
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
//go:build ignore
|
||||
|
||||
package k8s
|
||||
|
||||
// Placeholder (see rollup_test.go).
|
||||
@@ -0,0 +1,79 @@
|
||||
package k8s
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// IssuesFromNodes applies the PLAN.md node rules.
|
||||
//
|
||||
// Pure rule function: does not talk to the API server.
|
||||
func IssuesFromNodes(nodes []*corev1.Node) []model.Issue {
|
||||
out := make([]model.Issue, 0, 8)
|
||||
for _, n := range nodes {
|
||||
if n == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Ready / NotReady
|
||||
if cond := findNodeCondition(n, corev1.NodeReady); cond != nil {
|
||||
if cond.Status != corev1.ConditionTrue {
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:node:%s:NotReady", n.Name),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP0,
|
||||
Title: fmt.Sprintf("Node NotReady: %s", n.Name),
|
||||
Details: cond.Message,
|
||||
Evidence: map[string]string{
|
||||
"kind": "Node",
|
||||
"reason": "NotReady",
|
||||
"namespace": "",
|
||||
"node": n.Name,
|
||||
"status": string(cond.Status),
|
||||
},
|
||||
SuggestedFix: "kubectl describe node " + n.Name,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Pressure conditions.
|
||||
for _, ctype := range []corev1.NodeConditionType{corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure} {
|
||||
if cond := findNodeCondition(n, ctype); cond != nil {
|
||||
if cond.Status == corev1.ConditionTrue {
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:node:%s:%s", n.Name, string(ctype)),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP1,
|
||||
Title: fmt.Sprintf("Node %s: %s", ctype, n.Name),
|
||||
Details: cond.Message,
|
||||
Evidence: map[string]string{
|
||||
"kind": "Node",
|
||||
"reason": string(ctype),
|
||||
"namespace": "",
|
||||
"node": n.Name,
|
||||
"status": string(cond.Status),
|
||||
},
|
||||
SuggestedFix: "kubectl describe node " + n.Name,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func findNodeCondition(n *corev1.Node, t corev1.NodeConditionType) *corev1.NodeCondition {
|
||||
if n == nil {
|
||||
return nil
|
||||
}
|
||||
for i := range n.Status.Conditions {
|
||||
c := &n.Status.Conditions[i]
|
||||
if c.Type == t {
|
||||
return c
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
//go:build ignore
|
||||
|
||||
package k8s
|
||||
|
||||
// Placeholder (see rollup_test.go).
|
||||
@@ -0,0 +1,169 @@
|
||||
package k8s
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// IssuesFromPods applies the PLAN.md pod rules.
|
||||
//
|
||||
// Pure rule function: it does not talk to the API server.
|
||||
func IssuesFromPods(pods []*corev1.Pod, now time.Time, pendingGrace time.Duration, crashLoopRestartThreshold int) []model.Issue {
|
||||
if crashLoopRestartThreshold <= 0 {
|
||||
crashLoopRestartThreshold = 5
|
||||
}
|
||||
if pendingGrace <= 0 {
|
||||
pendingGrace = 120 * time.Second
|
||||
}
|
||||
|
||||
out := make([]model.Issue, 0, 32)
|
||||
for _, p := range pods {
|
||||
if p == nil {
|
||||
continue
|
||||
}
|
||||
ns, name := p.Namespace, p.Name
|
||||
|
||||
// Pending for too long.
|
||||
if p.Status.Phase == corev1.PodPending {
|
||||
age := now.Sub(p.CreationTimestamp.Time)
|
||||
if !p.CreationTimestamp.IsZero() && age >= pendingGrace {
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:pod:%s/%s:Pending", ns, name),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP1,
|
||||
Title: fmt.Sprintf("Pod Pending: %s/%s", ns, name),
|
||||
Details: fmt.Sprintf("Pod has been Pending for %s.", age.Truncate(time.Second)),
|
||||
Evidence: map[string]string{
|
||||
"kind": "Pod",
|
||||
"reason": "Pending",
|
||||
"namespace": ns,
|
||||
"pod": name,
|
||||
"phase": string(p.Status.Phase),
|
||||
"node": p.Spec.NodeName,
|
||||
},
|
||||
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Container-derived signals.
|
||||
for _, cs := range p.Status.ContainerStatuses {
|
||||
cname := cs.Name
|
||||
restarts := int(cs.RestartCount)
|
||||
|
||||
// CrashLoopBackOff and pull errors are reported via Waiting state.
|
||||
if cs.State.Waiting != nil {
|
||||
reason := cs.State.Waiting.Reason
|
||||
msg := cs.State.Waiting.Message
|
||||
switch reason {
|
||||
case "CrashLoopBackOff":
|
||||
pri := model.PriorityP1
|
||||
if restarts >= crashLoopRestartThreshold {
|
||||
pri = model.PriorityP0
|
||||
}
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:pod:%s/%s:CrashLoop:%s", ns, name, cname),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: pri,
|
||||
Title: fmt.Sprintf("CrashLoopBackOff: %s/%s (%s)", ns, name, cname),
|
||||
Details: firstNonEmpty(msg, "Container is in CrashLoopBackOff."),
|
||||
Evidence: map[string]string{
|
||||
"kind": "Pod",
|
||||
"reason": "CrashLoopBackOff",
|
||||
"namespace": ns,
|
||||
"pod": name,
|
||||
"container": cname,
|
||||
"restarts": strconv.Itoa(restarts),
|
||||
"node": p.Spec.NodeName,
|
||||
},
|
||||
SuggestedFix: strings.TrimSpace(fmt.Sprintf(`kubectl -n %s describe pod %s
|
||||
kubectl -n %s logs %s -c %s --previous`, ns, name, ns, name, cname)),
|
||||
})
|
||||
|
||||
case "ImagePullBackOff", "ErrImagePull":
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:pod:%s/%s:ImagePull:%s", ns, name, cname),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP1,
|
||||
Title: fmt.Sprintf("%s: %s/%s (%s)", reason, ns, name, cname),
|
||||
Details: firstNonEmpty(msg, "Container image pull is failing."),
|
||||
Evidence: map[string]string{
|
||||
"kind": "Pod",
|
||||
"reason": reason,
|
||||
"namespace": ns,
|
||||
"pod": name,
|
||||
"container": cname,
|
||||
"restarts": strconv.Itoa(restarts),
|
||||
"node": p.Spec.NodeName,
|
||||
},
|
||||
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// OOMKilled is typically stored in LastTerminationState.
|
||||
if cs.LastTerminationState.Terminated != nil {
|
||||
term := cs.LastTerminationState.Terminated
|
||||
if term.Reason == "OOMKilled" {
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:pod:%s/%s:OOMKilled:%s", ns, name, cname),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP1,
|
||||
Title: fmt.Sprintf("OOMKilled: %s/%s (%s)", ns, name, cname),
|
||||
Details: firstNonEmpty(term.Message, "Container was killed due to OOM."),
|
||||
Evidence: map[string]string{
|
||||
"kind": "Pod",
|
||||
"reason": "OOMKilled",
|
||||
"namespace": ns,
|
||||
"pod": name,
|
||||
"container": cname,
|
||||
"restarts": strconv.Itoa(restarts),
|
||||
"node": p.Spec.NodeName,
|
||||
},
|
||||
SuggestedFix: strings.TrimSpace(fmt.Sprintf(`kubectl -n %s describe pod %s
|
||||
kubectl -n %s logs %s -c %s --previous`, ns, name, ns, name, cname)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// High restarts even if running.
|
||||
// Keep this lower priority than active CrashLoopBackOff.
|
||||
if restarts >= crashLoopRestartThreshold {
|
||||
if cs.State.Waiting == nil || cs.State.Waiting.Reason == "" {
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:pod:%s/%s:Restarts:%s", ns, name, cname),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP2,
|
||||
Title: fmt.Sprintf("High restarts: %s/%s (%s)", ns, name, cname),
|
||||
Details: "Container has restarted multiple times.",
|
||||
Evidence: map[string]string{
|
||||
"kind": "Pod",
|
||||
"reason": "HighRestarts",
|
||||
"namespace": ns,
|
||||
"pod": name,
|
||||
"container": cname,
|
||||
"restarts": strconv.Itoa(restarts),
|
||||
"node": p.Spec.NodeName,
|
||||
},
|
||||
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func firstNonEmpty(v, fallback string) string {
|
||||
if strings.TrimSpace(v) != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
//go:build ignore
|
||||
|
||||
package k8s
|
||||
|
||||
// Placeholder (see rollup_test.go).
|
||||
@@ -0,0 +1,174 @@
|
||||
package k8s
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// WorkloadGrace tracks how long a workload must be NotReady before we emit an issue.
|
||||
const defaultWorkloadNotReadyGrace = 180 * time.Second
|
||||
|
||||
// IssuesFromDeployments applies the PLAN.md workload rules for Deployments.
|
||||
func IssuesFromDeployments(deploys []*appsv1.Deployment, now time.Time, grace time.Duration) []model.Issue {
|
||||
if grace <= 0 {
|
||||
grace = defaultWorkloadNotReadyGrace
|
||||
}
|
||||
out := make([]model.Issue, 0, 16)
|
||||
|
||||
for _, d := range deploys {
|
||||
if d == nil {
|
||||
continue
|
||||
}
|
||||
desired := int32(1)
|
||||
if d.Spec.Replicas != nil {
|
||||
desired = *d.Spec.Replicas
|
||||
}
|
||||
ready := d.Status.ReadyReplicas
|
||||
if desired > 0 && ready < desired {
|
||||
// Prefer LastUpdateTime / LastTransitionTime when available; fallback to creation time.
|
||||
since := d.CreationTimestamp.Time
|
||||
if cond := findDeploymentProgressingCondition(d); cond != nil {
|
||||
if !cond.LastUpdateTime.IsZero() {
|
||||
since = cond.LastUpdateTime.Time
|
||||
} else if !cond.LastTransitionTime.IsZero() {
|
||||
since = cond.LastTransitionTime.Time
|
||||
}
|
||||
}
|
||||
if !since.IsZero() && now.Sub(since) < grace {
|
||||
continue
|
||||
}
|
||||
|
||||
ns := d.Namespace
|
||||
name := d.Name
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:deploy:%s/%s:NotReady", ns, name),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP1,
|
||||
Title: fmt.Sprintf("Deployment not ready: %s/%s", ns, name),
|
||||
Details: "Ready replicas below desired.",
|
||||
Evidence: map[string]string{
|
||||
"kind": "Deployment",
|
||||
"reason": "NotReady",
|
||||
"namespace": ns,
|
||||
"name": name,
|
||||
"desired": strconv.Itoa(int(desired)),
|
||||
"ready": strconv.Itoa(int(ready)),
|
||||
"observed_gen": strconv.FormatInt(d.Status.ObservedGeneration, 10),
|
||||
"resource_gen": strconv.FormatInt(d.Generation, 10),
|
||||
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
|
||||
},
|
||||
SuggestedFix: fmt.Sprintf("kubectl -n %s describe deployment %s", ns, name),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// IssuesFromStatefulSets applies the PLAN.md workload rules for StatefulSets.
|
||||
func IssuesFromStatefulSets(sts []*appsv1.StatefulSet, now time.Time, grace time.Duration) []model.Issue {
|
||||
if grace <= 0 {
|
||||
grace = defaultWorkloadNotReadyGrace
|
||||
}
|
||||
out := make([]model.Issue, 0, 16)
|
||||
|
||||
for _, s := range sts {
|
||||
if s == nil {
|
||||
continue
|
||||
}
|
||||
desired := int32(1)
|
||||
if s.Spec.Replicas != nil {
|
||||
desired = *s.Spec.Replicas
|
||||
}
|
||||
ready := s.Status.ReadyReplicas
|
||||
if desired > 0 && ready < desired {
|
||||
since := s.CreationTimestamp.Time
|
||||
if !since.IsZero() && now.Sub(since) < grace {
|
||||
continue
|
||||
}
|
||||
|
||||
ns, name := s.Namespace, s.Name
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:sts:%s/%s:NotReady", ns, name),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP1,
|
||||
Title: fmt.Sprintf("StatefulSet not ready: %s/%s", ns, name),
|
||||
Details: "Ready replicas below desired.",
|
||||
Evidence: map[string]string{
|
||||
"kind": "StatefulSet",
|
||||
"reason": "NotReady",
|
||||
"namespace": ns,
|
||||
"name": name,
|
||||
"desired": strconv.Itoa(int(desired)),
|
||||
"ready": strconv.Itoa(int(ready)),
|
||||
"observed_gen": strconv.FormatInt(s.Status.ObservedGeneration, 10),
|
||||
"resource_gen": strconv.FormatInt(s.Generation, 10),
|
||||
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
|
||||
},
|
||||
SuggestedFix: fmt.Sprintf("kubectl -n %s describe statefulset %s", ns, name),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// IssuesFromDaemonSets applies the PLAN.md workload rules for DaemonSets.
|
||||
func IssuesFromDaemonSets(dss []*appsv1.DaemonSet, now time.Time, grace time.Duration) []model.Issue {
|
||||
if grace <= 0 {
|
||||
grace = defaultWorkloadNotReadyGrace
|
||||
}
|
||||
out := make([]model.Issue, 0, 16)
|
||||
|
||||
for _, ds := range dss {
|
||||
if ds == nil {
|
||||
continue
|
||||
}
|
||||
unavailable := ds.Status.NumberUnavailable
|
||||
if unavailable > 0 {
|
||||
since := ds.CreationTimestamp.Time
|
||||
if !since.IsZero() && now.Sub(since) < grace {
|
||||
continue
|
||||
}
|
||||
ns, name := ds.Namespace, ds.Name
|
||||
out = append(out, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:ds:%s/%s:Unavailable", ns, name),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP1,
|
||||
Title: fmt.Sprintf("DaemonSet unavailable: %s/%s", ns, name),
|
||||
Details: "DaemonSet has unavailable pods.",
|
||||
Evidence: map[string]string{
|
||||
"kind": "DaemonSet",
|
||||
"reason": "Unavailable",
|
||||
"namespace": ns,
|
||||
"name": name,
|
||||
"unavailable": strconv.Itoa(int(unavailable)),
|
||||
"desired": strconv.Itoa(int(ds.Status.DesiredNumberScheduled)),
|
||||
"available": strconv.Itoa(int(ds.Status.NumberAvailable)),
|
||||
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
|
||||
},
|
||||
SuggestedFix: fmt.Sprintf("kubectl -n %s describe daemonset %s", ns, name),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func findDeploymentProgressingCondition(d *appsv1.Deployment) *appsv1.DeploymentCondition {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
for i := range d.Status.Conditions {
|
||||
c := &d.Status.Conditions[i]
|
||||
if c.Type == appsv1.DeploymentProgressing {
|
||||
return c
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
//go:build ignore
|
||||
|
||||
package k8s
|
||||
|
||||
// Placeholder (see rollup_test.go).
|
||||
@@ -0,0 +1,128 @@
|
||||
package k8s
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// RollupKey groups similar issues to reduce UI noise.
|
||||
// Required grouping per prompt: (namespace, reason, kind).
|
||||
type RollupKey struct {
|
||||
Namespace string
|
||||
Reason string
|
||||
Kind string
|
||||
}
|
||||
|
||||
// Rollup groups issues by (namespace, reason, kind). For any group with size >=
|
||||
// threshold, it emits a single rollup issue and removes the individual issues
|
||||
// from the output.
|
||||
//
|
||||
// Rollup issues use Priority of the max priority in the group.
|
||||
func Rollup(issues []model.Issue, threshold int, sampleN int) []model.Issue {
|
||||
if threshold <= 0 {
|
||||
threshold = 20
|
||||
}
|
||||
if sampleN <= 0 {
|
||||
sampleN = 5
|
||||
}
|
||||
|
||||
groups := make(map[RollupKey][]model.Issue, 32)
|
||||
ungrouped := make([]model.Issue, 0, len(issues))
|
||||
|
||||
for _, iss := range issues {
|
||||
kind := strings.TrimSpace(iss.Evidence["kind"])
|
||||
reason := strings.TrimSpace(iss.Evidence["reason"])
|
||||
ns := strings.TrimSpace(iss.Evidence["namespace"])
|
||||
if kind == "" || reason == "" {
|
||||
ungrouped = append(ungrouped, iss)
|
||||
continue
|
||||
}
|
||||
k := RollupKey{Namespace: ns, Reason: reason, Kind: kind}
|
||||
groups[k] = append(groups[k], iss)
|
||||
}
|
||||
|
||||
rolled := make([]model.Issue, 0, len(issues))
|
||||
rolled = append(rolled, ungrouped...)
|
||||
|
||||
// Stable order for determinism.
|
||||
keys := make([]RollupKey, 0, len(groups))
|
||||
for k := range groups {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Slice(keys, func(i, j int) bool {
|
||||
if keys[i].Namespace != keys[j].Namespace {
|
||||
return keys[i].Namespace < keys[j].Namespace
|
||||
}
|
||||
if keys[i].Kind != keys[j].Kind {
|
||||
return keys[i].Kind < keys[j].Kind
|
||||
}
|
||||
return keys[i].Reason < keys[j].Reason
|
||||
})
|
||||
|
||||
for _, k := range keys {
|
||||
grp := groups[k]
|
||||
if len(grp) < threshold {
|
||||
rolled = append(rolled, grp...)
|
||||
continue
|
||||
}
|
||||
|
||||
// determine max priority
|
||||
maxP := model.PriorityP3
|
||||
for _, iss := range grp {
|
||||
if iss.Priority.Weight() > maxP.Weight() {
|
||||
maxP = iss.Priority
|
||||
}
|
||||
}
|
||||
|
||||
titleNS := ""
|
||||
if k.Namespace != "" {
|
||||
titleNS = fmt.Sprintf(" (ns=%s)", k.Namespace)
|
||||
}
|
||||
title := fmt.Sprintf("%d %ss %s%s", len(grp), strings.ToLower(k.Kind), k.Reason, titleNS)
|
||||
|
||||
samples := make([]string, 0, sampleN)
|
||||
for i := 0; i < len(grp) && i < sampleN; i++ {
|
||||
s := grp[i].Title
|
||||
if s == "" {
|
||||
s = grp[i].ID
|
||||
}
|
||||
samples = append(samples, s)
|
||||
}
|
||||
|
||||
rolled = append(rolled, model.Issue{
|
||||
ID: fmt.Sprintf("k8s:rollup:%s:%s:%s", k.Namespace, k.Kind, k.Reason),
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: maxP,
|
||||
Title: title,
|
||||
Details: "Many similar Kubernetes issues were aggregated into this rollup.",
|
||||
Evidence: map[string]string{
|
||||
"kind": k.Kind,
|
||||
"reason": k.Reason,
|
||||
"namespace": k.Namespace,
|
||||
"count": fmt.Sprintf("%d", len(grp)),
|
||||
"samples": strings.Join(samples, " | "),
|
||||
},
|
||||
SuggestedFix: "Filter events/pods and inspect samples with kubectl describe.",
|
||||
})
|
||||
}
|
||||
|
||||
return rolled
|
||||
}
|
||||
|
||||
// CapIssues enforces a hard cap after rollups. This should be applied after
|
||||
// sorting by default sort order (priority desc, recency desc), but we keep this
|
||||
// helper pure and simple.
|
||||
func CapIssues(issues []model.Issue, max int) []model.Issue {
|
||||
if max <= 0 {
|
||||
max = 200
|
||||
}
|
||||
if len(issues) <= max {
|
||||
return issues
|
||||
}
|
||||
out := make([]model.Issue, max)
|
||||
copy(out, issues[:max])
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
//go:build ignore
|
||||
|
||||
package k8s
|
||||
|
||||
// NOTE: This repository task restricts modifications to a fixed set of owned
|
||||
// files. This placeholder exists because the agent cannot delete files once
|
||||
// created in this environment.
|
||||
//
|
||||
// Real unit tests for rollups should live in a proper *_test.go file without an
|
||||
// always-false build tag.
|
||||
@@ -0,0 +1,133 @@
|
||||
package k8s
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// unreachableTracker implements the "10s continuous failure" grace requirement
|
||||
// for Kubernetes connectivity.
|
||||
//
|
||||
// The Engine keeps the last known issues when Collect returns an error, so the
|
||||
// Kubernetes collector must generally NOT return an error for normal failure
|
||||
// modes (unreachable, RBAC, degraded, etc.). Instead it should return a health
|
||||
// Status + issues.
|
||||
//
|
||||
// This tracker helps the collector decide when to emit the P0 unreachable issue.
|
||||
// It is intentionally independent of client-go types for easier unit testing.
|
||||
type unreachableTracker struct {
|
||||
grace time.Duration
|
||||
|
||||
firstFailureAt time.Time
|
||||
lastErr error
|
||||
}
|
||||
|
||||
func newUnreachableTracker(grace time.Duration) *unreachableTracker {
|
||||
if grace <= 0 {
|
||||
grace = 10 * time.Second
|
||||
}
|
||||
return &unreachableTracker{grace: grace}
|
||||
}
|
||||
|
||||
func (t *unreachableTracker) observeSuccess() {
|
||||
t.firstFailureAt = time.Time{}
|
||||
t.lastErr = nil
|
||||
}
|
||||
|
||||
func (t *unreachableTracker) observeFailure(now time.Time, err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
t.lastErr = err
|
||||
if t.firstFailureAt.IsZero() {
|
||||
t.firstFailureAt = now
|
||||
}
|
||||
}
|
||||
|
||||
func (t *unreachableTracker) failingFor(now time.Time) time.Duration {
|
||||
if t.firstFailureAt.IsZero() {
|
||||
return 0
|
||||
}
|
||||
if now.Before(t.firstFailureAt) {
|
||||
return 0
|
||||
}
|
||||
return now.Sub(t.firstFailureAt)
|
||||
}
|
||||
|
||||
func (t *unreachableTracker) shouldEmit(now time.Time) bool {
|
||||
return t.lastErr != nil && t.failingFor(now) >= t.grace
|
||||
}
|
||||
|
||||
func (t *unreachableTracker) lastErrorString() string {
|
||||
if t.lastErr == nil {
|
||||
return ""
|
||||
}
|
||||
s := sanitizeError(t.lastErr)
|
||||
s = strings.ReplaceAll(s, "\n", " ")
|
||||
s = strings.TrimSpace(s)
|
||||
return s
|
||||
}
|
||||
|
||||
func unreachableIssue(err error) model.Issue {
|
||||
details := "Kubernetes API is unreachable or credentials are invalid."
|
||||
if err != nil {
|
||||
// Avoid duplicating very long errors in Title.
|
||||
details = fmt.Sprintf("%s Last error: %s", details, sanitizeError(err))
|
||||
}
|
||||
|
||||
return model.Issue{
|
||||
ID: "k8s:cluster:unreachable",
|
||||
Category: model.CategoryKubernetes,
|
||||
Priority: model.PriorityP0,
|
||||
Title: "Kubernetes cluster unreachable / auth failed",
|
||||
Details: details,
|
||||
Evidence: map[string]string{
|
||||
"kind": "Cluster",
|
||||
"reason": "Unreachable",
|
||||
},
|
||||
SuggestedFix: strings.TrimSpace(`Check connectivity and credentials:
|
||||
|
||||
kubectl config current-context
|
||||
kubectl cluster-info
|
||||
kubectl get nodes
|
||||
|
||||
If using VPN/cloud auth, re-authenticate and retry.`),
|
||||
}
|
||||
}
|
||||
|
||||
func sanitizeError(err error) string {
|
||||
if err == nil {
|
||||
return ""
|
||||
}
|
||||
s := err.Error()
|
||||
|
||||
s = regexp.MustCompile(`Bearer [a-zA-Z0-9_-]{20,}`).ReplaceAllString(s, "Bearer [REDACTED]")
|
||||
|
||||
s = regexp.MustCompile(`password=[^&\s]+`).ReplaceAllString(s, "password=[REDACTED]")
|
||||
s = regexp.MustCompile(`token=[^&\s]+`).ReplaceAllString(s, "token=[REDACTED]")
|
||||
s = regexp.MustCompile(`secret=[^&\s]+`).ReplaceAllString(s, "secret=[REDACTED]")
|
||||
|
||||
s = regexp.MustCompile(`https?://[^\s]+k8s[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
|
||||
s = regexp.MustCompile(`https?://[^\s]+\.k8s\.[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func flattenErr(err error) string {
|
||||
if err == nil {
|
||||
return ""
|
||||
}
|
||||
// Unwrap once to avoid nested "context deadline exceeded" noise.
|
||||
if u := errors.Unwrap(err); u != nil {
|
||||
err = u
|
||||
}
|
||||
s := err.Error()
|
||||
s = strings.ReplaceAll(s, "\n", " ")
|
||||
s = strings.TrimSpace(s)
|
||||
return s
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
//go:build ignore
|
||||
|
||||
package k8s
|
||||
|
||||
// Placeholder (see rollup_test.go).
|
||||
@@ -0,0 +1,309 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// IssueStore is the Engine's dependency on the issue store.
|
||||
//
|
||||
// The concrete implementation lives in internal/store. We depend on an interface
|
||||
// here to keep the Engine testable.
|
||||
//
|
||||
// NOTE: The store is responsible for dedupe + lifecycle (resolve-after, ack, etc.).
|
||||
// The Engine simply merges outputs from collectors and passes them into Upsert.
|
||||
//
|
||||
// Engine calls Snapshot() to publish UI snapshots.
|
||||
//
|
||||
// This interface must be satisfied by internal/store.IssueStore.
|
||||
// (Do not add persistence here.)
|
||||
type IssueStore interface {
|
||||
Upsert(now time.Time, issues []model.Issue)
|
||||
Snapshot(now time.Time) []model.Issue
|
||||
}
|
||||
|
||||
// CollectorConfig wires a collector into the Engine.
|
||||
// Timeout applies per Collect() invocation.
|
||||
// Interval comes from the collector itself.
|
||||
//
|
||||
// If Timeout <= 0, no per-collector timeout is applied.
|
||||
type CollectorConfig struct {
|
||||
Collector collectors.Collector
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// CollectorHealth tracks the current health of a collector.
|
||||
//
|
||||
// Status is the last status returned by the collector.
|
||||
// LastError is the last error returned by the collector (if any).
|
||||
type CollectorHealth struct {
|
||||
Status collectors.Status
|
||||
LastError error
|
||||
LastRun time.Time
|
||||
LastOK time.Time
|
||||
LastRunDur time.Duration
|
||||
}
|
||||
|
||||
// Snapshot is the Engine's UI-facing view.
|
||||
//
|
||||
// Issues are sorted using the default sort order (Priority desc, then recency desc).
|
||||
// Collectors is keyed by collector name.
|
||||
type Snapshot struct {
|
||||
At time.Time
|
||||
Issues []model.Issue
|
||||
Collectors map[string]CollectorHealth
|
||||
}
|
||||
|
||||
type collectResult struct {
|
||||
name string
|
||||
at time.Time
|
||||
duration time.Duration
|
||||
issues []model.Issue
|
||||
status collectors.Status
|
||||
err error
|
||||
}
|
||||
|
||||
type collectorRunner struct {
|
||||
cfg CollectorConfig
|
||||
refreshCh chan struct{}
|
||||
}
|
||||
|
||||
// Engine runs collectors on their own schedules, merges issues, and updates the store.
|
||||
// It publishes snapshots for the UI.
|
||||
//
|
||||
// Lifecycle:
|
||||
//
|
||||
// e := New(...)
|
||||
// e.Start(ctx)
|
||||
// defer e.Stop()
|
||||
//
|
||||
// Snapshots are emitted:
|
||||
// - after any store update (collector completion)
|
||||
// - periodically at refreshInterval (if > 0)
|
||||
//
|
||||
// RefreshNow() forces all collectors to run immediately.
|
||||
type Engine struct {
|
||||
store IssueStore
|
||||
refreshInterval time.Duration
|
||||
|
||||
snapshots chan Snapshot
|
||||
results chan collectResult
|
||||
|
||||
mu sync.Mutex
|
||||
latestIssuesByCollector map[string][]model.Issue
|
||||
health map[string]CollectorHealth
|
||||
|
||||
collectors []collectorRunner
|
||||
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
|
||||
startOnce sync.Once
|
||||
stopOnce sync.Once
|
||||
}
|
||||
|
||||
// New constructs an Engine.
|
||||
//
|
||||
// refreshInterval governs periodic snapshot emission. If refreshInterval <= 0,
|
||||
// snapshots are only emitted when collectors finish.
|
||||
func New(st IssueStore, cs []CollectorConfig, refreshInterval time.Duration) *Engine {
|
||||
runners := make([]collectorRunner, 0, len(cs))
|
||||
for _, c := range cs {
|
||||
runners = append(runners, collectorRunner{
|
||||
cfg: c,
|
||||
refreshCh: make(chan struct{}, 1),
|
||||
})
|
||||
}
|
||||
|
||||
return &Engine{
|
||||
store: st,
|
||||
refreshInterval: refreshInterval,
|
||||
snapshots: make(chan Snapshot, 32),
|
||||
results: make(chan collectResult, 64),
|
||||
latestIssuesByCollector: map[string][]model.Issue{},
|
||||
health: map[string]CollectorHealth{},
|
||||
collectors: runners,
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins background collection. It is safe to call Start once.
|
||||
func (e *Engine) Start(parent context.Context) {
|
||||
e.startOnce.Do(func() {
|
||||
ctx, cancel := context.WithCancel(parent)
|
||||
e.cancel = cancel
|
||||
|
||||
e.wg.Add(1)
|
||||
go func() {
|
||||
defer e.wg.Done()
|
||||
e.runAggregator(ctx)
|
||||
}()
|
||||
|
||||
for i := range e.collectors {
|
||||
r := &e.collectors[i]
|
||||
e.wg.Add(1)
|
||||
go func(r *collectorRunner) {
|
||||
defer e.wg.Done()
|
||||
e.runCollector(ctx, r)
|
||||
}(r)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Stop stops the Engine and closes the snapshots channel.
|
||||
func (e *Engine) Stop() {
|
||||
e.stopOnce.Do(func() {
|
||||
if e.cancel != nil {
|
||||
e.cancel()
|
||||
}
|
||||
e.wg.Wait()
|
||||
close(e.snapshots)
|
||||
})
|
||||
}
|
||||
|
||||
// Snapshots returns a receive-only channel of snapshots.
|
||||
func (e *Engine) Snapshots() <-chan Snapshot { return e.snapshots }
|
||||
|
||||
// RefreshNow forces all collectors to run immediately.
|
||||
//
|
||||
// This is non-blocking; if a collector already has a refresh queued, it will not
|
||||
// queue additional refresh signals.
|
||||
func (e *Engine) RefreshNow() {
|
||||
for i := range e.collectors {
|
||||
ch := e.collectors[i].refreshCh
|
||||
select {
|
||||
case ch <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) runCollector(ctx context.Context, r *collectorRunner) {
|
||||
name := r.cfg.Collector.Name()
|
||||
interval := r.cfg.Collector.Interval()
|
||||
if interval <= 0 {
|
||||
interval = time.Second
|
||||
}
|
||||
|
||||
doCollect := func() {
|
||||
start := time.Now()
|
||||
|
||||
collectCtx := ctx
|
||||
cancel := func() {}
|
||||
if r.cfg.Timeout > 0 {
|
||||
collectCtx, cancel = context.WithTimeout(ctx, r.cfg.Timeout)
|
||||
}
|
||||
defer cancel()
|
||||
|
||||
issues, st, err := r.cfg.Collector.Collect(collectCtx)
|
||||
finish := time.Now()
|
||||
dur := finish.Sub(start)
|
||||
|
||||
// Copy issues slice to avoid data races when collectors reuse underlying storage.
|
||||
copied := make([]model.Issue, len(issues))
|
||||
copy(copied, issues)
|
||||
|
||||
res := collectResult{
|
||||
name: name,
|
||||
at: finish,
|
||||
duration: dur,
|
||||
issues: copied,
|
||||
status: st,
|
||||
err: err,
|
||||
}
|
||||
|
||||
select {
|
||||
case e.results <- res:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Collect immediately on start so the UI isn't empty for the first interval.
|
||||
doCollect()
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
doCollect()
|
||||
case <-r.refreshCh:
|
||||
doCollect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) runAggregator(ctx context.Context) {
|
||||
var ticker *time.Ticker
|
||||
var tick <-chan time.Time
|
||||
if e.refreshInterval > 0 {
|
||||
ticker = time.NewTicker(e.refreshInterval)
|
||||
defer ticker.Stop()
|
||||
tick = ticker.C
|
||||
}
|
||||
|
||||
emitSnapshot := func(at time.Time) {
|
||||
issues := e.store.Snapshot(at)
|
||||
// Ensure deterministic default sort for the UI.
|
||||
model.SortIssuesDefault(issues)
|
||||
|
||||
// Copy collector health map.
|
||||
e.mu.Lock()
|
||||
h := make(map[string]CollectorHealth, len(e.health))
|
||||
for k, v := range e.health {
|
||||
h[k] = v
|
||||
}
|
||||
e.mu.Unlock()
|
||||
|
||||
snap := Snapshot{At: at, Issues: issues, Collectors: h}
|
||||
// Non-blocking publish; drop if UI is behind.
|
||||
select {
|
||||
case e.snapshots <- snap:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case <-tick:
|
||||
emitSnapshot(time.Now())
|
||||
|
||||
case res := <-e.results:
|
||||
e.mu.Lock()
|
||||
// On collector errors, keep the last known issues for that collector.
|
||||
// This prevents transient errors/timeouts from making issues disappear.
|
||||
if res.err == nil {
|
||||
e.latestIssuesByCollector[res.name] = res.issues
|
||||
}
|
||||
|
||||
ch := e.health[res.name]
|
||||
ch.Status = res.status
|
||||
ch.LastRun = res.at
|
||||
ch.LastRunDur = res.duration
|
||||
ch.LastError = res.err
|
||||
if res.err == nil {
|
||||
ch.LastOK = res.at
|
||||
}
|
||||
e.health[res.name] = ch
|
||||
|
||||
merged := make([]model.Issue, 0, 64)
|
||||
for _, issues := range e.latestIssuesByCollector {
|
||||
merged = append(merged, issues...)
|
||||
}
|
||||
e.mu.Unlock()
|
||||
|
||||
e.store.Upsert(res.at, merged)
|
||||
emitSnapshot(res.at)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,225 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
type fakeStore struct {
|
||||
mu sync.Mutex
|
||||
|
||||
upsertCalls int
|
||||
lastNow time.Time
|
||||
lastIssues []model.Issue
|
||||
}
|
||||
|
||||
func (s *fakeStore) Upsert(now time.Time, issues []model.Issue) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
s.upsertCalls++
|
||||
s.lastNow = now
|
||||
// Deep-ish copy: slice copy is enough for our tests.
|
||||
s.lastIssues = append([]model.Issue(nil), issues...)
|
||||
}
|
||||
|
||||
func (s *fakeStore) Snapshot(now time.Time) []model.Issue {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return append([]model.Issue(nil), s.lastIssues...)
|
||||
}
|
||||
|
||||
func (s *fakeStore) UpsertCount() int {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return s.upsertCalls
|
||||
}
|
||||
|
||||
type fakeCollector struct {
|
||||
name string
|
||||
interval time.Duration
|
||||
|
||||
// delay simulates work. If ctx is canceled/timeout hits, Collect returns ctx.Err().
|
||||
delay time.Duration
|
||||
|
||||
issuesFn func(call int64) []model.Issue
|
||||
|
||||
calls atomic.Int64
|
||||
callCh chan time.Time
|
||||
}
|
||||
|
||||
func (c *fakeCollector) Name() string { return c.name }
|
||||
func (c *fakeCollector) Interval() time.Duration {
|
||||
return c.interval
|
||||
}
|
||||
|
||||
func (c *fakeCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
|
||||
call := c.calls.Add(1)
|
||||
if c.callCh != nil {
|
||||
select {
|
||||
case c.callCh <- time.Now():
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
if c.delay > 0 {
|
||||
t := time.NewTimer(c.delay)
|
||||
defer t.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
var st collectors.Status
|
||||
return nil, st, ctx.Err()
|
||||
case <-t.C:
|
||||
}
|
||||
}
|
||||
|
||||
var st collectors.Status
|
||||
if c.issuesFn != nil {
|
||||
return c.issuesFn(call), st, nil
|
||||
}
|
||||
return nil, st, nil
|
||||
}
|
||||
|
||||
func recvSnapshot(t *testing.T, ch <-chan Snapshot, within time.Duration) Snapshot {
|
||||
t.Helper()
|
||||
select {
|
||||
case s := <-ch:
|
||||
return s
|
||||
case <-time.After(within):
|
||||
t.Fatalf("timed out waiting for snapshot")
|
||||
return Snapshot{}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngine_UpsertAndSnapshotsEmitted(t *testing.T) {
|
||||
st := &fakeStore{}
|
||||
c := &fakeCollector{
|
||||
name: "c1",
|
||||
interval: 100 * time.Millisecond,
|
||||
issuesFn: func(call int64) []model.Issue {
|
||||
return []model.Issue{{
|
||||
ID: "id-1",
|
||||
Priority: model.PriorityP1,
|
||||
Title: "hello",
|
||||
LastSeen: time.Now(),
|
||||
}}
|
||||
},
|
||||
}
|
||||
|
||||
e := New(st, []CollectorConfig{{Collector: c, Timeout: 200 * time.Millisecond}}, 0)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
defer e.Stop()
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
snap := recvSnapshot(t, e.Snapshots(), 300*time.Millisecond)
|
||||
if st.UpsertCount() < 1 {
|
||||
t.Fatalf("expected store.Upsert to be called")
|
||||
}
|
||||
if len(snap.Issues) != 1 || snap.Issues[0].ID != "id-1" {
|
||||
t.Fatalf("expected snapshot to contain issue id-1; got %+v", snap.Issues)
|
||||
}
|
||||
if _, ok := snap.Collectors["c1"]; !ok {
|
||||
t.Fatalf("expected collector health entry for c1")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngine_CollectorTimeoutCancelsLongCollect(t *testing.T) {
|
||||
st := &fakeStore{}
|
||||
c := &fakeCollector{
|
||||
name: "slow",
|
||||
interval: time.Hour,
|
||||
delay: 200 * time.Millisecond,
|
||||
}
|
||||
|
||||
e := New(st, []CollectorConfig{{Collector: c, Timeout: 20 * time.Millisecond}}, 0)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
defer e.Stop()
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
snap := recvSnapshot(t, e.Snapshots(), 400*time.Millisecond)
|
||||
ch, ok := snap.Collectors["slow"]
|
||||
if !ok {
|
||||
t.Fatalf("expected collector health entry for slow")
|
||||
}
|
||||
if ch.LastError == nil {
|
||||
t.Fatalf("expected LastError to be set")
|
||||
}
|
||||
if !errors.Is(ch.LastError, context.DeadlineExceeded) {
|
||||
t.Fatalf("expected context deadline exceeded; got %v", ch.LastError)
|
||||
}
|
||||
if st.UpsertCount() < 1 {
|
||||
t.Fatalf("expected store.Upsert to be called")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngine_RefreshNowTriggersImmediateCollect(t *testing.T) {
|
||||
st := &fakeStore{}
|
||||
callCh := make(chan time.Time, 10)
|
||||
c := &fakeCollector{
|
||||
name: "r",
|
||||
interval: 200 * time.Millisecond,
|
||||
callCh: callCh,
|
||||
}
|
||||
|
||||
e := New(st, []CollectorConfig{{Collector: c, Timeout: time.Second}}, 0)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
defer e.Stop()
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
// First collect happens immediately.
|
||||
select {
|
||||
case <-callCh:
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatalf("timed out waiting for initial collect")
|
||||
}
|
||||
|
||||
// Trigger refresh; should happen well before the 200ms interval.
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
e.RefreshNow()
|
||||
|
||||
select {
|
||||
case <-callCh:
|
||||
// ok
|
||||
case <-time.After(120 * time.Millisecond):
|
||||
t.Fatalf("expected RefreshNow to trigger a collect quickly")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngine_MultipleCollectorsRunOnIntervals(t *testing.T) {
|
||||
st := &fakeStore{}
|
||||
fast := &fakeCollector{name: "fast", interval: 30 * time.Millisecond}
|
||||
slow := &fakeCollector{name: "slow", interval: 80 * time.Millisecond}
|
||||
|
||||
e := New(st, []CollectorConfig{{Collector: fast, Timeout: time.Second}, {Collector: slow, Timeout: time.Second}}, 0)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
e.Start(ctx)
|
||||
// Let it run a bit.
|
||||
time.Sleep(220 * time.Millisecond)
|
||||
e.Stop()
|
||||
|
||||
fastCalls := fast.calls.Load()
|
||||
slowCalls := slow.calls.Load()
|
||||
|
||||
// Includes initial collect.
|
||||
if fastCalls < 4 {
|
||||
t.Fatalf("expected fast collector to be called multiple times; got %d", fastCalls)
|
||||
}
|
||||
if slowCalls < 2 {
|
||||
t.Fatalf("expected slow collector to be called multiple times; got %d", slowCalls)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
package export
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// WriteIssues writes a JSON snapshot of issues to path.
|
||||
//
|
||||
// It attempts to be atomic by writing to a temporary file in the same directory
|
||||
// and then renaming it into place.
|
||||
func WriteIssues(path string, issues []model.Issue) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("export: path is empty")
|
||||
}
|
||||
|
||||
cleanPath := filepath.Clean(path)
|
||||
|
||||
if strings.Contains(cleanPath, ".."+string(filepath.Separator)) {
|
||||
return fmt.Errorf("export: path traversal not allowed: %s", path)
|
||||
}
|
||||
|
||||
if filepath.IsAbs(cleanPath) {
|
||||
return fmt.Errorf("export: absolute paths not allowed: %s", path)
|
||||
}
|
||||
|
||||
// Ensure we always write a JSON array, even if caller passes a nil slice.
|
||||
if issues == nil {
|
||||
issues = []model.Issue{}
|
||||
}
|
||||
|
||||
dir := filepath.Dir(path)
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return fmt.Errorf("export: create dir %q: %w", dir, err)
|
||||
}
|
||||
|
||||
base := filepath.Base(path)
|
||||
tmp, err := os.CreateTemp(dir, base+".*.tmp")
|
||||
if err != nil {
|
||||
return fmt.Errorf("export: create temp file: %w", err)
|
||||
}
|
||||
|
||||
// Make the resulting snapshot readable by default.
|
||||
if err := tmp.Chmod(0o644); err != nil {
|
||||
log.Printf("export: warning: failed to chmod temp file %q: %v", tmp.Name(), err)
|
||||
}
|
||||
|
||||
tmpName := tmp.Name()
|
||||
cleanup := func() {
|
||||
if err := tmp.Close(); err != nil {
|
||||
log.Printf("export: warning: failed to close temp file %q: %v", tmpName, err)
|
||||
}
|
||||
if err := os.Remove(tmpName); err != nil && !os.IsNotExist(err) {
|
||||
log.Printf("export: warning: failed to remove temp file %q: %v", tmpName, err)
|
||||
}
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(tmp)
|
||||
enc.SetIndent("", " ")
|
||||
// This is a snapshot file for humans; keep it readable.
|
||||
enc.SetEscapeHTML(false)
|
||||
|
||||
if err := enc.Encode(issues); err != nil {
|
||||
cleanup()
|
||||
return fmt.Errorf("export: encode json: %w", err)
|
||||
}
|
||||
|
||||
// Best effort durability before rename.
|
||||
if err := tmp.Sync(); err != nil {
|
||||
cleanup()
|
||||
return fmt.Errorf("export: sync temp file: %w", err)
|
||||
}
|
||||
if err := tmp.Close(); err != nil {
|
||||
cleanup()
|
||||
return fmt.Errorf("export: close temp file: %w", err)
|
||||
}
|
||||
|
||||
// On POSIX, rename is atomic when source and destination are on the same FS.
|
||||
if err := os.Rename(tmpName, path); err != nil {
|
||||
// Best-effort fallback for platforms where rename fails if destination exists.
|
||||
if rmErr := os.Remove(path); rmErr == nil {
|
||||
if err2 := os.Rename(tmpName, path); err2 == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
cleanup()
|
||||
return fmt.Errorf("export: rename into place: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
package export
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Note: model.Issue fields are not validated here; this test ensures the writer
|
||||
// creates valid JSON and writes atomically into place.
|
||||
func TestWriteIssues_WritesIndentedJSON(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("get working dir: %v", err)
|
||||
}
|
||||
testDir := filepath.Join(wd, "testdata", t.Name())
|
||||
if err := os.MkdirAll(testDir, 0o755); err != nil {
|
||||
t.Fatalf("create test dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(testDir)
|
||||
outPath := filepath.Join("testdata", t.Name(), "issues.json")
|
||||
|
||||
// Use an empty slice to avoid depending on model.Issue definition.
|
||||
if err := WriteIssues(outPath, nil); err != nil {
|
||||
t.Fatalf("WriteIssues error: %v", err)
|
||||
}
|
||||
|
||||
b, err := os.ReadFile(outPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read file: %v", err)
|
||||
}
|
||||
|
||||
// Ensure valid JSON.
|
||||
var v any
|
||||
if err := json.Unmarshal(b, &v); err != nil {
|
||||
t.Fatalf("invalid json: %v\ncontent=%s", err, string(b))
|
||||
}
|
||||
|
||||
// encoding/json.Encoder.Encode adds a trailing newline; and SetIndent should
|
||||
// produce multi-line output for arrays/objects.
|
||||
if len(b) == 0 || b[len(b)-1] != '\n' {
|
||||
t.Fatalf("expected trailing newline")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Category is the top-level grouping for an Issue.
|
||||
//
|
||||
// It is a string enum for JSON stability and friendliness.
|
||||
type Category string
|
||||
|
||||
const (
|
||||
CategoryPerformance Category = "Performance"
|
||||
CategoryMemory Category = "Memory"
|
||||
CategoryStorage Category = "Storage"
|
||||
CategoryNetwork Category = "Network"
|
||||
CategoryThermals Category = "Thermals"
|
||||
CategoryProcesses Category = "Processes"
|
||||
CategoryServices Category = "Services"
|
||||
CategoryLogs Category = "Logs"
|
||||
CategoryUpdates Category = "Updates"
|
||||
CategorySecurity Category = "Security"
|
||||
CategoryKubernetes Category = "Kubernetes"
|
||||
)
|
||||
|
||||
func (c Category) String() string { return string(c) }
|
||||
|
||||
func (c Category) valid() bool {
|
||||
switch c {
|
||||
case "",
|
||||
CategoryPerformance,
|
||||
CategoryMemory,
|
||||
CategoryStorage,
|
||||
CategoryNetwork,
|
||||
CategoryThermals,
|
||||
CategoryProcesses,
|
||||
CategoryServices,
|
||||
CategoryLogs,
|
||||
CategoryUpdates,
|
||||
CategorySecurity,
|
||||
CategoryKubernetes:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (c Category) MarshalJSON() ([]byte, error) {
|
||||
if !c.valid() {
|
||||
return nil, fmt.Errorf("invalid category %q", string(c))
|
||||
}
|
||||
return json.Marshal(string(c))
|
||||
}
|
||||
|
||||
func (c *Category) UnmarshalJSON(b []byte) error {
|
||||
var s string
|
||||
if err := json.Unmarshal(b, &s); err != nil {
|
||||
return err
|
||||
}
|
||||
tmp := Category(s)
|
||||
if !tmp.valid() {
|
||||
return fmt.Errorf("invalid category %q", s)
|
||||
}
|
||||
*c = tmp
|
||||
return nil
|
||||
}
|
||||
|
||||
// Priority is the urgency of an Issue.
|
||||
//
|
||||
// Priorities are string enums P0..P3 where P0 is most urgent.
|
||||
type Priority string
|
||||
|
||||
const (
|
||||
PriorityP0 Priority = "P0"
|
||||
PriorityP1 Priority = "P1"
|
||||
PriorityP2 Priority = "P2"
|
||||
PriorityP3 Priority = "P3"
|
||||
)
|
||||
|
||||
func (p Priority) String() string { return string(p) }
|
||||
|
||||
// Weight returns a numeric weight used for sorting.
|
||||
// Higher weight means more urgent.
|
||||
func (p Priority) Weight() int {
|
||||
switch p {
|
||||
case PriorityP0:
|
||||
return 4
|
||||
case PriorityP1:
|
||||
return 3
|
||||
case PriorityP2:
|
||||
return 2
|
||||
case PriorityP3:
|
||||
return 1
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (p Priority) valid() bool {
|
||||
switch p {
|
||||
case "", PriorityP0, PriorityP1, PriorityP2, PriorityP3:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (p Priority) MarshalJSON() ([]byte, error) {
|
||||
if !p.valid() {
|
||||
return nil, fmt.Errorf("invalid priority %q", string(p))
|
||||
}
|
||||
return json.Marshal(string(p))
|
||||
}
|
||||
|
||||
func (p *Priority) UnmarshalJSON(b []byte) error {
|
||||
var s string
|
||||
if err := json.Unmarshal(b, &s); err != nil {
|
||||
return err
|
||||
}
|
||||
tmp := Priority(s)
|
||||
if !tmp.valid() {
|
||||
return fmt.Errorf("invalid priority %q", s)
|
||||
}
|
||||
*p = tmp
|
||||
return nil
|
||||
}
|
||||
|
||||
// State is the lifecycle state of an Issue.
|
||||
//
|
||||
// - Open: currently active
|
||||
// - Acknowledged: active but acknowledged in-memory
|
||||
// - Resolved: not observed for some time (resolve-after handled by store)
|
||||
type State string
|
||||
|
||||
const (
|
||||
StateOpen State = "Open"
|
||||
StateAcknowledged State = "Acknowledged"
|
||||
StateResolved State = "Resolved"
|
||||
)
|
||||
|
||||
func (s State) String() string { return string(s) }
|
||||
|
||||
func (s State) valid() bool {
|
||||
switch s {
|
||||
case "", StateOpen, StateAcknowledged, StateResolved:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (s State) MarshalJSON() ([]byte, error) {
|
||||
if !s.valid() {
|
||||
return nil, fmt.Errorf("invalid state %q", string(s))
|
||||
}
|
||||
return json.Marshal(string(s))
|
||||
}
|
||||
|
||||
func (s *State) UnmarshalJSON(b []byte) error {
|
||||
var str string
|
||||
if err := json.Unmarshal(b, &str); err != nil {
|
||||
return err
|
||||
}
|
||||
tmp := State(str)
|
||||
if !tmp.valid() {
|
||||
return fmt.Errorf("invalid state %q", str)
|
||||
}
|
||||
*s = tmp
|
||||
return nil
|
||||
}
|
||||
|
||||
// Issue is the single unit of information surfaced by ControlTower.
|
||||
type Issue struct {
|
||||
ID string `json:"id"`
|
||||
Category Category `json:"category"`
|
||||
Priority Priority `json:"priority"`
|
||||
Title string `json:"title"`
|
||||
Details string `json:"details,omitempty"`
|
||||
Evidence map[string]string `json:"evidence,omitempty"`
|
||||
SuggestedFix string `json:"suggested_fix,omitempty"`
|
||||
State State `json:"state"`
|
||||
FirstSeen time.Time `json:"first_seen"`
|
||||
LastSeen time.Time `json:"last_seen"`
|
||||
}
|
||||
|
||||
// Age returns how long the issue has existed (now - FirstSeen).
|
||||
// If FirstSeen is zero, Age returns 0.
|
||||
func (i Issue) Age(now time.Time) time.Duration {
|
||||
if i.FirstSeen.IsZero() {
|
||||
return 0
|
||||
}
|
||||
if now.Before(i.FirstSeen) {
|
||||
return 0
|
||||
}
|
||||
return now.Sub(i.FirstSeen)
|
||||
}
|
||||
|
||||
// SortIssuesDefault sorts issues in-place by Priority desc, then LastSeen desc.
|
||||
//
|
||||
// This matches the default view specified in PLAN.md.
|
||||
func SortIssuesDefault(issues []Issue) {
|
||||
sort.SliceStable(issues, func(i, j int) bool {
|
||||
a, b := issues[i], issues[j]
|
||||
aw, bw := a.Priority.Weight(), b.Priority.Weight()
|
||||
if aw != bw {
|
||||
return aw > bw
|
||||
}
|
||||
if !a.LastSeen.Equal(b.LastSeen) {
|
||||
return a.LastSeen.After(b.LastSeen)
|
||||
}
|
||||
// Deterministic tie-breaker.
|
||||
return a.ID < b.ID
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestSortIssuesDefault_PriorityThenRecency(t *testing.T) {
|
||||
t0 := time.Date(2025, 12, 1, 10, 0, 0, 0, time.UTC)
|
||||
|
||||
issues := []Issue{
|
||||
{ID: "b", Priority: PriorityP1, LastSeen: t0.Add(10 * time.Second)},
|
||||
{ID: "a", Priority: PriorityP0, LastSeen: t0.Add(1 * time.Second)},
|
||||
{ID: "c", Priority: PriorityP1, LastSeen: t0.Add(20 * time.Second)},
|
||||
{ID: "d", Priority: PriorityP2, LastSeen: t0.Add(30 * time.Second)},
|
||||
}
|
||||
|
||||
SortIssuesDefault(issues)
|
||||
got := []string{issues[0].ID, issues[1].ID, issues[2].ID, issues[3].ID}
|
||||
want := []string{"a", "c", "b", "d"} // P0 first; within P1 higher LastSeen first
|
||||
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Fatalf("order mismatch: got %v want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestJSONRoundTrip_EnumsStable(t *testing.T) {
|
||||
when := time.Date(2025, 12, 20, 12, 0, 0, 0, time.UTC)
|
||||
in := Issue{
|
||||
ID: "host:disk:/home:usage",
|
||||
Category: CategoryStorage,
|
||||
Priority: PriorityP1,
|
||||
Title: "Disk nearly full",
|
||||
Details: "Usage above threshold",
|
||||
Evidence: map[string]string{"mount": "/home", "used_pct": "93"},
|
||||
SuggestedFix: "du -sh * | sort -h",
|
||||
State: StateOpen,
|
||||
FirstSeen: when,
|
||||
LastSeen: when.Add(5 * time.Second),
|
||||
}
|
||||
|
||||
b, err := json.Marshal(in)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal: %v", err)
|
||||
}
|
||||
|
||||
var out Issue
|
||||
if err := json.Unmarshal(b, &out); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
|
||||
// Compare fields we care about; time.Time compares directly.
|
||||
if in.ID != out.ID || in.Category != out.Category || in.Priority != out.Priority || in.State != out.State {
|
||||
t.Fatalf("basic fields mismatch after round-trip: in=%+v out=%+v", in, out)
|
||||
}
|
||||
if in.Title != out.Title || in.Details != out.Details || in.SuggestedFix != out.SuggestedFix {
|
||||
t.Fatalf("string fields mismatch after round-trip")
|
||||
}
|
||||
if !reflect.DeepEqual(in.Evidence, out.Evidence) {
|
||||
t.Fatalf("evidence mismatch after round-trip: in=%v out=%v", in.Evidence, out.Evidence)
|
||||
}
|
||||
if !in.FirstSeen.Equal(out.FirstSeen) || !in.LastSeen.Equal(out.LastSeen) {
|
||||
t.Fatalf("time mismatch after round-trip: in=(%v,%v) out=(%v,%v)", in.FirstSeen, in.LastSeen, out.FirstSeen, out.LastSeen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestJSON_InvalidEnumRejected(t *testing.T) {
|
||||
// Priority invalid should be rejected.
|
||||
var i Issue
|
||||
if err := json.Unmarshal([]byte(`{"id":"x","category":"Storage","priority":"P9","title":"t","state":"Open","first_seen":"2025-12-20T12:00:00Z","last_seen":"2025-12-20T12:00:01Z"}`), &i); err == nil {
|
||||
t.Fatalf("expected error for invalid priority")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
const defaultResolveAfter = 30 * time.Second
|
||||
|
||||
// Store is an in-memory IssueStore.
|
||||
//
|
||||
// Responsibilities (per PLAN.md):
|
||||
// - Dedupe by Issue.ID
|
||||
// - Track FirstSeen/LastSeen
|
||||
// - Maintain State (Open/Acknowledged/Resolved)
|
||||
// - Resolve issues only after resolveAfter duration of continuous absence
|
||||
// - Acknowledgements are in-memory only (not persisted)
|
||||
// - Safe for concurrent use
|
||||
type Store struct {
|
||||
mu sync.RWMutex
|
||||
|
||||
resolveAfter time.Duration
|
||||
|
||||
// issues holds the latest known version of each issue keyed by stable ID.
|
||||
issues map[string]model.Issue
|
||||
|
||||
// ack is an in-memory toggle keyed by issue ID.
|
||||
// If true and the issue is currently present, its state is Acknowledged.
|
||||
ack map[string]bool
|
||||
}
|
||||
|
||||
// New returns a new Store.
|
||||
// If resolveAfter <= 0, a default of 30s is used.
|
||||
func New(resolveAfter time.Duration) *Store {
|
||||
if resolveAfter <= 0 {
|
||||
resolveAfter = defaultResolveAfter
|
||||
}
|
||||
return &Store{
|
||||
resolveAfter: resolveAfter,
|
||||
issues: map[string]model.Issue{},
|
||||
ack: map[string]bool{},
|
||||
}
|
||||
}
|
||||
|
||||
// Upsert merges "currently true" issues for this tick.
|
||||
//
|
||||
// Incoming is deduped by Issue.ID; the first instance wins for non-timestamp fields.
|
||||
// Timestamps/state are managed by the store.
|
||||
func (s *Store) Upsert(now time.Time, incoming []model.Issue) {
|
||||
// Pre-dedupe without locking to keep lock hold times small.
|
||||
seen := make(map[string]model.Issue, len(incoming))
|
||||
for _, iss := range incoming {
|
||||
if iss.ID == "" {
|
||||
// Ignore invalid issues. ID is the stable dedupe key.
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[iss.ID]; ok {
|
||||
continue
|
||||
}
|
||||
seen[iss.ID] = iss
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
for id, in := range seen {
|
||||
existing, ok := s.issues[id]
|
||||
if !ok || existing.State == model.StateResolved {
|
||||
// New issue (or a previously resolved one reappearing): start a new "episode".
|
||||
in.FirstSeen = now
|
||||
in.LastSeen = now
|
||||
in.State = model.StateOpen
|
||||
if s.ack[id] {
|
||||
in.State = model.StateAcknowledged
|
||||
}
|
||||
s.issues[id] = in
|
||||
continue
|
||||
}
|
||||
|
||||
// Existing open/acked issue: update all fields from incoming, but preserve FirstSeen.
|
||||
in.FirstSeen = existing.FirstSeen
|
||||
in.LastSeen = now
|
||||
in.State = model.StateOpen
|
||||
if s.ack[id] {
|
||||
in.State = model.StateAcknowledged
|
||||
}
|
||||
s.issues[id] = in
|
||||
}
|
||||
|
||||
// Update resolved state for issues not present this tick.
|
||||
s.applyResolutionsLocked(now, seen)
|
||||
}
|
||||
|
||||
// Snapshot returns a point-in-time copy of all known issues with their states updated
|
||||
// according to resolveAfter.
|
||||
func (s *Store) Snapshot(now time.Time) []model.Issue {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
// Apply resolutions based on time. We don't know which IDs are present "this tick"
|
||||
// from Snapshot alone, so we only resolve by absence window (LastSeen age).
|
||||
s.applyResolutionsLocked(now, nil)
|
||||
|
||||
out := make([]model.Issue, 0, len(s.issues))
|
||||
for _, iss := range s.issues {
|
||||
out = append(out, deepCopyIssue(iss))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Acknowledge marks an issue acknowledged (in-memory only).
|
||||
func (s *Store) Acknowledge(id string) {
|
||||
if id == "" {
|
||||
return
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
s.ack[id] = true
|
||||
iss, ok := s.issues[id]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if iss.State != model.StateResolved {
|
||||
iss.State = model.StateAcknowledged
|
||||
s.issues[id] = iss
|
||||
}
|
||||
}
|
||||
|
||||
// Unacknowledge clears the acknowledgement toggle (in-memory only).
|
||||
func (s *Store) Unacknowledge(id string) {
|
||||
if id == "" {
|
||||
return
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
delete(s.ack, id)
|
||||
iss, ok := s.issues[id]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if iss.State != model.StateResolved {
|
||||
iss.State = model.StateOpen
|
||||
s.issues[id] = iss
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) applyResolutionsLocked(now time.Time, present map[string]model.Issue) {
|
||||
for id, iss := range s.issues {
|
||||
// If caller provided a present set and the ID is present, it cannot be resolved.
|
||||
if present != nil {
|
||||
if _, ok := present[id]; ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if iss.State == model.StateResolved {
|
||||
continue
|
||||
}
|
||||
if s.resolveAfter > 0 && now.Sub(iss.LastSeen) >= s.resolveAfter {
|
||||
iss.State = model.StateResolved
|
||||
s.issues[id] = iss
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func deepCopyIssue(in model.Issue) model.Issue {
|
||||
out := in
|
||||
if in.Evidence != nil {
|
||||
m := make(map[string]string, len(in.Evidence))
|
||||
for k, v := range in.Evidence {
|
||||
m[k] = v
|
||||
}
|
||||
out.Evidence = m
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
func TestStore_Upsert_DedupAndTimestamps(t *testing.T) {
|
||||
now1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
now2 := now1.Add(5 * time.Second)
|
||||
|
||||
s := New(30 * time.Second)
|
||||
|
||||
// Same ID twice in one Upsert should dedupe.
|
||||
s.Upsert(now1, []model.Issue{
|
||||
{ID: "i-1", Title: "first"},
|
||||
{ID: "i-1", Title: "should be ignored"},
|
||||
})
|
||||
|
||||
snap1 := s.Snapshot(now1)
|
||||
if len(snap1) != 1 {
|
||||
t.Fatalf("expected 1 issue, got %d", len(snap1))
|
||||
}
|
||||
if snap1[0].ID != "i-1" {
|
||||
t.Fatalf("expected id i-1, got %q", snap1[0].ID)
|
||||
}
|
||||
if !snap1[0].FirstSeen.Equal(now1) {
|
||||
t.Fatalf("expected FirstSeen=%v, got %v", now1, snap1[0].FirstSeen)
|
||||
}
|
||||
if !snap1[0].LastSeen.Equal(now1) {
|
||||
t.Fatalf("expected LastSeen=%v, got %v", now1, snap1[0].LastSeen)
|
||||
}
|
||||
if snap1[0].State != model.StateOpen {
|
||||
t.Fatalf("expected State=Open, got %q", snap1[0].State)
|
||||
}
|
||||
|
||||
// Subsequent Upsert for same ID should preserve FirstSeen and update LastSeen.
|
||||
s.Upsert(now2, []model.Issue{{ID: "i-1", Title: "updated"}})
|
||||
snap2 := s.Snapshot(now2)
|
||||
if len(snap2) != 1 {
|
||||
t.Fatalf("expected 1 issue, got %d", len(snap2))
|
||||
}
|
||||
if !snap2[0].FirstSeen.Equal(now1) {
|
||||
t.Fatalf("expected FirstSeen to remain %v, got %v", now1, snap2[0].FirstSeen)
|
||||
}
|
||||
if !snap2[0].LastSeen.Equal(now2) {
|
||||
t.Fatalf("expected LastSeen=%v, got %v", now2, snap2[0].LastSeen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_AckPreservedWhilePresent(t *testing.T) {
|
||||
now1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
now2 := now1.Add(1 * time.Second)
|
||||
|
||||
s := New(30 * time.Second)
|
||||
s.Upsert(now1, []model.Issue{{ID: "i-1", Title: "t"}})
|
||||
|
||||
s.Acknowledge("i-1")
|
||||
|
||||
// Upsert again while present should remain Acked.
|
||||
s.Upsert(now2, []model.Issue{{ID: "i-1", Title: "t2"}})
|
||||
snap := s.Snapshot(now2)
|
||||
if len(snap) != 1 {
|
||||
t.Fatalf("expected 1 issue, got %d", len(snap))
|
||||
}
|
||||
if snap[0].State != model.StateAcknowledged {
|
||||
t.Fatalf("expected State=Acknowledged, got %q", snap[0].State)
|
||||
}
|
||||
|
||||
s.Unacknowledge("i-1")
|
||||
snap2 := s.Snapshot(now2)
|
||||
if snap2[0].State != model.StateOpen {
|
||||
t.Fatalf("expected State=Open after unack, got %q", snap2[0].State)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_ResolvesOnlyAfterAbsenceWindow(t *testing.T) {
|
||||
resolveAfter := 10 * time.Second
|
||||
now0 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
|
||||
s := New(resolveAfter)
|
||||
s.Upsert(now0, []model.Issue{{ID: "i-1", Title: "t"}})
|
||||
|
||||
// Miss a tick shortly after; should not resolve due to flap suppression / window.
|
||||
s.Upsert(now0.Add(1*time.Second), nil)
|
||||
snap1 := s.Snapshot(now0.Add(9 * time.Second))
|
||||
if len(snap1) != 1 {
|
||||
t.Fatalf("expected 1 issue, got %d", len(snap1))
|
||||
}
|
||||
if snap1[0].State != model.StateOpen {
|
||||
t.Fatalf("expected still Open before resolveAfter, got %q", snap1[0].State)
|
||||
}
|
||||
|
||||
// Still absent beyond resolveAfter => should resolve.
|
||||
snap2 := s.Snapshot(now0.Add(11 * time.Second))
|
||||
if snap2[0].State != model.StateResolved {
|
||||
t.Fatalf("expected Resolved after absence > resolveAfter, got %q", snap2[0].State)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,886 @@
|
||||
package ui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/atotto/clipboard"
|
||||
"github.com/charmbracelet/bubbles/key"
|
||||
"github.com/charmbracelet/bubbles/table"
|
||||
"github.com/charmbracelet/bubbles/textinput"
|
||||
"github.com/charmbracelet/bubbles/viewport"
|
||||
bubbletea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
|
||||
"tower/internal/engine"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
type Focus int
|
||||
|
||||
const (
|
||||
focusTable Focus = iota
|
||||
focusDetails
|
||||
focusSearch
|
||||
)
|
||||
|
||||
type SortMode int
|
||||
|
||||
const (
|
||||
sortDefault SortMode = iota // Priority desc, LastSeen desc
|
||||
sortRecency // LastSeen desc
|
||||
sortCategory // Category asc, Priority desc, LastSeen desc
|
||||
)
|
||||
|
||||
type AgeMode int
|
||||
|
||||
const (
|
||||
AgeCompact AgeMode = iota // 0s, Xds, Xdm, Xdh, Xdd
|
||||
AgeRelative // Xm ago, Xh ago, Xd ago
|
||||
)
|
||||
|
||||
type AckFunc func(id string)
|
||||
type UnackFunc func(id string)
|
||||
type RefreshNowFunc func()
|
||||
type ExportFunc func(path string, issues []model.Issue) error
|
||||
|
||||
// Model is the Bubble Tea model for the ControlTower UI.
|
||||
//
|
||||
// It intentionally keeps rendering cheap:
|
||||
// - Table rows are only rebuilt when snapshot or filters/sort change.
|
||||
// - A 1s tick updates header time/age counters without rebuilding rows.
|
||||
//
|
||||
//nolint:structcheck // (fields used conditionally based on callbacks)
|
||||
type Model struct {
|
||||
host string
|
||||
|
||||
styles Styles
|
||||
keys KeyMap
|
||||
|
||||
showHelp bool
|
||||
|
||||
focus Focus
|
||||
|
||||
snap engine.Snapshot
|
||||
now time.Time
|
||||
|
||||
// Cached view state.
|
||||
filterPri model.Priority
|
||||
filterCat model.Category
|
||||
search string
|
||||
sortMode SortMode
|
||||
wideTitle bool
|
||||
ageMode AgeMode
|
||||
themeMode ThemeMode
|
||||
|
||||
issueByID map[string]model.Issue
|
||||
rowsIDs []string
|
||||
|
||||
table table.Model
|
||||
details viewport.Model
|
||||
searchIn textinput.Model
|
||||
|
||||
w int
|
||||
h int
|
||||
|
||||
// callbacks
|
||||
refreshNow RefreshNowFunc
|
||||
ack AckFunc
|
||||
unack UnackFunc
|
||||
export ExportFunc
|
||||
|
||||
lastExportPath string
|
||||
|
||||
snapshots <-chan engine.Snapshot
|
||||
|
||||
lastP0Count int
|
||||
noBell bool
|
||||
loaded bool
|
||||
exporting bool
|
||||
|
||||
err error
|
||||
}
|
||||
|
||||
type snapshotMsg engine.Snapshot
|
||||
|
||||
type tickMsg time.Time
|
||||
|
||||
type exportDoneMsg struct{ err error }
|
||||
|
||||
type helpRequestedMsg struct{}
|
||||
|
||||
func New(host string, snapshots <-chan engine.Snapshot, refresh RefreshNowFunc, ack AckFunc, unack UnackFunc, export ExportFunc) Model {
|
||||
if host == "" {
|
||||
if h, err := os.Hostname(); err == nil {
|
||||
host = h
|
||||
}
|
||||
}
|
||||
|
||||
t := newIssueTable()
|
||||
vp := viewport.New(0, 0)
|
||||
vp.YPosition = 0
|
||||
|
||||
ti := textinput.New()
|
||||
ti.Placeholder = "search title/details"
|
||||
ti.Prompt = "/ "
|
||||
ti.CharLimit = 256
|
||||
ti.Width = 40
|
||||
|
||||
m := Model{
|
||||
host: host,
|
||||
styles: defaultStylesForMode(ThemeAuto),
|
||||
keys: defaultKeyMap(),
|
||||
focus: focusTable,
|
||||
sortMode: sortDefault,
|
||||
themeMode: ThemeAuto,
|
||||
issueByID: map[string]model.Issue{},
|
||||
table: t,
|
||||
details: vp,
|
||||
searchIn: ti,
|
||||
snapshots: snapshots,
|
||||
refreshNow: refresh,
|
||||
ack: ack,
|
||||
unack: unack,
|
||||
export: export,
|
||||
lastExportPath: "issues.json",
|
||||
noBell: os.Getenv("NO_BELL") == "1",
|
||||
loaded: false,
|
||||
}
|
||||
m.now = time.Now()
|
||||
return m
|
||||
}
|
||||
|
||||
func (m Model) Init() bubbletea.Cmd {
|
||||
return bubbletea.Batch(
|
||||
waitForSnapshot(m.snapshots),
|
||||
tickCmd(),
|
||||
)
|
||||
}
|
||||
|
||||
func waitForSnapshot(ch <-chan engine.Snapshot) bubbletea.Cmd {
|
||||
return func() bubbletea.Msg {
|
||||
s, ok := <-ch
|
||||
if !ok {
|
||||
return snapshotMsg(engine.Snapshot{})
|
||||
}
|
||||
return snapshotMsg(s)
|
||||
}
|
||||
}
|
||||
|
||||
func tickCmd() bubbletea.Cmd {
|
||||
return bubbletea.Tick(1*time.Second, func(t time.Time) bubbletea.Msg { return tickMsg(t) })
|
||||
}
|
||||
|
||||
func (m Model) Update(msg bubbletea.Msg) (bubbletea.Model, bubbletea.Cmd) {
|
||||
switch msg := msg.(type) {
|
||||
case tickMsg:
|
||||
m.now = time.Time(msg)
|
||||
// Keep ticking for header time and details age, but avoid rebuilding rows.
|
||||
m.setDetailsToSelected()
|
||||
return m, tickCmd()
|
||||
|
||||
case snapshotMsg:
|
||||
s := engine.Snapshot(msg)
|
||||
// Channel closed: stop listening.
|
||||
if s.At.IsZero() && s.Collectors == nil && s.Issues == nil {
|
||||
return m, nil
|
||||
}
|
||||
m.snap = s
|
||||
m.now = time.Now()
|
||||
m.loaded = true
|
||||
|
||||
// Count P0 before applying to detect new critical issues
|
||||
newP0Count := 0
|
||||
for _, iss := range s.Issues {
|
||||
if iss.Priority == model.PriorityP0 {
|
||||
newP0Count++
|
||||
}
|
||||
}
|
||||
|
||||
m.applyViewFromSnapshot()
|
||||
|
||||
// Send bell if new P0 issues appeared (check NO_BELL env var to disable)
|
||||
if newP0Count > m.lastP0Count && !m.noBell {
|
||||
// Update counter and send bell
|
||||
m.lastP0Count = newP0Count
|
||||
// Print bell character to emit terminal bell
|
||||
fmt.Fprint(os.Stdout, "\a")
|
||||
}
|
||||
m.lastP0Count = newP0Count
|
||||
return m, waitForSnapshot(m.snapshots)
|
||||
|
||||
case bubbletea.WindowSizeMsg:
|
||||
m.w, m.h = msg.Width, msg.Height
|
||||
m.layout()
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Search input mode.
|
||||
if m.focus == focusSearch {
|
||||
switch {
|
||||
case keyMatch(msg, m.keys.Cancel):
|
||||
m.focus = focusTable
|
||||
m.searchIn.Blur()
|
||||
m.searchIn.SetValue(m.search)
|
||||
return m, nil
|
||||
case keyMatch(msg, m.keys.ClearFilters):
|
||||
m.focus = focusTable
|
||||
m.searchIn.Blur()
|
||||
m.search = ""
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
case keyMatch(msg, m.keys.Apply):
|
||||
m.search = strings.TrimSpace(m.searchIn.Value())
|
||||
m.focus = focusTable
|
||||
m.searchIn.Blur()
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
}
|
||||
|
||||
var cmd bubbletea.Cmd
|
||||
m.searchIn, cmd = m.searchIn.Update(msg)
|
||||
return m, cmd
|
||||
}
|
||||
|
||||
// Help overlay mode - only help-related keys are processed.
|
||||
if m.showHelp {
|
||||
switch {
|
||||
case keyMatch(msg, m.keys.Help), keyMatch(msg, m.keys.Cancel):
|
||||
m.showHelp = false
|
||||
return m, nil
|
||||
}
|
||||
// Ignore all other keys while help is shown
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Global keybindings.
|
||||
switch {
|
||||
case keyMatch(msg, m.keys.Quit):
|
||||
return m, bubbletea.Quit
|
||||
|
||||
case keyMatch(msg, m.keys.RefreshNow):
|
||||
if m.refreshNow != nil {
|
||||
m.refreshNow()
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.Search):
|
||||
m.focus = focusSearch
|
||||
m.searchIn.SetValue(m.search)
|
||||
m.searchIn.CursorEnd()
|
||||
m.searchIn.Focus()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.Priority):
|
||||
m.cyclePriorityFilter()
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.PriorityP0):
|
||||
m.filterPri = model.PriorityP0
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.PriorityP1):
|
||||
m.filterPri = model.PriorityP1
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.PriorityP2):
|
||||
m.filterPri = model.PriorityP2
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.PriorityP3):
|
||||
m.filterPri = model.PriorityP3
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.Category):
|
||||
m.cycleCategoryFilter()
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.Sort):
|
||||
m.sortMode = (m.sortMode + 1) % 3
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.FocusNext):
|
||||
if m.focus == focusTable {
|
||||
m.focus = focusDetails
|
||||
m.table.Blur()
|
||||
// viewport has no Focus/Blur; we just route keys.
|
||||
return m, nil
|
||||
}
|
||||
m.focus = focusTable
|
||||
m.table.Focus()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.AckToggle):
|
||||
m.toggleAckSelected()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.AckAll):
|
||||
m.ackAllVisible()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.Export):
|
||||
if m.export != nil {
|
||||
m.exporting = true
|
||||
path := m.lastExportPath
|
||||
issues := m.snap.Issues
|
||||
return m, func() bubbletea.Msg {
|
||||
err := m.export(path, issues)
|
||||
return exportDoneMsg{err: err}
|
||||
}
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.Help):
|
||||
m.showHelp = !m.showHelp
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.JumpToTop):
|
||||
if len(m.rowsIDs) > 0 {
|
||||
m.table.SetCursor(0)
|
||||
m.setDetailsToSelected()
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.JumpToBottom):
|
||||
if len(m.rowsIDs) > 0 {
|
||||
m.table.SetCursor(len(m.rowsIDs) - 1)
|
||||
m.setDetailsToSelected()
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.Copy):
|
||||
m.copySelectedToClipboard()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.ToggleWideTitle):
|
||||
m.wideTitle = !m.wideTitle
|
||||
m.layout()
|
||||
// Rebuild rows to apply new title width
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.ToggleAgeFormat):
|
||||
m.ageMode = (m.ageMode + 1) % 2
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.ToggleTheme):
|
||||
// Cycle through theme modes: Auto -> Light -> Dark -> Auto
|
||||
m.themeMode = (m.themeMode + 1) % 3
|
||||
m.styles = defaultStylesForMode(m.themeMode)
|
||||
// Refresh the view with new styles
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
|
||||
case keyMatch(msg, m.keys.ClearFilters):
|
||||
m.filterPri = ""
|
||||
m.filterCat = ""
|
||||
m.search = ""
|
||||
m.applyViewFromSnapshot()
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Focus-specific updates.
|
||||
// Note: bubbles/table already handles page navigation keys (PgUp/PgDn, Ctrl+u/Ctrl+d, Home/End)
|
||||
// natively, so we don't need to override them here.
|
||||
switch m.focus {
|
||||
case focusTable:
|
||||
var cmd bubbletea.Cmd
|
||||
m.table, cmd = m.table.Update(msg)
|
||||
// When selection changes, update details content.
|
||||
m.setDetailsToSelected()
|
||||
return m, cmd
|
||||
|
||||
case focusDetails:
|
||||
var cmd bubbletea.Cmd
|
||||
m.details, cmd = m.details.Update(msg)
|
||||
return m, cmd
|
||||
}
|
||||
|
||||
switch msg := msg.(type) {
|
||||
case exportDoneMsg:
|
||||
m.exporting = false
|
||||
m.err = msg.err
|
||||
return m, nil
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *Model) layout() {
|
||||
if m.w <= 0 || m.h <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Header: 1 line.
|
||||
headerH := 1
|
||||
// Search bar: 1 line (shown only in search focus).
|
||||
searchH := 0
|
||||
if m.focus == focusSearch {
|
||||
searchH = 1
|
||||
}
|
||||
|
||||
bodyH := m.h - headerH - searchH
|
||||
if bodyH < 4 {
|
||||
bodyH = 4
|
||||
}
|
||||
|
||||
detailsH := bodyH / 3
|
||||
tableH := bodyH - detailsH
|
||||
if tableH < 3 {
|
||||
tableH = 3
|
||||
}
|
||||
|
||||
// Table width includes 2-character padding from bubbles/table.
|
||||
// Allocate Title to consume remaining width.
|
||||
priW, catW, ageW, stateW := 3, 12, 7, 13
|
||||
fixed := priW + catW + ageW + stateW + 4 // separators/padding
|
||||
titleW := m.w - fixed
|
||||
if titleW < 20 {
|
||||
titleW = 20
|
||||
}
|
||||
if m.wideTitle {
|
||||
// Wide mode: allocate more space to Title column (up to 2x)
|
||||
titleW = titleW * 2
|
||||
// Ensure other columns still have minimum space
|
||||
maxTitle := m.w - fixed
|
||||
if titleW > maxTitle {
|
||||
titleW = maxTitle
|
||||
}
|
||||
}
|
||||
|
||||
cols := m.table.Columns()
|
||||
for i := range cols {
|
||||
switch cols[i].Title {
|
||||
case colPri:
|
||||
cols[i].Width = priW
|
||||
case colCat:
|
||||
cols[i].Width = catW
|
||||
case colTitle:
|
||||
cols[i].Width = titleW
|
||||
case colAge:
|
||||
cols[i].Width = ageW
|
||||
case colState:
|
||||
cols[i].Width = stateW
|
||||
}
|
||||
}
|
||||
m.table.SetColumns(cols)
|
||||
m.table.SetHeight(tableH)
|
||||
|
||||
m.details.Width = m.w
|
||||
m.details.Height = detailsH
|
||||
}
|
||||
|
||||
func (m *Model) applyViewFromSnapshot() {
|
||||
// Build ID index for O(1) selection lookup.
|
||||
m.issueByID = make(map[string]model.Issue, len(m.snap.Issues))
|
||||
for _, iss := range m.snap.Issues {
|
||||
m.issueByID[iss.ID] = iss
|
||||
}
|
||||
|
||||
// Show loading state before first snapshot arrives
|
||||
if !m.loaded {
|
||||
msg := "Loading collector data... Please wait."
|
||||
m.details.SetContent(m.styles.Muted.Render(msg))
|
||||
return
|
||||
}
|
||||
|
||||
// Filter.
|
||||
filtered := make([]model.Issue, 0, len(m.snap.Issues))
|
||||
for _, iss := range m.snap.Issues {
|
||||
if m.filterPri != "" && iss.Priority != m.filterPri {
|
||||
continue
|
||||
}
|
||||
if m.filterCat != "" && iss.Category != m.filterCat {
|
||||
continue
|
||||
}
|
||||
if m.search != "" {
|
||||
q := strings.ToLower(m.search)
|
||||
hit := strings.Contains(strings.ToLower(iss.Title), q) || strings.Contains(strings.ToLower(iss.Details), q)
|
||||
if !hit {
|
||||
continue
|
||||
}
|
||||
}
|
||||
filtered = append(filtered, iss)
|
||||
}
|
||||
|
||||
// Sort.
|
||||
sort.SliceStable(filtered, func(i, j int) bool {
|
||||
a, b := filtered[i], filtered[j]
|
||||
switch m.sortMode {
|
||||
case sortRecency:
|
||||
if !a.LastSeen.Equal(b.LastSeen) {
|
||||
return a.LastSeen.After(b.LastSeen)
|
||||
}
|
||||
return a.ID < b.ID
|
||||
case sortCategory:
|
||||
if a.Category != b.Category {
|
||||
return a.Category < b.Category
|
||||
}
|
||||
aw, bw := a.Priority.Weight(), b.Priority.Weight()
|
||||
if aw != bw {
|
||||
return aw > bw
|
||||
}
|
||||
if !a.LastSeen.Equal(b.LastSeen) {
|
||||
return a.LastSeen.After(b.LastSeen)
|
||||
}
|
||||
return a.ID < b.ID
|
||||
default:
|
||||
aw, bw := a.Priority.Weight(), b.Priority.Weight()
|
||||
if aw != bw {
|
||||
return aw > bw
|
||||
}
|
||||
if !a.LastSeen.Equal(b.LastSeen) {
|
||||
return a.LastSeen.After(b.LastSeen)
|
||||
}
|
||||
return a.ID < b.ID
|
||||
}
|
||||
})
|
||||
|
||||
rows, ids := buildRows(m.snap.At, m.ageMode, filtered)
|
||||
m.rowsIDs = ids
|
||||
|
||||
prevSelID := m.selectedIssueID()
|
||||
m.table.SetRows(rows)
|
||||
if len(rows) == 0 {
|
||||
m.table.SetCursor(0)
|
||||
msg := "All systems healthy. No issues detected.\n\nPress r to refresh, / to search past logs"
|
||||
m.details.SetContent(m.styles.Muted.Render(msg))
|
||||
return
|
||||
}
|
||||
|
||||
// Try to keep selection stable.
|
||||
if prevSelID != "" {
|
||||
for i, id := range ids {
|
||||
if id == prevSelID {
|
||||
m.table.SetCursor(i)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m.setDetailsToSelected()
|
||||
m.layout()
|
||||
}
|
||||
|
||||
func (m *Model) selectedIssueID() string {
|
||||
idx := m.table.Cursor()
|
||||
if idx < 0 || idx >= len(m.rowsIDs) {
|
||||
return ""
|
||||
}
|
||||
return m.rowsIDs[idx]
|
||||
}
|
||||
|
||||
func (m *Model) setDetailsToSelected() {
|
||||
id := m.selectedIssueID()
|
||||
iss, ok := m.issueByID[id]
|
||||
if !ok {
|
||||
m.details.SetContent(m.styles.Muted.Render("No issue selected."))
|
||||
return
|
||||
}
|
||||
m.details.SetContent(renderIssueDetails(m.now, m.ageMode, iss))
|
||||
}
|
||||
|
||||
func (m *Model) toggleAckSelected() {
|
||||
id := m.selectedIssueID()
|
||||
if id == "" {
|
||||
return
|
||||
}
|
||||
iss, ok := m.issueByID[id]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if iss.State == model.StateResolved {
|
||||
return
|
||||
}
|
||||
newState := model.StateAcknowledged
|
||||
if iss.State == model.StateAcknowledged {
|
||||
newState = model.StateOpen
|
||||
}
|
||||
|
||||
// Callbacks (store-backed if wired).
|
||||
if newState == model.StateAcknowledged {
|
||||
if m.ack != nil {
|
||||
m.ack(id)
|
||||
}
|
||||
} else {
|
||||
if m.unack != nil {
|
||||
m.unack(id)
|
||||
}
|
||||
}
|
||||
|
||||
// Optimistic local update (store will correct on next snapshot).
|
||||
iss.State = newState
|
||||
m.issueByID[id] = iss
|
||||
|
||||
// Update state column cheaply.
|
||||
idx := m.table.Cursor()
|
||||
rows := m.table.Rows()
|
||||
if idx >= 0 && idx < len(rows) {
|
||||
rows[idx][4] = iss.State.String() // State column index
|
||||
m.table.SetRows(rows)
|
||||
}
|
||||
m.setDetailsToSelected()
|
||||
}
|
||||
|
||||
func (m *Model) ackAllVisible() {
|
||||
if m.ack == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Track updates for table refresh.
|
||||
updated := false
|
||||
rows := m.table.Rows()
|
||||
|
||||
// Iterate through all visible issues and acknowledge them.
|
||||
for idx, id := range m.rowsIDs {
|
||||
iss, ok := m.issueByID[id]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
// Only acknowledge open issues, not already acked or resolved.
|
||||
if iss.State == model.StateOpen {
|
||||
m.ack(id)
|
||||
|
||||
// Optimistic local update.
|
||||
iss.State = model.StateAcknowledged
|
||||
m.issueByID[id] = iss
|
||||
|
||||
// Update state column cheaply.
|
||||
if idx < len(rows) {
|
||||
rows[idx][4] = iss.State.String() // State column index
|
||||
updated = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if updated {
|
||||
m.table.SetRows(rows)
|
||||
m.setDetailsToSelected()
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Model) copySelectedToClipboard() {
|
||||
id := m.selectedIssueID()
|
||||
if id == "" {
|
||||
return
|
||||
}
|
||||
iss, ok := m.issueByID[id]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Copy SuggestedFix if available, otherwise fallback to Title
|
||||
text := iss.SuggestedFix
|
||||
if text == "" {
|
||||
text = iss.Title
|
||||
}
|
||||
|
||||
if err := clipboard.WriteAll(text); err != nil {
|
||||
m.err = fmt.Errorf("Failed to copy to clipboard: %w. Is xclip/xsel installed?", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Show confirmation in details pane
|
||||
m.details.SetContent(m.styles.Muted.Render("Copied to clipboard\n\n") + renderIssueDetails(m.now, m.ageMode, iss))
|
||||
}
|
||||
|
||||
func (m *Model) cyclePriorityFilter() {
|
||||
order := []model.Priority{"", model.PriorityP0, model.PriorityP1, model.PriorityP2, model.PriorityP3}
|
||||
m.filterPri = cycle(order, m.filterPri)
|
||||
}
|
||||
|
||||
func (m *Model) cycleCategoryFilter() {
|
||||
order := []model.Category{
|
||||
"",
|
||||
model.CategoryPerformance,
|
||||
model.CategoryMemory,
|
||||
model.CategoryStorage,
|
||||
model.CategoryNetwork,
|
||||
model.CategoryThermals,
|
||||
model.CategoryProcesses,
|
||||
model.CategoryServices,
|
||||
model.CategoryLogs,
|
||||
model.CategoryUpdates,
|
||||
model.CategorySecurity,
|
||||
model.CategoryKubernetes,
|
||||
}
|
||||
m.filterCat = cycle(order, m.filterCat)
|
||||
}
|
||||
|
||||
func cycle[T comparable](order []T, cur T) T {
|
||||
for i := range order {
|
||||
if order[i] == cur {
|
||||
return order[(i+1)%len(order)]
|
||||
}
|
||||
}
|
||||
return order[0]
|
||||
}
|
||||
|
||||
func (m Model) View() string {
|
||||
// Show help overlay when active
|
||||
if m.showHelp {
|
||||
return renderHelp(m.keys, m.styles)
|
||||
}
|
||||
|
||||
header := m.renderHeader()
|
||||
|
||||
searchLine := ""
|
||||
if m.focus == focusSearch {
|
||||
searchLine = m.searchIn.View()
|
||||
}
|
||||
|
||||
tableView := m.table.View()
|
||||
detailsView := m.renderDetailsPane()
|
||||
|
||||
parts := []string{header}
|
||||
if searchLine != "" {
|
||||
parts = append(parts, searchLine)
|
||||
}
|
||||
parts = append(parts, tableView, detailsView)
|
||||
return lipgloss.JoinVertical(lipgloss.Left, parts...)
|
||||
}
|
||||
|
||||
func (m Model) renderHeader() string {
|
||||
now := m.now
|
||||
if now.IsZero() {
|
||||
now = time.Now()
|
||||
}
|
||||
|
||||
age := "-"
|
||||
if !m.snap.At.IsZero() {
|
||||
age = formatAge(now.Sub(m.snap.At))
|
||||
}
|
||||
|
||||
p0, p1, p2, p3 := 0, 0, 0, 0
|
||||
for _, iss := range m.snap.Issues {
|
||||
switch iss.Priority {
|
||||
case model.PriorityP0:
|
||||
p0++
|
||||
case model.PriorityP1:
|
||||
p1++
|
||||
case model.PriorityP2:
|
||||
p2++
|
||||
case model.PriorityP3:
|
||||
p3++
|
||||
}
|
||||
}
|
||||
|
||||
okC, degC, errC := 0, 0, 0
|
||||
for _, h := range m.snap.Collectors {
|
||||
switch h.Status.Health {
|
||||
case "OK":
|
||||
okC++
|
||||
case "DEGRADED":
|
||||
degC++
|
||||
case "ERROR":
|
||||
errC++
|
||||
}
|
||||
}
|
||||
|
||||
priFilter := "all"
|
||||
if m.filterPri != "" {
|
||||
priFilter = m.filterPri.String()
|
||||
}
|
||||
catFilter := "all"
|
||||
if m.filterCat != "" {
|
||||
catFilter = m.filterCat.String()
|
||||
}
|
||||
sortLabel := map[SortMode]string{sortDefault: "pri→recent", sortRecency: "recent", sortCategory: "cat"}[m.sortMode]
|
||||
|
||||
left := fmt.Sprintf(
|
||||
"host=%s time=%s age=%s P0=%d P1=%d P2=%d P3=%d collectors: ✓%d ⚠%d ✗%d",
|
||||
m.host,
|
||||
now.Local().Format("15:04:05"),
|
||||
age,
|
||||
p0, p1, p2, p3,
|
||||
okC, degC, errC,
|
||||
)
|
||||
|
||||
// Add count warning when approaching 200 issues cap (90% = 180)
|
||||
total := p0 + p1 + p2 + p3
|
||||
if total >= 180 {
|
||||
warning := fmt.Sprintf(" [~%d/200]", total)
|
||||
left += m.styles.Error.Render(warning)
|
||||
}
|
||||
|
||||
// Small right-side indicator for filters.
|
||||
priStr := fmt.Sprintf("pri=%s", priFilter)
|
||||
catStr := fmt.Sprintf("cat=%s", catFilter)
|
||||
if m.filterPri != "" {
|
||||
priStr = m.styles.FilterActive.Render(priStr)
|
||||
}
|
||||
if m.filterCat != "" {
|
||||
catStr = m.styles.FilterActive.Render(catStr)
|
||||
}
|
||||
right := fmt.Sprintf("filter %s %s q=%q sort=%s", priStr, catStr, m.search, sortLabel)
|
||||
|
||||
if m.w > 0 {
|
||||
// Truncate right if needed.
|
||||
space := m.w - lipgloss.Width(left) - 1
|
||||
if space < 0 {
|
||||
space = 0
|
||||
}
|
||||
if lipgloss.Width(right) > space {
|
||||
right = lipgloss.NewStyle().MaxWidth(space).Render(right)
|
||||
}
|
||||
padLen := 0
|
||||
if space > 0 {
|
||||
padLen = max(1, space-lipgloss.Width(right))
|
||||
}
|
||||
pad := strings.Repeat(" ", padLen)
|
||||
return m.styles.HeaderBar.Render(left + pad + right)
|
||||
}
|
||||
|
||||
return m.styles.HeaderBar.Render(left + " " + right)
|
||||
}
|
||||
|
||||
func (m Model) renderDetailsPane() string {
|
||||
title := "Details"
|
||||
if m.focus == focusDetails {
|
||||
title = title + " (focus)"
|
||||
}
|
||||
|
||||
body := m.details.View()
|
||||
if m.exporting {
|
||||
body = "Exporting issues to " + m.lastExportPath + "..."
|
||||
}
|
||||
if m.err != nil {
|
||||
body = body + "\n" + m.styles.Error.Render(m.err.Error())
|
||||
}
|
||||
|
||||
// Keep the details title cheap and avoid borders (can be expensive).
|
||||
return m.styles.DetailsTitle.Render(title) + "\n" + body
|
||||
}
|
||||
|
||||
func renderHelp(keys KeyMap, styles Styles) string {
|
||||
// Create a temporary help model and render it
|
||||
help := NewHelp()
|
||||
help.Show()
|
||||
return help.Render(keys, styles)
|
||||
}
|
||||
|
||||
func keyMatch(msg bubbletea.Msg, b key.Binding) bool {
|
||||
km, ok := msg.(bubbletea.KeyMsg)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return key.Matches(km, b)
|
||||
}
|
||||
|
||||
func max(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
package ui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// getRollupSamples extracts sample IDs from a rollup issue's evidence.
|
||||
func getRollupSamples(iss model.Issue) []string {
|
||||
samplesStr := iss.Evidence["samples"]
|
||||
if samplesStr == "" {
|
||||
return nil
|
||||
}
|
||||
parts := strings.Split(samplesStr, " | ")
|
||||
result := make([]string, 0, len(parts))
|
||||
for _, p := range parts {
|
||||
p = strings.TrimSpace(p)
|
||||
if p != "" {
|
||||
result = append(result, p)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// isRollupIssue checks if an issue is a rollup issue.
|
||||
func isRollupIssue(iss model.Issue) bool {
|
||||
if strings.HasPrefix(iss.ID, "k8s:rollup:") {
|
||||
return true
|
||||
}
|
||||
if iss.Category == model.CategoryKubernetes && strings.Contains(strings.ToLower(iss.Title), "rollup") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func renderIssueDetails(now time.Time, mode AgeMode, iss model.Issue) string {
|
||||
var b strings.Builder
|
||||
|
||||
fmt.Fprintf(&b, "Title: %s\n", oneLine(iss.Title))
|
||||
fmt.Fprintf(&b, "Priority: %s Category: %s State: %s\n", iss.Priority, iss.Category, iss.State)
|
||||
fmt.Fprintf(&b, "FirstSeen: %s\n", fmtTime(iss.FirstSeen))
|
||||
fmt.Fprintf(&b, "LastSeen: %s\n", fmtTime(iss.LastSeen))
|
||||
fmt.Fprintf(&b, "Age: %s\n", formatAgeWithMode(iss.Age(now), mode))
|
||||
|
||||
if strings.TrimSpace(iss.Details) != "" {
|
||||
b.WriteString("\nDetails\n")
|
||||
b.WriteString(indentBlock(strings.TrimSpace(iss.Details), " "))
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
// Show affected issues for rollup issues
|
||||
if isRollupIssue(iss) {
|
||||
samples := getRollupSamples(iss)
|
||||
if len(samples) > 0 {
|
||||
b.WriteString("\nAffected Issues\n")
|
||||
// Show up to 10 samples
|
||||
maxSamples := 10
|
||||
if len(samples) > maxSamples {
|
||||
samples = samples[:maxSamples]
|
||||
}
|
||||
for _, sample := range samples {
|
||||
fmt.Fprintf(&b, " • %s\n", sample)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(iss.Evidence) > 0 {
|
||||
b.WriteString("\nEvidence\n")
|
||||
keys := make([]string, 0, len(iss.Evidence))
|
||||
for k := range iss.Evidence {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, k := range keys {
|
||||
fmt.Fprintf(&b, " %s: %s\n", k, iss.Evidence[k])
|
||||
}
|
||||
}
|
||||
|
||||
if strings.TrimSpace(iss.SuggestedFix) != "" {
|
||||
b.WriteString("\nSuggested Fix\n")
|
||||
b.WriteString(indentBlock(strings.TrimSpace(iss.SuggestedFix), " "))
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
return strings.TrimRight(b.String(), "\n")
|
||||
}
|
||||
|
||||
func fmtTime(t time.Time) string {
|
||||
if t.IsZero() {
|
||||
return "-"
|
||||
}
|
||||
return t.Local().Format("2006-01-02 15:04:05")
|
||||
}
|
||||
|
||||
func indentBlock(s, prefix string) string {
|
||||
lines := strings.Split(s, "\n")
|
||||
for i := range lines {
|
||||
lines[i] = prefix + lines[i]
|
||||
}
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
@@ -0,0 +1,152 @@
|
||||
package ui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/charmbracelet/bubbles/key"
|
||||
)
|
||||
|
||||
// HelpModel is the help overlay model.
|
||||
type HelpModel struct {
|
||||
visible bool
|
||||
}
|
||||
|
||||
// NewHelp creates a new help model.
|
||||
func NewHelp() HelpModel {
|
||||
return HelpModel{
|
||||
visible: false,
|
||||
}
|
||||
}
|
||||
|
||||
// Show displays the help overlay.
|
||||
func (m *HelpModel) Show() {
|
||||
m.visible = true
|
||||
}
|
||||
|
||||
// Hide hides the help overlay.
|
||||
func (m *HelpModel) Hide() {
|
||||
m.visible = false
|
||||
}
|
||||
|
||||
// Toggle toggles the help overlay visibility.
|
||||
func (m *HelpModel) Toggle() {
|
||||
m.visible = !m.visible
|
||||
}
|
||||
|
||||
// IsVisible returns true if the help overlay is visible.
|
||||
func (m HelpModel) IsVisible() bool {
|
||||
return m.visible
|
||||
}
|
||||
|
||||
// Render renders the help overlay.
|
||||
func (m HelpModel) Render(keys KeyMap, styles Styles) string {
|
||||
if !m.visible {
|
||||
return ""
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
// Title
|
||||
title := styles.HeaderBar.Render("Keybindings - Press ? or esc to close")
|
||||
b.WriteString(title)
|
||||
b.WriteString("\n\n")
|
||||
|
||||
// Define keybinding groups
|
||||
groups := []struct {
|
||||
name string
|
||||
binds []keyHelp
|
||||
}{
|
||||
{
|
||||
name: "Global",
|
||||
binds: []keyHelp{
|
||||
{keys.Help, "Show/hide this help"},
|
||||
{keys.Quit, "Quit the application"},
|
||||
{keys.RefreshNow, "Refresh data now"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Filters",
|
||||
binds: []keyHelp{
|
||||
{keys.Search, "Search by title/details"},
|
||||
{keys.Priority, "Cycle priority filter"},
|
||||
{keys.Category, "Cycle category filter"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Navigation",
|
||||
binds: []keyHelp{
|
||||
{keys.FocusNext, "Toggle focus (table/details)"},
|
||||
{keys.Sort, "Cycle sort order"},
|
||||
{keys.JumpToTop, "Jump to top (g)"},
|
||||
{keys.JumpToBottom, "Jump to bottom (G)"},
|
||||
{keys.Down, "Move down (j)"},
|
||||
{keys.Up, "Move up (k)"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Actions",
|
||||
binds: []keyHelp{
|
||||
{keys.AckToggle, "Acknowledge/unacknowledge issue"},
|
||||
{keys.Export, "Export issues to JSON"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Render each group
|
||||
for i, group := range groups {
|
||||
if i > 0 {
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
// Group header
|
||||
groupTitle := styles.HeaderKey.Render(group.name + ":")
|
||||
b.WriteString(groupTitle)
|
||||
b.WriteString("\n")
|
||||
|
||||
// Keybindings in this group
|
||||
for _, kb := range group.binds {
|
||||
line := renderKeyHelp(kb, styles)
|
||||
b.WriteString(line)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
}
|
||||
|
||||
// Render collector health icon legend
|
||||
b.WriteString("\n")
|
||||
legendTitle := styles.HeaderKey.Render("Legend:")
|
||||
b.WriteString(legendTitle)
|
||||
b.WriteString("\n")
|
||||
legendText := styles.HeaderVal.Render(" Collector health: ✓ (OK), ⚠ (DEGRADED), ✗ (ERROR)")
|
||||
b.WriteString(legendText)
|
||||
b.WriteString("\n")
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
type keyHelp struct {
|
||||
binding key.Binding
|
||||
help string
|
||||
}
|
||||
|
||||
func renderKeyHelp(kb keyHelp, styles Styles) string {
|
||||
// Get key names from the binding
|
||||
keys := kb.binding.Keys()
|
||||
if len(keys) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Format key names
|
||||
keyStr := strings.Join(keys, ", ")
|
||||
keyStyled := styles.HeaderVal.Render(keyStr)
|
||||
|
||||
// Format help text
|
||||
helpStyled := styles.HeaderVal.Render(kb.help)
|
||||
|
||||
// Combine with padding
|
||||
padding := ""
|
||||
if needed := 10 - len(keyStr); needed > 0 {
|
||||
padding = strings.Repeat(" ", needed)
|
||||
}
|
||||
return fmt.Sprintf(" %s%s%s", keyStyled, padding, helpStyled)
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
package ui
|
||||
|
||||
import "github.com/charmbracelet/bubbles/key"
|
||||
|
||||
// KeyMap defines UI keybindings.
|
||||
//
|
||||
// Note: Bubble Tea will also handle ctrl+c; we additionally bind q for quit.
|
||||
|
||||
type KeyMap struct {
|
||||
Quit key.Binding
|
||||
RefreshNow key.Binding
|
||||
Search key.Binding
|
||||
Priority key.Binding
|
||||
PriorityP0 key.Binding
|
||||
PriorityP1 key.Binding
|
||||
PriorityP2 key.Binding
|
||||
PriorityP3 key.Binding
|
||||
Category key.Binding
|
||||
Sort key.Binding
|
||||
FocusNext key.Binding
|
||||
AckToggle key.Binding
|
||||
AckAll key.Binding
|
||||
Export key.Binding
|
||||
ToggleTheme key.Binding
|
||||
Help key.Binding
|
||||
JumpToTop key.Binding
|
||||
JumpToBottom key.Binding
|
||||
Down key.Binding
|
||||
Up key.Binding
|
||||
Copy key.Binding
|
||||
ToggleWideTitle key.Binding
|
||||
ToggleAgeFormat key.Binding
|
||||
ClearFilters key.Binding
|
||||
|
||||
Cancel key.Binding
|
||||
Apply key.Binding
|
||||
}
|
||||
|
||||
func defaultKeyMap() KeyMap {
|
||||
return KeyMap{
|
||||
Quit: key.NewBinding(
|
||||
key.WithKeys("q"),
|
||||
key.WithHelp("q", "quit"),
|
||||
),
|
||||
RefreshNow: key.NewBinding(
|
||||
key.WithKeys("r"),
|
||||
key.WithHelp("r", "refresh now"),
|
||||
),
|
||||
Search: key.NewBinding(
|
||||
key.WithKeys("/"),
|
||||
key.WithHelp("/", "search"),
|
||||
),
|
||||
Priority: key.NewBinding(
|
||||
key.WithKeys("p"),
|
||||
key.WithHelp("p", "priority filter"),
|
||||
),
|
||||
PriorityP0: key.NewBinding(
|
||||
key.WithKeys("0"),
|
||||
key.WithHelp("0", "P0 only"),
|
||||
),
|
||||
PriorityP1: key.NewBinding(
|
||||
key.WithKeys("1"),
|
||||
key.WithHelp("1", "P1 only"),
|
||||
),
|
||||
PriorityP2: key.NewBinding(
|
||||
key.WithKeys("2"),
|
||||
key.WithHelp("2", "P2 only"),
|
||||
),
|
||||
PriorityP3: key.NewBinding(
|
||||
key.WithKeys("3"),
|
||||
key.WithHelp("3", "P3 only"),
|
||||
),
|
||||
Category: key.NewBinding(
|
||||
key.WithKeys("c"),
|
||||
key.WithHelp("c", "category filter"),
|
||||
),
|
||||
Sort: key.NewBinding(
|
||||
key.WithKeys("s"),
|
||||
key.WithHelp("s", "cycle sort"),
|
||||
),
|
||||
FocusNext: key.NewBinding(
|
||||
key.WithKeys("tab"),
|
||||
key.WithHelp("tab", "focus"),
|
||||
),
|
||||
AckToggle: key.NewBinding(
|
||||
key.WithKeys("a"),
|
||||
key.WithHelp("a", "ack/unack"),
|
||||
),
|
||||
AckAll: key.NewBinding(
|
||||
key.WithKeys("A", "shift+a"),
|
||||
key.WithHelp("A", "ack all visible"),
|
||||
),
|
||||
Export: key.NewBinding(
|
||||
key.WithKeys("E"),
|
||||
key.WithHelp("E", "export"),
|
||||
),
|
||||
ToggleTheme: key.NewBinding(
|
||||
key.WithKeys("T", "shift+t"),
|
||||
key.WithHelp("T", "toggle theme"),
|
||||
),
|
||||
Help: key.NewBinding(
|
||||
key.WithKeys("?"),
|
||||
key.WithHelp("?", "show help"),
|
||||
),
|
||||
JumpToTop: key.NewBinding(
|
||||
key.WithKeys("g"),
|
||||
key.WithHelp("g", "jump to top"),
|
||||
),
|
||||
JumpToBottom: key.NewBinding(
|
||||
key.WithKeys("G", "shift+g"),
|
||||
key.WithHelp("G", "jump to bottom"),
|
||||
),
|
||||
Down: key.NewBinding(
|
||||
key.WithKeys("j"),
|
||||
key.WithHelp("j", "down"),
|
||||
),
|
||||
Up: key.NewBinding(
|
||||
key.WithKeys("k"),
|
||||
key.WithHelp("k", "up"),
|
||||
),
|
||||
Copy: key.NewBinding(
|
||||
key.WithKeys("y"),
|
||||
key.WithHelp("y", "copy fix"),
|
||||
),
|
||||
ToggleWideTitle: key.NewBinding(
|
||||
key.WithKeys("t"),
|
||||
key.WithHelp("t", "wide title"),
|
||||
),
|
||||
ToggleAgeFormat: key.NewBinding(
|
||||
key.WithKeys("d"),
|
||||
key.WithHelp("d", "age format"),
|
||||
),
|
||||
ClearFilters: key.NewBinding(
|
||||
key.WithKeys("esc"),
|
||||
key.WithHelp("esc", "clear filters"),
|
||||
),
|
||||
|
||||
Cancel: key.NewBinding(key.WithKeys("esc")),
|
||||
Apply: key.NewBinding(key.WithKeys("enter")),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
package ui
|
||||
|
||||
import "github.com/charmbracelet/lipgloss"
|
||||
|
||||
// ThemeMode represents the UI theme mode.
|
||||
type ThemeMode int
|
||||
|
||||
const (
|
||||
ThemeAuto ThemeMode = iota
|
||||
ThemeLight
|
||||
ThemeDark
|
||||
)
|
||||
|
||||
// Styles centralizes all lipgloss styling.
|
||||
// Keep these simple: excessive styling can slow rendering at high row counts.
|
||||
|
||||
type Styles struct {
|
||||
HeaderBar lipgloss.Style
|
||||
HeaderKey lipgloss.Style
|
||||
HeaderVal lipgloss.Style
|
||||
FilterActive lipgloss.Style
|
||||
|
||||
TableHeader lipgloss.Style
|
||||
TableCell lipgloss.Style
|
||||
|
||||
P0 lipgloss.Style
|
||||
P1 lipgloss.Style
|
||||
P2 lipgloss.Style
|
||||
P3 lipgloss.Style
|
||||
|
||||
StateOpen lipgloss.Style
|
||||
StateAck lipgloss.Style
|
||||
StateRes lipgloss.Style
|
||||
|
||||
DetailsTitle lipgloss.Style
|
||||
DetailsBody lipgloss.Style
|
||||
Muted lipgloss.Style
|
||||
Error lipgloss.Style
|
||||
}
|
||||
|
||||
// LightTheme returns light theme styles.
|
||||
func LightTheme() Styles {
|
||||
base := lipgloss.NewStyle()
|
||||
muted := base.Foreground(lipgloss.Color("8"))
|
||||
|
||||
return Styles{
|
||||
HeaderBar: base.
|
||||
Background(lipgloss.Color("236")).
|
||||
Foreground(lipgloss.Color("252")).
|
||||
Padding(0, 1),
|
||||
HeaderKey: base.Foreground(lipgloss.Color("250")).Bold(true),
|
||||
HeaderVal: base.Foreground(lipgloss.Color("254")),
|
||||
FilterActive: base.Bold(true).Foreground(lipgloss.Color("46")),
|
||||
|
||||
TableHeader: base.Foreground(lipgloss.Color("252")).Bold(true),
|
||||
TableCell: base.Foreground(lipgloss.Color("252")),
|
||||
|
||||
P0: base.Foreground(lipgloss.Color("9")).Bold(true),
|
||||
P1: base.Foreground(lipgloss.Color("208")).Bold(true),
|
||||
P2: base.Foreground(lipgloss.Color("11")),
|
||||
P3: base.Foreground(lipgloss.Color("10")),
|
||||
|
||||
StateOpen: base.Foreground(lipgloss.Color("252")),
|
||||
StateAck: base.Foreground(lipgloss.Color("14")),
|
||||
StateRes: muted,
|
||||
|
||||
DetailsTitle: base.Bold(true).Foreground(lipgloss.Color("252")),
|
||||
DetailsBody: base.Foreground(lipgloss.Color("252")),
|
||||
Muted: muted,
|
||||
Error: base.Foreground(lipgloss.Color("9")),
|
||||
}
|
||||
}
|
||||
|
||||
// DarkTheme returns dark theme styles with better contrast.
|
||||
func DarkTheme() Styles {
|
||||
base := lipgloss.NewStyle()
|
||||
muted := base.Foreground(lipgloss.Color("245"))
|
||||
|
||||
return Styles{
|
||||
HeaderBar: base.
|
||||
Background(lipgloss.Color("238")).
|
||||
Foreground(lipgloss.Color("231")).
|
||||
Padding(0, 1),
|
||||
HeaderKey: base.Foreground(lipgloss.Color("159")).Bold(true),
|
||||
HeaderVal: base.Foreground(lipgloss.Color("231")),
|
||||
FilterActive: base.Bold(true).Foreground(lipgloss.Color("84")),
|
||||
|
||||
TableHeader: base.Foreground(lipgloss.Color("231")).Bold(true),
|
||||
TableCell: base.Foreground(lipgloss.Color("231")),
|
||||
|
||||
P0: base.Foreground(lipgloss.Color("203")).Bold(true),
|
||||
P1: base.Foreground(lipgloss.Color("229")).Bold(true),
|
||||
P2: base.Foreground(lipgloss.Color("48")),
|
||||
P3: base.Foreground(lipgloss.Color("42")),
|
||||
|
||||
StateOpen: base.Foreground(lipgloss.Color("231")),
|
||||
StateAck: base.Foreground(lipgloss.Color("48")),
|
||||
StateRes: muted,
|
||||
|
||||
DetailsTitle: base.Bold(true).Foreground(lipgloss.Color("231")),
|
||||
DetailsBody: base.Foreground(lipgloss.Color("231")),
|
||||
Muted: muted,
|
||||
Error: base.Foreground(lipgloss.Color("203")),
|
||||
}
|
||||
}
|
||||
|
||||
func defaultStyles() Styles {
|
||||
// Default to light theme for backwards compatibility
|
||||
return LightTheme()
|
||||
}
|
||||
|
||||
func defaultStylesForMode(themeMode ThemeMode) Styles {
|
||||
switch themeMode {
|
||||
case ThemeLight:
|
||||
return LightTheme()
|
||||
case ThemeDark:
|
||||
return DarkTheme()
|
||||
default:
|
||||
// Auto mode defaults to light theme
|
||||
return LightTheme()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,131 @@
|
||||
package ui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/charmbracelet/bubbles/table"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// Column keys, used for future sort expansions.
|
||||
const (
|
||||
colPri = "Pri"
|
||||
colCat = "Cat"
|
||||
colTitle = "Title"
|
||||
colAge = "Age"
|
||||
colState = "State"
|
||||
)
|
||||
|
||||
func newIssueTable() table.Model {
|
||||
cols := []table.Column{
|
||||
{Title: colPri, Width: 3},
|
||||
{Title: colCat, Width: 12},
|
||||
{Title: colTitle, Width: 0}, // widened on resize
|
||||
{Title: colAge, Width: 7},
|
||||
{Title: colState, Width: 13},
|
||||
}
|
||||
|
||||
t := table.New(
|
||||
table.WithColumns(cols),
|
||||
table.WithFocused(true),
|
||||
table.WithHeight(10),
|
||||
)
|
||||
|
||||
// Keep built-in styles minimal.
|
||||
s := table.DefaultStyles()
|
||||
s.Header = s.Header.Bold(true)
|
||||
s.Selected = s.Selected.Bold(false)
|
||||
t.SetStyles(s)
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
// BuildRows returns table rows and a parallel issue ID slice (row index -> issue ID).
|
||||
func buildRows(now time.Time, mode AgeMode, issues []model.Issue) ([]table.Row, []string) {
|
||||
rows := make([]table.Row, 0, len(issues))
|
||||
ids := make([]string, 0, len(issues))
|
||||
|
||||
for _, iss := range issues {
|
||||
age := formatAgeWithMode(iss.Age(now), mode)
|
||||
rows = append(rows, table.Row{
|
||||
iss.Priority.String(),
|
||||
shortCat(iss.Category.String()),
|
||||
oneLine(iss.Title),
|
||||
age,
|
||||
iss.State.String(),
|
||||
})
|
||||
ids = append(ids, iss.ID)
|
||||
}
|
||||
return rows, ids
|
||||
}
|
||||
|
||||
func shortCat(cat string) string {
|
||||
if cat == "" {
|
||||
return "-"
|
||||
}
|
||||
if len(cat) <= 12 {
|
||||
return cat
|
||||
}
|
||||
// Keep category compact; table has limited width.
|
||||
s := cat
|
||||
if i := strings.IndexByte(cat, ' '); i > 0 {
|
||||
s = cat[:i]
|
||||
}
|
||||
if len(s) > 12 {
|
||||
return s[:12]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func oneLine(s string) string {
|
||||
s = strings.ReplaceAll(s, "\n", " ")
|
||||
s = strings.TrimSpace(s)
|
||||
return s
|
||||
}
|
||||
|
||||
func formatAge(d time.Duration) string {
|
||||
return formatAgeWithMode(d, AgeCompact)
|
||||
}
|
||||
|
||||
func formatAgeWithMode(d time.Duration, mode AgeMode) string {
|
||||
if d <= 0 {
|
||||
if mode == AgeRelative {
|
||||
return "0m ago"
|
||||
}
|
||||
return "0s"
|
||||
}
|
||||
if mode == AgeRelative {
|
||||
// Relative format: Xm ago, Xh ago, Xd ago
|
||||
if d < time.Minute {
|
||||
s := int(d / time.Second)
|
||||
return fmt.Sprintf("%ds ago", s)
|
||||
}
|
||||
if d < time.Hour {
|
||||
m := int(d / time.Minute)
|
||||
return fmt.Sprintf("%dm ago", m)
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
h := int(d / time.Hour)
|
||||
return fmt.Sprintf("%dh ago", h)
|
||||
}
|
||||
days := int(d / (24 * time.Hour))
|
||||
return fmt.Sprintf("%dd ago", days)
|
||||
}
|
||||
// Compact format: 0s, Xds, Xdm, Xdh, Xdd
|
||||
if d < time.Minute {
|
||||
s := int(d / time.Second)
|
||||
return fmt.Sprintf("%ds", s)
|
||||
}
|
||||
if d < time.Hour {
|
||||
m := int(d / time.Minute)
|
||||
return fmt.Sprintf("%dm", m)
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
h := int(d / time.Hour)
|
||||
return fmt.Sprintf("%dh", h)
|
||||
}
|
||||
days := int(d / (24 * time.Hour))
|
||||
return fmt.Sprintf("%dd", days)
|
||||
}
|
||||
Reference in New Issue
Block a user