feat: implement ControlTower TUI for cluster and host monitoring
Add complete TUI application for monitoring Kubernetes clusters and host systems. Features include: Core features: - Collector framework with concurrent scheduling - Host collectors: disk, memory, load, network - Kubernetes collectors: pods, nodes, workloads, events with informers - Issue deduplication, state management, and resolve-after logic - Bubble Tea TUI with table view, details pane, and filtering - JSON export functionality UX improvements: - Help overlay with keybindings - Priority/category filters with visual indicators - Direct priority jump (0/1/2/3) - Bulk acknowledge (Shift+A) - Clipboard copy (y) - Theme toggle (T) - Age format toggle (d) - Wide title toggle (t) - Vi-style navigation (j/k) - Home/End jump (g/G) - Rollup drill-down in details Robustness: - Grace period for unreachable clusters - Rollups for high-volume issues - Flap suppression - RBAC error handling Files: All core application code with tests for host collectors, engine, store, model, and export packages.
This commit is contained in:
182
internal/store/store.go
Normal file
182
internal/store/store.go
Normal file
@@ -0,0 +1,182 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
const defaultResolveAfter = 30 * time.Second
|
||||
|
||||
// Store is an in-memory IssueStore.
|
||||
//
|
||||
// Responsibilities (per PLAN.md):
|
||||
// - Dedupe by Issue.ID
|
||||
// - Track FirstSeen/LastSeen
|
||||
// - Maintain State (Open/Acknowledged/Resolved)
|
||||
// - Resolve issues only after resolveAfter duration of continuous absence
|
||||
// - Acknowledgements are in-memory only (not persisted)
|
||||
// - Safe for concurrent use
|
||||
type Store struct {
|
||||
mu sync.RWMutex
|
||||
|
||||
resolveAfter time.Duration
|
||||
|
||||
// issues holds the latest known version of each issue keyed by stable ID.
|
||||
issues map[string]model.Issue
|
||||
|
||||
// ack is an in-memory toggle keyed by issue ID.
|
||||
// If true and the issue is currently present, its state is Acknowledged.
|
||||
ack map[string]bool
|
||||
}
|
||||
|
||||
// New returns a new Store.
|
||||
// If resolveAfter <= 0, a default of 30s is used.
|
||||
func New(resolveAfter time.Duration) *Store {
|
||||
if resolveAfter <= 0 {
|
||||
resolveAfter = defaultResolveAfter
|
||||
}
|
||||
return &Store{
|
||||
resolveAfter: resolveAfter,
|
||||
issues: map[string]model.Issue{},
|
||||
ack: map[string]bool{},
|
||||
}
|
||||
}
|
||||
|
||||
// Upsert merges "currently true" issues for this tick.
|
||||
//
|
||||
// Incoming is deduped by Issue.ID; the first instance wins for non-timestamp fields.
|
||||
// Timestamps/state are managed by the store.
|
||||
func (s *Store) Upsert(now time.Time, incoming []model.Issue) {
|
||||
// Pre-dedupe without locking to keep lock hold times small.
|
||||
seen := make(map[string]model.Issue, len(incoming))
|
||||
for _, iss := range incoming {
|
||||
if iss.ID == "" {
|
||||
// Ignore invalid issues. ID is the stable dedupe key.
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[iss.ID]; ok {
|
||||
continue
|
||||
}
|
||||
seen[iss.ID] = iss
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
for id, in := range seen {
|
||||
existing, ok := s.issues[id]
|
||||
if !ok || existing.State == model.StateResolved {
|
||||
// New issue (or a previously resolved one reappearing): start a new "episode".
|
||||
in.FirstSeen = now
|
||||
in.LastSeen = now
|
||||
in.State = model.StateOpen
|
||||
if s.ack[id] {
|
||||
in.State = model.StateAcknowledged
|
||||
}
|
||||
s.issues[id] = in
|
||||
continue
|
||||
}
|
||||
|
||||
// Existing open/acked issue: update all fields from incoming, but preserve FirstSeen.
|
||||
in.FirstSeen = existing.FirstSeen
|
||||
in.LastSeen = now
|
||||
in.State = model.StateOpen
|
||||
if s.ack[id] {
|
||||
in.State = model.StateAcknowledged
|
||||
}
|
||||
s.issues[id] = in
|
||||
}
|
||||
|
||||
// Update resolved state for issues not present this tick.
|
||||
s.applyResolutionsLocked(now, seen)
|
||||
}
|
||||
|
||||
// Snapshot returns a point-in-time copy of all known issues with their states updated
|
||||
// according to resolveAfter.
|
||||
func (s *Store) Snapshot(now time.Time) []model.Issue {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
// Apply resolutions based on time. We don't know which IDs are present "this tick"
|
||||
// from Snapshot alone, so we only resolve by absence window (LastSeen age).
|
||||
s.applyResolutionsLocked(now, nil)
|
||||
|
||||
out := make([]model.Issue, 0, len(s.issues))
|
||||
for _, iss := range s.issues {
|
||||
out = append(out, deepCopyIssue(iss))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Acknowledge marks an issue acknowledged (in-memory only).
|
||||
func (s *Store) Acknowledge(id string) {
|
||||
if id == "" {
|
||||
return
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
s.ack[id] = true
|
||||
iss, ok := s.issues[id]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if iss.State != model.StateResolved {
|
||||
iss.State = model.StateAcknowledged
|
||||
s.issues[id] = iss
|
||||
}
|
||||
}
|
||||
|
||||
// Unacknowledge clears the acknowledgement toggle (in-memory only).
|
||||
func (s *Store) Unacknowledge(id string) {
|
||||
if id == "" {
|
||||
return
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
delete(s.ack, id)
|
||||
iss, ok := s.issues[id]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if iss.State != model.StateResolved {
|
||||
iss.State = model.StateOpen
|
||||
s.issues[id] = iss
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) applyResolutionsLocked(now time.Time, present map[string]model.Issue) {
|
||||
for id, iss := range s.issues {
|
||||
// If caller provided a present set and the ID is present, it cannot be resolved.
|
||||
if present != nil {
|
||||
if _, ok := present[id]; ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if iss.State == model.StateResolved {
|
||||
continue
|
||||
}
|
||||
if s.resolveAfter > 0 && now.Sub(iss.LastSeen) >= s.resolveAfter {
|
||||
iss.State = model.StateResolved
|
||||
s.issues[id] = iss
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func deepCopyIssue(in model.Issue) model.Issue {
|
||||
out := in
|
||||
if in.Evidence != nil {
|
||||
m := make(map[string]string, len(in.Evidence))
|
||||
for k, v := range in.Evidence {
|
||||
m[k] = v
|
||||
}
|
||||
out.Evidence = m
|
||||
}
|
||||
return out
|
||||
}
|
||||
101
internal/store/store_test.go
Normal file
101
internal/store/store_test.go
Normal file
@@ -0,0 +1,101 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
func TestStore_Upsert_DedupAndTimestamps(t *testing.T) {
|
||||
now1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
now2 := now1.Add(5 * time.Second)
|
||||
|
||||
s := New(30 * time.Second)
|
||||
|
||||
// Same ID twice in one Upsert should dedupe.
|
||||
s.Upsert(now1, []model.Issue{
|
||||
{ID: "i-1", Title: "first"},
|
||||
{ID: "i-1", Title: "should be ignored"},
|
||||
})
|
||||
|
||||
snap1 := s.Snapshot(now1)
|
||||
if len(snap1) != 1 {
|
||||
t.Fatalf("expected 1 issue, got %d", len(snap1))
|
||||
}
|
||||
if snap1[0].ID != "i-1" {
|
||||
t.Fatalf("expected id i-1, got %q", snap1[0].ID)
|
||||
}
|
||||
if !snap1[0].FirstSeen.Equal(now1) {
|
||||
t.Fatalf("expected FirstSeen=%v, got %v", now1, snap1[0].FirstSeen)
|
||||
}
|
||||
if !snap1[0].LastSeen.Equal(now1) {
|
||||
t.Fatalf("expected LastSeen=%v, got %v", now1, snap1[0].LastSeen)
|
||||
}
|
||||
if snap1[0].State != model.StateOpen {
|
||||
t.Fatalf("expected State=Open, got %q", snap1[0].State)
|
||||
}
|
||||
|
||||
// Subsequent Upsert for same ID should preserve FirstSeen and update LastSeen.
|
||||
s.Upsert(now2, []model.Issue{{ID: "i-1", Title: "updated"}})
|
||||
snap2 := s.Snapshot(now2)
|
||||
if len(snap2) != 1 {
|
||||
t.Fatalf("expected 1 issue, got %d", len(snap2))
|
||||
}
|
||||
if !snap2[0].FirstSeen.Equal(now1) {
|
||||
t.Fatalf("expected FirstSeen to remain %v, got %v", now1, snap2[0].FirstSeen)
|
||||
}
|
||||
if !snap2[0].LastSeen.Equal(now2) {
|
||||
t.Fatalf("expected LastSeen=%v, got %v", now2, snap2[0].LastSeen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_AckPreservedWhilePresent(t *testing.T) {
|
||||
now1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
now2 := now1.Add(1 * time.Second)
|
||||
|
||||
s := New(30 * time.Second)
|
||||
s.Upsert(now1, []model.Issue{{ID: "i-1", Title: "t"}})
|
||||
|
||||
s.Acknowledge("i-1")
|
||||
|
||||
// Upsert again while present should remain Acked.
|
||||
s.Upsert(now2, []model.Issue{{ID: "i-1", Title: "t2"}})
|
||||
snap := s.Snapshot(now2)
|
||||
if len(snap) != 1 {
|
||||
t.Fatalf("expected 1 issue, got %d", len(snap))
|
||||
}
|
||||
if snap[0].State != model.StateAcknowledged {
|
||||
t.Fatalf("expected State=Acknowledged, got %q", snap[0].State)
|
||||
}
|
||||
|
||||
s.Unacknowledge("i-1")
|
||||
snap2 := s.Snapshot(now2)
|
||||
if snap2[0].State != model.StateOpen {
|
||||
t.Fatalf("expected State=Open after unack, got %q", snap2[0].State)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_ResolvesOnlyAfterAbsenceWindow(t *testing.T) {
|
||||
resolveAfter := 10 * time.Second
|
||||
now0 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
|
||||
s := New(resolveAfter)
|
||||
s.Upsert(now0, []model.Issue{{ID: "i-1", Title: "t"}})
|
||||
|
||||
// Miss a tick shortly after; should not resolve due to flap suppression / window.
|
||||
s.Upsert(now0.Add(1*time.Second), nil)
|
||||
snap1 := s.Snapshot(now0.Add(9 * time.Second))
|
||||
if len(snap1) != 1 {
|
||||
t.Fatalf("expected 1 issue, got %d", len(snap1))
|
||||
}
|
||||
if snap1[0].State != model.StateOpen {
|
||||
t.Fatalf("expected still Open before resolveAfter, got %q", snap1[0].State)
|
||||
}
|
||||
|
||||
// Still absent beyond resolveAfter => should resolve.
|
||||
snap2 := s.Snapshot(now0.Add(11 * time.Second))
|
||||
if snap2[0].State != model.StateResolved {
|
||||
t.Fatalf("expected Resolved after absence > resolveAfter, got %q", snap2[0].State)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user