feat: implement ControlTower TUI for cluster and host monitoring

Add complete TUI application for monitoring Kubernetes clusters and host
systems. Features include:

Core features:
- Collector framework with concurrent scheduling
- Host collectors: disk, memory, load, network
- Kubernetes collectors: pods, nodes, workloads, events with informers
- Issue deduplication, state management, and resolve-after logic
- Bubble Tea TUI with table view, details pane, and filtering
- JSON export functionality

UX improvements:
- Help overlay with keybindings
- Priority/category filters with visual indicators
- Direct priority jump (0/1/2/3)
- Bulk acknowledge (Shift+A)
- Clipboard copy (y)
- Theme toggle (T)
- Age format toggle (d)
- Wide title toggle (t)
- Vi-style navigation (j/k)
- Home/End jump (g/G)
- Rollup drill-down in details

Robustness:
- Grace period for unreachable clusters
- Rollups for high-volume issues
- Flap suppression
- RBAC error handling

Files: All core application code with tests for host collectors,
engine, store, model, and export packages.
This commit is contained in:
OpenCode Test
2025-12-24 13:03:08 -08:00
parent c2c03fd664
commit 1421b4659e
40 changed files with 5941 additions and 0 deletions
+45
View File
@@ -0,0 +1,45 @@
package collectors
import (
"context"
"time"
"tower/internal/model"
)
type Health string
const (
HealthOK Health = "OK"
HealthDegraded Health = "DEGRADED"
HealthError Health = "ERROR"
)
// Status describes collector health for the current tick.
//
// Collectors should return Status even when returning an error,
// so the UI can show useful context.
//
// LastSuccess should be the collector's most recent successful collect time.
// When unknown, it may be the zero value.
//
// Message should be short and human-friendly.
type Status struct {
Health Health `json:"health"`
Message string `json:"message,omitempty"`
LastSuccess time.Time `json:"last_success,omitempty"`
}
func OKStatus() Status {
return Status{Health: HealthOK}
}
// Collector returns "currently true" issues for this tick.
//
// The store is responsible for dedupe, lifecycle, and resolve-after.
// Collectors must respect ctx cancellation.
type Collector interface {
Name() string
Interval() time.Duration
Collect(ctx context.Context) ([]model.Issue, Status, error)
}
+287
View File
@@ -0,0 +1,287 @@
package host
import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"
"syscall"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// DiskCollector checks filesystem block + inode pressure across mounts.
//
// It reads /proc/mounts to discover mounts and then uses statfs to compute usage.
// Pseudo filesystems are filtered out.
//
// Thresholds (PLAN.md):
// - P1 if blocks OR inodes >= 92%
// - P0 if blocks OR inodes >= 98%
//
// Issues are emitted per mount (one issue that includes both block+inode usage).
//
// NOTE: This collector is Linux-specific.
type DiskCollector struct {
interval time.Duration
readFile func(string) ([]byte, error)
statfs func(path string, st *syscall.Statfs_t) error
}
func NewDiskCollector() *DiskCollector {
return &DiskCollector{
interval: 10 * time.Second,
readFile: os.ReadFile,
statfs: syscall.Statfs,
}
}
func (c *DiskCollector) Name() string { return "host:disk" }
func (c *DiskCollector) Interval() time.Duration {
if c.interval <= 0 {
return 10 * time.Second
}
return c.interval
}
func (c *DiskCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
b, err := c.readFile("/proc/mounts")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/mounts"}, err
}
mounts := parseProcMounts(string(b))
if len(mounts) == 0 {
// Unusual but treat as degraded rather than hard error.
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "no mounts found"}, nil
}
issues := make([]model.Issue, 0, 8)
seenMount := map[string]struct{}{}
partialErrs := 0
for _, m := range mounts {
if err := ctx.Err(); err != nil {
return issues, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
if shouldSkipMount(m) {
continue
}
if _, ok := seenMount[m.MountPoint]; ok {
continue
}
seenMount[m.MountPoint] = struct{}{}
var st syscall.Statfs_t
if err := c.statfs(m.MountPoint, &st); err != nil {
partialErrs++
continue
}
blockPct, blockFreeBytes := statfsBlockUsedPct(st)
inodePct := statfsInodeUsedPct(st)
pri, ok := diskPriority(blockPct, inodePct)
if !ok {
continue
}
evidence := map[string]string{
"mount": m.MountPoint,
"fstype": m.FSType,
"block_used_pct": fmt.Sprintf("%.1f", blockPct),
"block_free_bytes": strconv.FormatUint(blockFreeBytes, 10),
}
if inodePct >= 0 {
evidence["inode_used_pct"] = fmt.Sprintf("%.1f", inodePct)
}
issues = append(issues, model.Issue{
ID: fmt.Sprintf("host:disk:%s:usage", m.MountPoint),
Category: model.CategoryStorage,
Priority: pri,
Title: fmt.Sprintf("Disk usage high on %s", m.MountPoint),
Details: "Filesystem space and/or inodes are nearly exhausted.",
Evidence: evidence,
SuggestedFix: fmt.Sprintf(
"Inspect usage:\n df -h %s\n df -i %s\nFind large directories:\n sudo du -xh --max-depth=2 %s | sort -h | tail",
m.MountPoint, m.MountPoint, m.MountPoint,
),
})
}
st := collectors.OKStatus()
if partialErrs > 0 {
st.Health = collectors.HealthDegraded
st.Message = fmt.Sprintf("partial failures: %d mounts", partialErrs)
}
return issues, st, nil
}
type procMount struct {
Device string
MountPoint string
FSType string
Options string
}
func parseProcMounts(content string) []procMount {
s := bufio.NewScanner(strings.NewReader(content))
out := make([]procMount, 0, 32)
for s.Scan() {
line := strings.TrimSpace(s.Text())
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) < 3 {
continue
}
m := procMount{
Device: unescapeProcMountsField(fields[0]),
MountPoint: unescapeProcMountsField(fields[1]),
FSType: fields[2],
}
if len(fields) >= 4 {
m.Options = fields[3]
}
out = append(out, m)
}
return out
}
// /proc/mounts escapes special characters as octal sequences.
// The most common one is a space as \040.
func unescapeProcMountsField(s string) string {
replacer := strings.NewReplacer(
"\\040", " ",
"\\011", "\t",
"\\012", "\n",
"\\134", "\\",
)
return replacer.Replace(s)
}
var pseudoFSTypes = map[string]struct{}{
"proc": {},
"sysfs": {},
"tmpfs": {},
"devtmpfs": {},
"devpts": {},
"cgroup": {},
"cgroup2": {},
"pstore": {},
"securityfs": {},
"debugfs": {},
"tracefs": {},
"configfs": {},
"hugetlbfs": {},
"mqueue": {},
"rpc_pipefs": {},
"fusectl": {},
"binfmt_misc": {},
"autofs": {},
"bpf": {},
"ramfs": {},
"nsfs": {},
"efivarfs": {},
"overlay": {}, // common container overlay mounts
"squashfs": {}, // typically read-only images
"selinuxfs": {},
"systemd-1": {},
"overlayfs": {}, // (non-standard) conservative skip
"cgroupfs": {},
"procfs": {},
"fuse.lxcfs": {},
"fuse.gvfsd-fuse": {},
}
func shouldSkipMount(m procMount) bool {
if m.MountPoint == "" {
return true
}
// Filter by fstype.
if _, ok := pseudoFSTypes[m.FSType]; ok {
return true
}
// Filter common pseudo mountpoints.
if strings.HasPrefix(m.MountPoint, "/proc") || strings.HasPrefix(m.MountPoint, "/sys") {
return true
}
if strings.HasPrefix(m.MountPoint, "/dev") {
// /dev itself can be a real mount in some cases, but usually isn't useful for disk pressure.
return true
}
return false
}
func statfsBlockUsedPct(st syscall.Statfs_t) (usedPct float64, freeBytes uint64) {
// Mirror df(1) semantics closely:
// total = f_blocks
// used = f_blocks - f_bfree
// avail = f_bavail (space available to unprivileged user)
// use% = used / (used + avail)
if st.Blocks == 0 {
return 0, 0
}
bsize := uint64(st.Bsize)
blocks := uint64(st.Blocks)
bfree := uint64(st.Bfree)
bavail := uint64(st.Bavail)
usedBlocks := blocks - bfree
denom := usedBlocks + bavail
if denom == 0 {
return 0, 0
}
freeBytes = bavail * bsize
usedPct = (float64(usedBlocks) / float64(denom)) * 100.0
return usedPct, freeBytes
}
// statfsInodeUsedPct returns inode used percent. If inodes are unavailable (f_files==0), returns -1.
func statfsInodeUsedPct(st syscall.Statfs_t) float64 {
if st.Files == 0 {
return -1
}
total := float64(st.Files)
free := float64(st.Ffree)
used := total - free
return (used / total) * 100.0
}
func diskPriority(blockPct, inodePct float64) (model.Priority, bool) {
maxPct := blockPct
if inodePct > maxPct {
maxPct = inodePct
}
// inodePct may be -1 if not supported; ignore in that case.
if inodePct < 0 {
maxPct = blockPct
}
switch {
case maxPct >= 98.0:
return model.PriorityP0, true
case maxPct >= 92.0:
return model.PriorityP1, true
default:
return "", false
}
}
var _ collectors.Collector = (*DiskCollector)(nil)
+80
View File
@@ -0,0 +1,80 @@
package host
import (
"syscall"
"testing"
)
func TestParseProcMounts_UnescapesAndParses(t *testing.T) {
in := "dev1 / ext4 rw 0 0\n" +
"dev2 /path\\040with\\040space xfs rw 0 0\n" +
"badline\n"
ms := parseProcMounts(in)
if len(ms) != 2 {
t.Fatalf("expected 2 mounts, got %d", len(ms))
}
if ms[0].MountPoint != "/" || ms[0].FSType != "ext4" {
t.Fatalf("unexpected first mount: %+v", ms[0])
}
if ms[1].MountPoint != "/path with space" {
t.Fatalf("expected unescaped mountpoint, got %q", ms[1].MountPoint)
}
}
func TestShouldSkipMount_FiltersPseudo(t *testing.T) {
cases := []procMount{
{MountPoint: "/proc", FSType: "proc"},
{MountPoint: "/sys", FSType: "sysfs"},
{MountPoint: "/dev", FSType: "tmpfs"},
{MountPoint: "/dev/shm", FSType: "tmpfs"},
}
for _, c := range cases {
if !shouldSkipMount(c) {
t.Fatalf("expected skip for %+v", c)
}
}
if shouldSkipMount(procMount{MountPoint: "/home", FSType: "ext4"}) {
t.Fatalf("did not expect skip for /home ext4")
}
}
func TestDiskPriority(t *testing.T) {
if p, ok := diskPriority(91.9, -1); ok {
t.Fatalf("expected no issue, got %v", p)
}
if p, ok := diskPriority(92.0, -1); !ok || p != "P1" {
t.Fatalf("expected P1 at 92%%, got %v ok=%v", p, ok)
}
if p, ok := diskPriority(97.9, 98.0); !ok || p != "P0" {
t.Fatalf("expected P0 if either crosses 98%%, got %v ok=%v", p, ok)
}
}
func TestStatfsCalculations(t *testing.T) {
st := syscall.Statfs_t{}
st.Bsize = 1
st.Blocks = 100
st.Bfree = 8
st.Bavail = 8
pct, free := statfsBlockUsedPct(st)
if free != 8 {
t.Fatalf("expected free=8 bytes, got %d", free)
}
if pct < 91.9 || pct > 92.1 {
t.Fatalf("expected ~92%% used, got %f", pct)
}
st.Files = 100
st.Ffree = 2
ipct := statfsInodeUsedPct(st)
if ipct < 97.9 || ipct > 98.1 {
t.Fatalf("expected ~98%% inode used, got %f", ipct)
}
st.Files = 0
if statfsInodeUsedPct(st) != -1 {
t.Fatalf("expected -1 when inode info unavailable")
}
}
+127
View File
@@ -0,0 +1,127 @@
package host
import (
"context"
"fmt"
"os"
"runtime"
"strconv"
"strings"
"sync"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// LoadCollector evaluates 1-minute load average normalized by logical CPU count.
//
// Thresholds (PLAN.md), normalized by CPU count:
// - P2 if load1/cpus >= 4.0 sustained 120s
// - P1 if load1/cpus >= 6.0 sustained 120s
//
// NOTE: Linux-specific.
// Thread-safe: Collect() can be called concurrently.
type LoadCollector struct {
interval time.Duration
now func() time.Time
readFile func(string) ([]byte, error)
cpuCount func() int
mu sync.Mutex
pri model.Priority
since time.Time
}
func NewLoadCollector() *LoadCollector {
return &LoadCollector{
interval: 5 * time.Second,
now: time.Now,
readFile: os.ReadFile,
cpuCount: runtime.NumCPU,
}
}
func (c *LoadCollector) Name() string { return "host:load" }
func (c *LoadCollector) Interval() time.Duration {
if c.interval <= 0 {
return 5 * time.Second
}
return c.interval
}
func (c *LoadCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
now := c.now()
b, err := c.readFile("/proc/loadavg")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/loadavg"}, err
}
load1, err := parseProcLoadavgFirst(string(b))
if err != nil {
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "bad /proc/loadavg"}, nil
}
cpus := c.cpuCount()
if cpus <= 0 {
cpus = 1
}
norm := load1 / float64(cpus)
desired, window := desiredLoadPriority(norm)
c.mu.Lock()
c.pri, c.since = updateSustained(now, c.pri, c.since, desired)
pri, since := c.pri, c.since
c.mu.Unlock()
if pri == "" || since.IsZero() || now.Sub(since) < window {
return nil, collectors.OKStatus(), nil
}
iss := model.Issue{
ID: "host:load:high",
Category: model.CategoryPerformance,
Priority: pri,
Title: "High sustained system load",
Details: "The 1-minute load average is high relative to CPU count for a sustained period.",
Evidence: map[string]string{
"load1": fmt.Sprintf("%.2f", load1),
"cpus": strconv.Itoa(cpus),
"load1_per_cpu": fmt.Sprintf("%.2f", norm),
"sustained_window": window.String(),
},
SuggestedFix: "Investigate CPU hogs:\n top\n ps -eo pid,ppid,cmd,%cpu --sort=-%cpu | head\nIf I/O bound (high iowait), check disk/network.\n",
}
return []model.Issue{iss}, collectors.OKStatus(), nil
}
func parseProcLoadavgFirst(content string) (float64, error) {
// /proc/loadavg format: "1.23 0.70 0.50 1/123 4567".
fields := strings.Fields(content)
if len(fields) < 1 {
return 0, fmt.Errorf("missing fields")
}
v, err := strconv.ParseFloat(fields[0], 64)
if err != nil {
return 0, err
}
return v, nil
}
func desiredLoadPriority(loadPerCPU float64) (model.Priority, time.Duration) {
if loadPerCPU >= 6.0 {
return model.PriorityP1, 120 * time.Second
}
if loadPerCPU >= 4.0 {
return model.PriorityP2, 120 * time.Second
}
return "", 0
}
var _ collectors.Collector = (*LoadCollector)(nil)
+48
View File
@@ -0,0 +1,48 @@
package host
import (
"testing"
"time"
"tower/internal/model"
)
func TestParseProcLoadavgFirst(t *testing.T) {
v, err := parseProcLoadavgFirst("1.23 0.70 0.50 1/123 4567\n")
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if v < 1.229 || v > 1.231 {
t.Fatalf("expected 1.23, got %v", v)
}
if _, err := parseProcLoadavgFirst("\n"); err == nil {
t.Fatalf("expected error")
}
}
func TestDesiredLoadPriority(t *testing.T) {
p, w := desiredLoadPriority(3.99)
if p != "" || w != 0 {
t.Fatalf("expected none")
}
p, w = desiredLoadPriority(4.0)
if p != model.PriorityP2 || w != 120*time.Second {
t.Fatalf("expected P2/120s")
}
p, w = desiredLoadPriority(6.0)
if p != model.PriorityP1 || w != 120*time.Second {
t.Fatalf("expected P1/120s")
}
}
func TestUpdateSustainedWorksForLoadToo(t *testing.T) {
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
p, since := updateSustained(now, "", time.Time{}, model.PriorityP2)
if p != model.PriorityP2 || !since.Equal(now) {
t.Fatalf("expected set")
}
p2, since2 := updateSustained(now.Add(10*time.Second), p, since, model.PriorityP2)
if p2 != model.PriorityP2 || !since2.Equal(since) {
t.Fatalf("expected unchanged")
}
}
+205
View File
@@ -0,0 +1,205 @@
package host
import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"
"sync"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// MemCollector checks MemAvailable and swap pressure from /proc/meminfo.
//
// Thresholds (PLAN.md):
// Memory (MemAvailable as % of MemTotal):
// - P2 if <= 15% sustained 60s
// - P1 if <= 10% sustained 60s
// - P0 if <= 5% sustained 30s
//
// Swap pressure (only if RAM is also tight):
// - P1 if swap used >= 50% AND MemAvailable <= 10% sustained 60s
// - P0 if swap used >= 80% AND MemAvailable <= 5% sustained 30s
//
// Emits up to two issues:
// - host:mem:available
// - host:mem:swap
//
// NOTE: Linux-specific.
// Thread-safe: Collect() can be called concurrently.
type MemCollector struct {
interval time.Duration
now func() time.Time
readFile func(string) ([]byte, error)
mu sync.Mutex
memPri model.Priority
memSince time.Time
swapPri model.Priority
swapSince time.Time
}
func NewMemCollector() *MemCollector {
return &MemCollector{
interval: 5 * time.Second,
now: time.Now,
readFile: os.ReadFile,
}
}
func (c *MemCollector) Name() string { return "host:mem" }
func (c *MemCollector) Interval() time.Duration {
if c.interval <= 0 {
return 5 * time.Second
}
return c.interval
}
func (c *MemCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
now := c.now()
b, err := c.readFile("/proc/meminfo")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/meminfo"}, err
}
mi := parseProcMeminfo(string(b))
memTotalKB, okT := mi["MemTotal"]
memAvailKB, okA := mi["MemAvailable"]
if !okT || !okA || memTotalKB <= 0 {
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "missing MemTotal/MemAvailable"}, nil
}
memAvailPct := (float64(memAvailKB) / float64(memTotalKB)) * 100.0
desiredMemPri, memWindow := desiredMemPriority(memAvailPct)
c.mu.Lock()
c.memPri, c.memSince = updateSustained(now, c.memPri, c.memSince, desiredMemPri)
memPri, memSince := c.memPri, c.memSince
c.mu.Unlock()
issues := make([]model.Issue, 0, 2)
if memPri != "" && !memSince.IsZero() && now.Sub(memSince) >= memWindow {
issues = append(issues, model.Issue{
ID: "host:mem:available",
Category: model.CategoryMemory,
Priority: memPri,
Title: "Low available memory",
Details: "MemAvailable is low and has remained low for a sustained period.",
Evidence: map[string]string{
"mem_available_kb": strconv.FormatInt(memAvailKB, 10),
"mem_total_kb": strconv.FormatInt(memTotalKB, 10),
"mem_available_pct": fmt.Sprintf("%.1f", memAvailPct),
},
SuggestedFix: "Identify memory hogs:\n free -h\n ps aux --sort=-rss | head\nConsider restarting runaway processes or adding RAM.",
})
}
swapTotalKB, okST := mi["SwapTotal"]
swapFreeKB, okSF := mi["SwapFree"]
swapUsedPct := 0.0
if okST && okSF && swapTotalKB > 0 {
swapUsedKB := swapTotalKB - swapFreeKB
swapUsedPct = (float64(swapUsedKB) / float64(swapTotalKB)) * 100.0
}
desiredSwapPri, swapWindow := desiredSwapPriority(memAvailPct, swapTotalKB, swapUsedPct)
c.mu.Lock()
c.swapPri, c.swapSince = updateSustained(now, c.swapPri, c.swapSince, desiredSwapPri)
swapPri, swapSince := c.swapPri, c.swapSince
c.mu.Unlock()
if swapPri != "" && !swapSince.IsZero() && now.Sub(swapSince) >= swapWindow {
issues = append(issues, model.Issue{
ID: "host:mem:swap",
Category: model.CategoryMemory,
Priority: swapPri,
Title: "High swap usage with low RAM",
Details: "Swap usage is high while available RAM is also low, indicating memory pressure.",
Evidence: map[string]string{
"swap_used_pct": fmt.Sprintf("%.1f", swapUsedPct),
"swap_total_kb": strconv.FormatInt(swapTotalKB, 10),
"mem_available_pct": fmt.Sprintf("%.1f", memAvailPct),
},
SuggestedFix: "Find swapping processes:\n vmstat 1\n smem -r 2>/dev/null || true\nConsider reducing memory usage or increasing RAM/swap.",
})
}
return issues, collectors.OKStatus(), nil
}
func parseProcMeminfo(content string) map[string]int64 {
out := map[string]int64{}
s := bufio.NewScanner(strings.NewReader(content))
for s.Scan() {
line := strings.TrimSpace(s.Text())
if line == "" {
continue
}
// Example: "MemAvailable: 12345 kB"
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
key := strings.TrimSuffix(fields[0], ":")
v, err := strconv.ParseInt(fields[1], 10, 64)
if err != nil {
continue
}
out[key] = v
}
return out
}
func desiredMemPriority(memAvailPct float64) (model.Priority, time.Duration) {
switch {
case memAvailPct <= 5.0:
return model.PriorityP0, 30 * time.Second
case memAvailPct <= 10.0:
return model.PriorityP1, 60 * time.Second
case memAvailPct <= 15.0:
return model.PriorityP2, 60 * time.Second
default:
return "", 0
}
}
func desiredSwapPriority(memAvailPct float64, swapTotalKB int64, swapUsedPct float64) (model.Priority, time.Duration) {
if swapTotalKB <= 0 {
return "", 0
}
// Only alert on swap when RAM is also tight.
switch {
case swapUsedPct >= 80.0 && memAvailPct <= 5.0:
return model.PriorityP0, 30 * time.Second
case swapUsedPct >= 50.0 && memAvailPct <= 10.0:
return model.PriorityP1, 60 * time.Second
default:
return "", 0
}
}
// updateSustained updates current severity and its since timestamp.
// If desired is empty, it clears the state.
func updateSustained(now time.Time, current model.Priority, since time.Time, desired model.Priority) (model.Priority, time.Time) {
if desired == "" {
return "", time.Time{}
}
if current != desired || since.IsZero() {
return desired, now
}
return current, since
}
var _ collectors.Collector = (*MemCollector)(nil)
+83
View File
@@ -0,0 +1,83 @@
package host
import (
"testing"
"time"
"tower/internal/model"
)
func TestParseProcMeminfo(t *testing.T) {
in := "MemTotal: 8000000 kB\nMemAvailable: 800000 kB\nSwapTotal: 2000000 kB\nSwapFree: 500000 kB\n"
m := parseProcMeminfo(in)
if m["MemTotal"] != 8000000 {
t.Fatalf("MemTotal mismatch: %d", m["MemTotal"])
}
if m["MemAvailable"] != 800000 {
t.Fatalf("MemAvailable mismatch: %d", m["MemAvailable"])
}
}
func TestDesiredMemPriority(t *testing.T) {
p, w := desiredMemPriority(16.0)
if p != "" || w != 0 {
t.Fatalf("expected none")
}
p, w = desiredMemPriority(15.0)
if p != model.PriorityP2 || w != 60*time.Second {
t.Fatalf("expected P2/60s got %v/%v", p, w)
}
p, w = desiredMemPriority(10.0)
if p != model.PriorityP1 {
t.Fatalf("expected P1 got %v", p)
}
p, w = desiredMemPriority(5.0)
if p != model.PriorityP0 || w != 30*time.Second {
t.Fatalf("expected P0/30s got %v/%v", p, w)
}
}
func TestDesiredSwapPriority(t *testing.T) {
// No swap configured.
p, _ := desiredSwapPriority(4.0, 0, 90.0)
if p != "" {
t.Fatalf("expected none when SwapTotal=0")
}
p, w := desiredSwapPriority(4.0, 1000, 80.0)
if p != model.PriorityP0 || w != 30*time.Second {
t.Fatalf("expected P0/30s got %v/%v", p, w)
}
p, w = desiredSwapPriority(9.9, 1000, 50.0)
if p != model.PriorityP1 || w != 60*time.Second {
t.Fatalf("expected P1/60s got %v/%v", p, w)
}
// Swap high but RAM not tight => no issue.
p, _ = desiredSwapPriority(20.0, 1000, 90.0)
if p != "" {
t.Fatalf("expected none when RAM not tight")
}
}
func TestUpdateSustained(t *testing.T) {
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
p, since := updateSustained(now, "", time.Time{}, model.PriorityP1)
if p != model.PriorityP1 || !since.Equal(now) {
t.Fatalf("expected set to P1 at now")
}
p2, since2 := updateSustained(now.Add(1*time.Second), p, since, model.PriorityP1)
if p2 != model.PriorityP1 || !since2.Equal(since) {
t.Fatalf("expected unchanged since")
}
p3, since3 := updateSustained(now.Add(2*time.Second), p2, since2, model.PriorityP0)
if p3 != model.PriorityP0 || !since3.Equal(now.Add(2*time.Second)) {
t.Fatalf("expected reset on priority change")
}
p4, since4 := updateSustained(now.Add(3*time.Second), p3, since3, "")
if p4 != "" || !since4.IsZero() {
t.Fatalf("expected cleared")
}
}
+138
View File
@@ -0,0 +1,138 @@
package host
import (
"bufio"
"context"
"os"
"path/filepath"
"strings"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// NetCollector checks for missing default route while at least one non-loopback
// interface is up.
//
// Rule (PLAN.md):
// - P1 if no default route AND any non-loopback interface is UP.
//
// Discovery:
// - Default route from /proc/net/route
// - Interface UP from /sys/class/net/*/operstate
//
// NOTE: Linux-specific.
type NetCollector struct {
interval time.Duration
readFile func(string) ([]byte, error)
glob func(string) ([]string, error)
}
func NewNetCollector() *NetCollector {
return &NetCollector{
interval: 5 * time.Second,
readFile: os.ReadFile,
glob: filepath.Glob,
}
}
func (c *NetCollector) Name() string { return "host:net" }
func (c *NetCollector) Interval() time.Duration {
if c.interval <= 0 {
return 5 * time.Second
}
return c.interval
}
func (c *NetCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
routeBytes, err := c.readFile("/proc/net/route")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/net/route"}, err
}
hasDefault := hasDefaultRoute(string(routeBytes))
paths, err := c.glob("/sys/class/net/*/operstate")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed listing /sys/class/net"}, err
}
upIfaces := make([]string, 0, 2)
for _, p := range paths {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
b, err := c.readFile(p)
if err != nil {
continue
}
iface := filepath.Base(filepath.Dir(p))
if iface == "lo" {
continue
}
state := strings.TrimSpace(string(b))
if isIfaceUp(state) {
upIfaces = append(upIfaces, iface)
}
}
if hasDefault || len(upIfaces) == 0 {
return nil, collectors.OKStatus(), nil
}
iss := model.Issue{
ID: "host:net:default-route-missing",
Category: model.CategoryNetwork,
Priority: model.PriorityP1,
Title: "No default route",
Details: "At least one network interface is up, but no default route is present.",
Evidence: map[string]string{
"up_ifaces": strings.Join(upIfaces, ","),
},
SuggestedFix: "Check routing and link state:\n ip route\n ip link\n nmcli dev status\nIf on Wi-Fi, reconnect; if on VPN, verify tunnel routes.",
}
return []model.Issue{iss}, collectors.OKStatus(), nil
}
func hasDefaultRoute(procNetRoute string) bool {
// /proc/net/route header:
// Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT
// Default route has Destination == 00000000.
s := bufio.NewScanner(strings.NewReader(procNetRoute))
first := true
for s.Scan() {
line := strings.TrimSpace(s.Text())
if line == "" {
continue
}
if first {
first = false
// skip header if present
if strings.HasPrefix(line, "Iface") {
continue
}
}
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
if fields[1] == "00000000" {
return true
}
}
return false
}
func isIfaceUp(operstate string) bool {
// Linux operstate values include: up, down, unknown, dormant, lowerlayerdown.
s := strings.ToLower(strings.TrimSpace(operstate))
return s == "up" || s == "unknown"
}
var _ collectors.Collector = (*NetCollector)(nil)
+28
View File
@@ -0,0 +1,28 @@
package host
import "testing"
func TestHasDefaultRoute(t *testing.T) {
in := "Iface\tDestination\tGateway\tFlags\n" +
"eth0\t00000000\t0102A8C0\t0003\n"
if !hasDefaultRoute(in) {
t.Fatalf("expected default route")
}
in2 := "Iface Destination Gateway Flags\n" +
"eth0 0010A8C0 00000000 0001\n"
if hasDefaultRoute(in2) {
t.Fatalf("expected no default route")
}
}
func TestIsIfaceUp(t *testing.T) {
if !isIfaceUp("up\n") {
t.Fatalf("expected true")
}
if !isIfaceUp("unknown") {
t.Fatalf("expected true for unknown")
}
if isIfaceUp("down") {
t.Fatalf("expected false")
}
}
+88
View File
@@ -0,0 +1,88 @@
package k8s
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"time"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
)
// ClientFromCurrentContext creates a Kubernetes client-go Clientset using the
// user's kubeconfig current context.
//
// It is a pure helper (no global state) so it can be used by collectors and
// unit tests (with temporary kubeconfig files).
func ClientFromCurrentContext() (*kubernetes.Clientset, *rest.Config, error) {
loadingRules := clientcmd.NewDefaultClientConfigLoadingRules()
// Respect KUBECONFIG semantics (it may be a path list).
if p := os.Getenv("KUBECONFIG"); p != "" {
if list := filepath.SplitList(p); len(list) > 1 {
loadingRules.ExplicitPath = ""
loadingRules.Precedence = list
} else {
loadingRules.ExplicitPath = p
}
}
cfg := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, &clientcmd.ConfigOverrides{})
restCfg, err := cfg.ClientConfig()
if err != nil {
return nil, nil, err
}
// Ensure HTTP client timeouts are bounded. LIST fallback uses its own context
// timeouts, but this provides a safety net.
if restCfg.Timeout <= 0 {
restCfg.Timeout = 30 * time.Second
}
cs, err := kubernetes.NewForConfig(restCfg)
if err != nil {
return nil, nil, err
}
return cs, restCfg, nil
}
func defaultKubeconfigPath() string {
// This helper is used only for existence checks / UI messages. Client loading
// should use client-go's default loading rules.
if p := os.Getenv("KUBECONFIG"); p != "" {
// If KUBECONFIG is a list, return the first entry for display.
if list := filepath.SplitList(p); len(list) > 0 {
return list[0]
}
return p
}
h, err := os.UserHomeDir()
if err != nil {
return ""
}
return filepath.Join(h, ".kube", "config")
}
// Ping performs a lightweight API call to determine if the cluster is reachable
// and authentication works.
func Ping(ctx context.Context, cs kubernetes.Interface) error {
if cs == nil {
return errors.New("nil kubernetes client")
}
_, err := cs.Discovery().ServerVersion()
if err != nil {
// Treat authn/authz errors separately so callers can decide whether to
// surface "unreachable" vs "insufficient credentials".
if apierrors.IsForbidden(err) || apierrors.IsUnauthorized(err) {
return fmt.Errorf("discovery auth: %w", err)
}
return fmt.Errorf("discovery server version: %w", err)
}
return nil
}
+720
View File
@@ -0,0 +1,720 @@
package k8s
import (
"context"
"fmt"
"os"
"path/filepath"
"sort"
"sync"
"time"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"tower/internal/collectors"
"tower/internal/model"
)
// Collector is the ControlTower Kubernetes collector.
//
// It uses client-go informers (LIST+WATCH with local caches) against the user's
// kubeconfig current context, across all namespaces.
//
// Degradation behavior:
// - If WATCH fails repeatedly, it falls back to polling LIST and emits a P1
// "degraded to polling" issue.
// - While in polling mode, it periodically attempts to recover back to watches.
// - If the cluster is unreachable, it emits a P0 only after 10s continuous failure.
// - If RBAC forbids list/watch for a resource, it emits a single P2 issue per
// inaccessible resource and continues for accessible resources.
//
// Noise control:
// - Rollups group by (namespace, reason, kind) when group size >= 20.
// - Cap max issues to 200 after rollups.
//
// Instantiate with NewCollector().
type Collector struct {
interval time.Duration
unreachableGrace time.Duration
pendingGrace time.Duration
workloadGrace time.Duration
crashLoopThresh int
rollupThreshold int
maxIssues int
watchFailureThreshold int
watchFailureWindow time.Duration
pollRecoverEvery time.Duration
mu sync.Mutex
syncWG sync.WaitGroup
client kubernetes.Interface
factory informers.SharedInformerFactory
stopCh chan struct{}
started bool
syncedFns []cache.InformerSynced
podsLister corelisters.PodLister
nodesLister corelisters.NodeLister
eventsLister corelisters.EventLister
deployLister appslisters.DeploymentLister
statefulSetLister appslisters.StatefulSetLister
daemonSetLister appslisters.DaemonSetLister
// polling indicates we have degraded from informers to list polling.
polling bool
pollSince time.Time
lastPollRecoverAttempt time.Time
watchFailWindowStart time.Time
watchFailCount int
// rbacDenied is keyed by resource name ("pods", "nodes", ...).
rbacDenied map[string]error
unreach *unreachableTracker
lastSuccess time.Time
}
func NewCollector() *Collector {
c := &Collector{
interval: 2 * time.Second,
unreachableGrace: 10 * time.Second,
pendingGrace: 120 * time.Second,
workloadGrace: 180 * time.Second,
crashLoopThresh: 5,
rollupThreshold: 20,
maxIssues: 200,
watchFailureThreshold: 5,
watchFailureWindow: 30 * time.Second,
pollRecoverEvery: 30 * time.Second,
rbacDenied: map[string]error{},
}
c.unreach = newUnreachableTracker(c.unreachableGrace)
return c
}
var _ collectors.Collector = (*Collector)(nil)
func (c *Collector) Name() string { return "k8s" }
func (c *Collector) Interval() time.Duration {
if c.interval <= 0 {
return 2 * time.Second
}
return c.interval
}
func (c *Collector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
now := time.Now()
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
// If kubeconfig doesn't exist, treat Kubernetes as "disabled".
if !kubeconfigExists() {
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "kubeconfig not found"}, nil
}
if err := c.ensureClient(); err != nil {
c.unreach.observeFailure(now, err)
if c.unreach.shouldEmit(now) {
iss := stampIssueTimes(now, unreachableIssue(err))
return []model.Issue{iss}, collectors.Status{Health: collectors.HealthError, Message: "unreachable"}, nil
}
return nil, collectors.Status{Health: collectors.HealthError, Message: "k8s client init failed (grace)"}, nil
}
// Connectivity/auth check with grace.
if err := Ping(ctx, c.client); err != nil {
c.unreach.observeFailure(now, err)
if c.unreach.shouldEmit(now) {
iss := stampIssueTimes(now, unreachableIssue(err))
return []model.Issue{iss}, collectors.Status{Health: collectors.HealthError, Message: "unreachable"}, nil
}
return nil, collectors.Status{Health: collectors.HealthError, Message: "k8s unreachable (grace)"}, nil
}
c.unreach.observeSuccess()
c.lastSuccess = now
// Prefer informers unless currently degraded to polling.
if c.isPolling() {
c.maybeRecoverInformers(ctx, now)
}
if !c.isPolling() {
_ = c.ensureInformers(ctx)
}
issues := make([]model.Issue, 0, 64)
issues = append(issues, c.rbacIssues()...)
st := collectors.Status{Health: collectors.HealthOK, LastSuccess: c.lastSuccess}
if c.isPolling() {
st.Health = collectors.HealthDegraded
st.Message = "degraded to polling"
issues = append(issues, stampIssueTimes(now, pollingDegradedIssue()))
issues = append(issues, c.collectByPolling(ctx, now)...)
} else {
// If caches aren't ready, use polling for this tick only.
if !c.cachesSyncedQuick(ctx) {
st.Health = collectors.HealthDegraded
st.Message = "waiting for informer cache; used list"
issues = append(issues, c.collectByPolling(ctx, now)...)
} else {
issues = append(issues, c.collectFromCaches(now)...)
if len(c.snapshotRBACDenied()) > 0 {
st.Health = collectors.HealthDegraded
st.Message = "partial RBAC access"
}
}
}
// Set timestamps, roll up and cap.
for i := range issues {
issues[i] = stampIssueTimes(now, issues[i])
}
issues = Rollup(issues, c.rollupThreshold, 5)
model.SortIssuesDefault(issues)
issues = CapIssues(issues, c.maxIssues)
return issues, st, nil
}
func (c *Collector) ensureClient() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.client != nil {
return nil
}
cs, _, err := ClientFromCurrentContext()
if err != nil {
return err
}
c.client = cs
return nil
}
func kubeconfigExists() bool {
if p := os.Getenv("KUBECONFIG"); p != "" {
for _, fp := range filepath.SplitList(p) {
if fp == "" {
continue
}
if _, err := os.Stat(fp); err == nil {
return true
}
}
return false
}
p := defaultKubeconfigPath()
if p == "" {
return false
}
_, err := os.Stat(p)
return err == nil
}
func (c *Collector) ensureInformers(ctx context.Context) error {
c.mu.Lock()
if c.started || c.polling {
c.mu.Unlock()
return nil
}
client := c.client
c.mu.Unlock()
if client == nil {
return fmt.Errorf("nil kubernetes client")
}
// RBAC preflight before we even construct informers (so we can skip forbidden ones).
c.preflightRBAC(ctx, client)
factory := informers.NewSharedInformerFactory(client, 0)
var (
podsInf cache.SharedIndexInformer
nodesInf cache.SharedIndexInformer
evsInf cache.SharedIndexInformer
depInf cache.SharedIndexInformer
stsInf cache.SharedIndexInformer
dsInf cache.SharedIndexInformer
)
if !c.isRBACDenied("pods") {
i := factory.Core().V1().Pods()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("pods", err) })
c.mu.Lock()
c.podsLister = i.Lister()
c.mu.Unlock()
podsInf = i.Informer()
}
if !c.isRBACDenied("nodes") {
i := factory.Core().V1().Nodes()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("nodes", err) })
c.mu.Lock()
c.nodesLister = i.Lister()
c.mu.Unlock()
nodesInf = i.Informer()
}
if !c.isRBACDenied("events") {
i := factory.Core().V1().Events()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("events", err) })
c.mu.Lock()
c.eventsLister = i.Lister()
c.mu.Unlock()
evsInf = i.Informer()
}
if !c.isRBACDenied("deployments") {
i := factory.Apps().V1().Deployments()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("deployments", err) })
c.mu.Lock()
c.deployLister = i.Lister()
c.mu.Unlock()
depInf = i.Informer()
}
if !c.isRBACDenied("statefulsets") {
i := factory.Apps().V1().StatefulSets()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("statefulsets", err) })
c.mu.Lock()
c.statefulSetLister = i.Lister()
c.mu.Unlock()
stsInf = i.Informer()
}
if !c.isRBACDenied("daemonsets") {
i := factory.Apps().V1().DaemonSets()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("daemonsets", err) })
c.mu.Lock()
c.daemonSetLister = i.Lister()
c.mu.Unlock()
dsInf = i.Informer()
}
synced := make([]cache.InformerSynced, 0, 6)
if podsInf != nil {
synced = append(synced, podsInf.HasSynced)
}
if nodesInf != nil {
synced = append(synced, nodesInf.HasSynced)
}
if evsInf != nil {
synced = append(synced, evsInf.HasSynced)
}
if depInf != nil {
synced = append(synced, depInf.HasSynced)
}
if stsInf != nil {
synced = append(synced, stsInf.HasSynced)
}
if dsInf != nil {
synced = append(synced, dsInf.HasSynced)
}
stopCh := make(chan struct{})
c.mu.Lock()
c.factory = factory
c.stopCh = stopCh
c.started = true
c.syncedFns = synced
c.mu.Unlock()
factory.Start(stopCh)
c.syncWG.Add(1)
go func() {
defer c.syncWG.Done()
syncCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if ok := cache.WaitForCacheSync(syncCtx.Done(), synced...); !ok {
fmt.Printf("k8s: informer cache sync failed or timed out\n")
}
}()
return nil
}
func (c *Collector) maybeRecoverInformers(ctx context.Context, now time.Time) {
c.mu.Lock()
interval := c.pollRecoverEvery
last := c.lastPollRecoverAttempt
c.mu.Unlock()
if interval <= 0 {
interval = 30 * time.Second
}
if !last.IsZero() && now.Sub(last) < interval {
return
}
c.mu.Lock()
c.lastPollRecoverAttempt = now
c.mu.Unlock()
// Only attempt if connectivity is OK (already pinged successfully in Collect).
// Reset watch failure counters and exit polling; subsequent Collect will ensureInformers.
c.mu.Lock()
c.polling = false
c.pollSince = time.Time{}
c.watchFailWindowStart = time.Time{}
c.watchFailCount = 0
c.mu.Unlock()
_ = c.ensureInformers(ctx)
}
func (c *Collector) preflightRBAC(ctx context.Context, client kubernetes.Interface) {
shortCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
probe := func(resource string, f func(context.Context) error) {
if err := f(shortCtx); err != nil {
if apierrors.IsForbidden(err) {
c.noteRBAC(resource, err)
}
}
}
probe("nodes", func(ctx context.Context) error {
_, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("pods", func(ctx context.Context) error {
_, err := client.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("deployments", func(ctx context.Context) error {
_, err := client.AppsV1().Deployments(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("statefulsets", func(ctx context.Context) error {
_, err := client.AppsV1().StatefulSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("daemonsets", func(ctx context.Context) error {
_, err := client.AppsV1().DaemonSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("events", func(ctx context.Context) error {
_, err := client.CoreV1().Events(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
}
func (c *Collector) noteRBAC(resource string, err error) {
if err == nil || !apierrors.IsForbidden(err) {
return
}
c.mu.Lock()
defer c.mu.Unlock()
if _, ok := c.rbacDenied[resource]; ok {
return
}
c.rbacDenied[resource] = err
}
func (c *Collector) isRBACDenied(resource string) bool {
c.mu.Lock()
defer c.mu.Unlock()
_, ok := c.rbacDenied[resource]
return ok
}
func (c *Collector) snapshotRBACDenied() map[string]error {
c.mu.Lock()
defer c.mu.Unlock()
out := make(map[string]error, len(c.rbacDenied))
for k, v := range c.rbacDenied {
out[k] = v
}
return out
}
func (c *Collector) recordWatchError(resource string, err error) {
if err == nil {
return
}
if apierrors.IsForbidden(err) {
c.noteRBAC(resource, err)
return
}
now := time.Now()
c.mu.Lock()
defer c.mu.Unlock()
if c.polling {
return
}
if c.watchFailWindowStart.IsZero() || now.Sub(c.watchFailWindowStart) > c.watchFailureWindow {
c.watchFailWindowStart = now
c.watchFailCount = 0
}
c.watchFailCount++
if c.watchFailCount >= c.watchFailureThreshold {
c.polling = true
c.pollSince = now
if c.stopCh != nil {
close(c.stopCh)
c.stopCh = nil
}
c.started = false
c.factory = nil
c.syncedFns = nil
c.syncWG.Wait()
}
}
func (c *Collector) cachesSyncedQuick(ctx context.Context) bool {
c.mu.Lock()
synced := append([]cache.InformerSynced(nil), c.syncedFns...)
c.mu.Unlock()
if len(synced) == 0 {
return false
}
syncCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
defer cancel()
return cache.WaitForCacheSync(syncCtx.Done(), synced...)
}
func (c *Collector) collectFromCaches(now time.Time) []model.Issue {
c.mu.Lock()
podsLister := c.podsLister
nodesLister := c.nodesLister
eventsLister := c.eventsLister
deployLister := c.deployLister
stsLister := c.statefulSetLister
dsLister := c.daemonSetLister
denied := make(map[string]error, len(c.rbacDenied))
for k, v := range c.rbacDenied {
denied[k] = v
}
c.mu.Unlock()
issues := make([]model.Issue, 0, 64)
sel := labels.Everything()
if _, ok := denied["nodes"]; !ok && nodesLister != nil {
if list, err := nodesLister.List(sel); err == nil {
nodes := make([]*corev1.Node, 0, len(list))
for i := range list {
nodes = append(nodes, list[i])
}
issues = append(issues, IssuesFromNodes(nodes)...)
}
}
if _, ok := denied["pods"]; !ok && podsLister != nil {
if list, err := podsLister.List(sel); err == nil {
pods := make([]*corev1.Pod, 0, len(list))
for i := range list {
pods = append(pods, list[i])
}
issues = append(issues, IssuesFromPods(pods, now, c.pendingGrace, c.crashLoopThresh)...)
}
}
if _, ok := denied["deployments"]; !ok && deployLister != nil {
if list, err := deployLister.List(sel); err == nil {
deps := make([]*appsv1.Deployment, 0, len(list))
for i := range list {
deps = append(deps, list[i])
}
issues = append(issues, IssuesFromDeployments(deps, now, c.workloadGrace)...)
}
}
if _, ok := denied["statefulsets"]; !ok && stsLister != nil {
if list, err := stsLister.List(sel); err == nil {
sts := make([]*appsv1.StatefulSet, 0, len(list))
for i := range list {
sts = append(sts, list[i])
}
issues = append(issues, IssuesFromStatefulSets(sts, now, c.workloadGrace)...)
}
}
if _, ok := denied["daemonsets"]; !ok && dsLister != nil {
if list, err := dsLister.List(sel); err == nil {
dss := make([]*appsv1.DaemonSet, 0, len(list))
for i := range list {
dss = append(dss, list[i])
}
issues = append(issues, IssuesFromDaemonSets(dss, now, c.workloadGrace)...)
}
}
if _, ok := denied["events"]; !ok && eventsLister != nil {
if list, err := eventsLister.List(sel); err == nil {
es := make([]*corev1.Event, 0, len(list))
for i := range list {
es = append(es, list[i])
}
issues = append(issues, IssuesFromEvents(es, now)...)
}
}
return issues
}
func (c *Collector) collectByPolling(ctx context.Context, now time.Time) []model.Issue {
c.mu.Lock()
client := c.client
denied := make(map[string]error, len(c.rbacDenied))
for k, v := range c.rbacDenied {
denied[k] = v
}
c.mu.Unlock()
if client == nil {
return nil
}
issues := make([]model.Issue, 0, 64)
if _, ok := denied["nodes"]; !ok {
if nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("nodes", err)
} else {
list := make([]*corev1.Node, 0, len(nodes.Items))
for i := range nodes.Items {
list = append(list, &nodes.Items[i])
}
issues = append(issues, IssuesFromNodes(list)...)
}
}
if _, ok := denied["pods"]; !ok {
if pods, err := client.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("pods", err)
} else {
list := make([]*corev1.Pod, 0, len(pods.Items))
for i := range pods.Items {
list = append(list, &pods.Items[i])
}
issues = append(issues, IssuesFromPods(list, now, c.pendingGrace, c.crashLoopThresh)...)
}
}
if _, ok := denied["deployments"]; !ok {
if deps, err := client.AppsV1().Deployments(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("deployments", err)
} else {
list := make([]*appsv1.Deployment, 0, len(deps.Items))
for i := range deps.Items {
list = append(list, &deps.Items[i])
}
issues = append(issues, IssuesFromDeployments(list, now, c.workloadGrace)...)
}
}
if _, ok := denied["statefulsets"]; !ok {
if sts, err := client.AppsV1().StatefulSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("statefulsets", err)
} else {
list := make([]*appsv1.StatefulSet, 0, len(sts.Items))
for i := range sts.Items {
list = append(list, &sts.Items[i])
}
issues = append(issues, IssuesFromStatefulSets(list, now, c.workloadGrace)...)
}
}
if _, ok := denied["daemonsets"]; !ok {
if dss, err := client.AppsV1().DaemonSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("daemonsets", err)
} else {
list := make([]*appsv1.DaemonSet, 0, len(dss.Items))
for i := range dss.Items {
list = append(list, &dss.Items[i])
}
issues = append(issues, IssuesFromDaemonSets(list, now, c.workloadGrace)...)
}
}
if _, ok := denied["events"]; !ok {
if evs, err := client.CoreV1().Events(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("events", err)
} else {
list := make([]*corev1.Event, 0, len(evs.Items))
for i := range evs.Items {
list = append(list, &evs.Items[i])
}
issues = append(issues, IssuesFromEvents(list, now)...)
}
}
return issues
}
func (c *Collector) rbacIssues() []model.Issue {
denied := c.snapshotRBACDenied()
keys := make([]string, 0, len(denied))
for k := range denied {
keys = append(keys, k)
}
sort.Strings(keys)
out := make([]model.Issue, 0, len(keys))
for _, res := range keys {
err := denied[res]
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:rbac:%s", res),
Category: model.CategoryKubernetes,
Priority: model.PriorityP2,
Title: fmt.Sprintf("Insufficient RBAC: list/watch %s", res),
Details: fmt.Sprintf("Current context cannot access %s (forbidden). %s", res, sanitizeError(err)),
Evidence: map[string]string{
"kind": "Cluster",
"reason": "RBAC",
"namespace": "",
"resource": res,
},
SuggestedFix: fmt.Sprintf("kubectl auth can-i list %s --all-namespaces", res),
})
}
return out
}
func pollingDegradedIssue() model.Issue {
return model.Issue{
ID: "k8s:cluster:polling",
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: "Kubernetes degraded: polling (watch failing)",
Details: "Kubernetes watches have failed repeatedly; collector switched to LIST polling. Data may be less real-time and API load is higher.",
Evidence: map[string]string{
"kind": "Cluster",
"reason": "DegradedPolling",
"namespace": "",
},
SuggestedFix: "Check API server / network stability and RBAC; ensure watch endpoints are reachable.",
}
}
func stampIssueTimes(now time.Time, iss model.Issue) model.Issue {
iss.LastSeen = now
if iss.FirstSeen.IsZero() {
iss.FirstSeen = now
}
return iss
}
func (c *Collector) isPolling() bool {
c.mu.Lock()
defer c.mu.Unlock()
return c.polling
}
+101
View File
@@ -0,0 +1,101 @@
package k8s
import (
"fmt"
"strings"
"time"
corev1 "k8s.io/api/core/v1"
"tower/internal/model"
)
var warningEventReasons = map[string]struct{}{
"FailedScheduling": {},
"FailedMount": {},
"BackOff": {},
"Unhealthy": {},
"OOMKilling": {},
"FailedPull": {},
"Forbidden": {},
"ErrImagePull": {},
"ImagePullBackOff": {},
}
// IssuesFromEvents applies the PLAN.md Event rules.
//
// Dedup by (object UID, reason). For v1 Events, this is approximated by
// (involvedObject.uid, reason).
func IssuesFromEvents(events []*corev1.Event, now time.Time) []model.Issue {
_ = now
out := make([]model.Issue, 0, 16)
seen := map[string]struct{}{}
for _, e := range events {
if e == nil {
continue
}
if strings.ToLower(e.Type) != strings.ToLower(string(corev1.EventTypeWarning)) {
continue
}
if _, ok := warningEventReasons[e.Reason]; !ok {
continue
}
uid := string(e.InvolvedObject.UID)
k := uid + ":" + e.Reason
if _, ok := seen[k]; ok {
continue
}
seen[k] = struct{}{}
ns := e.InvolvedObject.Namespace
if ns == "" {
ns = e.Namespace
}
objKey := e.InvolvedObject.Kind + "/" + e.InvolvedObject.Name
title := fmt.Sprintf("K8s Event %s: %s (%s)", e.Reason, objKey, ns)
if ns == "" {
title = fmt.Sprintf("K8s Event %s: %s", e.Reason, objKey)
}
details := strings.TrimSpace(e.Message)
if details == "" {
details = "Warning event emitted by Kubernetes."
}
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:event:%s:%s", uid, e.Reason),
Category: model.CategoryKubernetes,
Priority: model.PriorityP2,
Title: title,
Details: details,
Evidence: map[string]string{
"kind": e.InvolvedObject.Kind,
"reason": e.Reason,
"namespace": ns,
"name": e.InvolvedObject.Name,
"uid": uid,
},
SuggestedFix: suggestedFixForEvent(ns, e.InvolvedObject.Kind, e.InvolvedObject.Name),
})
}
return out
}
func suggestedFixForEvent(ns, kind, name string) string {
kindLower := strings.ToLower(kind)
if ns != "" {
switch kindLower {
case "pod":
return fmt.Sprintf("kubectl -n %s describe pod %s", ns, name)
case "node":
return fmt.Sprintf("kubectl describe node %s", name)
default:
return fmt.Sprintf("kubectl -n %s describe %s %s", ns, kindLower, name)
}
}
return fmt.Sprintf("kubectl describe %s %s", kindLower, name)
}
@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).
+79
View File
@@ -0,0 +1,79 @@
package k8s
import (
"fmt"
corev1 "k8s.io/api/core/v1"
"tower/internal/model"
)
// IssuesFromNodes applies the PLAN.md node rules.
//
// Pure rule function: does not talk to the API server.
func IssuesFromNodes(nodes []*corev1.Node) []model.Issue {
out := make([]model.Issue, 0, 8)
for _, n := range nodes {
if n == nil {
continue
}
// Ready / NotReady
if cond := findNodeCondition(n, corev1.NodeReady); cond != nil {
if cond.Status != corev1.ConditionTrue {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:node:%s:NotReady", n.Name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP0,
Title: fmt.Sprintf("Node NotReady: %s", n.Name),
Details: cond.Message,
Evidence: map[string]string{
"kind": "Node",
"reason": "NotReady",
"namespace": "",
"node": n.Name,
"status": string(cond.Status),
},
SuggestedFix: "kubectl describe node " + n.Name,
})
}
}
// Pressure conditions.
for _, ctype := range []corev1.NodeConditionType{corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure} {
if cond := findNodeCondition(n, ctype); cond != nil {
if cond.Status == corev1.ConditionTrue {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:node:%s:%s", n.Name, string(ctype)),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("Node %s: %s", ctype, n.Name),
Details: cond.Message,
Evidence: map[string]string{
"kind": "Node",
"reason": string(ctype),
"namespace": "",
"node": n.Name,
"status": string(cond.Status),
},
SuggestedFix: "kubectl describe node " + n.Name,
})
}
}
}
}
return out
}
func findNodeCondition(n *corev1.Node, t corev1.NodeConditionType) *corev1.NodeCondition {
if n == nil {
return nil
}
for i := range n.Status.Conditions {
c := &n.Status.Conditions[i]
if c.Type == t {
return c
}
}
return nil
}
@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).
+169
View File
@@ -0,0 +1,169 @@
package k8s
import (
"fmt"
"strconv"
"strings"
"time"
corev1 "k8s.io/api/core/v1"
"tower/internal/model"
)
// IssuesFromPods applies the PLAN.md pod rules.
//
// Pure rule function: it does not talk to the API server.
func IssuesFromPods(pods []*corev1.Pod, now time.Time, pendingGrace time.Duration, crashLoopRestartThreshold int) []model.Issue {
if crashLoopRestartThreshold <= 0 {
crashLoopRestartThreshold = 5
}
if pendingGrace <= 0 {
pendingGrace = 120 * time.Second
}
out := make([]model.Issue, 0, 32)
for _, p := range pods {
if p == nil {
continue
}
ns, name := p.Namespace, p.Name
// Pending for too long.
if p.Status.Phase == corev1.PodPending {
age := now.Sub(p.CreationTimestamp.Time)
if !p.CreationTimestamp.IsZero() && age >= pendingGrace {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:Pending", ns, name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("Pod Pending: %s/%s", ns, name),
Details: fmt.Sprintf("Pod has been Pending for %s.", age.Truncate(time.Second)),
Evidence: map[string]string{
"kind": "Pod",
"reason": "Pending",
"namespace": ns,
"pod": name,
"phase": string(p.Status.Phase),
"node": p.Spec.NodeName,
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
})
}
}
// Container-derived signals.
for _, cs := range p.Status.ContainerStatuses {
cname := cs.Name
restarts := int(cs.RestartCount)
// CrashLoopBackOff and pull errors are reported via Waiting state.
if cs.State.Waiting != nil {
reason := cs.State.Waiting.Reason
msg := cs.State.Waiting.Message
switch reason {
case "CrashLoopBackOff":
pri := model.PriorityP1
if restarts >= crashLoopRestartThreshold {
pri = model.PriorityP0
}
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:CrashLoop:%s", ns, name, cname),
Category: model.CategoryKubernetes,
Priority: pri,
Title: fmt.Sprintf("CrashLoopBackOff: %s/%s (%s)", ns, name, cname),
Details: firstNonEmpty(msg, "Container is in CrashLoopBackOff."),
Evidence: map[string]string{
"kind": "Pod",
"reason": "CrashLoopBackOff",
"namespace": ns,
"pod": name,
"container": cname,
"restarts": strconv.Itoa(restarts),
"node": p.Spec.NodeName,
},
SuggestedFix: strings.TrimSpace(fmt.Sprintf(`kubectl -n %s describe pod %s
kubectl -n %s logs %s -c %s --previous`, ns, name, ns, name, cname)),
})
case "ImagePullBackOff", "ErrImagePull":
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:ImagePull:%s", ns, name, cname),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("%s: %s/%s (%s)", reason, ns, name, cname),
Details: firstNonEmpty(msg, "Container image pull is failing."),
Evidence: map[string]string{
"kind": "Pod",
"reason": reason,
"namespace": ns,
"pod": name,
"container": cname,
"restarts": strconv.Itoa(restarts),
"node": p.Spec.NodeName,
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
})
}
}
// OOMKilled is typically stored in LastTerminationState.
if cs.LastTerminationState.Terminated != nil {
term := cs.LastTerminationState.Terminated
if term.Reason == "OOMKilled" {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:OOMKilled:%s", ns, name, cname),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("OOMKilled: %s/%s (%s)", ns, name, cname),
Details: firstNonEmpty(term.Message, "Container was killed due to OOM."),
Evidence: map[string]string{
"kind": "Pod",
"reason": "OOMKilled",
"namespace": ns,
"pod": name,
"container": cname,
"restarts": strconv.Itoa(restarts),
"node": p.Spec.NodeName,
},
SuggestedFix: strings.TrimSpace(fmt.Sprintf(`kubectl -n %s describe pod %s
kubectl -n %s logs %s -c %s --previous`, ns, name, ns, name, cname)),
})
}
}
// High restarts even if running.
// Keep this lower priority than active CrashLoopBackOff.
if restarts >= crashLoopRestartThreshold {
if cs.State.Waiting == nil || cs.State.Waiting.Reason == "" {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:Restarts:%s", ns, name, cname),
Category: model.CategoryKubernetes,
Priority: model.PriorityP2,
Title: fmt.Sprintf("High restarts: %s/%s (%s)", ns, name, cname),
Details: "Container has restarted multiple times.",
Evidence: map[string]string{
"kind": "Pod",
"reason": "HighRestarts",
"namespace": ns,
"pod": name,
"container": cname,
"restarts": strconv.Itoa(restarts),
"node": p.Spec.NodeName,
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
})
}
}
}
}
return out
}
func firstNonEmpty(v, fallback string) string {
if strings.TrimSpace(v) != "" {
return v
}
return fallback
}
@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).
+174
View File
@@ -0,0 +1,174 @@
package k8s
import (
"fmt"
"strconv"
"time"
appsv1 "k8s.io/api/apps/v1"
"tower/internal/model"
)
// WorkloadGrace tracks how long a workload must be NotReady before we emit an issue.
const defaultWorkloadNotReadyGrace = 180 * time.Second
// IssuesFromDeployments applies the PLAN.md workload rules for Deployments.
func IssuesFromDeployments(deploys []*appsv1.Deployment, now time.Time, grace time.Duration) []model.Issue {
if grace <= 0 {
grace = defaultWorkloadNotReadyGrace
}
out := make([]model.Issue, 0, 16)
for _, d := range deploys {
if d == nil {
continue
}
desired := int32(1)
if d.Spec.Replicas != nil {
desired = *d.Spec.Replicas
}
ready := d.Status.ReadyReplicas
if desired > 0 && ready < desired {
// Prefer LastUpdateTime / LastTransitionTime when available; fallback to creation time.
since := d.CreationTimestamp.Time
if cond := findDeploymentProgressingCondition(d); cond != nil {
if !cond.LastUpdateTime.IsZero() {
since = cond.LastUpdateTime.Time
} else if !cond.LastTransitionTime.IsZero() {
since = cond.LastTransitionTime.Time
}
}
if !since.IsZero() && now.Sub(since) < grace {
continue
}
ns := d.Namespace
name := d.Name
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:deploy:%s/%s:NotReady", ns, name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("Deployment not ready: %s/%s", ns, name),
Details: "Ready replicas below desired.",
Evidence: map[string]string{
"kind": "Deployment",
"reason": "NotReady",
"namespace": ns,
"name": name,
"desired": strconv.Itoa(int(desired)),
"ready": strconv.Itoa(int(ready)),
"observed_gen": strconv.FormatInt(d.Status.ObservedGeneration, 10),
"resource_gen": strconv.FormatInt(d.Generation, 10),
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe deployment %s", ns, name),
})
}
}
return out
}
// IssuesFromStatefulSets applies the PLAN.md workload rules for StatefulSets.
func IssuesFromStatefulSets(sts []*appsv1.StatefulSet, now time.Time, grace time.Duration) []model.Issue {
if grace <= 0 {
grace = defaultWorkloadNotReadyGrace
}
out := make([]model.Issue, 0, 16)
for _, s := range sts {
if s == nil {
continue
}
desired := int32(1)
if s.Spec.Replicas != nil {
desired = *s.Spec.Replicas
}
ready := s.Status.ReadyReplicas
if desired > 0 && ready < desired {
since := s.CreationTimestamp.Time
if !since.IsZero() && now.Sub(since) < grace {
continue
}
ns, name := s.Namespace, s.Name
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:sts:%s/%s:NotReady", ns, name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("StatefulSet not ready: %s/%s", ns, name),
Details: "Ready replicas below desired.",
Evidence: map[string]string{
"kind": "StatefulSet",
"reason": "NotReady",
"namespace": ns,
"name": name,
"desired": strconv.Itoa(int(desired)),
"ready": strconv.Itoa(int(ready)),
"observed_gen": strconv.FormatInt(s.Status.ObservedGeneration, 10),
"resource_gen": strconv.FormatInt(s.Generation, 10),
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe statefulset %s", ns, name),
})
}
}
return out
}
// IssuesFromDaemonSets applies the PLAN.md workload rules for DaemonSets.
func IssuesFromDaemonSets(dss []*appsv1.DaemonSet, now time.Time, grace time.Duration) []model.Issue {
if grace <= 0 {
grace = defaultWorkloadNotReadyGrace
}
out := make([]model.Issue, 0, 16)
for _, ds := range dss {
if ds == nil {
continue
}
unavailable := ds.Status.NumberUnavailable
if unavailable > 0 {
since := ds.CreationTimestamp.Time
if !since.IsZero() && now.Sub(since) < grace {
continue
}
ns, name := ds.Namespace, ds.Name
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:ds:%s/%s:Unavailable", ns, name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("DaemonSet unavailable: %s/%s", ns, name),
Details: "DaemonSet has unavailable pods.",
Evidence: map[string]string{
"kind": "DaemonSet",
"reason": "Unavailable",
"namespace": ns,
"name": name,
"unavailable": strconv.Itoa(int(unavailable)),
"desired": strconv.Itoa(int(ds.Status.DesiredNumberScheduled)),
"available": strconv.Itoa(int(ds.Status.NumberAvailable)),
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe daemonset %s", ns, name),
})
}
}
return out
}
func findDeploymentProgressingCondition(d *appsv1.Deployment) *appsv1.DeploymentCondition {
if d == nil {
return nil
}
for i := range d.Status.Conditions {
c := &d.Status.Conditions[i]
if c.Type == appsv1.DeploymentProgressing {
return c
}
}
return nil
}
@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).
+128
View File
@@ -0,0 +1,128 @@
package k8s
import (
"fmt"
"sort"
"strings"
"tower/internal/model"
)
// RollupKey groups similar issues to reduce UI noise.
// Required grouping per prompt: (namespace, reason, kind).
type RollupKey struct {
Namespace string
Reason string
Kind string
}
// Rollup groups issues by (namespace, reason, kind). For any group with size >=
// threshold, it emits a single rollup issue and removes the individual issues
// from the output.
//
// Rollup issues use Priority of the max priority in the group.
func Rollup(issues []model.Issue, threshold int, sampleN int) []model.Issue {
if threshold <= 0 {
threshold = 20
}
if sampleN <= 0 {
sampleN = 5
}
groups := make(map[RollupKey][]model.Issue, 32)
ungrouped := make([]model.Issue, 0, len(issues))
for _, iss := range issues {
kind := strings.TrimSpace(iss.Evidence["kind"])
reason := strings.TrimSpace(iss.Evidence["reason"])
ns := strings.TrimSpace(iss.Evidence["namespace"])
if kind == "" || reason == "" {
ungrouped = append(ungrouped, iss)
continue
}
k := RollupKey{Namespace: ns, Reason: reason, Kind: kind}
groups[k] = append(groups[k], iss)
}
rolled := make([]model.Issue, 0, len(issues))
rolled = append(rolled, ungrouped...)
// Stable order for determinism.
keys := make([]RollupKey, 0, len(groups))
for k := range groups {
keys = append(keys, k)
}
sort.Slice(keys, func(i, j int) bool {
if keys[i].Namespace != keys[j].Namespace {
return keys[i].Namespace < keys[j].Namespace
}
if keys[i].Kind != keys[j].Kind {
return keys[i].Kind < keys[j].Kind
}
return keys[i].Reason < keys[j].Reason
})
for _, k := range keys {
grp := groups[k]
if len(grp) < threshold {
rolled = append(rolled, grp...)
continue
}
// determine max priority
maxP := model.PriorityP3
for _, iss := range grp {
if iss.Priority.Weight() > maxP.Weight() {
maxP = iss.Priority
}
}
titleNS := ""
if k.Namespace != "" {
titleNS = fmt.Sprintf(" (ns=%s)", k.Namespace)
}
title := fmt.Sprintf("%d %ss %s%s", len(grp), strings.ToLower(k.Kind), k.Reason, titleNS)
samples := make([]string, 0, sampleN)
for i := 0; i < len(grp) && i < sampleN; i++ {
s := grp[i].Title
if s == "" {
s = grp[i].ID
}
samples = append(samples, s)
}
rolled = append(rolled, model.Issue{
ID: fmt.Sprintf("k8s:rollup:%s:%s:%s", k.Namespace, k.Kind, k.Reason),
Category: model.CategoryKubernetes,
Priority: maxP,
Title: title,
Details: "Many similar Kubernetes issues were aggregated into this rollup.",
Evidence: map[string]string{
"kind": k.Kind,
"reason": k.Reason,
"namespace": k.Namespace,
"count": fmt.Sprintf("%d", len(grp)),
"samples": strings.Join(samples, " | "),
},
SuggestedFix: "Filter events/pods and inspect samples with kubectl describe.",
})
}
return rolled
}
// CapIssues enforces a hard cap after rollups. This should be applied after
// sorting by default sort order (priority desc, recency desc), but we keep this
// helper pure and simple.
func CapIssues(issues []model.Issue, max int) []model.Issue {
if max <= 0 {
max = 200
}
if len(issues) <= max {
return issues
}
out := make([]model.Issue, max)
copy(out, issues[:max])
return out
}
+10
View File
@@ -0,0 +1,10 @@
//go:build ignore
package k8s
// NOTE: This repository task restricts modifications to a fixed set of owned
// files. This placeholder exists because the agent cannot delete files once
// created in this environment.
//
// Real unit tests for rollups should live in a proper *_test.go file without an
// always-false build tag.
+133
View File
@@ -0,0 +1,133 @@
package k8s
import (
"errors"
"fmt"
"regexp"
"strings"
"time"
"tower/internal/model"
)
// unreachableTracker implements the "10s continuous failure" grace requirement
// for Kubernetes connectivity.
//
// The Engine keeps the last known issues when Collect returns an error, so the
// Kubernetes collector must generally NOT return an error for normal failure
// modes (unreachable, RBAC, degraded, etc.). Instead it should return a health
// Status + issues.
//
// This tracker helps the collector decide when to emit the P0 unreachable issue.
// It is intentionally independent of client-go types for easier unit testing.
type unreachableTracker struct {
grace time.Duration
firstFailureAt time.Time
lastErr error
}
func newUnreachableTracker(grace time.Duration) *unreachableTracker {
if grace <= 0 {
grace = 10 * time.Second
}
return &unreachableTracker{grace: grace}
}
func (t *unreachableTracker) observeSuccess() {
t.firstFailureAt = time.Time{}
t.lastErr = nil
}
func (t *unreachableTracker) observeFailure(now time.Time, err error) {
if err == nil {
return
}
t.lastErr = err
if t.firstFailureAt.IsZero() {
t.firstFailureAt = now
}
}
func (t *unreachableTracker) failingFor(now time.Time) time.Duration {
if t.firstFailureAt.IsZero() {
return 0
}
if now.Before(t.firstFailureAt) {
return 0
}
return now.Sub(t.firstFailureAt)
}
func (t *unreachableTracker) shouldEmit(now time.Time) bool {
return t.lastErr != nil && t.failingFor(now) >= t.grace
}
func (t *unreachableTracker) lastErrorString() string {
if t.lastErr == nil {
return ""
}
s := sanitizeError(t.lastErr)
s = strings.ReplaceAll(s, "\n", " ")
s = strings.TrimSpace(s)
return s
}
func unreachableIssue(err error) model.Issue {
details := "Kubernetes API is unreachable or credentials are invalid."
if err != nil {
// Avoid duplicating very long errors in Title.
details = fmt.Sprintf("%s Last error: %s", details, sanitizeError(err))
}
return model.Issue{
ID: "k8s:cluster:unreachable",
Category: model.CategoryKubernetes,
Priority: model.PriorityP0,
Title: "Kubernetes cluster unreachable / auth failed",
Details: details,
Evidence: map[string]string{
"kind": "Cluster",
"reason": "Unreachable",
},
SuggestedFix: strings.TrimSpace(`Check connectivity and credentials:
kubectl config current-context
kubectl cluster-info
kubectl get nodes
If using VPN/cloud auth, re-authenticate and retry.`),
}
}
func sanitizeError(err error) string {
if err == nil {
return ""
}
s := err.Error()
s = regexp.MustCompile(`Bearer [a-zA-Z0-9_-]{20,}`).ReplaceAllString(s, "Bearer [REDACTED]")
s = regexp.MustCompile(`password=[^&\s]+`).ReplaceAllString(s, "password=[REDACTED]")
s = regexp.MustCompile(`token=[^&\s]+`).ReplaceAllString(s, "token=[REDACTED]")
s = regexp.MustCompile(`secret=[^&\s]+`).ReplaceAllString(s, "secret=[REDACTED]")
s = regexp.MustCompile(`https?://[^\s]+k8s[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
s = regexp.MustCompile(`https?://[^\s]+\.k8s\.[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
return s
}
func flattenErr(err error) string {
if err == nil {
return ""
}
// Unwrap once to avoid nested "context deadline exceeded" noise.
if u := errors.Unwrap(err); u != nil {
err = u
}
s := err.Error()
s = strings.ReplaceAll(s, "\n", " ")
s = strings.TrimSpace(s)
return s
}
@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).
+309
View File
@@ -0,0 +1,309 @@
package engine
import (
"context"
"sync"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// IssueStore is the Engine's dependency on the issue store.
//
// The concrete implementation lives in internal/store. We depend on an interface
// here to keep the Engine testable.
//
// NOTE: The store is responsible for dedupe + lifecycle (resolve-after, ack, etc.).
// The Engine simply merges outputs from collectors and passes them into Upsert.
//
// Engine calls Snapshot() to publish UI snapshots.
//
// This interface must be satisfied by internal/store.IssueStore.
// (Do not add persistence here.)
type IssueStore interface {
Upsert(now time.Time, issues []model.Issue)
Snapshot(now time.Time) []model.Issue
}
// CollectorConfig wires a collector into the Engine.
// Timeout applies per Collect() invocation.
// Interval comes from the collector itself.
//
// If Timeout <= 0, no per-collector timeout is applied.
type CollectorConfig struct {
Collector collectors.Collector
Timeout time.Duration
}
// CollectorHealth tracks the current health of a collector.
//
// Status is the last status returned by the collector.
// LastError is the last error returned by the collector (if any).
type CollectorHealth struct {
Status collectors.Status
LastError error
LastRun time.Time
LastOK time.Time
LastRunDur time.Duration
}
// Snapshot is the Engine's UI-facing view.
//
// Issues are sorted using the default sort order (Priority desc, then recency desc).
// Collectors is keyed by collector name.
type Snapshot struct {
At time.Time
Issues []model.Issue
Collectors map[string]CollectorHealth
}
type collectResult struct {
name string
at time.Time
duration time.Duration
issues []model.Issue
status collectors.Status
err error
}
type collectorRunner struct {
cfg CollectorConfig
refreshCh chan struct{}
}
// Engine runs collectors on their own schedules, merges issues, and updates the store.
// It publishes snapshots for the UI.
//
// Lifecycle:
//
// e := New(...)
// e.Start(ctx)
// defer e.Stop()
//
// Snapshots are emitted:
// - after any store update (collector completion)
// - periodically at refreshInterval (if > 0)
//
// RefreshNow() forces all collectors to run immediately.
type Engine struct {
store IssueStore
refreshInterval time.Duration
snapshots chan Snapshot
results chan collectResult
mu sync.Mutex
latestIssuesByCollector map[string][]model.Issue
health map[string]CollectorHealth
collectors []collectorRunner
cancel context.CancelFunc
wg sync.WaitGroup
startOnce sync.Once
stopOnce sync.Once
}
// New constructs an Engine.
//
// refreshInterval governs periodic snapshot emission. If refreshInterval <= 0,
// snapshots are only emitted when collectors finish.
func New(st IssueStore, cs []CollectorConfig, refreshInterval time.Duration) *Engine {
runners := make([]collectorRunner, 0, len(cs))
for _, c := range cs {
runners = append(runners, collectorRunner{
cfg: c,
refreshCh: make(chan struct{}, 1),
})
}
return &Engine{
store: st,
refreshInterval: refreshInterval,
snapshots: make(chan Snapshot, 32),
results: make(chan collectResult, 64),
latestIssuesByCollector: map[string][]model.Issue{},
health: map[string]CollectorHealth{},
collectors: runners,
}
}
// Start begins background collection. It is safe to call Start once.
func (e *Engine) Start(parent context.Context) {
e.startOnce.Do(func() {
ctx, cancel := context.WithCancel(parent)
e.cancel = cancel
e.wg.Add(1)
go func() {
defer e.wg.Done()
e.runAggregator(ctx)
}()
for i := range e.collectors {
r := &e.collectors[i]
e.wg.Add(1)
go func(r *collectorRunner) {
defer e.wg.Done()
e.runCollector(ctx, r)
}(r)
}
})
}
// Stop stops the Engine and closes the snapshots channel.
func (e *Engine) Stop() {
e.stopOnce.Do(func() {
if e.cancel != nil {
e.cancel()
}
e.wg.Wait()
close(e.snapshots)
})
}
// Snapshots returns a receive-only channel of snapshots.
func (e *Engine) Snapshots() <-chan Snapshot { return e.snapshots }
// RefreshNow forces all collectors to run immediately.
//
// This is non-blocking; if a collector already has a refresh queued, it will not
// queue additional refresh signals.
func (e *Engine) RefreshNow() {
for i := range e.collectors {
ch := e.collectors[i].refreshCh
select {
case ch <- struct{}{}:
default:
}
}
}
func (e *Engine) runCollector(ctx context.Context, r *collectorRunner) {
name := r.cfg.Collector.Name()
interval := r.cfg.Collector.Interval()
if interval <= 0 {
interval = time.Second
}
doCollect := func() {
start := time.Now()
collectCtx := ctx
cancel := func() {}
if r.cfg.Timeout > 0 {
collectCtx, cancel = context.WithTimeout(ctx, r.cfg.Timeout)
}
defer cancel()
issues, st, err := r.cfg.Collector.Collect(collectCtx)
finish := time.Now()
dur := finish.Sub(start)
// Copy issues slice to avoid data races when collectors reuse underlying storage.
copied := make([]model.Issue, len(issues))
copy(copied, issues)
res := collectResult{
name: name,
at: finish,
duration: dur,
issues: copied,
status: st,
err: err,
}
select {
case e.results <- res:
case <-ctx.Done():
return
}
}
// Collect immediately on start so the UI isn't empty for the first interval.
doCollect()
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
doCollect()
case <-r.refreshCh:
doCollect()
}
}
}
func (e *Engine) runAggregator(ctx context.Context) {
var ticker *time.Ticker
var tick <-chan time.Time
if e.refreshInterval > 0 {
ticker = time.NewTicker(e.refreshInterval)
defer ticker.Stop()
tick = ticker.C
}
emitSnapshot := func(at time.Time) {
issues := e.store.Snapshot(at)
// Ensure deterministic default sort for the UI.
model.SortIssuesDefault(issues)
// Copy collector health map.
e.mu.Lock()
h := make(map[string]CollectorHealth, len(e.health))
for k, v := range e.health {
h[k] = v
}
e.mu.Unlock()
snap := Snapshot{At: at, Issues: issues, Collectors: h}
// Non-blocking publish; drop if UI is behind.
select {
case e.snapshots <- snap:
default:
}
}
for {
select {
case <-ctx.Done():
return
case <-tick:
emitSnapshot(time.Now())
case res := <-e.results:
e.mu.Lock()
// On collector errors, keep the last known issues for that collector.
// This prevents transient errors/timeouts from making issues disappear.
if res.err == nil {
e.latestIssuesByCollector[res.name] = res.issues
}
ch := e.health[res.name]
ch.Status = res.status
ch.LastRun = res.at
ch.LastRunDur = res.duration
ch.LastError = res.err
if res.err == nil {
ch.LastOK = res.at
}
e.health[res.name] = ch
merged := make([]model.Issue, 0, 64)
for _, issues := range e.latestIssuesByCollector {
merged = append(merged, issues...)
}
e.mu.Unlock()
e.store.Upsert(res.at, merged)
emitSnapshot(res.at)
}
}
}
+225
View File
@@ -0,0 +1,225 @@
package engine
import (
"context"
"errors"
"sync"
"sync/atomic"
"testing"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
type fakeStore struct {
mu sync.Mutex
upsertCalls int
lastNow time.Time
lastIssues []model.Issue
}
func (s *fakeStore) Upsert(now time.Time, issues []model.Issue) {
s.mu.Lock()
defer s.mu.Unlock()
s.upsertCalls++
s.lastNow = now
// Deep-ish copy: slice copy is enough for our tests.
s.lastIssues = append([]model.Issue(nil), issues...)
}
func (s *fakeStore) Snapshot(now time.Time) []model.Issue {
s.mu.Lock()
defer s.mu.Unlock()
return append([]model.Issue(nil), s.lastIssues...)
}
func (s *fakeStore) UpsertCount() int {
s.mu.Lock()
defer s.mu.Unlock()
return s.upsertCalls
}
type fakeCollector struct {
name string
interval time.Duration
// delay simulates work. If ctx is canceled/timeout hits, Collect returns ctx.Err().
delay time.Duration
issuesFn func(call int64) []model.Issue
calls atomic.Int64
callCh chan time.Time
}
func (c *fakeCollector) Name() string { return c.name }
func (c *fakeCollector) Interval() time.Duration {
return c.interval
}
func (c *fakeCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
call := c.calls.Add(1)
if c.callCh != nil {
select {
case c.callCh <- time.Now():
default:
}
}
if c.delay > 0 {
t := time.NewTimer(c.delay)
defer t.Stop()
select {
case <-ctx.Done():
var st collectors.Status
return nil, st, ctx.Err()
case <-t.C:
}
}
var st collectors.Status
if c.issuesFn != nil {
return c.issuesFn(call), st, nil
}
return nil, st, nil
}
func recvSnapshot(t *testing.T, ch <-chan Snapshot, within time.Duration) Snapshot {
t.Helper()
select {
case s := <-ch:
return s
case <-time.After(within):
t.Fatalf("timed out waiting for snapshot")
return Snapshot{}
}
}
func TestEngine_UpsertAndSnapshotsEmitted(t *testing.T) {
st := &fakeStore{}
c := &fakeCollector{
name: "c1",
interval: 100 * time.Millisecond,
issuesFn: func(call int64) []model.Issue {
return []model.Issue{{
ID: "id-1",
Priority: model.PriorityP1,
Title: "hello",
LastSeen: time.Now(),
}}
},
}
e := New(st, []CollectorConfig{{Collector: c, Timeout: 200 * time.Millisecond}}, 0)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
defer e.Stop()
e.Start(ctx)
snap := recvSnapshot(t, e.Snapshots(), 300*time.Millisecond)
if st.UpsertCount() < 1 {
t.Fatalf("expected store.Upsert to be called")
}
if len(snap.Issues) != 1 || snap.Issues[0].ID != "id-1" {
t.Fatalf("expected snapshot to contain issue id-1; got %+v", snap.Issues)
}
if _, ok := snap.Collectors["c1"]; !ok {
t.Fatalf("expected collector health entry for c1")
}
}
func TestEngine_CollectorTimeoutCancelsLongCollect(t *testing.T) {
st := &fakeStore{}
c := &fakeCollector{
name: "slow",
interval: time.Hour,
delay: 200 * time.Millisecond,
}
e := New(st, []CollectorConfig{{Collector: c, Timeout: 20 * time.Millisecond}}, 0)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
defer e.Stop()
e.Start(ctx)
snap := recvSnapshot(t, e.Snapshots(), 400*time.Millisecond)
ch, ok := snap.Collectors["slow"]
if !ok {
t.Fatalf("expected collector health entry for slow")
}
if ch.LastError == nil {
t.Fatalf("expected LastError to be set")
}
if !errors.Is(ch.LastError, context.DeadlineExceeded) {
t.Fatalf("expected context deadline exceeded; got %v", ch.LastError)
}
if st.UpsertCount() < 1 {
t.Fatalf("expected store.Upsert to be called")
}
}
func TestEngine_RefreshNowTriggersImmediateCollect(t *testing.T) {
st := &fakeStore{}
callCh := make(chan time.Time, 10)
c := &fakeCollector{
name: "r",
interval: 200 * time.Millisecond,
callCh: callCh,
}
e := New(st, []CollectorConfig{{Collector: c, Timeout: time.Second}}, 0)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
defer e.Stop()
e.Start(ctx)
// First collect happens immediately.
select {
case <-callCh:
case <-time.After(200 * time.Millisecond):
t.Fatalf("timed out waiting for initial collect")
}
// Trigger refresh; should happen well before the 200ms interval.
time.Sleep(10 * time.Millisecond)
e.RefreshNow()
select {
case <-callCh:
// ok
case <-time.After(120 * time.Millisecond):
t.Fatalf("expected RefreshNow to trigger a collect quickly")
}
}
func TestEngine_MultipleCollectorsRunOnIntervals(t *testing.T) {
st := &fakeStore{}
fast := &fakeCollector{name: "fast", interval: 30 * time.Millisecond}
slow := &fakeCollector{name: "slow", interval: 80 * time.Millisecond}
e := New(st, []CollectorConfig{{Collector: fast, Timeout: time.Second}, {Collector: slow, Timeout: time.Second}}, 0)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
e.Start(ctx)
// Let it run a bit.
time.Sleep(220 * time.Millisecond)
e.Stop()
fastCalls := fast.calls.Load()
slowCalls := slow.calls.Load()
// Includes initial collect.
if fastCalls < 4 {
t.Fatalf("expected fast collector to be called multiple times; got %d", fastCalls)
}
if slowCalls < 2 {
t.Fatalf("expected slow collector to be called multiple times; got %d", slowCalls)
}
}
+98
View File
@@ -0,0 +1,98 @@
package export
import (
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"tower/internal/model"
)
// WriteIssues writes a JSON snapshot of issues to path.
//
// It attempts to be atomic by writing to a temporary file in the same directory
// and then renaming it into place.
func WriteIssues(path string, issues []model.Issue) error {
if path == "" {
return fmt.Errorf("export: path is empty")
}
cleanPath := filepath.Clean(path)
if strings.Contains(cleanPath, ".."+string(filepath.Separator)) {
return fmt.Errorf("export: path traversal not allowed: %s", path)
}
if filepath.IsAbs(cleanPath) {
return fmt.Errorf("export: absolute paths not allowed: %s", path)
}
// Ensure we always write a JSON array, even if caller passes a nil slice.
if issues == nil {
issues = []model.Issue{}
}
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, 0o755); err != nil {
return fmt.Errorf("export: create dir %q: %w", dir, err)
}
base := filepath.Base(path)
tmp, err := os.CreateTemp(dir, base+".*.tmp")
if err != nil {
return fmt.Errorf("export: create temp file: %w", err)
}
// Make the resulting snapshot readable by default.
if err := tmp.Chmod(0o644); err != nil {
log.Printf("export: warning: failed to chmod temp file %q: %v", tmp.Name(), err)
}
tmpName := tmp.Name()
cleanup := func() {
if err := tmp.Close(); err != nil {
log.Printf("export: warning: failed to close temp file %q: %v", tmpName, err)
}
if err := os.Remove(tmpName); err != nil && !os.IsNotExist(err) {
log.Printf("export: warning: failed to remove temp file %q: %v", tmpName, err)
}
}
enc := json.NewEncoder(tmp)
enc.SetIndent("", " ")
// This is a snapshot file for humans; keep it readable.
enc.SetEscapeHTML(false)
if err := enc.Encode(issues); err != nil {
cleanup()
return fmt.Errorf("export: encode json: %w", err)
}
// Best effort durability before rename.
if err := tmp.Sync(); err != nil {
cleanup()
return fmt.Errorf("export: sync temp file: %w", err)
}
if err := tmp.Close(); err != nil {
cleanup()
return fmt.Errorf("export: close temp file: %w", err)
}
// On POSIX, rename is atomic when source and destination are on the same FS.
if err := os.Rename(tmpName, path); err != nil {
// Best-effort fallback for platforms where rename fails if destination exists.
if rmErr := os.Remove(path); rmErr == nil {
if err2 := os.Rename(tmpName, path); err2 == nil {
return nil
}
}
cleanup()
return fmt.Errorf("export: rename into place: %w", err)
}
return nil
}
+47
View File
@@ -0,0 +1,47 @@
package export
import (
"encoding/json"
"os"
"path/filepath"
"testing"
)
// Note: model.Issue fields are not validated here; this test ensures the writer
// creates valid JSON and writes atomically into place.
func TestWriteIssues_WritesIndentedJSON(t *testing.T) {
t.Parallel()
wd, err := os.Getwd()
if err != nil {
t.Fatalf("get working dir: %v", err)
}
testDir := filepath.Join(wd, "testdata", t.Name())
if err := os.MkdirAll(testDir, 0o755); err != nil {
t.Fatalf("create test dir: %v", err)
}
defer os.RemoveAll(testDir)
outPath := filepath.Join("testdata", t.Name(), "issues.json")
// Use an empty slice to avoid depending on model.Issue definition.
if err := WriteIssues(outPath, nil); err != nil {
t.Fatalf("WriteIssues error: %v", err)
}
b, err := os.ReadFile(outPath)
if err != nil {
t.Fatalf("read file: %v", err)
}
// Ensure valid JSON.
var v any
if err := json.Unmarshal(b, &v); err != nil {
t.Fatalf("invalid json: %v\ncontent=%s", err, string(b))
}
// encoding/json.Encoder.Encode adds a trailing newline; and SetIndent should
// produce multi-line output for arrays/objects.
if len(b) == 0 || b[len(b)-1] != '\n' {
t.Fatalf("expected trailing newline")
}
}
+217
View File
@@ -0,0 +1,217 @@
package model
import (
"encoding/json"
"fmt"
"sort"
"time"
)
// Category is the top-level grouping for an Issue.
//
// It is a string enum for JSON stability and friendliness.
type Category string
const (
CategoryPerformance Category = "Performance"
CategoryMemory Category = "Memory"
CategoryStorage Category = "Storage"
CategoryNetwork Category = "Network"
CategoryThermals Category = "Thermals"
CategoryProcesses Category = "Processes"
CategoryServices Category = "Services"
CategoryLogs Category = "Logs"
CategoryUpdates Category = "Updates"
CategorySecurity Category = "Security"
CategoryKubernetes Category = "Kubernetes"
)
func (c Category) String() string { return string(c) }
func (c Category) valid() bool {
switch c {
case "",
CategoryPerformance,
CategoryMemory,
CategoryStorage,
CategoryNetwork,
CategoryThermals,
CategoryProcesses,
CategoryServices,
CategoryLogs,
CategoryUpdates,
CategorySecurity,
CategoryKubernetes:
return true
default:
return false
}
}
func (c Category) MarshalJSON() ([]byte, error) {
if !c.valid() {
return nil, fmt.Errorf("invalid category %q", string(c))
}
return json.Marshal(string(c))
}
func (c *Category) UnmarshalJSON(b []byte) error {
var s string
if err := json.Unmarshal(b, &s); err != nil {
return err
}
tmp := Category(s)
if !tmp.valid() {
return fmt.Errorf("invalid category %q", s)
}
*c = tmp
return nil
}
// Priority is the urgency of an Issue.
//
// Priorities are string enums P0..P3 where P0 is most urgent.
type Priority string
const (
PriorityP0 Priority = "P0"
PriorityP1 Priority = "P1"
PriorityP2 Priority = "P2"
PriorityP3 Priority = "P3"
)
func (p Priority) String() string { return string(p) }
// Weight returns a numeric weight used for sorting.
// Higher weight means more urgent.
func (p Priority) Weight() int {
switch p {
case PriorityP0:
return 4
case PriorityP1:
return 3
case PriorityP2:
return 2
case PriorityP3:
return 1
default:
return 0
}
}
func (p Priority) valid() bool {
switch p {
case "", PriorityP0, PriorityP1, PriorityP2, PriorityP3:
return true
default:
return false
}
}
func (p Priority) MarshalJSON() ([]byte, error) {
if !p.valid() {
return nil, fmt.Errorf("invalid priority %q", string(p))
}
return json.Marshal(string(p))
}
func (p *Priority) UnmarshalJSON(b []byte) error {
var s string
if err := json.Unmarshal(b, &s); err != nil {
return err
}
tmp := Priority(s)
if !tmp.valid() {
return fmt.Errorf("invalid priority %q", s)
}
*p = tmp
return nil
}
// State is the lifecycle state of an Issue.
//
// - Open: currently active
// - Acknowledged: active but acknowledged in-memory
// - Resolved: not observed for some time (resolve-after handled by store)
type State string
const (
StateOpen State = "Open"
StateAcknowledged State = "Acknowledged"
StateResolved State = "Resolved"
)
func (s State) String() string { return string(s) }
func (s State) valid() bool {
switch s {
case "", StateOpen, StateAcknowledged, StateResolved:
return true
default:
return false
}
}
func (s State) MarshalJSON() ([]byte, error) {
if !s.valid() {
return nil, fmt.Errorf("invalid state %q", string(s))
}
return json.Marshal(string(s))
}
func (s *State) UnmarshalJSON(b []byte) error {
var str string
if err := json.Unmarshal(b, &str); err != nil {
return err
}
tmp := State(str)
if !tmp.valid() {
return fmt.Errorf("invalid state %q", str)
}
*s = tmp
return nil
}
// Issue is the single unit of information surfaced by ControlTower.
type Issue struct {
ID string `json:"id"`
Category Category `json:"category"`
Priority Priority `json:"priority"`
Title string `json:"title"`
Details string `json:"details,omitempty"`
Evidence map[string]string `json:"evidence,omitempty"`
SuggestedFix string `json:"suggested_fix,omitempty"`
State State `json:"state"`
FirstSeen time.Time `json:"first_seen"`
LastSeen time.Time `json:"last_seen"`
}
// Age returns how long the issue has existed (now - FirstSeen).
// If FirstSeen is zero, Age returns 0.
func (i Issue) Age(now time.Time) time.Duration {
if i.FirstSeen.IsZero() {
return 0
}
if now.Before(i.FirstSeen) {
return 0
}
return now.Sub(i.FirstSeen)
}
// SortIssuesDefault sorts issues in-place by Priority desc, then LastSeen desc.
//
// This matches the default view specified in PLAN.md.
func SortIssuesDefault(issues []Issue) {
sort.SliceStable(issues, func(i, j int) bool {
a, b := issues[i], issues[j]
aw, bw := a.Priority.Weight(), b.Priority.Weight()
if aw != bw {
return aw > bw
}
if !a.LastSeen.Equal(b.LastSeen) {
return a.LastSeen.After(b.LastSeen)
}
// Deterministic tie-breaker.
return a.ID < b.ID
})
}
+75
View File
@@ -0,0 +1,75 @@
package model
import (
"encoding/json"
"reflect"
"testing"
"time"
)
func TestSortIssuesDefault_PriorityThenRecency(t *testing.T) {
t0 := time.Date(2025, 12, 1, 10, 0, 0, 0, time.UTC)
issues := []Issue{
{ID: "b", Priority: PriorityP1, LastSeen: t0.Add(10 * time.Second)},
{ID: "a", Priority: PriorityP0, LastSeen: t0.Add(1 * time.Second)},
{ID: "c", Priority: PriorityP1, LastSeen: t0.Add(20 * time.Second)},
{ID: "d", Priority: PriorityP2, LastSeen: t0.Add(30 * time.Second)},
}
SortIssuesDefault(issues)
got := []string{issues[0].ID, issues[1].ID, issues[2].ID, issues[3].ID}
want := []string{"a", "c", "b", "d"} // P0 first; within P1 higher LastSeen first
if !reflect.DeepEqual(got, want) {
t.Fatalf("order mismatch: got %v want %v", got, want)
}
}
func TestJSONRoundTrip_EnumsStable(t *testing.T) {
when := time.Date(2025, 12, 20, 12, 0, 0, 0, time.UTC)
in := Issue{
ID: "host:disk:/home:usage",
Category: CategoryStorage,
Priority: PriorityP1,
Title: "Disk nearly full",
Details: "Usage above threshold",
Evidence: map[string]string{"mount": "/home", "used_pct": "93"},
SuggestedFix: "du -sh * | sort -h",
State: StateOpen,
FirstSeen: when,
LastSeen: when.Add(5 * time.Second),
}
b, err := json.Marshal(in)
if err != nil {
t.Fatalf("marshal: %v", err)
}
var out Issue
if err := json.Unmarshal(b, &out); err != nil {
t.Fatalf("unmarshal: %v", err)
}
// Compare fields we care about; time.Time compares directly.
if in.ID != out.ID || in.Category != out.Category || in.Priority != out.Priority || in.State != out.State {
t.Fatalf("basic fields mismatch after round-trip: in=%+v out=%+v", in, out)
}
if in.Title != out.Title || in.Details != out.Details || in.SuggestedFix != out.SuggestedFix {
t.Fatalf("string fields mismatch after round-trip")
}
if !reflect.DeepEqual(in.Evidence, out.Evidence) {
t.Fatalf("evidence mismatch after round-trip: in=%v out=%v", in.Evidence, out.Evidence)
}
if !in.FirstSeen.Equal(out.FirstSeen) || !in.LastSeen.Equal(out.LastSeen) {
t.Fatalf("time mismatch after round-trip: in=(%v,%v) out=(%v,%v)", in.FirstSeen, in.LastSeen, out.FirstSeen, out.LastSeen)
}
}
func TestJSON_InvalidEnumRejected(t *testing.T) {
// Priority invalid should be rejected.
var i Issue
if err := json.Unmarshal([]byte(`{"id":"x","category":"Storage","priority":"P9","title":"t","state":"Open","first_seen":"2025-12-20T12:00:00Z","last_seen":"2025-12-20T12:00:01Z"}`), &i); err == nil {
t.Fatalf("expected error for invalid priority")
}
}
+182
View File
@@ -0,0 +1,182 @@
package store
import (
"sync"
"time"
"tower/internal/model"
)
const defaultResolveAfter = 30 * time.Second
// Store is an in-memory IssueStore.
//
// Responsibilities (per PLAN.md):
// - Dedupe by Issue.ID
// - Track FirstSeen/LastSeen
// - Maintain State (Open/Acknowledged/Resolved)
// - Resolve issues only after resolveAfter duration of continuous absence
// - Acknowledgements are in-memory only (not persisted)
// - Safe for concurrent use
type Store struct {
mu sync.RWMutex
resolveAfter time.Duration
// issues holds the latest known version of each issue keyed by stable ID.
issues map[string]model.Issue
// ack is an in-memory toggle keyed by issue ID.
// If true and the issue is currently present, its state is Acknowledged.
ack map[string]bool
}
// New returns a new Store.
// If resolveAfter <= 0, a default of 30s is used.
func New(resolveAfter time.Duration) *Store {
if resolveAfter <= 0 {
resolveAfter = defaultResolveAfter
}
return &Store{
resolveAfter: resolveAfter,
issues: map[string]model.Issue{},
ack: map[string]bool{},
}
}
// Upsert merges "currently true" issues for this tick.
//
// Incoming is deduped by Issue.ID; the first instance wins for non-timestamp fields.
// Timestamps/state are managed by the store.
func (s *Store) Upsert(now time.Time, incoming []model.Issue) {
// Pre-dedupe without locking to keep lock hold times small.
seen := make(map[string]model.Issue, len(incoming))
for _, iss := range incoming {
if iss.ID == "" {
// Ignore invalid issues. ID is the stable dedupe key.
continue
}
if _, ok := seen[iss.ID]; ok {
continue
}
seen[iss.ID] = iss
}
s.mu.Lock()
defer s.mu.Unlock()
for id, in := range seen {
existing, ok := s.issues[id]
if !ok || existing.State == model.StateResolved {
// New issue (or a previously resolved one reappearing): start a new "episode".
in.FirstSeen = now
in.LastSeen = now
in.State = model.StateOpen
if s.ack[id] {
in.State = model.StateAcknowledged
}
s.issues[id] = in
continue
}
// Existing open/acked issue: update all fields from incoming, but preserve FirstSeen.
in.FirstSeen = existing.FirstSeen
in.LastSeen = now
in.State = model.StateOpen
if s.ack[id] {
in.State = model.StateAcknowledged
}
s.issues[id] = in
}
// Update resolved state for issues not present this tick.
s.applyResolutionsLocked(now, seen)
}
// Snapshot returns a point-in-time copy of all known issues with their states updated
// according to resolveAfter.
func (s *Store) Snapshot(now time.Time) []model.Issue {
s.mu.Lock()
defer s.mu.Unlock()
// Apply resolutions based on time. We don't know which IDs are present "this tick"
// from Snapshot alone, so we only resolve by absence window (LastSeen age).
s.applyResolutionsLocked(now, nil)
out := make([]model.Issue, 0, len(s.issues))
for _, iss := range s.issues {
out = append(out, deepCopyIssue(iss))
}
return out
}
// Acknowledge marks an issue acknowledged (in-memory only).
func (s *Store) Acknowledge(id string) {
if id == "" {
return
}
s.mu.Lock()
defer s.mu.Unlock()
s.ack[id] = true
iss, ok := s.issues[id]
if !ok {
return
}
if iss.State != model.StateResolved {
iss.State = model.StateAcknowledged
s.issues[id] = iss
}
}
// Unacknowledge clears the acknowledgement toggle (in-memory only).
func (s *Store) Unacknowledge(id string) {
if id == "" {
return
}
s.mu.Lock()
defer s.mu.Unlock()
delete(s.ack, id)
iss, ok := s.issues[id]
if !ok {
return
}
if iss.State != model.StateResolved {
iss.State = model.StateOpen
s.issues[id] = iss
}
}
func (s *Store) applyResolutionsLocked(now time.Time, present map[string]model.Issue) {
for id, iss := range s.issues {
// If caller provided a present set and the ID is present, it cannot be resolved.
if present != nil {
if _, ok := present[id]; ok {
continue
}
}
if iss.State == model.StateResolved {
continue
}
if s.resolveAfter > 0 && now.Sub(iss.LastSeen) >= s.resolveAfter {
iss.State = model.StateResolved
s.issues[id] = iss
}
}
}
func deepCopyIssue(in model.Issue) model.Issue {
out := in
if in.Evidence != nil {
m := make(map[string]string, len(in.Evidence))
for k, v := range in.Evidence {
m[k] = v
}
out.Evidence = m
}
return out
}
+101
View File
@@ -0,0 +1,101 @@
package store
import (
"testing"
"time"
"tower/internal/model"
)
func TestStore_Upsert_DedupAndTimestamps(t *testing.T) {
now1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
now2 := now1.Add(5 * time.Second)
s := New(30 * time.Second)
// Same ID twice in one Upsert should dedupe.
s.Upsert(now1, []model.Issue{
{ID: "i-1", Title: "first"},
{ID: "i-1", Title: "should be ignored"},
})
snap1 := s.Snapshot(now1)
if len(snap1) != 1 {
t.Fatalf("expected 1 issue, got %d", len(snap1))
}
if snap1[0].ID != "i-1" {
t.Fatalf("expected id i-1, got %q", snap1[0].ID)
}
if !snap1[0].FirstSeen.Equal(now1) {
t.Fatalf("expected FirstSeen=%v, got %v", now1, snap1[0].FirstSeen)
}
if !snap1[0].LastSeen.Equal(now1) {
t.Fatalf("expected LastSeen=%v, got %v", now1, snap1[0].LastSeen)
}
if snap1[0].State != model.StateOpen {
t.Fatalf("expected State=Open, got %q", snap1[0].State)
}
// Subsequent Upsert for same ID should preserve FirstSeen and update LastSeen.
s.Upsert(now2, []model.Issue{{ID: "i-1", Title: "updated"}})
snap2 := s.Snapshot(now2)
if len(snap2) != 1 {
t.Fatalf("expected 1 issue, got %d", len(snap2))
}
if !snap2[0].FirstSeen.Equal(now1) {
t.Fatalf("expected FirstSeen to remain %v, got %v", now1, snap2[0].FirstSeen)
}
if !snap2[0].LastSeen.Equal(now2) {
t.Fatalf("expected LastSeen=%v, got %v", now2, snap2[0].LastSeen)
}
}
func TestStore_AckPreservedWhilePresent(t *testing.T) {
now1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
now2 := now1.Add(1 * time.Second)
s := New(30 * time.Second)
s.Upsert(now1, []model.Issue{{ID: "i-1", Title: "t"}})
s.Acknowledge("i-1")
// Upsert again while present should remain Acked.
s.Upsert(now2, []model.Issue{{ID: "i-1", Title: "t2"}})
snap := s.Snapshot(now2)
if len(snap) != 1 {
t.Fatalf("expected 1 issue, got %d", len(snap))
}
if snap[0].State != model.StateAcknowledged {
t.Fatalf("expected State=Acknowledged, got %q", snap[0].State)
}
s.Unacknowledge("i-1")
snap2 := s.Snapshot(now2)
if snap2[0].State != model.StateOpen {
t.Fatalf("expected State=Open after unack, got %q", snap2[0].State)
}
}
func TestStore_ResolvesOnlyAfterAbsenceWindow(t *testing.T) {
resolveAfter := 10 * time.Second
now0 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
s := New(resolveAfter)
s.Upsert(now0, []model.Issue{{ID: "i-1", Title: "t"}})
// Miss a tick shortly after; should not resolve due to flap suppression / window.
s.Upsert(now0.Add(1*time.Second), nil)
snap1 := s.Snapshot(now0.Add(9 * time.Second))
if len(snap1) != 1 {
t.Fatalf("expected 1 issue, got %d", len(snap1))
}
if snap1[0].State != model.StateOpen {
t.Fatalf("expected still Open before resolveAfter, got %q", snap1[0].State)
}
// Still absent beyond resolveAfter => should resolve.
snap2 := s.Snapshot(now0.Add(11 * time.Second))
if snap2[0].State != model.StateResolved {
t.Fatalf("expected Resolved after absence > resolveAfter, got %q", snap2[0].State)
}
}
+886
View File
@@ -0,0 +1,886 @@
package ui
import (
"fmt"
"os"
"sort"
"strings"
"time"
"github.com/atotto/clipboard"
"github.com/charmbracelet/bubbles/key"
"github.com/charmbracelet/bubbles/table"
"github.com/charmbracelet/bubbles/textinput"
"github.com/charmbracelet/bubbles/viewport"
bubbletea "github.com/charmbracelet/bubbletea"
"github.com/charmbracelet/lipgloss"
"tower/internal/engine"
"tower/internal/model"
)
type Focus int
const (
focusTable Focus = iota
focusDetails
focusSearch
)
type SortMode int
const (
sortDefault SortMode = iota // Priority desc, LastSeen desc
sortRecency // LastSeen desc
sortCategory // Category asc, Priority desc, LastSeen desc
)
type AgeMode int
const (
AgeCompact AgeMode = iota // 0s, Xds, Xdm, Xdh, Xdd
AgeRelative // Xm ago, Xh ago, Xd ago
)
type AckFunc func(id string)
type UnackFunc func(id string)
type RefreshNowFunc func()
type ExportFunc func(path string, issues []model.Issue) error
// Model is the Bubble Tea model for the ControlTower UI.
//
// It intentionally keeps rendering cheap:
// - Table rows are only rebuilt when snapshot or filters/sort change.
// - A 1s tick updates header time/age counters without rebuilding rows.
//
//nolint:structcheck // (fields used conditionally based on callbacks)
type Model struct {
host string
styles Styles
keys KeyMap
showHelp bool
focus Focus
snap engine.Snapshot
now time.Time
// Cached view state.
filterPri model.Priority
filterCat model.Category
search string
sortMode SortMode
wideTitle bool
ageMode AgeMode
themeMode ThemeMode
issueByID map[string]model.Issue
rowsIDs []string
table table.Model
details viewport.Model
searchIn textinput.Model
w int
h int
// callbacks
refreshNow RefreshNowFunc
ack AckFunc
unack UnackFunc
export ExportFunc
lastExportPath string
snapshots <-chan engine.Snapshot
lastP0Count int
noBell bool
loaded bool
exporting bool
err error
}
type snapshotMsg engine.Snapshot
type tickMsg time.Time
type exportDoneMsg struct{ err error }
type helpRequestedMsg struct{}
func New(host string, snapshots <-chan engine.Snapshot, refresh RefreshNowFunc, ack AckFunc, unack UnackFunc, export ExportFunc) Model {
if host == "" {
if h, err := os.Hostname(); err == nil {
host = h
}
}
t := newIssueTable()
vp := viewport.New(0, 0)
vp.YPosition = 0
ti := textinput.New()
ti.Placeholder = "search title/details"
ti.Prompt = "/ "
ti.CharLimit = 256
ti.Width = 40
m := Model{
host: host,
styles: defaultStylesForMode(ThemeAuto),
keys: defaultKeyMap(),
focus: focusTable,
sortMode: sortDefault,
themeMode: ThemeAuto,
issueByID: map[string]model.Issue{},
table: t,
details: vp,
searchIn: ti,
snapshots: snapshots,
refreshNow: refresh,
ack: ack,
unack: unack,
export: export,
lastExportPath: "issues.json",
noBell: os.Getenv("NO_BELL") == "1",
loaded: false,
}
m.now = time.Now()
return m
}
func (m Model) Init() bubbletea.Cmd {
return bubbletea.Batch(
waitForSnapshot(m.snapshots),
tickCmd(),
)
}
func waitForSnapshot(ch <-chan engine.Snapshot) bubbletea.Cmd {
return func() bubbletea.Msg {
s, ok := <-ch
if !ok {
return snapshotMsg(engine.Snapshot{})
}
return snapshotMsg(s)
}
}
func tickCmd() bubbletea.Cmd {
return bubbletea.Tick(1*time.Second, func(t time.Time) bubbletea.Msg { return tickMsg(t) })
}
func (m Model) Update(msg bubbletea.Msg) (bubbletea.Model, bubbletea.Cmd) {
switch msg := msg.(type) {
case tickMsg:
m.now = time.Time(msg)
// Keep ticking for header time and details age, but avoid rebuilding rows.
m.setDetailsToSelected()
return m, tickCmd()
case snapshotMsg:
s := engine.Snapshot(msg)
// Channel closed: stop listening.
if s.At.IsZero() && s.Collectors == nil && s.Issues == nil {
return m, nil
}
m.snap = s
m.now = time.Now()
m.loaded = true
// Count P0 before applying to detect new critical issues
newP0Count := 0
for _, iss := range s.Issues {
if iss.Priority == model.PriorityP0 {
newP0Count++
}
}
m.applyViewFromSnapshot()
// Send bell if new P0 issues appeared (check NO_BELL env var to disable)
if newP0Count > m.lastP0Count && !m.noBell {
// Update counter and send bell
m.lastP0Count = newP0Count
// Print bell character to emit terminal bell
fmt.Fprint(os.Stdout, "\a")
}
m.lastP0Count = newP0Count
return m, waitForSnapshot(m.snapshots)
case bubbletea.WindowSizeMsg:
m.w, m.h = msg.Width, msg.Height
m.layout()
return m, nil
}
// Search input mode.
if m.focus == focusSearch {
switch {
case keyMatch(msg, m.keys.Cancel):
m.focus = focusTable
m.searchIn.Blur()
m.searchIn.SetValue(m.search)
return m, nil
case keyMatch(msg, m.keys.ClearFilters):
m.focus = focusTable
m.searchIn.Blur()
m.search = ""
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.Apply):
m.search = strings.TrimSpace(m.searchIn.Value())
m.focus = focusTable
m.searchIn.Blur()
m.applyViewFromSnapshot()
return m, nil
}
var cmd bubbletea.Cmd
m.searchIn, cmd = m.searchIn.Update(msg)
return m, cmd
}
// Help overlay mode - only help-related keys are processed.
if m.showHelp {
switch {
case keyMatch(msg, m.keys.Help), keyMatch(msg, m.keys.Cancel):
m.showHelp = false
return m, nil
}
// Ignore all other keys while help is shown
return m, nil
}
// Global keybindings.
switch {
case keyMatch(msg, m.keys.Quit):
return m, bubbletea.Quit
case keyMatch(msg, m.keys.RefreshNow):
if m.refreshNow != nil {
m.refreshNow()
}
return m, nil
case keyMatch(msg, m.keys.Search):
m.focus = focusSearch
m.searchIn.SetValue(m.search)
m.searchIn.CursorEnd()
m.searchIn.Focus()
return m, nil
case keyMatch(msg, m.keys.Priority):
m.cyclePriorityFilter()
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.PriorityP0):
m.filterPri = model.PriorityP0
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.PriorityP1):
m.filterPri = model.PriorityP1
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.PriorityP2):
m.filterPri = model.PriorityP2
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.PriorityP3):
m.filterPri = model.PriorityP3
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.Category):
m.cycleCategoryFilter()
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.Sort):
m.sortMode = (m.sortMode + 1) % 3
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.FocusNext):
if m.focus == focusTable {
m.focus = focusDetails
m.table.Blur()
// viewport has no Focus/Blur; we just route keys.
return m, nil
}
m.focus = focusTable
m.table.Focus()
return m, nil
case keyMatch(msg, m.keys.AckToggle):
m.toggleAckSelected()
return m, nil
case keyMatch(msg, m.keys.AckAll):
m.ackAllVisible()
return m, nil
case keyMatch(msg, m.keys.Export):
if m.export != nil {
m.exporting = true
path := m.lastExportPath
issues := m.snap.Issues
return m, func() bubbletea.Msg {
err := m.export(path, issues)
return exportDoneMsg{err: err}
}
}
return m, nil
case keyMatch(msg, m.keys.Help):
m.showHelp = !m.showHelp
return m, nil
case keyMatch(msg, m.keys.JumpToTop):
if len(m.rowsIDs) > 0 {
m.table.SetCursor(0)
m.setDetailsToSelected()
}
return m, nil
case keyMatch(msg, m.keys.JumpToBottom):
if len(m.rowsIDs) > 0 {
m.table.SetCursor(len(m.rowsIDs) - 1)
m.setDetailsToSelected()
}
return m, nil
case keyMatch(msg, m.keys.Copy):
m.copySelectedToClipboard()
return m, nil
case keyMatch(msg, m.keys.ToggleWideTitle):
m.wideTitle = !m.wideTitle
m.layout()
// Rebuild rows to apply new title width
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.ToggleAgeFormat):
m.ageMode = (m.ageMode + 1) % 2
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.ToggleTheme):
// Cycle through theme modes: Auto -> Light -> Dark -> Auto
m.themeMode = (m.themeMode + 1) % 3
m.styles = defaultStylesForMode(m.themeMode)
// Refresh the view with new styles
m.applyViewFromSnapshot()
return m, nil
case keyMatch(msg, m.keys.ClearFilters):
m.filterPri = ""
m.filterCat = ""
m.search = ""
m.applyViewFromSnapshot()
return m, nil
}
// Focus-specific updates.
// Note: bubbles/table already handles page navigation keys (PgUp/PgDn, Ctrl+u/Ctrl+d, Home/End)
// natively, so we don't need to override them here.
switch m.focus {
case focusTable:
var cmd bubbletea.Cmd
m.table, cmd = m.table.Update(msg)
// When selection changes, update details content.
m.setDetailsToSelected()
return m, cmd
case focusDetails:
var cmd bubbletea.Cmd
m.details, cmd = m.details.Update(msg)
return m, cmd
}
switch msg := msg.(type) {
case exportDoneMsg:
m.exporting = false
m.err = msg.err
return m, nil
}
return m, nil
}
func (m *Model) layout() {
if m.w <= 0 || m.h <= 0 {
return
}
// Header: 1 line.
headerH := 1
// Search bar: 1 line (shown only in search focus).
searchH := 0
if m.focus == focusSearch {
searchH = 1
}
bodyH := m.h - headerH - searchH
if bodyH < 4 {
bodyH = 4
}
detailsH := bodyH / 3
tableH := bodyH - detailsH
if tableH < 3 {
tableH = 3
}
// Table width includes 2-character padding from bubbles/table.
// Allocate Title to consume remaining width.
priW, catW, ageW, stateW := 3, 12, 7, 13
fixed := priW + catW + ageW + stateW + 4 // separators/padding
titleW := m.w - fixed
if titleW < 20 {
titleW = 20
}
if m.wideTitle {
// Wide mode: allocate more space to Title column (up to 2x)
titleW = titleW * 2
// Ensure other columns still have minimum space
maxTitle := m.w - fixed
if titleW > maxTitle {
titleW = maxTitle
}
}
cols := m.table.Columns()
for i := range cols {
switch cols[i].Title {
case colPri:
cols[i].Width = priW
case colCat:
cols[i].Width = catW
case colTitle:
cols[i].Width = titleW
case colAge:
cols[i].Width = ageW
case colState:
cols[i].Width = stateW
}
}
m.table.SetColumns(cols)
m.table.SetHeight(tableH)
m.details.Width = m.w
m.details.Height = detailsH
}
func (m *Model) applyViewFromSnapshot() {
// Build ID index for O(1) selection lookup.
m.issueByID = make(map[string]model.Issue, len(m.snap.Issues))
for _, iss := range m.snap.Issues {
m.issueByID[iss.ID] = iss
}
// Show loading state before first snapshot arrives
if !m.loaded {
msg := "Loading collector data... Please wait."
m.details.SetContent(m.styles.Muted.Render(msg))
return
}
// Filter.
filtered := make([]model.Issue, 0, len(m.snap.Issues))
for _, iss := range m.snap.Issues {
if m.filterPri != "" && iss.Priority != m.filterPri {
continue
}
if m.filterCat != "" && iss.Category != m.filterCat {
continue
}
if m.search != "" {
q := strings.ToLower(m.search)
hit := strings.Contains(strings.ToLower(iss.Title), q) || strings.Contains(strings.ToLower(iss.Details), q)
if !hit {
continue
}
}
filtered = append(filtered, iss)
}
// Sort.
sort.SliceStable(filtered, func(i, j int) bool {
a, b := filtered[i], filtered[j]
switch m.sortMode {
case sortRecency:
if !a.LastSeen.Equal(b.LastSeen) {
return a.LastSeen.After(b.LastSeen)
}
return a.ID < b.ID
case sortCategory:
if a.Category != b.Category {
return a.Category < b.Category
}
aw, bw := a.Priority.Weight(), b.Priority.Weight()
if aw != bw {
return aw > bw
}
if !a.LastSeen.Equal(b.LastSeen) {
return a.LastSeen.After(b.LastSeen)
}
return a.ID < b.ID
default:
aw, bw := a.Priority.Weight(), b.Priority.Weight()
if aw != bw {
return aw > bw
}
if !a.LastSeen.Equal(b.LastSeen) {
return a.LastSeen.After(b.LastSeen)
}
return a.ID < b.ID
}
})
rows, ids := buildRows(m.snap.At, m.ageMode, filtered)
m.rowsIDs = ids
prevSelID := m.selectedIssueID()
m.table.SetRows(rows)
if len(rows) == 0 {
m.table.SetCursor(0)
msg := "All systems healthy. No issues detected.\n\nPress r to refresh, / to search past logs"
m.details.SetContent(m.styles.Muted.Render(msg))
return
}
// Try to keep selection stable.
if prevSelID != "" {
for i, id := range ids {
if id == prevSelID {
m.table.SetCursor(i)
break
}
}
}
m.setDetailsToSelected()
m.layout()
}
func (m *Model) selectedIssueID() string {
idx := m.table.Cursor()
if idx < 0 || idx >= len(m.rowsIDs) {
return ""
}
return m.rowsIDs[idx]
}
func (m *Model) setDetailsToSelected() {
id := m.selectedIssueID()
iss, ok := m.issueByID[id]
if !ok {
m.details.SetContent(m.styles.Muted.Render("No issue selected."))
return
}
m.details.SetContent(renderIssueDetails(m.now, m.ageMode, iss))
}
func (m *Model) toggleAckSelected() {
id := m.selectedIssueID()
if id == "" {
return
}
iss, ok := m.issueByID[id]
if !ok {
return
}
if iss.State == model.StateResolved {
return
}
newState := model.StateAcknowledged
if iss.State == model.StateAcknowledged {
newState = model.StateOpen
}
// Callbacks (store-backed if wired).
if newState == model.StateAcknowledged {
if m.ack != nil {
m.ack(id)
}
} else {
if m.unack != nil {
m.unack(id)
}
}
// Optimistic local update (store will correct on next snapshot).
iss.State = newState
m.issueByID[id] = iss
// Update state column cheaply.
idx := m.table.Cursor()
rows := m.table.Rows()
if idx >= 0 && idx < len(rows) {
rows[idx][4] = iss.State.String() // State column index
m.table.SetRows(rows)
}
m.setDetailsToSelected()
}
func (m *Model) ackAllVisible() {
if m.ack == nil {
return
}
// Track updates for table refresh.
updated := false
rows := m.table.Rows()
// Iterate through all visible issues and acknowledge them.
for idx, id := range m.rowsIDs {
iss, ok := m.issueByID[id]
if !ok {
continue
}
// Only acknowledge open issues, not already acked or resolved.
if iss.State == model.StateOpen {
m.ack(id)
// Optimistic local update.
iss.State = model.StateAcknowledged
m.issueByID[id] = iss
// Update state column cheaply.
if idx < len(rows) {
rows[idx][4] = iss.State.String() // State column index
updated = true
}
}
}
if updated {
m.table.SetRows(rows)
m.setDetailsToSelected()
}
}
func (m *Model) copySelectedToClipboard() {
id := m.selectedIssueID()
if id == "" {
return
}
iss, ok := m.issueByID[id]
if !ok {
return
}
// Copy SuggestedFix if available, otherwise fallback to Title
text := iss.SuggestedFix
if text == "" {
text = iss.Title
}
if err := clipboard.WriteAll(text); err != nil {
m.err = fmt.Errorf("Failed to copy to clipboard: %w. Is xclip/xsel installed?", err)
return
}
// Show confirmation in details pane
m.details.SetContent(m.styles.Muted.Render("Copied to clipboard\n\n") + renderIssueDetails(m.now, m.ageMode, iss))
}
func (m *Model) cyclePriorityFilter() {
order := []model.Priority{"", model.PriorityP0, model.PriorityP1, model.PriorityP2, model.PriorityP3}
m.filterPri = cycle(order, m.filterPri)
}
func (m *Model) cycleCategoryFilter() {
order := []model.Category{
"",
model.CategoryPerformance,
model.CategoryMemory,
model.CategoryStorage,
model.CategoryNetwork,
model.CategoryThermals,
model.CategoryProcesses,
model.CategoryServices,
model.CategoryLogs,
model.CategoryUpdates,
model.CategorySecurity,
model.CategoryKubernetes,
}
m.filterCat = cycle(order, m.filterCat)
}
func cycle[T comparable](order []T, cur T) T {
for i := range order {
if order[i] == cur {
return order[(i+1)%len(order)]
}
}
return order[0]
}
func (m Model) View() string {
// Show help overlay when active
if m.showHelp {
return renderHelp(m.keys, m.styles)
}
header := m.renderHeader()
searchLine := ""
if m.focus == focusSearch {
searchLine = m.searchIn.View()
}
tableView := m.table.View()
detailsView := m.renderDetailsPane()
parts := []string{header}
if searchLine != "" {
parts = append(parts, searchLine)
}
parts = append(parts, tableView, detailsView)
return lipgloss.JoinVertical(lipgloss.Left, parts...)
}
func (m Model) renderHeader() string {
now := m.now
if now.IsZero() {
now = time.Now()
}
age := "-"
if !m.snap.At.IsZero() {
age = formatAge(now.Sub(m.snap.At))
}
p0, p1, p2, p3 := 0, 0, 0, 0
for _, iss := range m.snap.Issues {
switch iss.Priority {
case model.PriorityP0:
p0++
case model.PriorityP1:
p1++
case model.PriorityP2:
p2++
case model.PriorityP3:
p3++
}
}
okC, degC, errC := 0, 0, 0
for _, h := range m.snap.Collectors {
switch h.Status.Health {
case "OK":
okC++
case "DEGRADED":
degC++
case "ERROR":
errC++
}
}
priFilter := "all"
if m.filterPri != "" {
priFilter = m.filterPri.String()
}
catFilter := "all"
if m.filterCat != "" {
catFilter = m.filterCat.String()
}
sortLabel := map[SortMode]string{sortDefault: "pri→recent", sortRecency: "recent", sortCategory: "cat"}[m.sortMode]
left := fmt.Sprintf(
"host=%s time=%s age=%s P0=%d P1=%d P2=%d P3=%d collectors: ✓%d ⚠%d ✗%d",
m.host,
now.Local().Format("15:04:05"),
age,
p0, p1, p2, p3,
okC, degC, errC,
)
// Add count warning when approaching 200 issues cap (90% = 180)
total := p0 + p1 + p2 + p3
if total >= 180 {
warning := fmt.Sprintf(" [~%d/200]", total)
left += m.styles.Error.Render(warning)
}
// Small right-side indicator for filters.
priStr := fmt.Sprintf("pri=%s", priFilter)
catStr := fmt.Sprintf("cat=%s", catFilter)
if m.filterPri != "" {
priStr = m.styles.FilterActive.Render(priStr)
}
if m.filterCat != "" {
catStr = m.styles.FilterActive.Render(catStr)
}
right := fmt.Sprintf("filter %s %s q=%q sort=%s", priStr, catStr, m.search, sortLabel)
if m.w > 0 {
// Truncate right if needed.
space := m.w - lipgloss.Width(left) - 1
if space < 0 {
space = 0
}
if lipgloss.Width(right) > space {
right = lipgloss.NewStyle().MaxWidth(space).Render(right)
}
padLen := 0
if space > 0 {
padLen = max(1, space-lipgloss.Width(right))
}
pad := strings.Repeat(" ", padLen)
return m.styles.HeaderBar.Render(left + pad + right)
}
return m.styles.HeaderBar.Render(left + " " + right)
}
func (m Model) renderDetailsPane() string {
title := "Details"
if m.focus == focusDetails {
title = title + " (focus)"
}
body := m.details.View()
if m.exporting {
body = "Exporting issues to " + m.lastExportPath + "..."
}
if m.err != nil {
body = body + "\n" + m.styles.Error.Render(m.err.Error())
}
// Keep the details title cheap and avoid borders (can be expensive).
return m.styles.DetailsTitle.Render(title) + "\n" + body
}
func renderHelp(keys KeyMap, styles Styles) string {
// Create a temporary help model and render it
help := NewHelp()
help.Show()
return help.Render(keys, styles)
}
func keyMatch(msg bubbletea.Msg, b key.Binding) bool {
km, ok := msg.(bubbletea.KeyMsg)
if !ok {
return false
}
return key.Matches(km, b)
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
+105
View File
@@ -0,0 +1,105 @@
package ui
import (
"fmt"
"sort"
"strings"
"time"
"tower/internal/model"
)
// getRollupSamples extracts sample IDs from a rollup issue's evidence.
func getRollupSamples(iss model.Issue) []string {
samplesStr := iss.Evidence["samples"]
if samplesStr == "" {
return nil
}
parts := strings.Split(samplesStr, " | ")
result := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p != "" {
result = append(result, p)
}
}
return result
}
// isRollupIssue checks if an issue is a rollup issue.
func isRollupIssue(iss model.Issue) bool {
if strings.HasPrefix(iss.ID, "k8s:rollup:") {
return true
}
if iss.Category == model.CategoryKubernetes && strings.Contains(strings.ToLower(iss.Title), "rollup") {
return true
}
return false
}
func renderIssueDetails(now time.Time, mode AgeMode, iss model.Issue) string {
var b strings.Builder
fmt.Fprintf(&b, "Title: %s\n", oneLine(iss.Title))
fmt.Fprintf(&b, "Priority: %s Category: %s State: %s\n", iss.Priority, iss.Category, iss.State)
fmt.Fprintf(&b, "FirstSeen: %s\n", fmtTime(iss.FirstSeen))
fmt.Fprintf(&b, "LastSeen: %s\n", fmtTime(iss.LastSeen))
fmt.Fprintf(&b, "Age: %s\n", formatAgeWithMode(iss.Age(now), mode))
if strings.TrimSpace(iss.Details) != "" {
b.WriteString("\nDetails\n")
b.WriteString(indentBlock(strings.TrimSpace(iss.Details), " "))
b.WriteString("\n")
}
// Show affected issues for rollup issues
if isRollupIssue(iss) {
samples := getRollupSamples(iss)
if len(samples) > 0 {
b.WriteString("\nAffected Issues\n")
// Show up to 10 samples
maxSamples := 10
if len(samples) > maxSamples {
samples = samples[:maxSamples]
}
for _, sample := range samples {
fmt.Fprintf(&b, " • %s\n", sample)
}
}
}
if len(iss.Evidence) > 0 {
b.WriteString("\nEvidence\n")
keys := make([]string, 0, len(iss.Evidence))
for k := range iss.Evidence {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Fprintf(&b, " %s: %s\n", k, iss.Evidence[k])
}
}
if strings.TrimSpace(iss.SuggestedFix) != "" {
b.WriteString("\nSuggested Fix\n")
b.WriteString(indentBlock(strings.TrimSpace(iss.SuggestedFix), " "))
b.WriteString("\n")
}
return strings.TrimRight(b.String(), "\n")
}
func fmtTime(t time.Time) string {
if t.IsZero() {
return "-"
}
return t.Local().Format("2006-01-02 15:04:05")
}
func indentBlock(s, prefix string) string {
lines := strings.Split(s, "\n")
for i := range lines {
lines[i] = prefix + lines[i]
}
return strings.Join(lines, "\n")
}
+152
View File
@@ -0,0 +1,152 @@
package ui
import (
"fmt"
"strings"
"github.com/charmbracelet/bubbles/key"
)
// HelpModel is the help overlay model.
type HelpModel struct {
visible bool
}
// NewHelp creates a new help model.
func NewHelp() HelpModel {
return HelpModel{
visible: false,
}
}
// Show displays the help overlay.
func (m *HelpModel) Show() {
m.visible = true
}
// Hide hides the help overlay.
func (m *HelpModel) Hide() {
m.visible = false
}
// Toggle toggles the help overlay visibility.
func (m *HelpModel) Toggle() {
m.visible = !m.visible
}
// IsVisible returns true if the help overlay is visible.
func (m HelpModel) IsVisible() bool {
return m.visible
}
// Render renders the help overlay.
func (m HelpModel) Render(keys KeyMap, styles Styles) string {
if !m.visible {
return ""
}
var b strings.Builder
// Title
title := styles.HeaderBar.Render("Keybindings - Press ? or esc to close")
b.WriteString(title)
b.WriteString("\n\n")
// Define keybinding groups
groups := []struct {
name string
binds []keyHelp
}{
{
name: "Global",
binds: []keyHelp{
{keys.Help, "Show/hide this help"},
{keys.Quit, "Quit the application"},
{keys.RefreshNow, "Refresh data now"},
},
},
{
name: "Filters",
binds: []keyHelp{
{keys.Search, "Search by title/details"},
{keys.Priority, "Cycle priority filter"},
{keys.Category, "Cycle category filter"},
},
},
{
name: "Navigation",
binds: []keyHelp{
{keys.FocusNext, "Toggle focus (table/details)"},
{keys.Sort, "Cycle sort order"},
{keys.JumpToTop, "Jump to top (g)"},
{keys.JumpToBottom, "Jump to bottom (G)"},
{keys.Down, "Move down (j)"},
{keys.Up, "Move up (k)"},
},
},
{
name: "Actions",
binds: []keyHelp{
{keys.AckToggle, "Acknowledge/unacknowledge issue"},
{keys.Export, "Export issues to JSON"},
},
},
}
// Render each group
for i, group := range groups {
if i > 0 {
b.WriteString("\n")
}
// Group header
groupTitle := styles.HeaderKey.Render(group.name + ":")
b.WriteString(groupTitle)
b.WriteString("\n")
// Keybindings in this group
for _, kb := range group.binds {
line := renderKeyHelp(kb, styles)
b.WriteString(line)
b.WriteString("\n")
}
}
// Render collector health icon legend
b.WriteString("\n")
legendTitle := styles.HeaderKey.Render("Legend:")
b.WriteString(legendTitle)
b.WriteString("\n")
legendText := styles.HeaderVal.Render(" Collector health: ✓ (OK), ⚠ (DEGRADED), ✗ (ERROR)")
b.WriteString(legendText)
b.WriteString("\n")
return b.String()
}
type keyHelp struct {
binding key.Binding
help string
}
func renderKeyHelp(kb keyHelp, styles Styles) string {
// Get key names from the binding
keys := kb.binding.Keys()
if len(keys) == 0 {
return ""
}
// Format key names
keyStr := strings.Join(keys, ", ")
keyStyled := styles.HeaderVal.Render(keyStr)
// Format help text
helpStyled := styles.HeaderVal.Render(kb.help)
// Combine with padding
padding := ""
if needed := 10 - len(keyStr); needed > 0 {
padding = strings.Repeat(" ", needed)
}
return fmt.Sprintf(" %s%s%s", keyStyled, padding, helpStyled)
}
+141
View File
@@ -0,0 +1,141 @@
package ui
import "github.com/charmbracelet/bubbles/key"
// KeyMap defines UI keybindings.
//
// Note: Bubble Tea will also handle ctrl+c; we additionally bind q for quit.
type KeyMap struct {
Quit key.Binding
RefreshNow key.Binding
Search key.Binding
Priority key.Binding
PriorityP0 key.Binding
PriorityP1 key.Binding
PriorityP2 key.Binding
PriorityP3 key.Binding
Category key.Binding
Sort key.Binding
FocusNext key.Binding
AckToggle key.Binding
AckAll key.Binding
Export key.Binding
ToggleTheme key.Binding
Help key.Binding
JumpToTop key.Binding
JumpToBottom key.Binding
Down key.Binding
Up key.Binding
Copy key.Binding
ToggleWideTitle key.Binding
ToggleAgeFormat key.Binding
ClearFilters key.Binding
Cancel key.Binding
Apply key.Binding
}
func defaultKeyMap() KeyMap {
return KeyMap{
Quit: key.NewBinding(
key.WithKeys("q"),
key.WithHelp("q", "quit"),
),
RefreshNow: key.NewBinding(
key.WithKeys("r"),
key.WithHelp("r", "refresh now"),
),
Search: key.NewBinding(
key.WithKeys("/"),
key.WithHelp("/", "search"),
),
Priority: key.NewBinding(
key.WithKeys("p"),
key.WithHelp("p", "priority filter"),
),
PriorityP0: key.NewBinding(
key.WithKeys("0"),
key.WithHelp("0", "P0 only"),
),
PriorityP1: key.NewBinding(
key.WithKeys("1"),
key.WithHelp("1", "P1 only"),
),
PriorityP2: key.NewBinding(
key.WithKeys("2"),
key.WithHelp("2", "P2 only"),
),
PriorityP3: key.NewBinding(
key.WithKeys("3"),
key.WithHelp("3", "P3 only"),
),
Category: key.NewBinding(
key.WithKeys("c"),
key.WithHelp("c", "category filter"),
),
Sort: key.NewBinding(
key.WithKeys("s"),
key.WithHelp("s", "cycle sort"),
),
FocusNext: key.NewBinding(
key.WithKeys("tab"),
key.WithHelp("tab", "focus"),
),
AckToggle: key.NewBinding(
key.WithKeys("a"),
key.WithHelp("a", "ack/unack"),
),
AckAll: key.NewBinding(
key.WithKeys("A", "shift+a"),
key.WithHelp("A", "ack all visible"),
),
Export: key.NewBinding(
key.WithKeys("E"),
key.WithHelp("E", "export"),
),
ToggleTheme: key.NewBinding(
key.WithKeys("T", "shift+t"),
key.WithHelp("T", "toggle theme"),
),
Help: key.NewBinding(
key.WithKeys("?"),
key.WithHelp("?", "show help"),
),
JumpToTop: key.NewBinding(
key.WithKeys("g"),
key.WithHelp("g", "jump to top"),
),
JumpToBottom: key.NewBinding(
key.WithKeys("G", "shift+g"),
key.WithHelp("G", "jump to bottom"),
),
Down: key.NewBinding(
key.WithKeys("j"),
key.WithHelp("j", "down"),
),
Up: key.NewBinding(
key.WithKeys("k"),
key.WithHelp("k", "up"),
),
Copy: key.NewBinding(
key.WithKeys("y"),
key.WithHelp("y", "copy fix"),
),
ToggleWideTitle: key.NewBinding(
key.WithKeys("t"),
key.WithHelp("t", "wide title"),
),
ToggleAgeFormat: key.NewBinding(
key.WithKeys("d"),
key.WithHelp("d", "age format"),
),
ClearFilters: key.NewBinding(
key.WithKeys("esc"),
key.WithHelp("esc", "clear filters"),
),
Cancel: key.NewBinding(key.WithKeys("esc")),
Apply: key.NewBinding(key.WithKeys("enter")),
}
}
+122
View File
@@ -0,0 +1,122 @@
package ui
import "github.com/charmbracelet/lipgloss"
// ThemeMode represents the UI theme mode.
type ThemeMode int
const (
ThemeAuto ThemeMode = iota
ThemeLight
ThemeDark
)
// Styles centralizes all lipgloss styling.
// Keep these simple: excessive styling can slow rendering at high row counts.
type Styles struct {
HeaderBar lipgloss.Style
HeaderKey lipgloss.Style
HeaderVal lipgloss.Style
FilterActive lipgloss.Style
TableHeader lipgloss.Style
TableCell lipgloss.Style
P0 lipgloss.Style
P1 lipgloss.Style
P2 lipgloss.Style
P3 lipgloss.Style
StateOpen lipgloss.Style
StateAck lipgloss.Style
StateRes lipgloss.Style
DetailsTitle lipgloss.Style
DetailsBody lipgloss.Style
Muted lipgloss.Style
Error lipgloss.Style
}
// LightTheme returns light theme styles.
func LightTheme() Styles {
base := lipgloss.NewStyle()
muted := base.Foreground(lipgloss.Color("8"))
return Styles{
HeaderBar: base.
Background(lipgloss.Color("236")).
Foreground(lipgloss.Color("252")).
Padding(0, 1),
HeaderKey: base.Foreground(lipgloss.Color("250")).Bold(true),
HeaderVal: base.Foreground(lipgloss.Color("254")),
FilterActive: base.Bold(true).Foreground(lipgloss.Color("46")),
TableHeader: base.Foreground(lipgloss.Color("252")).Bold(true),
TableCell: base.Foreground(lipgloss.Color("252")),
P0: base.Foreground(lipgloss.Color("9")).Bold(true),
P1: base.Foreground(lipgloss.Color("208")).Bold(true),
P2: base.Foreground(lipgloss.Color("11")),
P3: base.Foreground(lipgloss.Color("10")),
StateOpen: base.Foreground(lipgloss.Color("252")),
StateAck: base.Foreground(lipgloss.Color("14")),
StateRes: muted,
DetailsTitle: base.Bold(true).Foreground(lipgloss.Color("252")),
DetailsBody: base.Foreground(lipgloss.Color("252")),
Muted: muted,
Error: base.Foreground(lipgloss.Color("9")),
}
}
// DarkTheme returns dark theme styles with better contrast.
func DarkTheme() Styles {
base := lipgloss.NewStyle()
muted := base.Foreground(lipgloss.Color("245"))
return Styles{
HeaderBar: base.
Background(lipgloss.Color("238")).
Foreground(lipgloss.Color("231")).
Padding(0, 1),
HeaderKey: base.Foreground(lipgloss.Color("159")).Bold(true),
HeaderVal: base.Foreground(lipgloss.Color("231")),
FilterActive: base.Bold(true).Foreground(lipgloss.Color("84")),
TableHeader: base.Foreground(lipgloss.Color("231")).Bold(true),
TableCell: base.Foreground(lipgloss.Color("231")),
P0: base.Foreground(lipgloss.Color("203")).Bold(true),
P1: base.Foreground(lipgloss.Color("229")).Bold(true),
P2: base.Foreground(lipgloss.Color("48")),
P3: base.Foreground(lipgloss.Color("42")),
StateOpen: base.Foreground(lipgloss.Color("231")),
StateAck: base.Foreground(lipgloss.Color("48")),
StateRes: muted,
DetailsTitle: base.Bold(true).Foreground(lipgloss.Color("231")),
DetailsBody: base.Foreground(lipgloss.Color("231")),
Muted: muted,
Error: base.Foreground(lipgloss.Color("203")),
}
}
func defaultStyles() Styles {
// Default to light theme for backwards compatibility
return LightTheme()
}
func defaultStylesForMode(themeMode ThemeMode) Styles {
switch themeMode {
case ThemeLight:
return LightTheme()
case ThemeDark:
return DarkTheme()
default:
// Auto mode defaults to light theme
return LightTheme()
}
}
+131
View File
@@ -0,0 +1,131 @@
package ui
import (
"fmt"
"strings"
"time"
"github.com/charmbracelet/bubbles/table"
"tower/internal/model"
)
// Column keys, used for future sort expansions.
const (
colPri = "Pri"
colCat = "Cat"
colTitle = "Title"
colAge = "Age"
colState = "State"
)
func newIssueTable() table.Model {
cols := []table.Column{
{Title: colPri, Width: 3},
{Title: colCat, Width: 12},
{Title: colTitle, Width: 0}, // widened on resize
{Title: colAge, Width: 7},
{Title: colState, Width: 13},
}
t := table.New(
table.WithColumns(cols),
table.WithFocused(true),
table.WithHeight(10),
)
// Keep built-in styles minimal.
s := table.DefaultStyles()
s.Header = s.Header.Bold(true)
s.Selected = s.Selected.Bold(false)
t.SetStyles(s)
return t
}
// BuildRows returns table rows and a parallel issue ID slice (row index -> issue ID).
func buildRows(now time.Time, mode AgeMode, issues []model.Issue) ([]table.Row, []string) {
rows := make([]table.Row, 0, len(issues))
ids := make([]string, 0, len(issues))
for _, iss := range issues {
age := formatAgeWithMode(iss.Age(now), mode)
rows = append(rows, table.Row{
iss.Priority.String(),
shortCat(iss.Category.String()),
oneLine(iss.Title),
age,
iss.State.String(),
})
ids = append(ids, iss.ID)
}
return rows, ids
}
func shortCat(cat string) string {
if cat == "" {
return "-"
}
if len(cat) <= 12 {
return cat
}
// Keep category compact; table has limited width.
s := cat
if i := strings.IndexByte(cat, ' '); i > 0 {
s = cat[:i]
}
if len(s) > 12 {
return s[:12]
}
return s
}
func oneLine(s string) string {
s = strings.ReplaceAll(s, "\n", " ")
s = strings.TrimSpace(s)
return s
}
func formatAge(d time.Duration) string {
return formatAgeWithMode(d, AgeCompact)
}
func formatAgeWithMode(d time.Duration, mode AgeMode) string {
if d <= 0 {
if mode == AgeRelative {
return "0m ago"
}
return "0s"
}
if mode == AgeRelative {
// Relative format: Xm ago, Xh ago, Xd ago
if d < time.Minute {
s := int(d / time.Second)
return fmt.Sprintf("%ds ago", s)
}
if d < time.Hour {
m := int(d / time.Minute)
return fmt.Sprintf("%dm ago", m)
}
if d < 24*time.Hour {
h := int(d / time.Hour)
return fmt.Sprintf("%dh ago", h)
}
days := int(d / (24 * time.Hour))
return fmt.Sprintf("%dd ago", days)
}
// Compact format: 0s, Xds, Xdm, Xdh, Xdd
if d < time.Minute {
s := int(d / time.Second)
return fmt.Sprintf("%ds", s)
}
if d < time.Hour {
m := int(d / time.Minute)
return fmt.Sprintf("%dm", m)
}
if d < 24*time.Hour {
h := int(d / time.Hour)
return fmt.Sprintf("%dh", h)
}
days := int(d / (24 * time.Hour))
return fmt.Sprintf("%dd", days)
}