feat: implement ControlTower TUI for cluster and host monitoring

Add complete TUI application for monitoring Kubernetes clusters and host
systems. Features include:

Core features:
- Collector framework with concurrent scheduling
- Host collectors: disk, memory, load, network
- Kubernetes collectors: pods, nodes, workloads, events with informers
- Issue deduplication, state management, and resolve-after logic
- Bubble Tea TUI with table view, details pane, and filtering
- JSON export functionality

UX improvements:
- Help overlay with keybindings
- Priority/category filters with visual indicators
- Direct priority jump (0/1/2/3)
- Bulk acknowledge (Shift+A)
- Clipboard copy (y)
- Theme toggle (T)
- Age format toggle (d)
- Wide title toggle (t)
- Vi-style navigation (j/k)
- Home/End jump (g/G)
- Rollup drill-down in details

Robustness:
- Grace period for unreachable clusters
- Rollups for high-volume issues
- Flap suppression
- RBAC error handling

Files: All core application code with tests for host collectors,
engine, store, model, and export packages.
This commit is contained in:
OpenCode Test
2025-12-24 13:03:08 -08:00
parent c2c03fd664
commit 1421b4659e
40 changed files with 5941 additions and 0 deletions

View File

@@ -0,0 +1,287 @@
package host
import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"
"syscall"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// DiskCollector checks filesystem block + inode pressure across mounts.
//
// It reads /proc/mounts to discover mounts and then uses statfs to compute usage.
// Pseudo filesystems are filtered out.
//
// Thresholds (PLAN.md):
// - P1 if blocks OR inodes >= 92%
// - P0 if blocks OR inodes >= 98%
//
// Issues are emitted per mount (one issue that includes both block+inode usage).
//
// NOTE: This collector is Linux-specific.
type DiskCollector struct {
interval time.Duration
readFile func(string) ([]byte, error)
statfs func(path string, st *syscall.Statfs_t) error
}
func NewDiskCollector() *DiskCollector {
return &DiskCollector{
interval: 10 * time.Second,
readFile: os.ReadFile,
statfs: syscall.Statfs,
}
}
func (c *DiskCollector) Name() string { return "host:disk" }
func (c *DiskCollector) Interval() time.Duration {
if c.interval <= 0 {
return 10 * time.Second
}
return c.interval
}
func (c *DiskCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
b, err := c.readFile("/proc/mounts")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/mounts"}, err
}
mounts := parseProcMounts(string(b))
if len(mounts) == 0 {
// Unusual but treat as degraded rather than hard error.
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "no mounts found"}, nil
}
issues := make([]model.Issue, 0, 8)
seenMount := map[string]struct{}{}
partialErrs := 0
for _, m := range mounts {
if err := ctx.Err(); err != nil {
return issues, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
if shouldSkipMount(m) {
continue
}
if _, ok := seenMount[m.MountPoint]; ok {
continue
}
seenMount[m.MountPoint] = struct{}{}
var st syscall.Statfs_t
if err := c.statfs(m.MountPoint, &st); err != nil {
partialErrs++
continue
}
blockPct, blockFreeBytes := statfsBlockUsedPct(st)
inodePct := statfsInodeUsedPct(st)
pri, ok := diskPriority(blockPct, inodePct)
if !ok {
continue
}
evidence := map[string]string{
"mount": m.MountPoint,
"fstype": m.FSType,
"block_used_pct": fmt.Sprintf("%.1f", blockPct),
"block_free_bytes": strconv.FormatUint(blockFreeBytes, 10),
}
if inodePct >= 0 {
evidence["inode_used_pct"] = fmt.Sprintf("%.1f", inodePct)
}
issues = append(issues, model.Issue{
ID: fmt.Sprintf("host:disk:%s:usage", m.MountPoint),
Category: model.CategoryStorage,
Priority: pri,
Title: fmt.Sprintf("Disk usage high on %s", m.MountPoint),
Details: "Filesystem space and/or inodes are nearly exhausted.",
Evidence: evidence,
SuggestedFix: fmt.Sprintf(
"Inspect usage:\n df -h %s\n df -i %s\nFind large directories:\n sudo du -xh --max-depth=2 %s | sort -h | tail",
m.MountPoint, m.MountPoint, m.MountPoint,
),
})
}
st := collectors.OKStatus()
if partialErrs > 0 {
st.Health = collectors.HealthDegraded
st.Message = fmt.Sprintf("partial failures: %d mounts", partialErrs)
}
return issues, st, nil
}
type procMount struct {
Device string
MountPoint string
FSType string
Options string
}
func parseProcMounts(content string) []procMount {
s := bufio.NewScanner(strings.NewReader(content))
out := make([]procMount, 0, 32)
for s.Scan() {
line := strings.TrimSpace(s.Text())
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) < 3 {
continue
}
m := procMount{
Device: unescapeProcMountsField(fields[0]),
MountPoint: unescapeProcMountsField(fields[1]),
FSType: fields[2],
}
if len(fields) >= 4 {
m.Options = fields[3]
}
out = append(out, m)
}
return out
}
// /proc/mounts escapes special characters as octal sequences.
// The most common one is a space as \040.
func unescapeProcMountsField(s string) string {
replacer := strings.NewReplacer(
"\\040", " ",
"\\011", "\t",
"\\012", "\n",
"\\134", "\\",
)
return replacer.Replace(s)
}
var pseudoFSTypes = map[string]struct{}{
"proc": {},
"sysfs": {},
"tmpfs": {},
"devtmpfs": {},
"devpts": {},
"cgroup": {},
"cgroup2": {},
"pstore": {},
"securityfs": {},
"debugfs": {},
"tracefs": {},
"configfs": {},
"hugetlbfs": {},
"mqueue": {},
"rpc_pipefs": {},
"fusectl": {},
"binfmt_misc": {},
"autofs": {},
"bpf": {},
"ramfs": {},
"nsfs": {},
"efivarfs": {},
"overlay": {}, // common container overlay mounts
"squashfs": {}, // typically read-only images
"selinuxfs": {},
"systemd-1": {},
"overlayfs": {}, // (non-standard) conservative skip
"cgroupfs": {},
"procfs": {},
"fuse.lxcfs": {},
"fuse.gvfsd-fuse": {},
}
func shouldSkipMount(m procMount) bool {
if m.MountPoint == "" {
return true
}
// Filter by fstype.
if _, ok := pseudoFSTypes[m.FSType]; ok {
return true
}
// Filter common pseudo mountpoints.
if strings.HasPrefix(m.MountPoint, "/proc") || strings.HasPrefix(m.MountPoint, "/sys") {
return true
}
if strings.HasPrefix(m.MountPoint, "/dev") {
// /dev itself can be a real mount in some cases, but usually isn't useful for disk pressure.
return true
}
return false
}
func statfsBlockUsedPct(st syscall.Statfs_t) (usedPct float64, freeBytes uint64) {
// Mirror df(1) semantics closely:
// total = f_blocks
// used = f_blocks - f_bfree
// avail = f_bavail (space available to unprivileged user)
// use% = used / (used + avail)
if st.Blocks == 0 {
return 0, 0
}
bsize := uint64(st.Bsize)
blocks := uint64(st.Blocks)
bfree := uint64(st.Bfree)
bavail := uint64(st.Bavail)
usedBlocks := blocks - bfree
denom := usedBlocks + bavail
if denom == 0 {
return 0, 0
}
freeBytes = bavail * bsize
usedPct = (float64(usedBlocks) / float64(denom)) * 100.0
return usedPct, freeBytes
}
// statfsInodeUsedPct returns inode used percent. If inodes are unavailable (f_files==0), returns -1.
func statfsInodeUsedPct(st syscall.Statfs_t) float64 {
if st.Files == 0 {
return -1
}
total := float64(st.Files)
free := float64(st.Ffree)
used := total - free
return (used / total) * 100.0
}
func diskPriority(blockPct, inodePct float64) (model.Priority, bool) {
maxPct := blockPct
if inodePct > maxPct {
maxPct = inodePct
}
// inodePct may be -1 if not supported; ignore in that case.
if inodePct < 0 {
maxPct = blockPct
}
switch {
case maxPct >= 98.0:
return model.PriorityP0, true
case maxPct >= 92.0:
return model.PriorityP1, true
default:
return "", false
}
}
var _ collectors.Collector = (*DiskCollector)(nil)

View File

@@ -0,0 +1,80 @@
package host
import (
"syscall"
"testing"
)
func TestParseProcMounts_UnescapesAndParses(t *testing.T) {
in := "dev1 / ext4 rw 0 0\n" +
"dev2 /path\\040with\\040space xfs rw 0 0\n" +
"badline\n"
ms := parseProcMounts(in)
if len(ms) != 2 {
t.Fatalf("expected 2 mounts, got %d", len(ms))
}
if ms[0].MountPoint != "/" || ms[0].FSType != "ext4" {
t.Fatalf("unexpected first mount: %+v", ms[0])
}
if ms[1].MountPoint != "/path with space" {
t.Fatalf("expected unescaped mountpoint, got %q", ms[1].MountPoint)
}
}
func TestShouldSkipMount_FiltersPseudo(t *testing.T) {
cases := []procMount{
{MountPoint: "/proc", FSType: "proc"},
{MountPoint: "/sys", FSType: "sysfs"},
{MountPoint: "/dev", FSType: "tmpfs"},
{MountPoint: "/dev/shm", FSType: "tmpfs"},
}
for _, c := range cases {
if !shouldSkipMount(c) {
t.Fatalf("expected skip for %+v", c)
}
}
if shouldSkipMount(procMount{MountPoint: "/home", FSType: "ext4"}) {
t.Fatalf("did not expect skip for /home ext4")
}
}
func TestDiskPriority(t *testing.T) {
if p, ok := diskPriority(91.9, -1); ok {
t.Fatalf("expected no issue, got %v", p)
}
if p, ok := diskPriority(92.0, -1); !ok || p != "P1" {
t.Fatalf("expected P1 at 92%%, got %v ok=%v", p, ok)
}
if p, ok := diskPriority(97.9, 98.0); !ok || p != "P0" {
t.Fatalf("expected P0 if either crosses 98%%, got %v ok=%v", p, ok)
}
}
func TestStatfsCalculations(t *testing.T) {
st := syscall.Statfs_t{}
st.Bsize = 1
st.Blocks = 100
st.Bfree = 8
st.Bavail = 8
pct, free := statfsBlockUsedPct(st)
if free != 8 {
t.Fatalf("expected free=8 bytes, got %d", free)
}
if pct < 91.9 || pct > 92.1 {
t.Fatalf("expected ~92%% used, got %f", pct)
}
st.Files = 100
st.Ffree = 2
ipct := statfsInodeUsedPct(st)
if ipct < 97.9 || ipct > 98.1 {
t.Fatalf("expected ~98%% inode used, got %f", ipct)
}
st.Files = 0
if statfsInodeUsedPct(st) != -1 {
t.Fatalf("expected -1 when inode info unavailable")
}
}

View File

@@ -0,0 +1,127 @@
package host
import (
"context"
"fmt"
"os"
"runtime"
"strconv"
"strings"
"sync"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// LoadCollector evaluates 1-minute load average normalized by logical CPU count.
//
// Thresholds (PLAN.md), normalized by CPU count:
// - P2 if load1/cpus >= 4.0 sustained 120s
// - P1 if load1/cpus >= 6.0 sustained 120s
//
// NOTE: Linux-specific.
// Thread-safe: Collect() can be called concurrently.
type LoadCollector struct {
interval time.Duration
now func() time.Time
readFile func(string) ([]byte, error)
cpuCount func() int
mu sync.Mutex
pri model.Priority
since time.Time
}
func NewLoadCollector() *LoadCollector {
return &LoadCollector{
interval: 5 * time.Second,
now: time.Now,
readFile: os.ReadFile,
cpuCount: runtime.NumCPU,
}
}
func (c *LoadCollector) Name() string { return "host:load" }
func (c *LoadCollector) Interval() time.Duration {
if c.interval <= 0 {
return 5 * time.Second
}
return c.interval
}
func (c *LoadCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
now := c.now()
b, err := c.readFile("/proc/loadavg")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/loadavg"}, err
}
load1, err := parseProcLoadavgFirst(string(b))
if err != nil {
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "bad /proc/loadavg"}, nil
}
cpus := c.cpuCount()
if cpus <= 0 {
cpus = 1
}
norm := load1 / float64(cpus)
desired, window := desiredLoadPriority(norm)
c.mu.Lock()
c.pri, c.since = updateSustained(now, c.pri, c.since, desired)
pri, since := c.pri, c.since
c.mu.Unlock()
if pri == "" || since.IsZero() || now.Sub(since) < window {
return nil, collectors.OKStatus(), nil
}
iss := model.Issue{
ID: "host:load:high",
Category: model.CategoryPerformance,
Priority: pri,
Title: "High sustained system load",
Details: "The 1-minute load average is high relative to CPU count for a sustained period.",
Evidence: map[string]string{
"load1": fmt.Sprintf("%.2f", load1),
"cpus": strconv.Itoa(cpus),
"load1_per_cpu": fmt.Sprintf("%.2f", norm),
"sustained_window": window.String(),
},
SuggestedFix: "Investigate CPU hogs:\n top\n ps -eo pid,ppid,cmd,%cpu --sort=-%cpu | head\nIf I/O bound (high iowait), check disk/network.\n",
}
return []model.Issue{iss}, collectors.OKStatus(), nil
}
func parseProcLoadavgFirst(content string) (float64, error) {
// /proc/loadavg format: "1.23 0.70 0.50 1/123 4567".
fields := strings.Fields(content)
if len(fields) < 1 {
return 0, fmt.Errorf("missing fields")
}
v, err := strconv.ParseFloat(fields[0], 64)
if err != nil {
return 0, err
}
return v, nil
}
func desiredLoadPriority(loadPerCPU float64) (model.Priority, time.Duration) {
if loadPerCPU >= 6.0 {
return model.PriorityP1, 120 * time.Second
}
if loadPerCPU >= 4.0 {
return model.PriorityP2, 120 * time.Second
}
return "", 0
}
var _ collectors.Collector = (*LoadCollector)(nil)

View File

@@ -0,0 +1,48 @@
package host
import (
"testing"
"time"
"tower/internal/model"
)
func TestParseProcLoadavgFirst(t *testing.T) {
v, err := parseProcLoadavgFirst("1.23 0.70 0.50 1/123 4567\n")
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if v < 1.229 || v > 1.231 {
t.Fatalf("expected 1.23, got %v", v)
}
if _, err := parseProcLoadavgFirst("\n"); err == nil {
t.Fatalf("expected error")
}
}
func TestDesiredLoadPriority(t *testing.T) {
p, w := desiredLoadPriority(3.99)
if p != "" || w != 0 {
t.Fatalf("expected none")
}
p, w = desiredLoadPriority(4.0)
if p != model.PriorityP2 || w != 120*time.Second {
t.Fatalf("expected P2/120s")
}
p, w = desiredLoadPriority(6.0)
if p != model.PriorityP1 || w != 120*time.Second {
t.Fatalf("expected P1/120s")
}
}
func TestUpdateSustainedWorksForLoadToo(t *testing.T) {
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
p, since := updateSustained(now, "", time.Time{}, model.PriorityP2)
if p != model.PriorityP2 || !since.Equal(now) {
t.Fatalf("expected set")
}
p2, since2 := updateSustained(now.Add(10*time.Second), p, since, model.PriorityP2)
if p2 != model.PriorityP2 || !since2.Equal(since) {
t.Fatalf("expected unchanged")
}
}

View File

@@ -0,0 +1,205 @@
package host
import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"
"sync"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// MemCollector checks MemAvailable and swap pressure from /proc/meminfo.
//
// Thresholds (PLAN.md):
// Memory (MemAvailable as % of MemTotal):
// - P2 if <= 15% sustained 60s
// - P1 if <= 10% sustained 60s
// - P0 if <= 5% sustained 30s
//
// Swap pressure (only if RAM is also tight):
// - P1 if swap used >= 50% AND MemAvailable <= 10% sustained 60s
// - P0 if swap used >= 80% AND MemAvailable <= 5% sustained 30s
//
// Emits up to two issues:
// - host:mem:available
// - host:mem:swap
//
// NOTE: Linux-specific.
// Thread-safe: Collect() can be called concurrently.
type MemCollector struct {
interval time.Duration
now func() time.Time
readFile func(string) ([]byte, error)
mu sync.Mutex
memPri model.Priority
memSince time.Time
swapPri model.Priority
swapSince time.Time
}
func NewMemCollector() *MemCollector {
return &MemCollector{
interval: 5 * time.Second,
now: time.Now,
readFile: os.ReadFile,
}
}
func (c *MemCollector) Name() string { return "host:mem" }
func (c *MemCollector) Interval() time.Duration {
if c.interval <= 0 {
return 5 * time.Second
}
return c.interval
}
func (c *MemCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
now := c.now()
b, err := c.readFile("/proc/meminfo")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/meminfo"}, err
}
mi := parseProcMeminfo(string(b))
memTotalKB, okT := mi["MemTotal"]
memAvailKB, okA := mi["MemAvailable"]
if !okT || !okA || memTotalKB <= 0 {
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "missing MemTotal/MemAvailable"}, nil
}
memAvailPct := (float64(memAvailKB) / float64(memTotalKB)) * 100.0
desiredMemPri, memWindow := desiredMemPriority(memAvailPct)
c.mu.Lock()
c.memPri, c.memSince = updateSustained(now, c.memPri, c.memSince, desiredMemPri)
memPri, memSince := c.memPri, c.memSince
c.mu.Unlock()
issues := make([]model.Issue, 0, 2)
if memPri != "" && !memSince.IsZero() && now.Sub(memSince) >= memWindow {
issues = append(issues, model.Issue{
ID: "host:mem:available",
Category: model.CategoryMemory,
Priority: memPri,
Title: "Low available memory",
Details: "MemAvailable is low and has remained low for a sustained period.",
Evidence: map[string]string{
"mem_available_kb": strconv.FormatInt(memAvailKB, 10),
"mem_total_kb": strconv.FormatInt(memTotalKB, 10),
"mem_available_pct": fmt.Sprintf("%.1f", memAvailPct),
},
SuggestedFix: "Identify memory hogs:\n free -h\n ps aux --sort=-rss | head\nConsider restarting runaway processes or adding RAM.",
})
}
swapTotalKB, okST := mi["SwapTotal"]
swapFreeKB, okSF := mi["SwapFree"]
swapUsedPct := 0.0
if okST && okSF && swapTotalKB > 0 {
swapUsedKB := swapTotalKB - swapFreeKB
swapUsedPct = (float64(swapUsedKB) / float64(swapTotalKB)) * 100.0
}
desiredSwapPri, swapWindow := desiredSwapPriority(memAvailPct, swapTotalKB, swapUsedPct)
c.mu.Lock()
c.swapPri, c.swapSince = updateSustained(now, c.swapPri, c.swapSince, desiredSwapPri)
swapPri, swapSince := c.swapPri, c.swapSince
c.mu.Unlock()
if swapPri != "" && !swapSince.IsZero() && now.Sub(swapSince) >= swapWindow {
issues = append(issues, model.Issue{
ID: "host:mem:swap",
Category: model.CategoryMemory,
Priority: swapPri,
Title: "High swap usage with low RAM",
Details: "Swap usage is high while available RAM is also low, indicating memory pressure.",
Evidence: map[string]string{
"swap_used_pct": fmt.Sprintf("%.1f", swapUsedPct),
"swap_total_kb": strconv.FormatInt(swapTotalKB, 10),
"mem_available_pct": fmt.Sprintf("%.1f", memAvailPct),
},
SuggestedFix: "Find swapping processes:\n vmstat 1\n smem -r 2>/dev/null || true\nConsider reducing memory usage or increasing RAM/swap.",
})
}
return issues, collectors.OKStatus(), nil
}
func parseProcMeminfo(content string) map[string]int64 {
out := map[string]int64{}
s := bufio.NewScanner(strings.NewReader(content))
for s.Scan() {
line := strings.TrimSpace(s.Text())
if line == "" {
continue
}
// Example: "MemAvailable: 12345 kB"
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
key := strings.TrimSuffix(fields[0], ":")
v, err := strconv.ParseInt(fields[1], 10, 64)
if err != nil {
continue
}
out[key] = v
}
return out
}
func desiredMemPriority(memAvailPct float64) (model.Priority, time.Duration) {
switch {
case memAvailPct <= 5.0:
return model.PriorityP0, 30 * time.Second
case memAvailPct <= 10.0:
return model.PriorityP1, 60 * time.Second
case memAvailPct <= 15.0:
return model.PriorityP2, 60 * time.Second
default:
return "", 0
}
}
func desiredSwapPriority(memAvailPct float64, swapTotalKB int64, swapUsedPct float64) (model.Priority, time.Duration) {
if swapTotalKB <= 0 {
return "", 0
}
// Only alert on swap when RAM is also tight.
switch {
case swapUsedPct >= 80.0 && memAvailPct <= 5.0:
return model.PriorityP0, 30 * time.Second
case swapUsedPct >= 50.0 && memAvailPct <= 10.0:
return model.PriorityP1, 60 * time.Second
default:
return "", 0
}
}
// updateSustained updates current severity and its since timestamp.
// If desired is empty, it clears the state.
func updateSustained(now time.Time, current model.Priority, since time.Time, desired model.Priority) (model.Priority, time.Time) {
if desired == "" {
return "", time.Time{}
}
if current != desired || since.IsZero() {
return desired, now
}
return current, since
}
var _ collectors.Collector = (*MemCollector)(nil)

View File

@@ -0,0 +1,83 @@
package host
import (
"testing"
"time"
"tower/internal/model"
)
func TestParseProcMeminfo(t *testing.T) {
in := "MemTotal: 8000000 kB\nMemAvailable: 800000 kB\nSwapTotal: 2000000 kB\nSwapFree: 500000 kB\n"
m := parseProcMeminfo(in)
if m["MemTotal"] != 8000000 {
t.Fatalf("MemTotal mismatch: %d", m["MemTotal"])
}
if m["MemAvailable"] != 800000 {
t.Fatalf("MemAvailable mismatch: %d", m["MemAvailable"])
}
}
func TestDesiredMemPriority(t *testing.T) {
p, w := desiredMemPriority(16.0)
if p != "" || w != 0 {
t.Fatalf("expected none")
}
p, w = desiredMemPriority(15.0)
if p != model.PriorityP2 || w != 60*time.Second {
t.Fatalf("expected P2/60s got %v/%v", p, w)
}
p, w = desiredMemPriority(10.0)
if p != model.PriorityP1 {
t.Fatalf("expected P1 got %v", p)
}
p, w = desiredMemPriority(5.0)
if p != model.PriorityP0 || w != 30*time.Second {
t.Fatalf("expected P0/30s got %v/%v", p, w)
}
}
func TestDesiredSwapPriority(t *testing.T) {
// No swap configured.
p, _ := desiredSwapPriority(4.0, 0, 90.0)
if p != "" {
t.Fatalf("expected none when SwapTotal=0")
}
p, w := desiredSwapPriority(4.0, 1000, 80.0)
if p != model.PriorityP0 || w != 30*time.Second {
t.Fatalf("expected P0/30s got %v/%v", p, w)
}
p, w = desiredSwapPriority(9.9, 1000, 50.0)
if p != model.PriorityP1 || w != 60*time.Second {
t.Fatalf("expected P1/60s got %v/%v", p, w)
}
// Swap high but RAM not tight => no issue.
p, _ = desiredSwapPriority(20.0, 1000, 90.0)
if p != "" {
t.Fatalf("expected none when RAM not tight")
}
}
func TestUpdateSustained(t *testing.T) {
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
p, since := updateSustained(now, "", time.Time{}, model.PriorityP1)
if p != model.PriorityP1 || !since.Equal(now) {
t.Fatalf("expected set to P1 at now")
}
p2, since2 := updateSustained(now.Add(1*time.Second), p, since, model.PriorityP1)
if p2 != model.PriorityP1 || !since2.Equal(since) {
t.Fatalf("expected unchanged since")
}
p3, since3 := updateSustained(now.Add(2*time.Second), p2, since2, model.PriorityP0)
if p3 != model.PriorityP0 || !since3.Equal(now.Add(2*time.Second)) {
t.Fatalf("expected reset on priority change")
}
p4, since4 := updateSustained(now.Add(3*time.Second), p3, since3, "")
if p4 != "" || !since4.IsZero() {
t.Fatalf("expected cleared")
}
}

View File

@@ -0,0 +1,138 @@
package host
import (
"bufio"
"context"
"os"
"path/filepath"
"strings"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// NetCollector checks for missing default route while at least one non-loopback
// interface is up.
//
// Rule (PLAN.md):
// - P1 if no default route AND any non-loopback interface is UP.
//
// Discovery:
// - Default route from /proc/net/route
// - Interface UP from /sys/class/net/*/operstate
//
// NOTE: Linux-specific.
type NetCollector struct {
interval time.Duration
readFile func(string) ([]byte, error)
glob func(string) ([]string, error)
}
func NewNetCollector() *NetCollector {
return &NetCollector{
interval: 5 * time.Second,
readFile: os.ReadFile,
glob: filepath.Glob,
}
}
func (c *NetCollector) Name() string { return "host:net" }
func (c *NetCollector) Interval() time.Duration {
if c.interval <= 0 {
return 5 * time.Second
}
return c.interval
}
func (c *NetCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
routeBytes, err := c.readFile("/proc/net/route")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/net/route"}, err
}
hasDefault := hasDefaultRoute(string(routeBytes))
paths, err := c.glob("/sys/class/net/*/operstate")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed listing /sys/class/net"}, err
}
upIfaces := make([]string, 0, 2)
for _, p := range paths {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
b, err := c.readFile(p)
if err != nil {
continue
}
iface := filepath.Base(filepath.Dir(p))
if iface == "lo" {
continue
}
state := strings.TrimSpace(string(b))
if isIfaceUp(state) {
upIfaces = append(upIfaces, iface)
}
}
if hasDefault || len(upIfaces) == 0 {
return nil, collectors.OKStatus(), nil
}
iss := model.Issue{
ID: "host:net:default-route-missing",
Category: model.CategoryNetwork,
Priority: model.PriorityP1,
Title: "No default route",
Details: "At least one network interface is up, but no default route is present.",
Evidence: map[string]string{
"up_ifaces": strings.Join(upIfaces, ","),
},
SuggestedFix: "Check routing and link state:\n ip route\n ip link\n nmcli dev status\nIf on Wi-Fi, reconnect; if on VPN, verify tunnel routes.",
}
return []model.Issue{iss}, collectors.OKStatus(), nil
}
func hasDefaultRoute(procNetRoute string) bool {
// /proc/net/route header:
// Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT
// Default route has Destination == 00000000.
s := bufio.NewScanner(strings.NewReader(procNetRoute))
first := true
for s.Scan() {
line := strings.TrimSpace(s.Text())
if line == "" {
continue
}
if first {
first = false
// skip header if present
if strings.HasPrefix(line, "Iface") {
continue
}
}
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
if fields[1] == "00000000" {
return true
}
}
return false
}
func isIfaceUp(operstate string) bool {
// Linux operstate values include: up, down, unknown, dormant, lowerlayerdown.
s := strings.ToLower(strings.TrimSpace(operstate))
return s == "up" || s == "unknown"
}
var _ collectors.Collector = (*NetCollector)(nil)

View File

@@ -0,0 +1,28 @@
package host
import "testing"
func TestHasDefaultRoute(t *testing.T) {
in := "Iface\tDestination\tGateway\tFlags\n" +
"eth0\t00000000\t0102A8C0\t0003\n"
if !hasDefaultRoute(in) {
t.Fatalf("expected default route")
}
in2 := "Iface Destination Gateway Flags\n" +
"eth0 0010A8C0 00000000 0001\n"
if hasDefaultRoute(in2) {
t.Fatalf("expected no default route")
}
}
func TestIsIfaceUp(t *testing.T) {
if !isIfaceUp("up\n") {
t.Fatalf("expected true")
}
if !isIfaceUp("unknown") {
t.Fatalf("expected true for unknown")
}
if isIfaceUp("down") {
t.Fatalf("expected false")
}
}