feat: implement ControlTower TUI for cluster and host monitoring
Add complete TUI application for monitoring Kubernetes clusters and host systems. Features include: Core features: - Collector framework with concurrent scheduling - Host collectors: disk, memory, load, network - Kubernetes collectors: pods, nodes, workloads, events with informers - Issue deduplication, state management, and resolve-after logic - Bubble Tea TUI with table view, details pane, and filtering - JSON export functionality UX improvements: - Help overlay with keybindings - Priority/category filters with visual indicators - Direct priority jump (0/1/2/3) - Bulk acknowledge (Shift+A) - Clipboard copy (y) - Theme toggle (T) - Age format toggle (d) - Wide title toggle (t) - Vi-style navigation (j/k) - Home/End jump (g/G) - Rollup drill-down in details Robustness: - Grace period for unreachable clusters - Rollups for high-volume issues - Flap suppression - RBAC error handling Files: All core application code with tests for host collectors, engine, store, model, and export packages.
This commit is contained in:
138
internal/collectors/host/net.go
Normal file
138
internal/collectors/host/net.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package host
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"tower/internal/collectors"
|
||||
"tower/internal/model"
|
||||
)
|
||||
|
||||
// NetCollector checks for missing default route while at least one non-loopback
|
||||
// interface is up.
|
||||
//
|
||||
// Rule (PLAN.md):
|
||||
// - P1 if no default route AND any non-loopback interface is UP.
|
||||
//
|
||||
// Discovery:
|
||||
// - Default route from /proc/net/route
|
||||
// - Interface UP from /sys/class/net/*/operstate
|
||||
//
|
||||
// NOTE: Linux-specific.
|
||||
type NetCollector struct {
|
||||
interval time.Duration
|
||||
|
||||
readFile func(string) ([]byte, error)
|
||||
glob func(string) ([]string, error)
|
||||
}
|
||||
|
||||
func NewNetCollector() *NetCollector {
|
||||
return &NetCollector{
|
||||
interval: 5 * time.Second,
|
||||
readFile: os.ReadFile,
|
||||
glob: filepath.Glob,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *NetCollector) Name() string { return "host:net" }
|
||||
|
||||
func (c *NetCollector) Interval() time.Duration {
|
||||
if c.interval <= 0 {
|
||||
return 5 * time.Second
|
||||
}
|
||||
return c.interval
|
||||
}
|
||||
|
||||
func (c *NetCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
|
||||
routeBytes, err := c.readFile("/proc/net/route")
|
||||
if err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/net/route"}, err
|
||||
}
|
||||
|
||||
hasDefault := hasDefaultRoute(string(routeBytes))
|
||||
|
||||
paths, err := c.glob("/sys/class/net/*/operstate")
|
||||
if err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed listing /sys/class/net"}, err
|
||||
}
|
||||
upIfaces := make([]string, 0, 2)
|
||||
for _, p := range paths {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
|
||||
}
|
||||
b, err := c.readFile(p)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
iface := filepath.Base(filepath.Dir(p))
|
||||
if iface == "lo" {
|
||||
continue
|
||||
}
|
||||
state := strings.TrimSpace(string(b))
|
||||
if isIfaceUp(state) {
|
||||
upIfaces = append(upIfaces, iface)
|
||||
}
|
||||
}
|
||||
|
||||
if hasDefault || len(upIfaces) == 0 {
|
||||
return nil, collectors.OKStatus(), nil
|
||||
}
|
||||
|
||||
iss := model.Issue{
|
||||
ID: "host:net:default-route-missing",
|
||||
Category: model.CategoryNetwork,
|
||||
Priority: model.PriorityP1,
|
||||
Title: "No default route",
|
||||
Details: "At least one network interface is up, but no default route is present.",
|
||||
Evidence: map[string]string{
|
||||
"up_ifaces": strings.Join(upIfaces, ","),
|
||||
},
|
||||
SuggestedFix: "Check routing and link state:\n ip route\n ip link\n nmcli dev status\nIf on Wi-Fi, reconnect; if on VPN, verify tunnel routes.",
|
||||
}
|
||||
return []model.Issue{iss}, collectors.OKStatus(), nil
|
||||
}
|
||||
|
||||
func hasDefaultRoute(procNetRoute string) bool {
|
||||
// /proc/net/route header:
|
||||
// Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT
|
||||
// Default route has Destination == 00000000.
|
||||
s := bufio.NewScanner(strings.NewReader(procNetRoute))
|
||||
first := true
|
||||
for s.Scan() {
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
if first {
|
||||
first = false
|
||||
// skip header if present
|
||||
if strings.HasPrefix(line, "Iface") {
|
||||
continue
|
||||
}
|
||||
}
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
if fields[1] == "00000000" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isIfaceUp(operstate string) bool {
|
||||
// Linux operstate values include: up, down, unknown, dormant, lowerlayerdown.
|
||||
s := strings.ToLower(strings.TrimSpace(operstate))
|
||||
return s == "up" || s == "unknown"
|
||||
}
|
||||
|
||||
var _ collectors.Collector = (*NetCollector)(nil)
|
||||
Reference in New Issue
Block a user