Files
porthole/internal/collectors/host/disk.go
OpenCode Test 1421b4659e feat: implement ControlTower TUI for cluster and host monitoring
Add complete TUI application for monitoring Kubernetes clusters and host
systems. Features include:

Core features:
- Collector framework with concurrent scheduling
- Host collectors: disk, memory, load, network
- Kubernetes collectors: pods, nodes, workloads, events with informers
- Issue deduplication, state management, and resolve-after logic
- Bubble Tea TUI with table view, details pane, and filtering
- JSON export functionality

UX improvements:
- Help overlay with keybindings
- Priority/category filters with visual indicators
- Direct priority jump (0/1/2/3)
- Bulk acknowledge (Shift+A)
- Clipboard copy (y)
- Theme toggle (T)
- Age format toggle (d)
- Wide title toggle (t)
- Vi-style navigation (j/k)
- Home/End jump (g/G)
- Rollup drill-down in details

Robustness:
- Grace period for unreachable clusters
- Rollups for high-volume issues
- Flap suppression
- RBAC error handling

Files: All core application code with tests for host collectors,
engine, store, model, and export packages.
2025-12-24 13:29:51 -08:00

288 lines
6.8 KiB
Go

package host
import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"
"syscall"
"time"
"tower/internal/collectors"
"tower/internal/model"
)
// DiskCollector checks filesystem block + inode pressure across mounts.
//
// It reads /proc/mounts to discover mounts and then uses statfs to compute usage.
// Pseudo filesystems are filtered out.
//
// Thresholds (PLAN.md):
// - P1 if blocks OR inodes >= 92%
// - P0 if blocks OR inodes >= 98%
//
// Issues are emitted per mount (one issue that includes both block+inode usage).
//
// NOTE: This collector is Linux-specific.
type DiskCollector struct {
interval time.Duration
readFile func(string) ([]byte, error)
statfs func(path string, st *syscall.Statfs_t) error
}
func NewDiskCollector() *DiskCollector {
return &DiskCollector{
interval: 10 * time.Second,
readFile: os.ReadFile,
statfs: syscall.Statfs,
}
}
func (c *DiskCollector) Name() string { return "host:disk" }
func (c *DiskCollector) Interval() time.Duration {
if c.interval <= 0 {
return 10 * time.Second
}
return c.interval
}
func (c *DiskCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
b, err := c.readFile("/proc/mounts")
if err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/mounts"}, err
}
mounts := parseProcMounts(string(b))
if len(mounts) == 0 {
// Unusual but treat as degraded rather than hard error.
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "no mounts found"}, nil
}
issues := make([]model.Issue, 0, 8)
seenMount := map[string]struct{}{}
partialErrs := 0
for _, m := range mounts {
if err := ctx.Err(); err != nil {
return issues, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
if shouldSkipMount(m) {
continue
}
if _, ok := seenMount[m.MountPoint]; ok {
continue
}
seenMount[m.MountPoint] = struct{}{}
var st syscall.Statfs_t
if err := c.statfs(m.MountPoint, &st); err != nil {
partialErrs++
continue
}
blockPct, blockFreeBytes := statfsBlockUsedPct(st)
inodePct := statfsInodeUsedPct(st)
pri, ok := diskPriority(blockPct, inodePct)
if !ok {
continue
}
evidence := map[string]string{
"mount": m.MountPoint,
"fstype": m.FSType,
"block_used_pct": fmt.Sprintf("%.1f", blockPct),
"block_free_bytes": strconv.FormatUint(blockFreeBytes, 10),
}
if inodePct >= 0 {
evidence["inode_used_pct"] = fmt.Sprintf("%.1f", inodePct)
}
issues = append(issues, model.Issue{
ID: fmt.Sprintf("host:disk:%s:usage", m.MountPoint),
Category: model.CategoryStorage,
Priority: pri,
Title: fmt.Sprintf("Disk usage high on %s", m.MountPoint),
Details: "Filesystem space and/or inodes are nearly exhausted.",
Evidence: evidence,
SuggestedFix: fmt.Sprintf(
"Inspect usage:\n df -h %s\n df -i %s\nFind large directories:\n sudo du -xh --max-depth=2 %s | sort -h | tail",
m.MountPoint, m.MountPoint, m.MountPoint,
),
})
}
st := collectors.OKStatus()
if partialErrs > 0 {
st.Health = collectors.HealthDegraded
st.Message = fmt.Sprintf("partial failures: %d mounts", partialErrs)
}
return issues, st, nil
}
type procMount struct {
Device string
MountPoint string
FSType string
Options string
}
func parseProcMounts(content string) []procMount {
s := bufio.NewScanner(strings.NewReader(content))
out := make([]procMount, 0, 32)
for s.Scan() {
line := strings.TrimSpace(s.Text())
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) < 3 {
continue
}
m := procMount{
Device: unescapeProcMountsField(fields[0]),
MountPoint: unescapeProcMountsField(fields[1]),
FSType: fields[2],
}
if len(fields) >= 4 {
m.Options = fields[3]
}
out = append(out, m)
}
return out
}
// /proc/mounts escapes special characters as octal sequences.
// The most common one is a space as \040.
func unescapeProcMountsField(s string) string {
replacer := strings.NewReplacer(
"\\040", " ",
"\\011", "\t",
"\\012", "\n",
"\\134", "\\",
)
return replacer.Replace(s)
}
var pseudoFSTypes = map[string]struct{}{
"proc": {},
"sysfs": {},
"tmpfs": {},
"devtmpfs": {},
"devpts": {},
"cgroup": {},
"cgroup2": {},
"pstore": {},
"securityfs": {},
"debugfs": {},
"tracefs": {},
"configfs": {},
"hugetlbfs": {},
"mqueue": {},
"rpc_pipefs": {},
"fusectl": {},
"binfmt_misc": {},
"autofs": {},
"bpf": {},
"ramfs": {},
"nsfs": {},
"efivarfs": {},
"overlay": {}, // common container overlay mounts
"squashfs": {}, // typically read-only images
"selinuxfs": {},
"systemd-1": {},
"overlayfs": {}, // (non-standard) conservative skip
"cgroupfs": {},
"procfs": {},
"fuse.lxcfs": {},
"fuse.gvfsd-fuse": {},
}
func shouldSkipMount(m procMount) bool {
if m.MountPoint == "" {
return true
}
// Filter by fstype.
if _, ok := pseudoFSTypes[m.FSType]; ok {
return true
}
// Filter common pseudo mountpoints.
if strings.HasPrefix(m.MountPoint, "/proc") || strings.HasPrefix(m.MountPoint, "/sys") {
return true
}
if strings.HasPrefix(m.MountPoint, "/dev") {
// /dev itself can be a real mount in some cases, but usually isn't useful for disk pressure.
return true
}
return false
}
func statfsBlockUsedPct(st syscall.Statfs_t) (usedPct float64, freeBytes uint64) {
// Mirror df(1) semantics closely:
// total = f_blocks
// used = f_blocks - f_bfree
// avail = f_bavail (space available to unprivileged user)
// use% = used / (used + avail)
if st.Blocks == 0 {
return 0, 0
}
bsize := uint64(st.Bsize)
blocks := uint64(st.Blocks)
bfree := uint64(st.Bfree)
bavail := uint64(st.Bavail)
usedBlocks := blocks - bfree
denom := usedBlocks + bavail
if denom == 0 {
return 0, 0
}
freeBytes = bavail * bsize
usedPct = (float64(usedBlocks) / float64(denom)) * 100.0
return usedPct, freeBytes
}
// statfsInodeUsedPct returns inode used percent. If inodes are unavailable (f_files==0), returns -1.
func statfsInodeUsedPct(st syscall.Statfs_t) float64 {
if st.Files == 0 {
return -1
}
total := float64(st.Files)
free := float64(st.Ffree)
used := total - free
return (used / total) * 100.0
}
func diskPriority(blockPct, inodePct float64) (model.Priority, bool) {
maxPct := blockPct
if inodePct > maxPct {
maxPct = inodePct
}
// inodePct may be -1 if not supported; ignore in that case.
if inodePct < 0 {
maxPct = blockPct
}
switch {
case maxPct >= 98.0:
return model.PriorityP0, true
case maxPct >= 92.0:
return model.PriorityP1, true
default:
return "", false
}
}
var _ collectors.Collector = (*DiskCollector)(nil)