package host import ( "bufio" "context" "fmt" "os" "strconv" "strings" "syscall" "time" "tower/internal/collectors" "tower/internal/model" ) // DiskCollector checks filesystem block + inode pressure across mounts. // // It reads /proc/mounts to discover mounts and then uses statfs to compute usage. // Pseudo filesystems are filtered out. // // Thresholds (PLAN.md): // - P1 if blocks OR inodes >= 92% // - P0 if blocks OR inodes >= 98% // // Issues are emitted per mount (one issue that includes both block+inode usage). // // NOTE: This collector is Linux-specific. type DiskCollector struct { interval time.Duration readFile func(string) ([]byte, error) statfs func(path string, st *syscall.Statfs_t) error } func NewDiskCollector() *DiskCollector { return &DiskCollector{ interval: 10 * time.Second, readFile: os.ReadFile, statfs: syscall.Statfs, } } func (c *DiskCollector) Name() string { return "host:disk" } func (c *DiskCollector) Interval() time.Duration { if c.interval <= 0 { return 10 * time.Second } return c.interval } func (c *DiskCollector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) { if err := ctx.Err(); err != nil { return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err } b, err := c.readFile("/proc/mounts") if err != nil { return nil, collectors.Status{Health: collectors.HealthError, Message: "failed reading /proc/mounts"}, err } mounts := parseProcMounts(string(b)) if len(mounts) == 0 { // Unusual but treat as degraded rather than hard error. return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "no mounts found"}, nil } issues := make([]model.Issue, 0, 8) seenMount := map[string]struct{}{} partialErrs := 0 for _, m := range mounts { if err := ctx.Err(); err != nil { return issues, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err } if shouldSkipMount(m) { continue } if _, ok := seenMount[m.MountPoint]; ok { continue } seenMount[m.MountPoint] = struct{}{} var st syscall.Statfs_t if err := c.statfs(m.MountPoint, &st); err != nil { partialErrs++ continue } blockPct, blockFreeBytes := statfsBlockUsedPct(st) inodePct := statfsInodeUsedPct(st) pri, ok := diskPriority(blockPct, inodePct) if !ok { continue } evidence := map[string]string{ "mount": m.MountPoint, "fstype": m.FSType, "block_used_pct": fmt.Sprintf("%.1f", blockPct), "block_free_bytes": strconv.FormatUint(blockFreeBytes, 10), } if inodePct >= 0 { evidence["inode_used_pct"] = fmt.Sprintf("%.1f", inodePct) } issues = append(issues, model.Issue{ ID: fmt.Sprintf("host:disk:%s:usage", m.MountPoint), Category: model.CategoryStorage, Priority: pri, Title: fmt.Sprintf("Disk usage high on %s", m.MountPoint), Details: "Filesystem space and/or inodes are nearly exhausted.", Evidence: evidence, SuggestedFix: fmt.Sprintf( "Inspect usage:\n df -h %s\n df -i %s\nFind large directories:\n sudo du -xh --max-depth=2 %s | sort -h | tail", m.MountPoint, m.MountPoint, m.MountPoint, ), }) } st := collectors.OKStatus() if partialErrs > 0 { st.Health = collectors.HealthDegraded st.Message = fmt.Sprintf("partial failures: %d mounts", partialErrs) } return issues, st, nil } type procMount struct { Device string MountPoint string FSType string Options string } func parseProcMounts(content string) []procMount { s := bufio.NewScanner(strings.NewReader(content)) out := make([]procMount, 0, 32) for s.Scan() { line := strings.TrimSpace(s.Text()) if line == "" { continue } fields := strings.Fields(line) if len(fields) < 3 { continue } m := procMount{ Device: unescapeProcMountsField(fields[0]), MountPoint: unescapeProcMountsField(fields[1]), FSType: fields[2], } if len(fields) >= 4 { m.Options = fields[3] } out = append(out, m) } return out } // /proc/mounts escapes special characters as octal sequences. // The most common one is a space as \040. func unescapeProcMountsField(s string) string { replacer := strings.NewReplacer( "\\040", " ", "\\011", "\t", "\\012", "\n", "\\134", "\\", ) return replacer.Replace(s) } var pseudoFSTypes = map[string]struct{}{ "proc": {}, "sysfs": {}, "tmpfs": {}, "devtmpfs": {}, "devpts": {}, "cgroup": {}, "cgroup2": {}, "pstore": {}, "securityfs": {}, "debugfs": {}, "tracefs": {}, "configfs": {}, "hugetlbfs": {}, "mqueue": {}, "rpc_pipefs": {}, "fusectl": {}, "binfmt_misc": {}, "autofs": {}, "bpf": {}, "ramfs": {}, "nsfs": {}, "efivarfs": {}, "overlay": {}, // common container overlay mounts "squashfs": {}, // typically read-only images "selinuxfs": {}, "systemd-1": {}, "overlayfs": {}, // (non-standard) conservative skip "cgroupfs": {}, "procfs": {}, "fuse.lxcfs": {}, "fuse.gvfsd-fuse": {}, } func shouldSkipMount(m procMount) bool { if m.MountPoint == "" { return true } // Filter by fstype. if _, ok := pseudoFSTypes[m.FSType]; ok { return true } // Filter common pseudo mountpoints. if strings.HasPrefix(m.MountPoint, "/proc") || strings.HasPrefix(m.MountPoint, "/sys") { return true } if strings.HasPrefix(m.MountPoint, "/dev") { // /dev itself can be a real mount in some cases, but usually isn't useful for disk pressure. return true } return false } func statfsBlockUsedPct(st syscall.Statfs_t) (usedPct float64, freeBytes uint64) { // Mirror df(1) semantics closely: // total = f_blocks // used = f_blocks - f_bfree // avail = f_bavail (space available to unprivileged user) // use% = used / (used + avail) if st.Blocks == 0 { return 0, 0 } bsize := uint64(st.Bsize) blocks := uint64(st.Blocks) bfree := uint64(st.Bfree) bavail := uint64(st.Bavail) usedBlocks := blocks - bfree denom := usedBlocks + bavail if denom == 0 { return 0, 0 } freeBytes = bavail * bsize usedPct = (float64(usedBlocks) / float64(denom)) * 100.0 return usedPct, freeBytes } // statfsInodeUsedPct returns inode used percent. If inodes are unavailable (f_files==0), returns -1. func statfsInodeUsedPct(st syscall.Statfs_t) float64 { if st.Files == 0 { return -1 } total := float64(st.Files) free := float64(st.Ffree) used := total - free return (used / total) * 100.0 } func diskPriority(blockPct, inodePct float64) (model.Priority, bool) { maxPct := blockPct if inodePct > maxPct { maxPct = inodePct } // inodePct may be -1 if not supported; ignore in that case. if inodePct < 0 { maxPct = blockPct } switch { case maxPct >= 98.0: return model.PriorityP0, true case maxPct >= 92.0: return model.PriorityP1, true default: return "", false } } var _ collectors.Collector = (*DiskCollector)(nil)