Files
porthole/internal/collectors/k8s/unreachable.go
OpenCode Test 1421b4659e feat: implement ControlTower TUI for cluster and host monitoring
Add complete TUI application for monitoring Kubernetes clusters and host
systems. Features include:

Core features:
- Collector framework with concurrent scheduling
- Host collectors: disk, memory, load, network
- Kubernetes collectors: pods, nodes, workloads, events with informers
- Issue deduplication, state management, and resolve-after logic
- Bubble Tea TUI with table view, details pane, and filtering
- JSON export functionality

UX improvements:
- Help overlay with keybindings
- Priority/category filters with visual indicators
- Direct priority jump (0/1/2/3)
- Bulk acknowledge (Shift+A)
- Clipboard copy (y)
- Theme toggle (T)
- Age format toggle (d)
- Wide title toggle (t)
- Vi-style navigation (j/k)
- Home/End jump (g/G)
- Rollup drill-down in details

Robustness:
- Grace period for unreachable clusters
- Rollups for high-volume issues
- Flap suppression
- RBAC error handling

Files: All core application code with tests for host collectors,
engine, store, model, and export packages.
2025-12-24 13:29:51 -08:00

134 lines
3.3 KiB
Go

package k8s
import (
"errors"
"fmt"
"regexp"
"strings"
"time"
"tower/internal/model"
)
// unreachableTracker implements the "10s continuous failure" grace requirement
// for Kubernetes connectivity.
//
// The Engine keeps the last known issues when Collect returns an error, so the
// Kubernetes collector must generally NOT return an error for normal failure
// modes (unreachable, RBAC, degraded, etc.). Instead it should return a health
// Status + issues.
//
// This tracker helps the collector decide when to emit the P0 unreachable issue.
// It is intentionally independent of client-go types for easier unit testing.
type unreachableTracker struct {
grace time.Duration
firstFailureAt time.Time
lastErr error
}
func newUnreachableTracker(grace time.Duration) *unreachableTracker {
if grace <= 0 {
grace = 10 * time.Second
}
return &unreachableTracker{grace: grace}
}
func (t *unreachableTracker) observeSuccess() {
t.firstFailureAt = time.Time{}
t.lastErr = nil
}
func (t *unreachableTracker) observeFailure(now time.Time, err error) {
if err == nil {
return
}
t.lastErr = err
if t.firstFailureAt.IsZero() {
t.firstFailureAt = now
}
}
func (t *unreachableTracker) failingFor(now time.Time) time.Duration {
if t.firstFailureAt.IsZero() {
return 0
}
if now.Before(t.firstFailureAt) {
return 0
}
return now.Sub(t.firstFailureAt)
}
func (t *unreachableTracker) shouldEmit(now time.Time) bool {
return t.lastErr != nil && t.failingFor(now) >= t.grace
}
func (t *unreachableTracker) lastErrorString() string {
if t.lastErr == nil {
return ""
}
s := sanitizeError(t.lastErr)
s = strings.ReplaceAll(s, "\n", " ")
s = strings.TrimSpace(s)
return s
}
func unreachableIssue(err error) model.Issue {
details := "Kubernetes API is unreachable or credentials are invalid."
if err != nil {
// Avoid duplicating very long errors in Title.
details = fmt.Sprintf("%s Last error: %s", details, sanitizeError(err))
}
return model.Issue{
ID: "k8s:cluster:unreachable",
Category: model.CategoryKubernetes,
Priority: model.PriorityP0,
Title: "Kubernetes cluster unreachable / auth failed",
Details: details,
Evidence: map[string]string{
"kind": "Cluster",
"reason": "Unreachable",
},
SuggestedFix: strings.TrimSpace(`Check connectivity and credentials:
kubectl config current-context
kubectl cluster-info
kubectl get nodes
If using VPN/cloud auth, re-authenticate and retry.`),
}
}
func sanitizeError(err error) string {
if err == nil {
return ""
}
s := err.Error()
s = regexp.MustCompile(`Bearer [a-zA-Z0-9_-]{20,}`).ReplaceAllString(s, "Bearer [REDACTED]")
s = regexp.MustCompile(`password=[^&\s]+`).ReplaceAllString(s, "password=[REDACTED]")
s = regexp.MustCompile(`token=[^&\s]+`).ReplaceAllString(s, "token=[REDACTED]")
s = regexp.MustCompile(`secret=[^&\s]+`).ReplaceAllString(s, "secret=[REDACTED]")
s = regexp.MustCompile(`https?://[^\s]+k8s[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
s = regexp.MustCompile(`https?://[^\s]+\.k8s\.[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
return s
}
func flattenErr(err error) string {
if err == nil {
return ""
}
// Unwrap once to avoid nested "context deadline exceeded" noise.
if u := errors.Unwrap(err); u != nil {
err = u
}
s := err.Error()
s = strings.ReplaceAll(s, "\n", " ")
s = strings.TrimSpace(s)
return s
}