Add complete TUI application for monitoring Kubernetes clusters and host systems. Features include: Core features: - Collector framework with concurrent scheduling - Host collectors: disk, memory, load, network - Kubernetes collectors: pods, nodes, workloads, events with informers - Issue deduplication, state management, and resolve-after logic - Bubble Tea TUI with table view, details pane, and filtering - JSON export functionality UX improvements: - Help overlay with keybindings - Priority/category filters with visual indicators - Direct priority jump (0/1/2/3) - Bulk acknowledge (Shift+A) - Clipboard copy (y) - Theme toggle (T) - Age format toggle (d) - Wide title toggle (t) - Vi-style navigation (j/k) - Home/End jump (g/G) - Rollup drill-down in details Robustness: - Grace period for unreachable clusters - Rollups for high-volume issues - Flap suppression - RBAC error handling Files: All core application code with tests for host collectors, engine, store, model, and export packages.
134 lines
3.3 KiB
Go
134 lines
3.3 KiB
Go
package k8s
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"tower/internal/model"
|
|
)
|
|
|
|
// unreachableTracker implements the "10s continuous failure" grace requirement
|
|
// for Kubernetes connectivity.
|
|
//
|
|
// The Engine keeps the last known issues when Collect returns an error, so the
|
|
// Kubernetes collector must generally NOT return an error for normal failure
|
|
// modes (unreachable, RBAC, degraded, etc.). Instead it should return a health
|
|
// Status + issues.
|
|
//
|
|
// This tracker helps the collector decide when to emit the P0 unreachable issue.
|
|
// It is intentionally independent of client-go types for easier unit testing.
|
|
type unreachableTracker struct {
|
|
grace time.Duration
|
|
|
|
firstFailureAt time.Time
|
|
lastErr error
|
|
}
|
|
|
|
func newUnreachableTracker(grace time.Duration) *unreachableTracker {
|
|
if grace <= 0 {
|
|
grace = 10 * time.Second
|
|
}
|
|
return &unreachableTracker{grace: grace}
|
|
}
|
|
|
|
func (t *unreachableTracker) observeSuccess() {
|
|
t.firstFailureAt = time.Time{}
|
|
t.lastErr = nil
|
|
}
|
|
|
|
func (t *unreachableTracker) observeFailure(now time.Time, err error) {
|
|
if err == nil {
|
|
return
|
|
}
|
|
t.lastErr = err
|
|
if t.firstFailureAt.IsZero() {
|
|
t.firstFailureAt = now
|
|
}
|
|
}
|
|
|
|
func (t *unreachableTracker) failingFor(now time.Time) time.Duration {
|
|
if t.firstFailureAt.IsZero() {
|
|
return 0
|
|
}
|
|
if now.Before(t.firstFailureAt) {
|
|
return 0
|
|
}
|
|
return now.Sub(t.firstFailureAt)
|
|
}
|
|
|
|
func (t *unreachableTracker) shouldEmit(now time.Time) bool {
|
|
return t.lastErr != nil && t.failingFor(now) >= t.grace
|
|
}
|
|
|
|
func (t *unreachableTracker) lastErrorString() string {
|
|
if t.lastErr == nil {
|
|
return ""
|
|
}
|
|
s := sanitizeError(t.lastErr)
|
|
s = strings.ReplaceAll(s, "\n", " ")
|
|
s = strings.TrimSpace(s)
|
|
return s
|
|
}
|
|
|
|
func unreachableIssue(err error) model.Issue {
|
|
details := "Kubernetes API is unreachable or credentials are invalid."
|
|
if err != nil {
|
|
// Avoid duplicating very long errors in Title.
|
|
details = fmt.Sprintf("%s Last error: %s", details, sanitizeError(err))
|
|
}
|
|
|
|
return model.Issue{
|
|
ID: "k8s:cluster:unreachable",
|
|
Category: model.CategoryKubernetes,
|
|
Priority: model.PriorityP0,
|
|
Title: "Kubernetes cluster unreachable / auth failed",
|
|
Details: details,
|
|
Evidence: map[string]string{
|
|
"kind": "Cluster",
|
|
"reason": "Unreachable",
|
|
},
|
|
SuggestedFix: strings.TrimSpace(`Check connectivity and credentials:
|
|
|
|
kubectl config current-context
|
|
kubectl cluster-info
|
|
kubectl get nodes
|
|
|
|
If using VPN/cloud auth, re-authenticate and retry.`),
|
|
}
|
|
}
|
|
|
|
func sanitizeError(err error) string {
|
|
if err == nil {
|
|
return ""
|
|
}
|
|
s := err.Error()
|
|
|
|
s = regexp.MustCompile(`Bearer [a-zA-Z0-9_-]{20,}`).ReplaceAllString(s, "Bearer [REDACTED]")
|
|
|
|
s = regexp.MustCompile(`password=[^&\s]+`).ReplaceAllString(s, "password=[REDACTED]")
|
|
s = regexp.MustCompile(`token=[^&\s]+`).ReplaceAllString(s, "token=[REDACTED]")
|
|
s = regexp.MustCompile(`secret=[^&\s]+`).ReplaceAllString(s, "secret=[REDACTED]")
|
|
|
|
s = regexp.MustCompile(`https?://[^\s]+k8s[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
|
|
s = regexp.MustCompile(`https?://[^\s]+\.k8s\.[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
|
|
|
|
return s
|
|
}
|
|
|
|
func flattenErr(err error) string {
|
|
if err == nil {
|
|
return ""
|
|
}
|
|
// Unwrap once to avoid nested "context deadline exceeded" noise.
|
|
if u := errors.Unwrap(err); u != nil {
|
|
err = u
|
|
}
|
|
s := err.Error()
|
|
s = strings.ReplaceAll(s, "\n", " ")
|
|
s = strings.TrimSpace(s)
|
|
return s
|
|
}
|