package k8s import ( "errors" "fmt" "regexp" "strings" "time" "tower/internal/model" ) // unreachableTracker implements the "10s continuous failure" grace requirement // for Kubernetes connectivity. // // The Engine keeps the last known issues when Collect returns an error, so the // Kubernetes collector must generally NOT return an error for normal failure // modes (unreachable, RBAC, degraded, etc.). Instead it should return a health // Status + issues. // // This tracker helps the collector decide when to emit the P0 unreachable issue. // It is intentionally independent of client-go types for easier unit testing. type unreachableTracker struct { grace time.Duration firstFailureAt time.Time lastErr error } func newUnreachableTracker(grace time.Duration) *unreachableTracker { if grace <= 0 { grace = 10 * time.Second } return &unreachableTracker{grace: grace} } func (t *unreachableTracker) observeSuccess() { t.firstFailureAt = time.Time{} t.lastErr = nil } func (t *unreachableTracker) observeFailure(now time.Time, err error) { if err == nil { return } t.lastErr = err if t.firstFailureAt.IsZero() { t.firstFailureAt = now } } func (t *unreachableTracker) failingFor(now time.Time) time.Duration { if t.firstFailureAt.IsZero() { return 0 } if now.Before(t.firstFailureAt) { return 0 } return now.Sub(t.firstFailureAt) } func (t *unreachableTracker) shouldEmit(now time.Time) bool { return t.lastErr != nil && t.failingFor(now) >= t.grace } func (t *unreachableTracker) lastErrorString() string { if t.lastErr == nil { return "" } s := sanitizeError(t.lastErr) s = strings.ReplaceAll(s, "\n", " ") s = strings.TrimSpace(s) return s } func unreachableIssue(err error) model.Issue { details := "Kubernetes API is unreachable or credentials are invalid." if err != nil { // Avoid duplicating very long errors in Title. details = fmt.Sprintf("%s Last error: %s", details, sanitizeError(err)) } return model.Issue{ ID: "k8s:cluster:unreachable", Category: model.CategoryKubernetes, Priority: model.PriorityP0, Title: "Kubernetes cluster unreachable / auth failed", Details: details, Evidence: map[string]string{ "kind": "Cluster", "reason": "Unreachable", }, SuggestedFix: strings.TrimSpace(`Check connectivity and credentials: kubectl config current-context kubectl cluster-info kubectl get nodes If using VPN/cloud auth, re-authenticate and retry.`), } } func sanitizeError(err error) string { if err == nil { return "" } s := err.Error() s = regexp.MustCompile(`Bearer [a-zA-Z0-9_-]{20,}`).ReplaceAllString(s, "Bearer [REDACTED]") s = regexp.MustCompile(`password=[^&\s]+`).ReplaceAllString(s, "password=[REDACTED]") s = regexp.MustCompile(`token=[^&\s]+`).ReplaceAllString(s, "token=[REDACTED]") s = regexp.MustCompile(`secret=[^&\s]+`).ReplaceAllString(s, "secret=[REDACTED]") s = regexp.MustCompile(`https?://[^\s]+k8s[^\s]*`).ReplaceAllString(s, "[API_SERVER]") s = regexp.MustCompile(`https?://[^\s]+\.k8s\.[^\s]*`).ReplaceAllString(s, "[API_SERVER]") return s } func flattenErr(err error) string { if err == nil { return "" } // Unwrap once to avoid nested "context deadline exceeded" noise. if u := errors.Unwrap(err); u != nil { err = u } s := err.Error() s = strings.ReplaceAll(s, "\n", " ") s = strings.TrimSpace(s) return s }