feat: implement ControlTower TUI for cluster and host monitoring

Add complete TUI application for monitoring Kubernetes clusters and host
systems. Features include:

Core features:
- Collector framework with concurrent scheduling
- Host collectors: disk, memory, load, network
- Kubernetes collectors: pods, nodes, workloads, events with informers
- Issue deduplication, state management, and resolve-after logic
- Bubble Tea TUI with table view, details pane, and filtering
- JSON export functionality

UX improvements:
- Help overlay with keybindings
- Priority/category filters with visual indicators
- Direct priority jump (0/1/2/3)
- Bulk acknowledge (Shift+A)
- Clipboard copy (y)
- Theme toggle (T)
- Age format toggle (d)
- Wide title toggle (t)
- Vi-style navigation (j/k)
- Home/End jump (g/G)
- Rollup drill-down in details

Robustness:
- Grace period for unreachable clusters
- Rollups for high-volume issues
- Flap suppression
- RBAC error handling

Files: All core application code with tests for host collectors,
engine, store, model, and export packages.
This commit is contained in:
OpenCode Test
2025-12-24 13:03:08 -08:00
parent c2c03fd664
commit 1421b4659e
40 changed files with 5941 additions and 0 deletions

View File

@@ -0,0 +1,88 @@
package k8s
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"time"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
)
// ClientFromCurrentContext creates a Kubernetes client-go Clientset using the
// user's kubeconfig current context.
//
// It is a pure helper (no global state) so it can be used by collectors and
// unit tests (with temporary kubeconfig files).
func ClientFromCurrentContext() (*kubernetes.Clientset, *rest.Config, error) {
loadingRules := clientcmd.NewDefaultClientConfigLoadingRules()
// Respect KUBECONFIG semantics (it may be a path list).
if p := os.Getenv("KUBECONFIG"); p != "" {
if list := filepath.SplitList(p); len(list) > 1 {
loadingRules.ExplicitPath = ""
loadingRules.Precedence = list
} else {
loadingRules.ExplicitPath = p
}
}
cfg := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, &clientcmd.ConfigOverrides{})
restCfg, err := cfg.ClientConfig()
if err != nil {
return nil, nil, err
}
// Ensure HTTP client timeouts are bounded. LIST fallback uses its own context
// timeouts, but this provides a safety net.
if restCfg.Timeout <= 0 {
restCfg.Timeout = 30 * time.Second
}
cs, err := kubernetes.NewForConfig(restCfg)
if err != nil {
return nil, nil, err
}
return cs, restCfg, nil
}
func defaultKubeconfigPath() string {
// This helper is used only for existence checks / UI messages. Client loading
// should use client-go's default loading rules.
if p := os.Getenv("KUBECONFIG"); p != "" {
// If KUBECONFIG is a list, return the first entry for display.
if list := filepath.SplitList(p); len(list) > 0 {
return list[0]
}
return p
}
h, err := os.UserHomeDir()
if err != nil {
return ""
}
return filepath.Join(h, ".kube", "config")
}
// Ping performs a lightweight API call to determine if the cluster is reachable
// and authentication works.
func Ping(ctx context.Context, cs kubernetes.Interface) error {
if cs == nil {
return errors.New("nil kubernetes client")
}
_, err := cs.Discovery().ServerVersion()
if err != nil {
// Treat authn/authz errors separately so callers can decide whether to
// surface "unreachable" vs "insufficient credentials".
if apierrors.IsForbidden(err) || apierrors.IsUnauthorized(err) {
return fmt.Errorf("discovery auth: %w", err)
}
return fmt.Errorf("discovery server version: %w", err)
}
return nil
}

View File

@@ -0,0 +1,720 @@
package k8s
import (
"context"
"fmt"
"os"
"path/filepath"
"sort"
"sync"
"time"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"tower/internal/collectors"
"tower/internal/model"
)
// Collector is the ControlTower Kubernetes collector.
//
// It uses client-go informers (LIST+WATCH with local caches) against the user's
// kubeconfig current context, across all namespaces.
//
// Degradation behavior:
// - If WATCH fails repeatedly, it falls back to polling LIST and emits a P1
// "degraded to polling" issue.
// - While in polling mode, it periodically attempts to recover back to watches.
// - If the cluster is unreachable, it emits a P0 only after 10s continuous failure.
// - If RBAC forbids list/watch for a resource, it emits a single P2 issue per
// inaccessible resource and continues for accessible resources.
//
// Noise control:
// - Rollups group by (namespace, reason, kind) when group size >= 20.
// - Cap max issues to 200 after rollups.
//
// Instantiate with NewCollector().
type Collector struct {
interval time.Duration
unreachableGrace time.Duration
pendingGrace time.Duration
workloadGrace time.Duration
crashLoopThresh int
rollupThreshold int
maxIssues int
watchFailureThreshold int
watchFailureWindow time.Duration
pollRecoverEvery time.Duration
mu sync.Mutex
syncWG sync.WaitGroup
client kubernetes.Interface
factory informers.SharedInformerFactory
stopCh chan struct{}
started bool
syncedFns []cache.InformerSynced
podsLister corelisters.PodLister
nodesLister corelisters.NodeLister
eventsLister corelisters.EventLister
deployLister appslisters.DeploymentLister
statefulSetLister appslisters.StatefulSetLister
daemonSetLister appslisters.DaemonSetLister
// polling indicates we have degraded from informers to list polling.
polling bool
pollSince time.Time
lastPollRecoverAttempt time.Time
watchFailWindowStart time.Time
watchFailCount int
// rbacDenied is keyed by resource name ("pods", "nodes", ...).
rbacDenied map[string]error
unreach *unreachableTracker
lastSuccess time.Time
}
func NewCollector() *Collector {
c := &Collector{
interval: 2 * time.Second,
unreachableGrace: 10 * time.Second,
pendingGrace: 120 * time.Second,
workloadGrace: 180 * time.Second,
crashLoopThresh: 5,
rollupThreshold: 20,
maxIssues: 200,
watchFailureThreshold: 5,
watchFailureWindow: 30 * time.Second,
pollRecoverEvery: 30 * time.Second,
rbacDenied: map[string]error{},
}
c.unreach = newUnreachableTracker(c.unreachableGrace)
return c
}
var _ collectors.Collector = (*Collector)(nil)
func (c *Collector) Name() string { return "k8s" }
func (c *Collector) Interval() time.Duration {
if c.interval <= 0 {
return 2 * time.Second
}
return c.interval
}
func (c *Collector) Collect(ctx context.Context) ([]model.Issue, collectors.Status, error) {
now := time.Now()
if err := ctx.Err(); err != nil {
return nil, collectors.Status{Health: collectors.HealthError, Message: "canceled"}, err
}
// If kubeconfig doesn't exist, treat Kubernetes as "disabled".
if !kubeconfigExists() {
return nil, collectors.Status{Health: collectors.HealthDegraded, Message: "kubeconfig not found"}, nil
}
if err := c.ensureClient(); err != nil {
c.unreach.observeFailure(now, err)
if c.unreach.shouldEmit(now) {
iss := stampIssueTimes(now, unreachableIssue(err))
return []model.Issue{iss}, collectors.Status{Health: collectors.HealthError, Message: "unreachable"}, nil
}
return nil, collectors.Status{Health: collectors.HealthError, Message: "k8s client init failed (grace)"}, nil
}
// Connectivity/auth check with grace.
if err := Ping(ctx, c.client); err != nil {
c.unreach.observeFailure(now, err)
if c.unreach.shouldEmit(now) {
iss := stampIssueTimes(now, unreachableIssue(err))
return []model.Issue{iss}, collectors.Status{Health: collectors.HealthError, Message: "unreachable"}, nil
}
return nil, collectors.Status{Health: collectors.HealthError, Message: "k8s unreachable (grace)"}, nil
}
c.unreach.observeSuccess()
c.lastSuccess = now
// Prefer informers unless currently degraded to polling.
if c.isPolling() {
c.maybeRecoverInformers(ctx, now)
}
if !c.isPolling() {
_ = c.ensureInformers(ctx)
}
issues := make([]model.Issue, 0, 64)
issues = append(issues, c.rbacIssues()...)
st := collectors.Status{Health: collectors.HealthOK, LastSuccess: c.lastSuccess}
if c.isPolling() {
st.Health = collectors.HealthDegraded
st.Message = "degraded to polling"
issues = append(issues, stampIssueTimes(now, pollingDegradedIssue()))
issues = append(issues, c.collectByPolling(ctx, now)...)
} else {
// If caches aren't ready, use polling for this tick only.
if !c.cachesSyncedQuick(ctx) {
st.Health = collectors.HealthDegraded
st.Message = "waiting for informer cache; used list"
issues = append(issues, c.collectByPolling(ctx, now)...)
} else {
issues = append(issues, c.collectFromCaches(now)...)
if len(c.snapshotRBACDenied()) > 0 {
st.Health = collectors.HealthDegraded
st.Message = "partial RBAC access"
}
}
}
// Set timestamps, roll up and cap.
for i := range issues {
issues[i] = stampIssueTimes(now, issues[i])
}
issues = Rollup(issues, c.rollupThreshold, 5)
model.SortIssuesDefault(issues)
issues = CapIssues(issues, c.maxIssues)
return issues, st, nil
}
func (c *Collector) ensureClient() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.client != nil {
return nil
}
cs, _, err := ClientFromCurrentContext()
if err != nil {
return err
}
c.client = cs
return nil
}
func kubeconfigExists() bool {
if p := os.Getenv("KUBECONFIG"); p != "" {
for _, fp := range filepath.SplitList(p) {
if fp == "" {
continue
}
if _, err := os.Stat(fp); err == nil {
return true
}
}
return false
}
p := defaultKubeconfigPath()
if p == "" {
return false
}
_, err := os.Stat(p)
return err == nil
}
func (c *Collector) ensureInformers(ctx context.Context) error {
c.mu.Lock()
if c.started || c.polling {
c.mu.Unlock()
return nil
}
client := c.client
c.mu.Unlock()
if client == nil {
return fmt.Errorf("nil kubernetes client")
}
// RBAC preflight before we even construct informers (so we can skip forbidden ones).
c.preflightRBAC(ctx, client)
factory := informers.NewSharedInformerFactory(client, 0)
var (
podsInf cache.SharedIndexInformer
nodesInf cache.SharedIndexInformer
evsInf cache.SharedIndexInformer
depInf cache.SharedIndexInformer
stsInf cache.SharedIndexInformer
dsInf cache.SharedIndexInformer
)
if !c.isRBACDenied("pods") {
i := factory.Core().V1().Pods()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("pods", err) })
c.mu.Lock()
c.podsLister = i.Lister()
c.mu.Unlock()
podsInf = i.Informer()
}
if !c.isRBACDenied("nodes") {
i := factory.Core().V1().Nodes()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("nodes", err) })
c.mu.Lock()
c.nodesLister = i.Lister()
c.mu.Unlock()
nodesInf = i.Informer()
}
if !c.isRBACDenied("events") {
i := factory.Core().V1().Events()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("events", err) })
c.mu.Lock()
c.eventsLister = i.Lister()
c.mu.Unlock()
evsInf = i.Informer()
}
if !c.isRBACDenied("deployments") {
i := factory.Apps().V1().Deployments()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("deployments", err) })
c.mu.Lock()
c.deployLister = i.Lister()
c.mu.Unlock()
depInf = i.Informer()
}
if !c.isRBACDenied("statefulsets") {
i := factory.Apps().V1().StatefulSets()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("statefulsets", err) })
c.mu.Lock()
c.statefulSetLister = i.Lister()
c.mu.Unlock()
stsInf = i.Informer()
}
if !c.isRBACDenied("daemonsets") {
i := factory.Apps().V1().DaemonSets()
i.Informer().SetWatchErrorHandler(func(_ *cache.Reflector, err error) { c.recordWatchError("daemonsets", err) })
c.mu.Lock()
c.daemonSetLister = i.Lister()
c.mu.Unlock()
dsInf = i.Informer()
}
synced := make([]cache.InformerSynced, 0, 6)
if podsInf != nil {
synced = append(synced, podsInf.HasSynced)
}
if nodesInf != nil {
synced = append(synced, nodesInf.HasSynced)
}
if evsInf != nil {
synced = append(synced, evsInf.HasSynced)
}
if depInf != nil {
synced = append(synced, depInf.HasSynced)
}
if stsInf != nil {
synced = append(synced, stsInf.HasSynced)
}
if dsInf != nil {
synced = append(synced, dsInf.HasSynced)
}
stopCh := make(chan struct{})
c.mu.Lock()
c.factory = factory
c.stopCh = stopCh
c.started = true
c.syncedFns = synced
c.mu.Unlock()
factory.Start(stopCh)
c.syncWG.Add(1)
go func() {
defer c.syncWG.Done()
syncCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if ok := cache.WaitForCacheSync(syncCtx.Done(), synced...); !ok {
fmt.Printf("k8s: informer cache sync failed or timed out\n")
}
}()
return nil
}
func (c *Collector) maybeRecoverInformers(ctx context.Context, now time.Time) {
c.mu.Lock()
interval := c.pollRecoverEvery
last := c.lastPollRecoverAttempt
c.mu.Unlock()
if interval <= 0 {
interval = 30 * time.Second
}
if !last.IsZero() && now.Sub(last) < interval {
return
}
c.mu.Lock()
c.lastPollRecoverAttempt = now
c.mu.Unlock()
// Only attempt if connectivity is OK (already pinged successfully in Collect).
// Reset watch failure counters and exit polling; subsequent Collect will ensureInformers.
c.mu.Lock()
c.polling = false
c.pollSince = time.Time{}
c.watchFailWindowStart = time.Time{}
c.watchFailCount = 0
c.mu.Unlock()
_ = c.ensureInformers(ctx)
}
func (c *Collector) preflightRBAC(ctx context.Context, client kubernetes.Interface) {
shortCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
probe := func(resource string, f func(context.Context) error) {
if err := f(shortCtx); err != nil {
if apierrors.IsForbidden(err) {
c.noteRBAC(resource, err)
}
}
}
probe("nodes", func(ctx context.Context) error {
_, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("pods", func(ctx context.Context) error {
_, err := client.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("deployments", func(ctx context.Context) error {
_, err := client.AppsV1().Deployments(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("statefulsets", func(ctx context.Context) error {
_, err := client.AppsV1().StatefulSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("daemonsets", func(ctx context.Context) error {
_, err := client.AppsV1().DaemonSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
probe("events", func(ctx context.Context) error {
_, err := client.CoreV1().Events(metav1.NamespaceAll).List(ctx, metav1.ListOptions{Limit: 1})
return err
})
}
func (c *Collector) noteRBAC(resource string, err error) {
if err == nil || !apierrors.IsForbidden(err) {
return
}
c.mu.Lock()
defer c.mu.Unlock()
if _, ok := c.rbacDenied[resource]; ok {
return
}
c.rbacDenied[resource] = err
}
func (c *Collector) isRBACDenied(resource string) bool {
c.mu.Lock()
defer c.mu.Unlock()
_, ok := c.rbacDenied[resource]
return ok
}
func (c *Collector) snapshotRBACDenied() map[string]error {
c.mu.Lock()
defer c.mu.Unlock()
out := make(map[string]error, len(c.rbacDenied))
for k, v := range c.rbacDenied {
out[k] = v
}
return out
}
func (c *Collector) recordWatchError(resource string, err error) {
if err == nil {
return
}
if apierrors.IsForbidden(err) {
c.noteRBAC(resource, err)
return
}
now := time.Now()
c.mu.Lock()
defer c.mu.Unlock()
if c.polling {
return
}
if c.watchFailWindowStart.IsZero() || now.Sub(c.watchFailWindowStart) > c.watchFailureWindow {
c.watchFailWindowStart = now
c.watchFailCount = 0
}
c.watchFailCount++
if c.watchFailCount >= c.watchFailureThreshold {
c.polling = true
c.pollSince = now
if c.stopCh != nil {
close(c.stopCh)
c.stopCh = nil
}
c.started = false
c.factory = nil
c.syncedFns = nil
c.syncWG.Wait()
}
}
func (c *Collector) cachesSyncedQuick(ctx context.Context) bool {
c.mu.Lock()
synced := append([]cache.InformerSynced(nil), c.syncedFns...)
c.mu.Unlock()
if len(synced) == 0 {
return false
}
syncCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
defer cancel()
return cache.WaitForCacheSync(syncCtx.Done(), synced...)
}
func (c *Collector) collectFromCaches(now time.Time) []model.Issue {
c.mu.Lock()
podsLister := c.podsLister
nodesLister := c.nodesLister
eventsLister := c.eventsLister
deployLister := c.deployLister
stsLister := c.statefulSetLister
dsLister := c.daemonSetLister
denied := make(map[string]error, len(c.rbacDenied))
for k, v := range c.rbacDenied {
denied[k] = v
}
c.mu.Unlock()
issues := make([]model.Issue, 0, 64)
sel := labels.Everything()
if _, ok := denied["nodes"]; !ok && nodesLister != nil {
if list, err := nodesLister.List(sel); err == nil {
nodes := make([]*corev1.Node, 0, len(list))
for i := range list {
nodes = append(nodes, list[i])
}
issues = append(issues, IssuesFromNodes(nodes)...)
}
}
if _, ok := denied["pods"]; !ok && podsLister != nil {
if list, err := podsLister.List(sel); err == nil {
pods := make([]*corev1.Pod, 0, len(list))
for i := range list {
pods = append(pods, list[i])
}
issues = append(issues, IssuesFromPods(pods, now, c.pendingGrace, c.crashLoopThresh)...)
}
}
if _, ok := denied["deployments"]; !ok && deployLister != nil {
if list, err := deployLister.List(sel); err == nil {
deps := make([]*appsv1.Deployment, 0, len(list))
for i := range list {
deps = append(deps, list[i])
}
issues = append(issues, IssuesFromDeployments(deps, now, c.workloadGrace)...)
}
}
if _, ok := denied["statefulsets"]; !ok && stsLister != nil {
if list, err := stsLister.List(sel); err == nil {
sts := make([]*appsv1.StatefulSet, 0, len(list))
for i := range list {
sts = append(sts, list[i])
}
issues = append(issues, IssuesFromStatefulSets(sts, now, c.workloadGrace)...)
}
}
if _, ok := denied["daemonsets"]; !ok && dsLister != nil {
if list, err := dsLister.List(sel); err == nil {
dss := make([]*appsv1.DaemonSet, 0, len(list))
for i := range list {
dss = append(dss, list[i])
}
issues = append(issues, IssuesFromDaemonSets(dss, now, c.workloadGrace)...)
}
}
if _, ok := denied["events"]; !ok && eventsLister != nil {
if list, err := eventsLister.List(sel); err == nil {
es := make([]*corev1.Event, 0, len(list))
for i := range list {
es = append(es, list[i])
}
issues = append(issues, IssuesFromEvents(es, now)...)
}
}
return issues
}
func (c *Collector) collectByPolling(ctx context.Context, now time.Time) []model.Issue {
c.mu.Lock()
client := c.client
denied := make(map[string]error, len(c.rbacDenied))
for k, v := range c.rbacDenied {
denied[k] = v
}
c.mu.Unlock()
if client == nil {
return nil
}
issues := make([]model.Issue, 0, 64)
if _, ok := denied["nodes"]; !ok {
if nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("nodes", err)
} else {
list := make([]*corev1.Node, 0, len(nodes.Items))
for i := range nodes.Items {
list = append(list, &nodes.Items[i])
}
issues = append(issues, IssuesFromNodes(list)...)
}
}
if _, ok := denied["pods"]; !ok {
if pods, err := client.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("pods", err)
} else {
list := make([]*corev1.Pod, 0, len(pods.Items))
for i := range pods.Items {
list = append(list, &pods.Items[i])
}
issues = append(issues, IssuesFromPods(list, now, c.pendingGrace, c.crashLoopThresh)...)
}
}
if _, ok := denied["deployments"]; !ok {
if deps, err := client.AppsV1().Deployments(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("deployments", err)
} else {
list := make([]*appsv1.Deployment, 0, len(deps.Items))
for i := range deps.Items {
list = append(list, &deps.Items[i])
}
issues = append(issues, IssuesFromDeployments(list, now, c.workloadGrace)...)
}
}
if _, ok := denied["statefulsets"]; !ok {
if sts, err := client.AppsV1().StatefulSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("statefulsets", err)
} else {
list := make([]*appsv1.StatefulSet, 0, len(sts.Items))
for i := range sts.Items {
list = append(list, &sts.Items[i])
}
issues = append(issues, IssuesFromStatefulSets(list, now, c.workloadGrace)...)
}
}
if _, ok := denied["daemonsets"]; !ok {
if dss, err := client.AppsV1().DaemonSets(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("daemonsets", err)
} else {
list := make([]*appsv1.DaemonSet, 0, len(dss.Items))
for i := range dss.Items {
list = append(list, &dss.Items[i])
}
issues = append(issues, IssuesFromDaemonSets(list, now, c.workloadGrace)...)
}
}
if _, ok := denied["events"]; !ok {
if evs, err := client.CoreV1().Events(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}); err != nil {
c.noteRBAC("events", err)
} else {
list := make([]*corev1.Event, 0, len(evs.Items))
for i := range evs.Items {
list = append(list, &evs.Items[i])
}
issues = append(issues, IssuesFromEvents(list, now)...)
}
}
return issues
}
func (c *Collector) rbacIssues() []model.Issue {
denied := c.snapshotRBACDenied()
keys := make([]string, 0, len(denied))
for k := range denied {
keys = append(keys, k)
}
sort.Strings(keys)
out := make([]model.Issue, 0, len(keys))
for _, res := range keys {
err := denied[res]
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:rbac:%s", res),
Category: model.CategoryKubernetes,
Priority: model.PriorityP2,
Title: fmt.Sprintf("Insufficient RBAC: list/watch %s", res),
Details: fmt.Sprintf("Current context cannot access %s (forbidden). %s", res, sanitizeError(err)),
Evidence: map[string]string{
"kind": "Cluster",
"reason": "RBAC",
"namespace": "",
"resource": res,
},
SuggestedFix: fmt.Sprintf("kubectl auth can-i list %s --all-namespaces", res),
})
}
return out
}
func pollingDegradedIssue() model.Issue {
return model.Issue{
ID: "k8s:cluster:polling",
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: "Kubernetes degraded: polling (watch failing)",
Details: "Kubernetes watches have failed repeatedly; collector switched to LIST polling. Data may be less real-time and API load is higher.",
Evidence: map[string]string{
"kind": "Cluster",
"reason": "DegradedPolling",
"namespace": "",
},
SuggestedFix: "Check API server / network stability and RBAC; ensure watch endpoints are reachable.",
}
}
func stampIssueTimes(now time.Time, iss model.Issue) model.Issue {
iss.LastSeen = now
if iss.FirstSeen.IsZero() {
iss.FirstSeen = now
}
return iss
}
func (c *Collector) isPolling() bool {
c.mu.Lock()
defer c.mu.Unlock()
return c.polling
}

View File

@@ -0,0 +1,101 @@
package k8s
import (
"fmt"
"strings"
"time"
corev1 "k8s.io/api/core/v1"
"tower/internal/model"
)
var warningEventReasons = map[string]struct{}{
"FailedScheduling": {},
"FailedMount": {},
"BackOff": {},
"Unhealthy": {},
"OOMKilling": {},
"FailedPull": {},
"Forbidden": {},
"ErrImagePull": {},
"ImagePullBackOff": {},
}
// IssuesFromEvents applies the PLAN.md Event rules.
//
// Dedup by (object UID, reason). For v1 Events, this is approximated by
// (involvedObject.uid, reason).
func IssuesFromEvents(events []*corev1.Event, now time.Time) []model.Issue {
_ = now
out := make([]model.Issue, 0, 16)
seen := map[string]struct{}{}
for _, e := range events {
if e == nil {
continue
}
if strings.ToLower(e.Type) != strings.ToLower(string(corev1.EventTypeWarning)) {
continue
}
if _, ok := warningEventReasons[e.Reason]; !ok {
continue
}
uid := string(e.InvolvedObject.UID)
k := uid + ":" + e.Reason
if _, ok := seen[k]; ok {
continue
}
seen[k] = struct{}{}
ns := e.InvolvedObject.Namespace
if ns == "" {
ns = e.Namespace
}
objKey := e.InvolvedObject.Kind + "/" + e.InvolvedObject.Name
title := fmt.Sprintf("K8s Event %s: %s (%s)", e.Reason, objKey, ns)
if ns == "" {
title = fmt.Sprintf("K8s Event %s: %s", e.Reason, objKey)
}
details := strings.TrimSpace(e.Message)
if details == "" {
details = "Warning event emitted by Kubernetes."
}
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:event:%s:%s", uid, e.Reason),
Category: model.CategoryKubernetes,
Priority: model.PriorityP2,
Title: title,
Details: details,
Evidence: map[string]string{
"kind": e.InvolvedObject.Kind,
"reason": e.Reason,
"namespace": ns,
"name": e.InvolvedObject.Name,
"uid": uid,
},
SuggestedFix: suggestedFixForEvent(ns, e.InvolvedObject.Kind, e.InvolvedObject.Name),
})
}
return out
}
func suggestedFixForEvent(ns, kind, name string) string {
kindLower := strings.ToLower(kind)
if ns != "" {
switch kindLower {
case "pod":
return fmt.Sprintf("kubectl -n %s describe pod %s", ns, name)
case "node":
return fmt.Sprintf("kubectl describe node %s", name)
default:
return fmt.Sprintf("kubectl -n %s describe %s %s", ns, kindLower, name)
}
}
return fmt.Sprintf("kubectl describe %s %s", kindLower, name)
}

View File

@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).

View File

@@ -0,0 +1,79 @@
package k8s
import (
"fmt"
corev1 "k8s.io/api/core/v1"
"tower/internal/model"
)
// IssuesFromNodes applies the PLAN.md node rules.
//
// Pure rule function: does not talk to the API server.
func IssuesFromNodes(nodes []*corev1.Node) []model.Issue {
out := make([]model.Issue, 0, 8)
for _, n := range nodes {
if n == nil {
continue
}
// Ready / NotReady
if cond := findNodeCondition(n, corev1.NodeReady); cond != nil {
if cond.Status != corev1.ConditionTrue {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:node:%s:NotReady", n.Name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP0,
Title: fmt.Sprintf("Node NotReady: %s", n.Name),
Details: cond.Message,
Evidence: map[string]string{
"kind": "Node",
"reason": "NotReady",
"namespace": "",
"node": n.Name,
"status": string(cond.Status),
},
SuggestedFix: "kubectl describe node " + n.Name,
})
}
}
// Pressure conditions.
for _, ctype := range []corev1.NodeConditionType{corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure} {
if cond := findNodeCondition(n, ctype); cond != nil {
if cond.Status == corev1.ConditionTrue {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:node:%s:%s", n.Name, string(ctype)),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("Node %s: %s", ctype, n.Name),
Details: cond.Message,
Evidence: map[string]string{
"kind": "Node",
"reason": string(ctype),
"namespace": "",
"node": n.Name,
"status": string(cond.Status),
},
SuggestedFix: "kubectl describe node " + n.Name,
})
}
}
}
}
return out
}
func findNodeCondition(n *corev1.Node, t corev1.NodeConditionType) *corev1.NodeCondition {
if n == nil {
return nil
}
for i := range n.Status.Conditions {
c := &n.Status.Conditions[i]
if c.Type == t {
return c
}
}
return nil
}

View File

@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).

View File

@@ -0,0 +1,169 @@
package k8s
import (
"fmt"
"strconv"
"strings"
"time"
corev1 "k8s.io/api/core/v1"
"tower/internal/model"
)
// IssuesFromPods applies the PLAN.md pod rules.
//
// Pure rule function: it does not talk to the API server.
func IssuesFromPods(pods []*corev1.Pod, now time.Time, pendingGrace time.Duration, crashLoopRestartThreshold int) []model.Issue {
if crashLoopRestartThreshold <= 0 {
crashLoopRestartThreshold = 5
}
if pendingGrace <= 0 {
pendingGrace = 120 * time.Second
}
out := make([]model.Issue, 0, 32)
for _, p := range pods {
if p == nil {
continue
}
ns, name := p.Namespace, p.Name
// Pending for too long.
if p.Status.Phase == corev1.PodPending {
age := now.Sub(p.CreationTimestamp.Time)
if !p.CreationTimestamp.IsZero() && age >= pendingGrace {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:Pending", ns, name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("Pod Pending: %s/%s", ns, name),
Details: fmt.Sprintf("Pod has been Pending for %s.", age.Truncate(time.Second)),
Evidence: map[string]string{
"kind": "Pod",
"reason": "Pending",
"namespace": ns,
"pod": name,
"phase": string(p.Status.Phase),
"node": p.Spec.NodeName,
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
})
}
}
// Container-derived signals.
for _, cs := range p.Status.ContainerStatuses {
cname := cs.Name
restarts := int(cs.RestartCount)
// CrashLoopBackOff and pull errors are reported via Waiting state.
if cs.State.Waiting != nil {
reason := cs.State.Waiting.Reason
msg := cs.State.Waiting.Message
switch reason {
case "CrashLoopBackOff":
pri := model.PriorityP1
if restarts >= crashLoopRestartThreshold {
pri = model.PriorityP0
}
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:CrashLoop:%s", ns, name, cname),
Category: model.CategoryKubernetes,
Priority: pri,
Title: fmt.Sprintf("CrashLoopBackOff: %s/%s (%s)", ns, name, cname),
Details: firstNonEmpty(msg, "Container is in CrashLoopBackOff."),
Evidence: map[string]string{
"kind": "Pod",
"reason": "CrashLoopBackOff",
"namespace": ns,
"pod": name,
"container": cname,
"restarts": strconv.Itoa(restarts),
"node": p.Spec.NodeName,
},
SuggestedFix: strings.TrimSpace(fmt.Sprintf(`kubectl -n %s describe pod %s
kubectl -n %s logs %s -c %s --previous`, ns, name, ns, name, cname)),
})
case "ImagePullBackOff", "ErrImagePull":
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:ImagePull:%s", ns, name, cname),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("%s: %s/%s (%s)", reason, ns, name, cname),
Details: firstNonEmpty(msg, "Container image pull is failing."),
Evidence: map[string]string{
"kind": "Pod",
"reason": reason,
"namespace": ns,
"pod": name,
"container": cname,
"restarts": strconv.Itoa(restarts),
"node": p.Spec.NodeName,
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
})
}
}
// OOMKilled is typically stored in LastTerminationState.
if cs.LastTerminationState.Terminated != nil {
term := cs.LastTerminationState.Terminated
if term.Reason == "OOMKilled" {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:OOMKilled:%s", ns, name, cname),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("OOMKilled: %s/%s (%s)", ns, name, cname),
Details: firstNonEmpty(term.Message, "Container was killed due to OOM."),
Evidence: map[string]string{
"kind": "Pod",
"reason": "OOMKilled",
"namespace": ns,
"pod": name,
"container": cname,
"restarts": strconv.Itoa(restarts),
"node": p.Spec.NodeName,
},
SuggestedFix: strings.TrimSpace(fmt.Sprintf(`kubectl -n %s describe pod %s
kubectl -n %s logs %s -c %s --previous`, ns, name, ns, name, cname)),
})
}
}
// High restarts even if running.
// Keep this lower priority than active CrashLoopBackOff.
if restarts >= crashLoopRestartThreshold {
if cs.State.Waiting == nil || cs.State.Waiting.Reason == "" {
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:pod:%s/%s:Restarts:%s", ns, name, cname),
Category: model.CategoryKubernetes,
Priority: model.PriorityP2,
Title: fmt.Sprintf("High restarts: %s/%s (%s)", ns, name, cname),
Details: "Container has restarted multiple times.",
Evidence: map[string]string{
"kind": "Pod",
"reason": "HighRestarts",
"namespace": ns,
"pod": name,
"container": cname,
"restarts": strconv.Itoa(restarts),
"node": p.Spec.NodeName,
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe pod %s", ns, name),
})
}
}
}
}
return out
}
func firstNonEmpty(v, fallback string) string {
if strings.TrimSpace(v) != "" {
return v
}
return fallback
}

View File

@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).

View File

@@ -0,0 +1,174 @@
package k8s
import (
"fmt"
"strconv"
"time"
appsv1 "k8s.io/api/apps/v1"
"tower/internal/model"
)
// WorkloadGrace tracks how long a workload must be NotReady before we emit an issue.
const defaultWorkloadNotReadyGrace = 180 * time.Second
// IssuesFromDeployments applies the PLAN.md workload rules for Deployments.
func IssuesFromDeployments(deploys []*appsv1.Deployment, now time.Time, grace time.Duration) []model.Issue {
if grace <= 0 {
grace = defaultWorkloadNotReadyGrace
}
out := make([]model.Issue, 0, 16)
for _, d := range deploys {
if d == nil {
continue
}
desired := int32(1)
if d.Spec.Replicas != nil {
desired = *d.Spec.Replicas
}
ready := d.Status.ReadyReplicas
if desired > 0 && ready < desired {
// Prefer LastUpdateTime / LastTransitionTime when available; fallback to creation time.
since := d.CreationTimestamp.Time
if cond := findDeploymentProgressingCondition(d); cond != nil {
if !cond.LastUpdateTime.IsZero() {
since = cond.LastUpdateTime.Time
} else if !cond.LastTransitionTime.IsZero() {
since = cond.LastTransitionTime.Time
}
}
if !since.IsZero() && now.Sub(since) < grace {
continue
}
ns := d.Namespace
name := d.Name
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:deploy:%s/%s:NotReady", ns, name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("Deployment not ready: %s/%s", ns, name),
Details: "Ready replicas below desired.",
Evidence: map[string]string{
"kind": "Deployment",
"reason": "NotReady",
"namespace": ns,
"name": name,
"desired": strconv.Itoa(int(desired)),
"ready": strconv.Itoa(int(ready)),
"observed_gen": strconv.FormatInt(d.Status.ObservedGeneration, 10),
"resource_gen": strconv.FormatInt(d.Generation, 10),
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe deployment %s", ns, name),
})
}
}
return out
}
// IssuesFromStatefulSets applies the PLAN.md workload rules for StatefulSets.
func IssuesFromStatefulSets(sts []*appsv1.StatefulSet, now time.Time, grace time.Duration) []model.Issue {
if grace <= 0 {
grace = defaultWorkloadNotReadyGrace
}
out := make([]model.Issue, 0, 16)
for _, s := range sts {
if s == nil {
continue
}
desired := int32(1)
if s.Spec.Replicas != nil {
desired = *s.Spec.Replicas
}
ready := s.Status.ReadyReplicas
if desired > 0 && ready < desired {
since := s.CreationTimestamp.Time
if !since.IsZero() && now.Sub(since) < grace {
continue
}
ns, name := s.Namespace, s.Name
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:sts:%s/%s:NotReady", ns, name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("StatefulSet not ready: %s/%s", ns, name),
Details: "Ready replicas below desired.",
Evidence: map[string]string{
"kind": "StatefulSet",
"reason": "NotReady",
"namespace": ns,
"name": name,
"desired": strconv.Itoa(int(desired)),
"ready": strconv.Itoa(int(ready)),
"observed_gen": strconv.FormatInt(s.Status.ObservedGeneration, 10),
"resource_gen": strconv.FormatInt(s.Generation, 10),
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe statefulset %s", ns, name),
})
}
}
return out
}
// IssuesFromDaemonSets applies the PLAN.md workload rules for DaemonSets.
func IssuesFromDaemonSets(dss []*appsv1.DaemonSet, now time.Time, grace time.Duration) []model.Issue {
if grace <= 0 {
grace = defaultWorkloadNotReadyGrace
}
out := make([]model.Issue, 0, 16)
for _, ds := range dss {
if ds == nil {
continue
}
unavailable := ds.Status.NumberUnavailable
if unavailable > 0 {
since := ds.CreationTimestamp.Time
if !since.IsZero() && now.Sub(since) < grace {
continue
}
ns, name := ds.Namespace, ds.Name
out = append(out, model.Issue{
ID: fmt.Sprintf("k8s:ds:%s/%s:Unavailable", ns, name),
Category: model.CategoryKubernetes,
Priority: model.PriorityP1,
Title: fmt.Sprintf("DaemonSet unavailable: %s/%s", ns, name),
Details: "DaemonSet has unavailable pods.",
Evidence: map[string]string{
"kind": "DaemonSet",
"reason": "Unavailable",
"namespace": ns,
"name": name,
"unavailable": strconv.Itoa(int(unavailable)),
"desired": strconv.Itoa(int(ds.Status.DesiredNumberScheduled)),
"available": strconv.Itoa(int(ds.Status.NumberAvailable)),
"min_grace_sec": strconv.Itoa(int(grace.Seconds())),
},
SuggestedFix: fmt.Sprintf("kubectl -n %s describe daemonset %s", ns, name),
})
}
}
return out
}
func findDeploymentProgressingCondition(d *appsv1.Deployment) *appsv1.DeploymentCondition {
if d == nil {
return nil
}
for i := range d.Status.Conditions {
c := &d.Status.Conditions[i]
if c.Type == appsv1.DeploymentProgressing {
return c
}
}
return nil
}

View File

@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).

View File

@@ -0,0 +1,128 @@
package k8s
import (
"fmt"
"sort"
"strings"
"tower/internal/model"
)
// RollupKey groups similar issues to reduce UI noise.
// Required grouping per prompt: (namespace, reason, kind).
type RollupKey struct {
Namespace string
Reason string
Kind string
}
// Rollup groups issues by (namespace, reason, kind). For any group with size >=
// threshold, it emits a single rollup issue and removes the individual issues
// from the output.
//
// Rollup issues use Priority of the max priority in the group.
func Rollup(issues []model.Issue, threshold int, sampleN int) []model.Issue {
if threshold <= 0 {
threshold = 20
}
if sampleN <= 0 {
sampleN = 5
}
groups := make(map[RollupKey][]model.Issue, 32)
ungrouped := make([]model.Issue, 0, len(issues))
for _, iss := range issues {
kind := strings.TrimSpace(iss.Evidence["kind"])
reason := strings.TrimSpace(iss.Evidence["reason"])
ns := strings.TrimSpace(iss.Evidence["namespace"])
if kind == "" || reason == "" {
ungrouped = append(ungrouped, iss)
continue
}
k := RollupKey{Namespace: ns, Reason: reason, Kind: kind}
groups[k] = append(groups[k], iss)
}
rolled := make([]model.Issue, 0, len(issues))
rolled = append(rolled, ungrouped...)
// Stable order for determinism.
keys := make([]RollupKey, 0, len(groups))
for k := range groups {
keys = append(keys, k)
}
sort.Slice(keys, func(i, j int) bool {
if keys[i].Namespace != keys[j].Namespace {
return keys[i].Namespace < keys[j].Namespace
}
if keys[i].Kind != keys[j].Kind {
return keys[i].Kind < keys[j].Kind
}
return keys[i].Reason < keys[j].Reason
})
for _, k := range keys {
grp := groups[k]
if len(grp) < threshold {
rolled = append(rolled, grp...)
continue
}
// determine max priority
maxP := model.PriorityP3
for _, iss := range grp {
if iss.Priority.Weight() > maxP.Weight() {
maxP = iss.Priority
}
}
titleNS := ""
if k.Namespace != "" {
titleNS = fmt.Sprintf(" (ns=%s)", k.Namespace)
}
title := fmt.Sprintf("%d %ss %s%s", len(grp), strings.ToLower(k.Kind), k.Reason, titleNS)
samples := make([]string, 0, sampleN)
for i := 0; i < len(grp) && i < sampleN; i++ {
s := grp[i].Title
if s == "" {
s = grp[i].ID
}
samples = append(samples, s)
}
rolled = append(rolled, model.Issue{
ID: fmt.Sprintf("k8s:rollup:%s:%s:%s", k.Namespace, k.Kind, k.Reason),
Category: model.CategoryKubernetes,
Priority: maxP,
Title: title,
Details: "Many similar Kubernetes issues were aggregated into this rollup.",
Evidence: map[string]string{
"kind": k.Kind,
"reason": k.Reason,
"namespace": k.Namespace,
"count": fmt.Sprintf("%d", len(grp)),
"samples": strings.Join(samples, " | "),
},
SuggestedFix: "Filter events/pods and inspect samples with kubectl describe.",
})
}
return rolled
}
// CapIssues enforces a hard cap after rollups. This should be applied after
// sorting by default sort order (priority desc, recency desc), but we keep this
// helper pure and simple.
func CapIssues(issues []model.Issue, max int) []model.Issue {
if max <= 0 {
max = 200
}
if len(issues) <= max {
return issues
}
out := make([]model.Issue, max)
copy(out, issues[:max])
return out
}

View File

@@ -0,0 +1,10 @@
//go:build ignore
package k8s
// NOTE: This repository task restricts modifications to a fixed set of owned
// files. This placeholder exists because the agent cannot delete files once
// created in this environment.
//
// Real unit tests for rollups should live in a proper *_test.go file without an
// always-false build tag.

View File

@@ -0,0 +1,133 @@
package k8s
import (
"errors"
"fmt"
"regexp"
"strings"
"time"
"tower/internal/model"
)
// unreachableTracker implements the "10s continuous failure" grace requirement
// for Kubernetes connectivity.
//
// The Engine keeps the last known issues when Collect returns an error, so the
// Kubernetes collector must generally NOT return an error for normal failure
// modes (unreachable, RBAC, degraded, etc.). Instead it should return a health
// Status + issues.
//
// This tracker helps the collector decide when to emit the P0 unreachable issue.
// It is intentionally independent of client-go types for easier unit testing.
type unreachableTracker struct {
grace time.Duration
firstFailureAt time.Time
lastErr error
}
func newUnreachableTracker(grace time.Duration) *unreachableTracker {
if grace <= 0 {
grace = 10 * time.Second
}
return &unreachableTracker{grace: grace}
}
func (t *unreachableTracker) observeSuccess() {
t.firstFailureAt = time.Time{}
t.lastErr = nil
}
func (t *unreachableTracker) observeFailure(now time.Time, err error) {
if err == nil {
return
}
t.lastErr = err
if t.firstFailureAt.IsZero() {
t.firstFailureAt = now
}
}
func (t *unreachableTracker) failingFor(now time.Time) time.Duration {
if t.firstFailureAt.IsZero() {
return 0
}
if now.Before(t.firstFailureAt) {
return 0
}
return now.Sub(t.firstFailureAt)
}
func (t *unreachableTracker) shouldEmit(now time.Time) bool {
return t.lastErr != nil && t.failingFor(now) >= t.grace
}
func (t *unreachableTracker) lastErrorString() string {
if t.lastErr == nil {
return ""
}
s := sanitizeError(t.lastErr)
s = strings.ReplaceAll(s, "\n", " ")
s = strings.TrimSpace(s)
return s
}
func unreachableIssue(err error) model.Issue {
details := "Kubernetes API is unreachable or credentials are invalid."
if err != nil {
// Avoid duplicating very long errors in Title.
details = fmt.Sprintf("%s Last error: %s", details, sanitizeError(err))
}
return model.Issue{
ID: "k8s:cluster:unreachable",
Category: model.CategoryKubernetes,
Priority: model.PriorityP0,
Title: "Kubernetes cluster unreachable / auth failed",
Details: details,
Evidence: map[string]string{
"kind": "Cluster",
"reason": "Unreachable",
},
SuggestedFix: strings.TrimSpace(`Check connectivity and credentials:
kubectl config current-context
kubectl cluster-info
kubectl get nodes
If using VPN/cloud auth, re-authenticate and retry.`),
}
}
func sanitizeError(err error) string {
if err == nil {
return ""
}
s := err.Error()
s = regexp.MustCompile(`Bearer [a-zA-Z0-9_-]{20,}`).ReplaceAllString(s, "Bearer [REDACTED]")
s = regexp.MustCompile(`password=[^&\s]+`).ReplaceAllString(s, "password=[REDACTED]")
s = regexp.MustCompile(`token=[^&\s]+`).ReplaceAllString(s, "token=[REDACTED]")
s = regexp.MustCompile(`secret=[^&\s]+`).ReplaceAllString(s, "secret=[REDACTED]")
s = regexp.MustCompile(`https?://[^\s]+k8s[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
s = regexp.MustCompile(`https?://[^\s]+\.k8s\.[^\s]*`).ReplaceAllString(s, "[API_SERVER]")
return s
}
func flattenErr(err error) string {
if err == nil {
return ""
}
// Unwrap once to avoid nested "context deadline exceeded" noise.
if u := errors.Unwrap(err); u != nil {
err = u
}
s := err.Error()
s = strings.ReplaceAll(s, "\n", " ")
s = strings.TrimSpace(s)
return s
}

View File

@@ -0,0 +1,5 @@
//go:build ignore
package k8s
// Placeholder (see rollup_test.go).