From 73512e92a6fe85dcac4b9a019f5d205c0666d2f4 Mon Sep 17 00:00:00 2001 From: OpenCode Test Date: Sat, 27 Dec 2025 11:39:40 -0800 Subject: [PATCH] Update dashboard manifests and add automation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Updated deployment with correct Pi 3 tolerations - Updated ingress for cloudflare-tunnel - Added crontab example for systemd alternative - Updated go.sum 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- automation/crontab.example | 18 +++++++++++++++ dashboard/deploy/deployment.yaml | 10 ++++----- dashboard/deploy/ingress.yaml | 12 ++-------- dashboard/deploy/kustomization.yaml | 2 +- dashboard/deploy/pvc.yaml | 3 +-- dashboard/go.sum | 6 ++--- plans/pure-wishing-metcalfe.md | 34 +++++++++++++++++++++++++++++ 7 files changed, 64 insertions(+), 21 deletions(-) create mode 100644 automation/crontab.example create mode 100644 plans/pure-wishing-metcalfe.md diff --git a/automation/crontab.example b/automation/crontab.example new file mode 100644 index 0000000..1a151cb --- /dev/null +++ b/automation/crontab.example @@ -0,0 +1,18 @@ +# K8s Agent Scheduled Workflows +# Install with: crontab /home/will/.claude/automation/crontab.example +# Or add to existing: crontab -e + +# Environment +SHELL=/bin/bash +PATH=/usr/local/bin:/usr/bin:/bin +CLAUDE_DIR=/home/will/.claude +DASHBOARD_URL=http://k8s-agent-dashboard-k8s-agent.taildb3494.ts.net + +# Cluster health check - every 6 hours +0 */6 * * * /home/will/.claude/automation/scheduler.sh cluster-health-check >> /home/will/.claude/logs/cron.log 2>&1 + +# Daily cluster summary - 8am +0 8 * * * /home/will/.claude/automation/scheduler.sh cluster-health-check >> /home/will/.claude/logs/cron.log 2>&1 + +# Log rotation - weekly on Sunday at midnight +0 0 * * 0 find /home/will/.claude/logs -name "*.log" -mtime +7 -delete diff --git a/dashboard/deploy/deployment.yaml b/dashboard/deploy/deployment.yaml index 45ce267..be41161 100644 --- a/dashboard/deploy/deployment.yaml +++ b/dashboard/deploy/deployment.yaml @@ -18,12 +18,12 @@ spec: spec: # Target Pi 3 node (lightweight workload) tolerations: - - key: "node-type" + - key: "capacity" operator: "Equal" - value: "pi3" - effect: "NoSchedule" + value: "low" + effect: "NoExecute" nodeSelector: - kubernetes.io/arch: arm64 + kubernetes.io/hostname: pi3 # Security context securityContext: @@ -33,7 +33,7 @@ spec: containers: - name: dashboard - image: ghcr.io/will/k8s-agent-dashboard:latest + image: gitea-http.taildb3494.ts.net/will/k8s-agent-dashboard:latest imagePullPolicy: Always ports: - name: http diff --git a/dashboard/deploy/ingress.yaml b/dashboard/deploy/ingress.yaml index 3069030..bfbd31d 100644 --- a/dashboard/deploy/ingress.yaml +++ b/dashboard/deploy/ingress.yaml @@ -6,13 +6,10 @@ metadata: labels: app.kubernetes.io/name: k8s-agent-dashboard app.kubernetes.io/component: dashboard - annotations: - # Adjust annotations based on your ingress controller - # nginx.ingress.kubernetes.io/ssl-redirect: "false" spec: - ingressClassName: nginx # or traefik, etc. + ingressClassName: cloudflare-tunnel rules: - - host: k8s-agent.local # Adjust to your domain + - host: k8s-agent-dashboard-k8s-agent.taildb3494.ts.net http: paths: - path: / @@ -22,8 +19,3 @@ spec: name: k8s-agent-dashboard port: name: http - # Uncomment for TLS - # tls: - # - hosts: - # - k8s-agent.local - # secretName: k8s-agent-dashboard-tls diff --git a/dashboard/deploy/kustomization.yaml b/dashboard/deploy/kustomization.yaml index 56f76ce..b51bcad 100644 --- a/dashboard/deploy/kustomization.yaml +++ b/dashboard/deploy/kustomization.yaml @@ -15,5 +15,5 @@ commonLabels: app.kubernetes.io/managed-by: argocd images: - - name: ghcr.io/will/k8s-agent-dashboard + - name: gitea-http.taildb3494.ts.net/will/k8s-agent-dashboard newTag: latest diff --git a/dashboard/deploy/pvc.yaml b/dashboard/deploy/pvc.yaml index e480239..432a8fd 100644 --- a/dashboard/deploy/pvc.yaml +++ b/dashboard/deploy/pvc.yaml @@ -9,8 +9,7 @@ metadata: spec: accessModes: - ReadWriteOnce + storageClassName: local-path resources: requests: storage: 100Mi - # Adjust storageClassName based on your cluster - # storageClassName: local-path diff --git a/dashboard/go.sum b/dashboard/go.sum index 63f5df9..071bd33 100644 --- a/dashboard/go.sum +++ b/dashboard/go.sum @@ -1,4 +1,4 @@ -github.com/go-chi/chi/v5 v5.0.11 h1/BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA= -github.com/go-chi/chi/v5 v5.0.11/go.mod h1/DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= +github.com/go-chi/chi/v5 v5.0.11 h1:BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA= +github.com/go-chi/chi/v5 v5.0.11/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4= -github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn3rn0gOeEOrPIsEDqiK+0= +github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58= diff --git a/plans/pure-wishing-metcalfe.md b/plans/pure-wishing-metcalfe.md new file mode 100644 index 0000000..1548ba5 --- /dev/null +++ b/plans/pure-wishing-metcalfe.md @@ -0,0 +1,34 @@ +# Cluster Issue Diagnosis Plan + +## Issues to Investigate + +1. **Critical Alerts** - KubeSchedulerDown, KubeControllerManagerDown + - Likely false positives (k0s bundles these in k0s-controller) + - Check if cluster is actually functional + +2. **CrashLooping Pod** - Find and diagnose + - Get pod status across all namespaces + - Check logs and events + +3. **Stuck Deployment** - Find and diagnose + - List deployments not at desired replica count + - Check events + +4. **Degraded kube-prometheus-stack** + - Check prometheus/alertmanager pods + +## Commands to Run + +```bash +# Find crash looping pods +kubectl get pods -A | grep -E 'CrashLoop|Error|ImagePull' + +# Find stuck deployments +kubectl get deploy -A -o wide | grep -v '1/1\|2/2\|3/3\|4/4' + +# Check prometheus stack +kubectl get pods -n monitoring + +# Check scheduler/controller (k0s specific) +kubectl get pods -n kube-system +```