Update dashboard manifests and add automation
- Updated deployment with correct Pi 3 tolerations - Updated ingress for cloudflare-tunnel - Added crontab example for systemd alternative - Updated go.sum 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
18
automation/crontab.example
Normal file
18
automation/crontab.example
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# K8s Agent Scheduled Workflows
|
||||||
|
# Install with: crontab /home/will/.claude/automation/crontab.example
|
||||||
|
# Or add to existing: crontab -e
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
SHELL=/bin/bash
|
||||||
|
PATH=/usr/local/bin:/usr/bin:/bin
|
||||||
|
CLAUDE_DIR=/home/will/.claude
|
||||||
|
DASHBOARD_URL=http://k8s-agent-dashboard-k8s-agent.taildb3494.ts.net
|
||||||
|
|
||||||
|
# Cluster health check - every 6 hours
|
||||||
|
0 */6 * * * /home/will/.claude/automation/scheduler.sh cluster-health-check >> /home/will/.claude/logs/cron.log 2>&1
|
||||||
|
|
||||||
|
# Daily cluster summary - 8am
|
||||||
|
0 8 * * * /home/will/.claude/automation/scheduler.sh cluster-health-check >> /home/will/.claude/logs/cron.log 2>&1
|
||||||
|
|
||||||
|
# Log rotation - weekly on Sunday at midnight
|
||||||
|
0 0 * * 0 find /home/will/.claude/logs -name "*.log" -mtime +7 -delete
|
||||||
@@ -18,12 +18,12 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
# Target Pi 3 node (lightweight workload)
|
# Target Pi 3 node (lightweight workload)
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: "node-type"
|
- key: "capacity"
|
||||||
operator: "Equal"
|
operator: "Equal"
|
||||||
value: "pi3"
|
value: "low"
|
||||||
effect: "NoSchedule"
|
effect: "NoExecute"
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/arch: arm64
|
kubernetes.io/hostname: pi3
|
||||||
|
|
||||||
# Security context
|
# Security context
|
||||||
securityContext:
|
securityContext:
|
||||||
@@ -33,7 +33,7 @@ spec:
|
|||||||
|
|
||||||
containers:
|
containers:
|
||||||
- name: dashboard
|
- name: dashboard
|
||||||
image: ghcr.io/will/k8s-agent-dashboard:latest
|
image: gitea-http.taildb3494.ts.net/will/k8s-agent-dashboard:latest
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
|
|||||||
@@ -6,13 +6,10 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: k8s-agent-dashboard
|
app.kubernetes.io/name: k8s-agent-dashboard
|
||||||
app.kubernetes.io/component: dashboard
|
app.kubernetes.io/component: dashboard
|
||||||
annotations:
|
|
||||||
# Adjust annotations based on your ingress controller
|
|
||||||
# nginx.ingress.kubernetes.io/ssl-redirect: "false"
|
|
||||||
spec:
|
spec:
|
||||||
ingressClassName: nginx # or traefik, etc.
|
ingressClassName: cloudflare-tunnel
|
||||||
rules:
|
rules:
|
||||||
- host: k8s-agent.local # Adjust to your domain
|
- host: k8s-agent-dashboard-k8s-agent.taildb3494.ts.net
|
||||||
http:
|
http:
|
||||||
paths:
|
paths:
|
||||||
- path: /
|
- path: /
|
||||||
@@ -22,8 +19,3 @@ spec:
|
|||||||
name: k8s-agent-dashboard
|
name: k8s-agent-dashboard
|
||||||
port:
|
port:
|
||||||
name: http
|
name: http
|
||||||
# Uncomment for TLS
|
|
||||||
# tls:
|
|
||||||
# - hosts:
|
|
||||||
# - k8s-agent.local
|
|
||||||
# secretName: k8s-agent-dashboard-tls
|
|
||||||
|
|||||||
@@ -15,5 +15,5 @@ commonLabels:
|
|||||||
app.kubernetes.io/managed-by: argocd
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
|
||||||
images:
|
images:
|
||||||
- name: ghcr.io/will/k8s-agent-dashboard
|
- name: gitea-http.taildb3494.ts.net/will/k8s-agent-dashboard
|
||||||
newTag: latest
|
newTag: latest
|
||||||
|
|||||||
@@ -9,8 +9,7 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteOnce
|
- ReadWriteOnce
|
||||||
|
storageClassName: local-path
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
storage: 100Mi
|
storage: 100Mi
|
||||||
# Adjust storageClassName based on your cluster
|
|
||||||
# storageClassName: local-path
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
github.com/go-chi/chi/v5 v5.0.11 h1/BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA=
|
github.com/go-chi/chi/v5 v5.0.11 h1:BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA=
|
||||||
github.com/go-chi/chi/v5 v5.0.11/go.mod h1/DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
|
github.com/go-chi/chi/v5 v5.0.11/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
|
||||||
github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4=
|
github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4=
|
||||||
github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn3rn0gOeEOrPIsEDqiK+0=
|
github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58=
|
||||||
|
|||||||
34
plans/pure-wishing-metcalfe.md
Normal file
34
plans/pure-wishing-metcalfe.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# Cluster Issue Diagnosis Plan
|
||||||
|
|
||||||
|
## Issues to Investigate
|
||||||
|
|
||||||
|
1. **Critical Alerts** - KubeSchedulerDown, KubeControllerManagerDown
|
||||||
|
- Likely false positives (k0s bundles these in k0s-controller)
|
||||||
|
- Check if cluster is actually functional
|
||||||
|
|
||||||
|
2. **CrashLooping Pod** - Find and diagnose
|
||||||
|
- Get pod status across all namespaces
|
||||||
|
- Check logs and events
|
||||||
|
|
||||||
|
3. **Stuck Deployment** - Find and diagnose
|
||||||
|
- List deployments not at desired replica count
|
||||||
|
- Check events
|
||||||
|
|
||||||
|
4. **Degraded kube-prometheus-stack**
|
||||||
|
- Check prometheus/alertmanager pods
|
||||||
|
|
||||||
|
## Commands to Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Find crash looping pods
|
||||||
|
kubectl get pods -A | grep -E 'CrashLoop|Error|ImagePull'
|
||||||
|
|
||||||
|
# Find stuck deployments
|
||||||
|
kubectl get deploy -A -o wide | grep -v '1/1\|2/2\|3/3\|4/4'
|
||||||
|
|
||||||
|
# Check prometheus stack
|
||||||
|
kubectl get pods -n monitoring
|
||||||
|
|
||||||
|
# Check scheduler/controller (k0s specific)
|
||||||
|
kubectl get pods -n kube-system
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user