From 5646508adbd8b749b3c42f1e7413e849e68735cc Mon Sep 17 00:00:00 2001 From: OpenCode Test Date: Fri, 26 Dec 2025 11:34:36 -0800 Subject: [PATCH] feat: Implement Phase 2 dashboard for K8s agent system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lightweight Go-based dashboard for Raspberry Pi cluster: Backend: - chi router with REST API - Embedded static file serving - JSON file-based state storage - Health checks and CORS support Frontend: - Responsive dark theme UI - Status view with nodes, alerts, ArgoCD apps - Pending actions with approve/reject - Action history and audit trail - Workflow listing and manual triggers Deployment: - Multi-stage Dockerfile (small Alpine image) - Kubernetes manifests with Pi 3 tolerations - Resource limits: 32-64Mi memory, 10-100m CPU - ArgoCD application manifest - Kustomize configuration API endpoints: - GET /api/status - Cluster status - GET/POST /api/pending - Action management - GET /api/history - Action audit trail - GET/POST /api/workflows - Workflow management 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- dashboard/Dockerfile | 45 +++ dashboard/README.md | 97 +++++ dashboard/cmd/server/main.go | 81 ++++ dashboard/cmd/server/web/index.html | 112 ++++++ dashboard/cmd/server/web/static/css/style.css | 355 ++++++++++++++++++ dashboard/cmd/server/web/static/js/app.js | 306 +++++++++++++++ dashboard/deploy/argocd-application.yaml | 45 +++ dashboard/deploy/deployment.yaml | 90 +++++ dashboard/deploy/ingress.yaml | 29 ++ dashboard/deploy/kustomization.yaml | 19 + dashboard/deploy/namespace.yaml | 7 + dashboard/deploy/pvc.yaml | 16 + dashboard/deploy/service.yaml | 17 + dashboard/go.mod | 8 + dashboard/go.sum | 4 + dashboard/internal/api/handlers.go | 157 ++++++++ dashboard/internal/models/models.go | 80 ++++ dashboard/internal/store/store.go | 244 ++++++++++++ 18 files changed, 1712 insertions(+) create mode 100644 dashboard/Dockerfile create mode 100644 dashboard/README.md create mode 100644 dashboard/cmd/server/main.go create mode 100644 dashboard/cmd/server/web/index.html create mode 100644 dashboard/cmd/server/web/static/css/style.css create mode 100644 dashboard/cmd/server/web/static/js/app.js create mode 100644 dashboard/deploy/argocd-application.yaml create mode 100644 dashboard/deploy/deployment.yaml create mode 100644 dashboard/deploy/ingress.yaml create mode 100644 dashboard/deploy/kustomization.yaml create mode 100644 dashboard/deploy/namespace.yaml create mode 100644 dashboard/deploy/pvc.yaml create mode 100644 dashboard/deploy/service.yaml create mode 100644 dashboard/go.mod create mode 100644 dashboard/go.sum create mode 100644 dashboard/internal/api/handlers.go create mode 100644 dashboard/internal/models/models.go create mode 100644 dashboard/internal/store/store.go diff --git a/dashboard/Dockerfile b/dashboard/Dockerfile new file mode 100644 index 0000000..3d9e06f --- /dev/null +++ b/dashboard/Dockerfile @@ -0,0 +1,45 @@ +# Build stage +FROM golang:1.21-alpine AS builder + +WORKDIR /app + +# Install dependencies +RUN apk add --no-cache git + +# Copy go mod files +COPY go.mod go.sum* ./ +RUN go mod download + +# Copy source code +COPY . . + +# Build binary +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /server ./cmd/server + +# Final stage - minimal image +FROM alpine:3.19 + +RUN apk add --no-cache ca-certificates tzdata + +WORKDIR /app + +# Copy binary from builder +COPY --from=builder /server /app/server + +# Create data directory +RUN mkdir -p /data + +# Expose port +EXPOSE 8080 + +# Run as non-root user +RUN adduser -D -u 1000 appuser +RUN chown -R appuser:appuser /app /data +USER appuser + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:8080/api/health || exit 1 + +ENTRYPOINT ["/app/server"] +CMD ["--port", "8080", "--data", "/data"] diff --git a/dashboard/README.md b/dashboard/README.md new file mode 100644 index 0000000..e940de0 --- /dev/null +++ b/dashboard/README.md @@ -0,0 +1,97 @@ +# K8s Agent Dashboard + +Lightweight web dashboard for the K8s Agent Orchestrator system. Designed to run on Raspberry Pi clusters with minimal resource usage. + +## Features + +- **Cluster Status** - View node health, resource usage, and conditions +- **Pending Actions** - Approve or reject actions requiring confirmation +- **Action History** - Audit trail of all agent actions +- **Workflows** - View and trigger defined workflows + +## Resource Requirements + +Optimized for Raspberry Pi 3B+ (1GB RAM): +- Memory: 32-64Mi +- CPU: 10-100m + +## Development + +### Prerequisites + +- Go 1.21+ +- Docker (for building images) + +### Local Development + +```bash +# Run locally +go run ./cmd/server --port 8080 --data ./data + +# Build binary +go build -o server ./cmd/server + +# Build Docker image +docker build -t k8s-agent-dashboard:dev . +``` + +### Build for ARM64 + +```bash +# Build multi-arch image +docker buildx build --platform linux/arm64 -t ghcr.io/user/k8s-agent-dashboard:latest . +``` + +## Deployment + +### Using Kustomize + +```bash +kubectl apply -k deploy/ +``` + +### Using ArgoCD + +1. Copy `deploy/` contents to your GitOps repo under `apps/k8s-agent-dashboard/` +2. Apply the ArgoCD application: + ```bash + kubectl apply -f deploy/argocd-application.yaml + ``` + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| PORT | 8080 | Server port | + +### Command Line Flags + +| Flag | Default | Description | +|------|---------|-------------| +| --port | 8080 | Server port | +| --data | /data | Data directory for persistent state | + +## API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| GET | /api/health | Health check | +| GET | /api/status | Cluster status | +| GET | /api/pending | Pending actions | +| POST | /api/pending/{id}/approve | Approve action | +| POST | /api/pending/{id}/reject | Reject action | +| GET | /api/history | Action history | +| GET | /api/workflows | List workflows | +| POST | /api/workflows/{name}/run | Trigger workflow | + +## Integration with Claude Code + +The dashboard reads/writes state files that can be shared with Claude Code agents: + +- `status.json` - Cluster status (written by agents) +- `pending.json` - Pending actions (read/write) +- `history.json` - Action history (append by agents) + +To share state, mount the same PVC or directory in both the dashboard and Claude Code's data directory. diff --git a/dashboard/cmd/server/main.go b/dashboard/cmd/server/main.go new file mode 100644 index 0000000..0fa2821 --- /dev/null +++ b/dashboard/cmd/server/main.go @@ -0,0 +1,81 @@ +package main + +import ( + "embed" + "flag" + "io/fs" + "log" + "net/http" + "os" + + "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" + "github.com/go-chi/cors" + "github.com/will/k8s-agent-dashboard/internal/api" + "github.com/will/k8s-agent-dashboard/internal/store" +) + +//go:embed all:web +var webFS embed.FS + +func main() { + port := flag.String("port", "8080", "Server port") + dataDir := flag.String("data", "/data", "Data directory for state") + flag.Parse() + + // Initialize store + s, err := store.New(*dataDir) + if err != nil { + log.Fatalf("Failed to initialize store: %v", err) + } + + // Create router + r := chi.NewRouter() + + // Middleware + r.Use(middleware.Logger) + r.Use(middleware.Recoverer) + r.Use(middleware.Compress(5)) + r.Use(cors.Handler(cors.Options{ + AllowedOrigins: []string{"*"}, + AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"}, + AllowedHeaders: []string{"Accept", "Content-Type"}, + ExposedHeaders: []string{"Link"}, + AllowCredentials: false, + MaxAge: 300, + })) + + // API routes + r.Route("/api", func(r chi.Router) { + r.Get("/health", api.HealthCheck) + r.Get("/status", api.GetClusterStatus(s)) + r.Get("/pending", api.GetPendingActions(s)) + r.Post("/pending/{id}/approve", api.ApproveAction(s)) + r.Post("/pending/{id}/reject", api.RejectAction(s)) + r.Get("/history", api.GetActionHistory(s)) + r.Get("/workflows", api.GetWorkflows(s)) + r.Post("/workflows/{name}/run", api.RunWorkflow(s)) + }) + + // Static files + webContent, err := fs.Sub(webFS, "web") + if err != nil { + log.Fatalf("Failed to get web content: %v", err) + } + + fileServer := http.FileServer(http.FS(webContent)) + r.Handle("/*", fileServer) + + // Start server + addr := ":" + *port + if envPort := os.Getenv("PORT"); envPort != "" { + addr = ":" + envPort + } + + log.Printf("Starting server on %s", addr) + log.Printf("Data directory: %s", *dataDir) + + if err := http.ListenAndServe(addr, r); err != nil { + log.Fatalf("Server failed: %v", err) + } +} diff --git a/dashboard/cmd/server/web/index.html b/dashboard/cmd/server/web/index.html new file mode 100644 index 0000000..7f29293 --- /dev/null +++ b/dashboard/cmd/server/web/index.html @@ -0,0 +1,112 @@ + + + + + + K8s Agent Dashboard + + + +
+

K8s Agent Dashboard

+
+ + Loading... +
+
+ + + +
+ +
+
+

Nodes

+ + + + + + + + + + + +
NodeStatusCPUMemoryConditions
+
+ +
+

Active Alerts

+
+

No active alerts

+
+
+ +
+

ArgoCD Applications

+ + + + + + + + + + +
ApplicationSyncHealthRevision
+
+
+ + +
+
+

Pending Actions

+
+

No pending actions

+
+
+
+ + +
+
+

Action History

+ + + + + + + + + + +
TimeAgentActionResult
+
+
+ + +
+
+

Workflows

+
+

Loading workflows...

+
+
+
+
+ +
+

K8s Agent Dashboard | Last updated: -

+
+ + + + diff --git a/dashboard/cmd/server/web/static/css/style.css b/dashboard/cmd/server/web/static/css/style.css new file mode 100644 index 0000000..224b441 --- /dev/null +++ b/dashboard/cmd/server/web/static/css/style.css @@ -0,0 +1,355 @@ +:root { + --bg-primary: #1a1a2e; + --bg-secondary: #16213e; + --bg-card: #0f3460; + --text-primary: #eaeaea; + --text-secondary: #a0a0a0; + --accent: #e94560; + --success: #4ade80; + --warning: #fbbf24; + --danger: #ef4444; + --info: #60a5fa; +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif; + background: var(--bg-primary); + color: var(--text-primary); + min-height: 100vh; + line-height: 1.6; +} + +header { + background: var(--bg-secondary); + padding: 1rem 2rem; + display: flex; + justify-content: space-between; + align-items: center; + border-bottom: 1px solid var(--bg-card); +} + +header h1 { + font-size: 1.5rem; + font-weight: 600; +} + +.cluster-health { + display: flex; + align-items: center; + gap: 0.5rem; + padding: 0.5rem 1rem; + background: var(--bg-card); + border-radius: 20px; +} + +.health-indicator { + width: 12px; + height: 12px; + border-radius: 50%; + background: var(--text-secondary); +} + +.health-indicator.healthy { background: var(--success); } +.health-indicator.degraded { background: var(--warning); } +.health-indicator.critical { background: var(--danger); } + +nav { + background: var(--bg-secondary); + padding: 0.5rem 2rem; + display: flex; + gap: 0.5rem; + border-bottom: 1px solid var(--bg-card); +} + +.nav-btn { + background: transparent; + border: none; + color: var(--text-secondary); + padding: 0.75rem 1.5rem; + cursor: pointer; + border-radius: 8px; + font-size: 0.9rem; + transition: all 0.2s; +} + +.nav-btn:hover { + background: var(--bg-card); + color: var(--text-primary); +} + +.nav-btn.active { + background: var(--accent); + color: white; +} + +.badge { + background: var(--danger); + color: white; + font-size: 0.75rem; + padding: 0.1rem 0.5rem; + border-radius: 10px; + margin-left: 0.25rem; +} + +main { + padding: 2rem; + max-width: 1400px; + margin: 0 auto; +} + +.view { + display: none; +} + +.view.active { + display: block; +} + +.card { + background: var(--bg-card); + border-radius: 12px; + padding: 1.5rem; + margin-bottom: 1.5rem; +} + +.card h2 { + font-size: 1.1rem; + margin-bottom: 1rem; + color: var(--text-primary); +} + +table { + width: 100%; + border-collapse: collapse; +} + +th, td { + text-align: left; + padding: 0.75rem; + border-bottom: 1px solid var(--bg-secondary); +} + +th { + color: var(--text-secondary); + font-weight: 500; + font-size: 0.85rem; + text-transform: uppercase; +} + +td { + font-size: 0.9rem; +} + +.status-badge { + padding: 0.25rem 0.75rem; + border-radius: 12px; + font-size: 0.8rem; + font-weight: 500; +} + +.status-ready, .status-healthy, .status-synced, .status-success { + background: rgba(74, 222, 128, 0.2); + color: var(--success); +} + +.status-notready, .status-degraded, .status-outofsync, .status-failed { + background: rgba(239, 68, 68, 0.2); + color: var(--danger); +} + +.status-progressing, .status-pending { + background: rgba(251, 191, 36, 0.2); + color: var(--warning); +} + +.status-rejected { + background: rgba(239, 68, 68, 0.2); + color: var(--danger); +} + +.status-approved { + background: rgba(74, 222, 128, 0.2); + color: var(--success); +} + +.alerts-list, .pending-list, .workflows-list { + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.alert-item { + display: flex; + align-items: center; + gap: 1rem; + padding: 0.75rem; + background: var(--bg-secondary); + border-radius: 8px; + border-left: 3px solid var(--danger); +} + +.alert-item.warning { + border-left-color: var(--warning); +} + +.pending-item { + background: var(--bg-secondary); + border-radius: 8px; + padding: 1rem; + border-left: 3px solid var(--warning); +} + +.pending-item .header { + display: flex; + justify-content: space-between; + align-items: flex-start; + margin-bottom: 0.5rem; +} + +.pending-item .agent { + font-size: 0.8rem; + color: var(--text-secondary); +} + +.pending-item .action { + font-weight: 600; +} + +.pending-item .description { + color: var(--text-secondary); + font-size: 0.9rem; + margin-bottom: 1rem; +} + +.pending-item .buttons { + display: flex; + gap: 0.5rem; +} + +.btn { + padding: 0.5rem 1rem; + border: none; + border-radius: 6px; + cursor: pointer; + font-size: 0.85rem; + font-weight: 500; + transition: all 0.2s; +} + +.btn-approve { + background: var(--success); + color: var(--bg-primary); +} + +.btn-reject { + background: var(--danger); + color: white; +} + +.btn-run { + background: var(--info); + color: var(--bg-primary); +} + +.btn:hover { + opacity: 0.9; + transform: translateY(-1px); +} + +.workflow-item { + background: var(--bg-secondary); + border-radius: 8px; + padding: 1rem; + display: flex; + justify-content: space-between; + align-items: center; +} + +.workflow-item .info h3 { + font-size: 1rem; + margin-bottom: 0.25rem; +} + +.workflow-item .info p { + color: var(--text-secondary); + font-size: 0.85rem; +} + +.workflow-item .triggers { + display: flex; + gap: 0.5rem; + margin-top: 0.5rem; +} + +.trigger-tag { + background: var(--bg-card); + padding: 0.2rem 0.5rem; + border-radius: 4px; + font-size: 0.75rem; + color: var(--text-secondary); +} + +.empty-state { + color: var(--text-secondary); + text-align: center; + padding: 2rem; + font-style: italic; +} + +footer { + text-align: center; + padding: 1rem; + color: var(--text-secondary); + font-size: 0.85rem; + border-top: 1px solid var(--bg-card); +} + +/* Progress bar for resource usage */ +.progress-bar { + width: 100px; + height: 8px; + background: var(--bg-secondary); + border-radius: 4px; + overflow: hidden; +} + +.progress-bar .fill { + height: 100%; + background: var(--success); + transition: width 0.3s; +} + +.progress-bar .fill.warning { background: var(--warning); } +.progress-bar .fill.danger { background: var(--danger); } + +/* Responsive */ +@media (max-width: 768px) { + header { + flex-direction: column; + gap: 1rem; + text-align: center; + } + + nav { + flex-wrap: wrap; + justify-content: center; + } + + main { + padding: 1rem; + } + + table { + font-size: 0.85rem; + } + + th, td { + padding: 0.5rem; + } +} diff --git a/dashboard/cmd/server/web/static/js/app.js b/dashboard/cmd/server/web/static/js/app.js new file mode 100644 index 0000000..0e22d5d --- /dev/null +++ b/dashboard/cmd/server/web/static/js/app.js @@ -0,0 +1,306 @@ +// K8s Agent Dashboard - Frontend JavaScript + +const API_BASE = '/api'; + +// State +let currentView = 'status'; + +// Initialize +document.addEventListener('DOMContentLoaded', () => { + setupNavigation(); + loadAllData(); + // Refresh data every 30 seconds + setInterval(loadAllData, 30000); +}); + +// Navigation +function setupNavigation() { + document.querySelectorAll('.nav-btn').forEach(btn => { + btn.addEventListener('click', () => { + const view = btn.dataset.view; + switchView(view); + }); + }); +} + +function switchView(view) { + currentView = view; + + // Update nav buttons + document.querySelectorAll('.nav-btn').forEach(btn => { + btn.classList.toggle('active', btn.dataset.view === view); + }); + + // Update views + document.querySelectorAll('.view').forEach(v => { + v.classList.toggle('active', v.id === `${view}-view`); + }); +} + +// Data Loading +async function loadAllData() { + try { + await Promise.all([ + loadClusterStatus(), + loadPendingActions(), + loadHistory(), + loadWorkflows() + ]); + updateLastUpdate(); + } catch (error) { + console.error('Error loading data:', error); + } +} + +async function loadClusterStatus() { + try { + const response = await fetch(`${API_BASE}/status`); + const data = await response.json(); + renderClusterStatus(data); + } catch (error) { + console.error('Error loading status:', error); + } +} + +async function loadPendingActions() { + try { + const response = await fetch(`${API_BASE}/pending`); + const data = await response.json(); + renderPendingActions(data.actions || []); + document.getElementById('pending-count').textContent = data.count || 0; + } catch (error) { + console.error('Error loading pending:', error); + } +} + +async function loadHistory() { + try { + const response = await fetch(`${API_BASE}/history?limit=20`); + const data = await response.json(); + renderHistory(data.history || []); + } catch (error) { + console.error('Error loading history:', error); + } +} + +async function loadWorkflows() { + try { + const response = await fetch(`${API_BASE}/workflows`); + const data = await response.json(); + renderWorkflows(data.workflows || []); + } catch (error) { + console.error('Error loading workflows:', error); + } +} + +// Rendering +function renderClusterStatus(status) { + // Update health indicator + const healthEl = document.getElementById('cluster-health'); + const indicator = healthEl.querySelector('.health-indicator'); + const text = healthEl.querySelector('.health-text'); + + const health = (status.health || 'Unknown').toLowerCase(); + indicator.className = `health-indicator ${health}`; + text.textContent = status.health || 'Unknown'; + + // Render nodes + const nodesBody = document.querySelector('#nodes-table tbody'); + if (status.nodes && status.nodes.length > 0) { + nodesBody.innerHTML = status.nodes.map(node => ` + + ${node.name} + ${node.status} + +
+
+
+ ${node.cpu_percent.toFixed(0)}% + + +
+
+
+ ${node.memory_percent.toFixed(0)}% + + ${node.conditions} + + `).join(''); + } else { + nodesBody.innerHTML = 'No nodes data available'; + } + + // Render alerts + const alertsList = document.getElementById('alerts-list'); + if (status.alerts && status.alerts.length > 0) { + alertsList.innerHTML = status.alerts.map(alert => ` +
+ [${alert.severity.toUpperCase()}] + ${alert.name} + ${alert.description} +
+ `).join(''); + } else { + alertsList.innerHTML = '

No active alerts

'; + } + + // Render apps + const appsBody = document.querySelector('#apps-table tbody'); + if (status.apps && status.apps.length > 0) { + appsBody.innerHTML = status.apps.map(app => ` + + ${app.name} + ${app.sync_status} + ${app.health} + ${app.revision.substring(0, 7)} + + `).join(''); + } else { + appsBody.innerHTML = 'No ArgoCD apps data available'; + } +} + +function renderPendingActions(actions) { + const list = document.getElementById('pending-list'); + + if (actions.length === 0) { + list.innerHTML = '

No pending actions

'; + return; + } + + list.innerHTML = actions.map(action => ` +
+
+
+ ${action.agent} +
${action.action}
+
+ ${action.risk} risk +
+
${action.description}
+
+ + +
+
+ `).join(''); +} + +function renderHistory(history) { + const tbody = document.querySelector('#history-table tbody'); + + if (history.length === 0) { + tbody.innerHTML = 'No history available'; + return; + } + + tbody.innerHTML = history.map(entry => ` + + ${formatTime(entry.timestamp)} + ${entry.agent} + ${entry.action} + ${entry.result} + + `).join(''); +} + +function renderWorkflows(workflows) { + const list = document.getElementById('workflows-list'); + + if (workflows.length === 0) { + list.innerHTML = '

No workflows defined

'; + return; + } + + list.innerHTML = workflows.map(wf => ` +
+
+

${wf.name}

+

${wf.description}

+
+ ${wf.triggers.map(t => `${t}`).join('')} +
+
+ +
+ `).join(''); +} + +// Actions +async function approveAction(id) { + try { + const response = await fetch(`${API_BASE}/pending/${id}/approve`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}) + }); + + if (response.ok) { + loadPendingActions(); + loadHistory(); + } else { + alert('Failed to approve action'); + } + } catch (error) { + console.error('Error approving action:', error); + alert('Error approving action'); + } +} + +async function rejectAction(id) { + const reason = prompt('Reason for rejection (optional):'); + + try { + const response = await fetch(`${API_BASE}/pending/${id}/reject`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ reason: reason || 'Rejected by user' }) + }); + + if (response.ok) { + loadPendingActions(); + loadHistory(); + } else { + alert('Failed to reject action'); + } + } catch (error) { + console.error('Error rejecting action:', error); + alert('Error rejecting action'); + } +} + +async function runWorkflow(name) { + try { + const response = await fetch(`${API_BASE}/workflows/${name}/run`, { + method: 'POST' + }); + + const data = await response.json(); + alert(data.message); + } catch (error) { + console.error('Error running workflow:', error); + alert('Error running workflow'); + } +} + +// Helpers +function getProgressClass(percent) { + if (percent >= 80) return 'danger'; + if (percent >= 60) return 'warning'; + return ''; +} + +function formatTime(timestamp) { + const date = new Date(timestamp); + return date.toLocaleString('en-US', { + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit' + }); +} + +function updateLastUpdate() { + const now = new Date(); + document.getElementById('last-update').textContent = now.toLocaleTimeString(); +} diff --git a/dashboard/deploy/argocd-application.yaml b/dashboard/deploy/argocd-application.yaml new file mode 100644 index 0000000..5cd8121 --- /dev/null +++ b/dashboard/deploy/argocd-application.yaml @@ -0,0 +1,45 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: k8s-agent-dashboard + namespace: argocd + labels: + app.kubernetes.io/name: k8s-agent-dashboard + app.kubernetes.io/part-of: k8s-agent-system + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + + source: + # Update this to your GitOps repo + repoURL: https://gitea.example.com/user/gitops-repo.git + targetRevision: HEAD + path: apps/k8s-agent-dashboard + + destination: + server: https://kubernetes.default.svc + namespace: k8s-agent + + syncPolicy: + automated: + prune: true + selfHeal: true + allowEmpty: false + syncOptions: + - CreateNamespace=true + - PrunePropagationPolicy=foreground + - PruneLast=true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m + + # Health checks + ignoreDifferences: + - group: apps + kind: Deployment + jsonPointers: + - /spec/replicas diff --git a/dashboard/deploy/deployment.yaml b/dashboard/deploy/deployment.yaml new file mode 100644 index 0000000..45ce267 --- /dev/null +++ b/dashboard/deploy/deployment.yaml @@ -0,0 +1,90 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: k8s-agent-dashboard + namespace: k8s-agent + labels: + app.kubernetes.io/name: k8s-agent-dashboard + app.kubernetes.io/component: dashboard +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: k8s-agent-dashboard + template: + metadata: + labels: + app.kubernetes.io/name: k8s-agent-dashboard + spec: + # Target Pi 3 node (lightweight workload) + tolerations: + - key: "node-type" + operator: "Equal" + value: "pi3" + effect: "NoSchedule" + nodeSelector: + kubernetes.io/arch: arm64 + + # Security context + securityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + + containers: + - name: dashboard + image: ghcr.io/will/k8s-agent-dashboard:latest + imagePullPolicy: Always + ports: + - name: http + containerPort: 8080 + protocol: TCP + args: + - "--port" + - "8080" + - "--data" + - "/data" + + # Resource limits for Pi 3 (1GB RAM) + resources: + requests: + memory: "32Mi" + cpu: "10m" + limits: + memory: "64Mi" + cpu: "100m" + + # Health checks + livenessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 5 + periodSeconds: 30 + timeoutSeconds: 3 + + readinessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 3 + + # Volume mount for persistent data + volumeMounts: + - name: data + mountPath: /data + + # Security + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + volumes: + - name: data + persistentVolumeClaim: + claimName: k8s-agent-dashboard-data diff --git a/dashboard/deploy/ingress.yaml b/dashboard/deploy/ingress.yaml new file mode 100644 index 0000000..3069030 --- /dev/null +++ b/dashboard/deploy/ingress.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: k8s-agent-dashboard + namespace: k8s-agent + labels: + app.kubernetes.io/name: k8s-agent-dashboard + app.kubernetes.io/component: dashboard + annotations: + # Adjust annotations based on your ingress controller + # nginx.ingress.kubernetes.io/ssl-redirect: "false" +spec: + ingressClassName: nginx # or traefik, etc. + rules: + - host: k8s-agent.local # Adjust to your domain + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: k8s-agent-dashboard + port: + name: http + # Uncomment for TLS + # tls: + # - hosts: + # - k8s-agent.local + # secretName: k8s-agent-dashboard-tls diff --git a/dashboard/deploy/kustomization.yaml b/dashboard/deploy/kustomization.yaml new file mode 100644 index 0000000..56f76ce --- /dev/null +++ b/dashboard/deploy/kustomization.yaml @@ -0,0 +1,19 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: k8s-agent + +resources: + - namespace.yaml + - pvc.yaml + - deployment.yaml + - service.yaml + - ingress.yaml + +commonLabels: + app.kubernetes.io/part-of: k8s-agent-system + app.kubernetes.io/managed-by: argocd + +images: + - name: ghcr.io/will/k8s-agent-dashboard + newTag: latest diff --git a/dashboard/deploy/namespace.yaml b/dashboard/deploy/namespace.yaml new file mode 100644 index 0000000..498bc05 --- /dev/null +++ b/dashboard/deploy/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: k8s-agent + labels: + app.kubernetes.io/name: k8s-agent-dashboard + app.kubernetes.io/part-of: k8s-agent-system diff --git a/dashboard/deploy/pvc.yaml b/dashboard/deploy/pvc.yaml new file mode 100644 index 0000000..e480239 --- /dev/null +++ b/dashboard/deploy/pvc.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: k8s-agent-dashboard-data + namespace: k8s-agent + labels: + app.kubernetes.io/name: k8s-agent-dashboard + app.kubernetes.io/component: dashboard +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi + # Adjust storageClassName based on your cluster + # storageClassName: local-path diff --git a/dashboard/deploy/service.yaml b/dashboard/deploy/service.yaml new file mode 100644 index 0000000..6b4a229 --- /dev/null +++ b/dashboard/deploy/service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: k8s-agent-dashboard + namespace: k8s-agent + labels: + app.kubernetes.io/name: k8s-agent-dashboard + app.kubernetes.io/component: dashboard +spec: + type: ClusterIP + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP + selector: + app.kubernetes.io/name: k8s-agent-dashboard diff --git a/dashboard/go.mod b/dashboard/go.mod new file mode 100644 index 0000000..b4ab15e --- /dev/null +++ b/dashboard/go.mod @@ -0,0 +1,8 @@ +module github.com/will/k8s-agent-dashboard + +go 1.21 + +require ( + github.com/go-chi/chi/v5 v5.0.11 + github.com/go-chi/cors v1.2.1 +) diff --git a/dashboard/go.sum b/dashboard/go.sum new file mode 100644 index 0000000..63f5df9 --- /dev/null +++ b/dashboard/go.sum @@ -0,0 +1,4 @@ +github.com/go-chi/chi/v5 v5.0.11 h1/BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA= +github.com/go-chi/chi/v5 v5.0.11/go.mod h1/DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= +github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4= +github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn3rn0gOeEOrPIsEDqiK+0= diff --git a/dashboard/internal/api/handlers.go b/dashboard/internal/api/handlers.go new file mode 100644 index 0000000..fa83e1e --- /dev/null +++ b/dashboard/internal/api/handlers.go @@ -0,0 +1,157 @@ +package api + +import ( + "encoding/json" + "net/http" + "strconv" + + "github.com/go-chi/chi/v5" + "github.com/will/k8s-agent-dashboard/internal/store" +) + +// JSON helper +func respondJSON(w http.ResponseWriter, status int, data interface{}) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + json.NewEncoder(w).Encode(data) +} + +func respondError(w http.ResponseWriter, status int, message string) { + respondJSON(w, status, map[string]string{"error": message}) +} + +// HealthCheck returns API health status +func HealthCheck(w http.ResponseWriter, r *http.Request) { + respondJSON(w, http.StatusOK, map[string]string{ + "status": "ok", + "service": "k8s-agent-dashboard", + }) +} + +// GetClusterStatus returns current cluster status +func GetClusterStatus(s *store.Store) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + status := s.GetClusterStatus() + respondJSON(w, http.StatusOK, status) + } +} + +// GetPendingActions returns all pending actions +func GetPendingActions(s *store.Store) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + actions := s.GetPendingActions() + respondJSON(w, http.StatusOK, map[string]interface{}{ + "count": len(actions), + "actions": actions, + }) + } +} + +// ApproveAction approves a pending action +func ApproveAction(s *store.Store) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + id := chi.URLParam(r, "id") + if id == "" { + respondError(w, http.StatusBadRequest, "missing action id") + return + } + + var body struct { + Reason string `json:"reason"` + } + json.NewDecoder(r.Body).Decode(&body) + + action, err := s.ApproveAction(id, body.Reason) + if err != nil { + respondError(w, http.StatusNotFound, err.Error()) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "status": "approved", + "action": action, + "message": "Action approved and ready for execution", + }) + } +} + +// RejectAction rejects a pending action +func RejectAction(s *store.Store) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + id := chi.URLParam(r, "id") + if id == "" { + respondError(w, http.StatusBadRequest, "missing action id") + return + } + + var body struct { + Reason string `json:"reason"` + } + json.NewDecoder(r.Body).Decode(&body) + + if body.Reason == "" { + body.Reason = "Rejected by user" + } + + action, err := s.RejectAction(id, body.Reason) + if err != nil { + respondError(w, http.StatusNotFound, err.Error()) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "status": "rejected", + "action": action, + "message": "Action rejected", + }) + } +} + +// GetActionHistory returns action history +func GetActionHistory(s *store.Store) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + limitStr := r.URL.Query().Get("limit") + limit := 50 + if limitStr != "" { + if l, err := strconv.Atoi(limitStr); err == nil { + limit = l + } + } + + history := s.GetActionHistory(limit) + respondJSON(w, http.StatusOK, map[string]interface{}{ + "count": len(history), + "history": history, + }) + } +} + +// GetWorkflows returns defined workflows +func GetWorkflows(s *store.Store) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + workflows := s.GetWorkflows() + respondJSON(w, http.StatusOK, map[string]interface{}{ + "count": len(workflows), + "workflows": workflows, + }) + } +} + +// RunWorkflow triggers a workflow execution +func RunWorkflow(s *store.Store) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + respondError(w, http.StatusBadRequest, "missing workflow name") + return + } + + // In Phase 2, we just acknowledge the request + // Phase 3 will implement actual execution via Claude Code + respondJSON(w, http.StatusAccepted, map[string]interface{}{ + "status": "queued", + "workflow": name, + "message": "Workflow queued for execution. Use Claude Code CLI to run workflows.", + }) + } +} diff --git a/dashboard/internal/models/models.go b/dashboard/internal/models/models.go new file mode 100644 index 0000000..63be546 --- /dev/null +++ b/dashboard/internal/models/models.go @@ -0,0 +1,80 @@ +package models + +import "time" + +// ClusterStatus represents the overall cluster health +type ClusterStatus struct { + Health string `json:"health"` // Healthy, Degraded, Critical + UpdatedAt time.Time `json:"updated_at"` + Nodes []NodeStatus `json:"nodes"` + Alerts []Alert `json:"alerts"` + Apps []AppStatus `json:"apps"` +} + +// NodeStatus represents a single node's status +type NodeStatus struct { + Name string `json:"name"` + Status string `json:"status"` // Ready, NotReady + CPU float64 `json:"cpu_percent"` + Memory float64 `json:"memory_percent"` + Conditions string `json:"conditions"` // OK, MemoryPressure, DiskPressure, etc. +} + +// Alert represents a Prometheus/Alertmanager alert +type Alert struct { + Name string `json:"name"` + Severity string `json:"severity"` // warning, critical + Description string `json:"description"` + FiringAt time.Time `json:"firing_at"` +} + +// AppStatus represents an ArgoCD application status +type AppStatus struct { + Name string `json:"name"` + SyncStatus string `json:"sync_status"` // Synced, OutOfSync + Health string `json:"health"` // Healthy, Progressing, Degraded + Revision string `json:"revision"` +} + +// PendingAction represents an action awaiting user approval +type PendingAction struct { + ID string `json:"id"` + CreatedAt time.Time `json:"created_at"` + Agent string `json:"agent"` + Action string `json:"action"` + Description string `json:"description"` + Details map[string]interface{} `json:"details"` + Risk string `json:"risk"` // low, medium, high + Workflow string `json:"workflow,omitempty"` +} + +// ActionDecision represents the user's decision on a pending action +type ActionDecision struct { + ID string `json:"id"` + Decision string `json:"decision"` // approved, rejected + DecidedAt time.Time `json:"decided_at"` + DecidedBy string `json:"decided_by,omitempty"` + Reason string `json:"reason,omitempty"` +} + +// ActionHistory represents a completed action +type ActionHistory struct { + ID string `json:"id"` + Timestamp time.Time `json:"timestamp"` + Agent string `json:"agent"` + Action string `json:"action"` + Description string `json:"description"` + Details map[string]interface{} `json:"details,omitempty"` + Result string `json:"result"` // success, failed + AutoApproved bool `json:"auto_approved"` + Workflow string `json:"workflow,omitempty"` +} + +// Workflow represents a defined workflow +type Workflow struct { + Name string `json:"name"` + Description string `json:"description"` + Triggers []string `json:"triggers"` + LastRun *time.Time `json:"last_run,omitempty"` + Status string `json:"status,omitempty"` // idle, running, completed, failed +} diff --git a/dashboard/internal/store/store.go b/dashboard/internal/store/store.go new file mode 100644 index 0000000..5401973 --- /dev/null +++ b/dashboard/internal/store/store.go @@ -0,0 +1,244 @@ +package store + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "github.com/will/k8s-agent-dashboard/internal/models" +) + +// Store manages persistent state for the dashboard +type Store struct { + dataDir string + mu sync.RWMutex + + // In-memory cache + status *models.ClusterStatus + pending []models.PendingAction + history []models.ActionHistory + workflows []models.Workflow +} + +// New creates a new store instance +func New(dataDir string) (*Store, error) { + // Ensure data directory exists + if err := os.MkdirAll(dataDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create data dir: %w", err) + } + + s := &Store{ + dataDir: dataDir, + pending: make([]models.PendingAction, 0), + history: make([]models.ActionHistory, 0), + workflows: make([]models.Workflow, 0), + } + + // Load existing data + if err := s.load(); err != nil { + return nil, err + } + + return s, nil +} + +func (s *Store) load() error { + // Load pending actions + pendingPath := filepath.Join(s.dataDir, "pending.json") + if data, err := os.ReadFile(pendingPath); err == nil { + if err := json.Unmarshal(data, &s.pending); err != nil { + return fmt.Errorf("failed to parse pending.json: %w", err) + } + } + + // Load history + historyPath := filepath.Join(s.dataDir, "history.json") + if data, err := os.ReadFile(historyPath); err == nil { + if err := json.Unmarshal(data, &s.history); err != nil { + return fmt.Errorf("failed to parse history.json: %w", err) + } + } + + // Load status + statusPath := filepath.Join(s.dataDir, "status.json") + if data, err := os.ReadFile(statusPath); err == nil { + s.status = &models.ClusterStatus{} + if err := json.Unmarshal(data, s.status); err != nil { + return fmt.Errorf("failed to parse status.json: %w", err) + } + } + + return nil +} + +func (s *Store) save(filename string, data interface{}) error { + path := filepath.Join(s.dataDir, filename) + bytes, err := json.MarshalIndent(data, "", " ") + if err != nil { + return err + } + return os.WriteFile(path, bytes, 0644) +} + +// GetClusterStatus returns the current cluster status +func (s *Store) GetClusterStatus() *models.ClusterStatus { + s.mu.RLock() + defer s.mu.RUnlock() + + if s.status == nil { + // Return demo status if none exists + return &models.ClusterStatus{ + Health: "Unknown", + UpdatedAt: time.Now(), + Nodes: []models.NodeStatus{}, + Alerts: []models.Alert{}, + Apps: []models.AppStatus{}, + } + } + return s.status +} + +// UpdateClusterStatus updates the cluster status +func (s *Store) UpdateClusterStatus(status *models.ClusterStatus) error { + s.mu.Lock() + defer s.mu.Unlock() + + status.UpdatedAt = time.Now() + s.status = status + return s.save("status.json", status) +} + +// GetPendingActions returns all pending actions +func (s *Store) GetPendingActions() []models.PendingAction { + s.mu.RLock() + defer s.mu.RUnlock() + return s.pending +} + +// AddPendingAction adds a new pending action +func (s *Store) AddPendingAction(action models.PendingAction) error { + s.mu.Lock() + defer s.mu.Unlock() + + action.CreatedAt = time.Now() + s.pending = append(s.pending, action) + return s.save("pending.json", s.pending) +} + +// ApproveAction approves a pending action +func (s *Store) ApproveAction(id string, reason string) (*models.PendingAction, error) { + s.mu.Lock() + defer s.mu.Unlock() + + for i, action := range s.pending { + if action.ID == id { + // Remove from pending + s.pending = append(s.pending[:i], s.pending[i+1:]...) + + // Add to history + historyEntry := models.ActionHistory{ + ID: action.ID, + Timestamp: time.Now(), + Agent: action.Agent, + Action: action.Action, + Description: action.Description, + Details: action.Details, + Result: "approved", + AutoApproved: false, + Workflow: action.Workflow, + } + s.history = append([]models.ActionHistory{historyEntry}, s.history...) + + // Keep only last 100 history entries + if len(s.history) > 100 { + s.history = s.history[:100] + } + + // Save both files + s.save("pending.json", s.pending) + s.save("history.json", s.history) + + return &action, nil + } + } + return nil, fmt.Errorf("action not found: %s", id) +} + +// RejectAction rejects a pending action +func (s *Store) RejectAction(id string, reason string) (*models.PendingAction, error) { + s.mu.Lock() + defer s.mu.Unlock() + + for i, action := range s.pending { + if action.ID == id { + // Remove from pending + s.pending = append(s.pending[:i], s.pending[i+1:]...) + + // Add to history as rejected + historyEntry := models.ActionHistory{ + ID: action.ID, + Timestamp: time.Now(), + Agent: action.Agent, + Action: action.Action, + Description: action.Description + " (REJECTED: " + reason + ")", + Details: action.Details, + Result: "rejected", + AutoApproved: false, + Workflow: action.Workflow, + } + s.history = append([]models.ActionHistory{historyEntry}, s.history...) + + if len(s.history) > 100 { + s.history = s.history[:100] + } + + s.save("pending.json", s.pending) + s.save("history.json", s.history) + + return &action, nil + } + } + return nil, fmt.Errorf("action not found: %s", id) +} + +// GetActionHistory returns the action history +func (s *Store) GetActionHistory(limit int) []models.ActionHistory { + s.mu.RLock() + defer s.mu.RUnlock() + + if limit <= 0 || limit > len(s.history) { + return s.history + } + return s.history[:limit] +} + +// GetWorkflows returns all defined workflows +func (s *Store) GetWorkflows() []models.Workflow { + s.mu.RLock() + defer s.mu.RUnlock() + + // Return predefined workflows based on what we have in ~/.claude/workflows + return []models.Workflow{ + { + Name: "cluster-health-check", + Description: "Comprehensive cluster health assessment", + Triggers: []string{"schedule: 0 */6 * * *", "manual"}, + Status: "idle", + }, + { + Name: "deploy-app", + Description: "Deploy or update an application", + Triggers: []string{"manual"}, + Status: "idle", + }, + { + Name: "pod-crashloop-remediation", + Description: "Diagnose and remediate pods in CrashLoopBackOff", + Triggers: []string{"alert: KubePodCrashLooping", "manual"}, + Status: "idle", + }, + } +}