feat: Implement Phase 2 dashboard for K8s agent system

Lightweight Go-based dashboard for Raspberry Pi cluster:

Backend:
- chi router with REST API
- Embedded static file serving
- JSON file-based state storage
- Health checks and CORS support

Frontend:
- Responsive dark theme UI
- Status view with nodes, alerts, ArgoCD apps
- Pending actions with approve/reject
- Action history and audit trail
- Workflow listing and manual triggers

Deployment:
- Multi-stage Dockerfile (small Alpine image)
- Kubernetes manifests with Pi 3 tolerations
- Resource limits: 32-64Mi memory, 10-100m CPU
- ArgoCD application manifest
- Kustomize configuration

API endpoints:
- GET /api/status - Cluster status
- GET/POST /api/pending - Action management
- GET /api/history - Action audit trail
- GET/POST /api/workflows - Workflow management

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
OpenCode Test
2025-12-26 11:34:36 -08:00
parent a80f714fc2
commit 5646508adb
18 changed files with 1712 additions and 0 deletions

45
dashboard/Dockerfile Normal file
View File

@@ -0,0 +1,45 @@
# Build stage
FROM golang:1.21-alpine AS builder
WORKDIR /app
# Install dependencies
RUN apk add --no-cache git
# Copy go mod files
COPY go.mod go.sum* ./
RUN go mod download
# Copy source code
COPY . .
# Build binary
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /server ./cmd/server
# Final stage - minimal image
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata
WORKDIR /app
# Copy binary from builder
COPY --from=builder /server /app/server
# Create data directory
RUN mkdir -p /data
# Expose port
EXPOSE 8080
# Run as non-root user
RUN adduser -D -u 1000 appuser
RUN chown -R appuser:appuser /app /data
USER appuser
# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:8080/api/health || exit 1
ENTRYPOINT ["/app/server"]
CMD ["--port", "8080", "--data", "/data"]

97
dashboard/README.md Normal file
View File

@@ -0,0 +1,97 @@
# K8s Agent Dashboard
Lightweight web dashboard for the K8s Agent Orchestrator system. Designed to run on Raspberry Pi clusters with minimal resource usage.
## Features
- **Cluster Status** - View node health, resource usage, and conditions
- **Pending Actions** - Approve or reject actions requiring confirmation
- **Action History** - Audit trail of all agent actions
- **Workflows** - View and trigger defined workflows
## Resource Requirements
Optimized for Raspberry Pi 3B+ (1GB RAM):
- Memory: 32-64Mi
- CPU: 10-100m
## Development
### Prerequisites
- Go 1.21+
- Docker (for building images)
### Local Development
```bash
# Run locally
go run ./cmd/server --port 8080 --data ./data
# Build binary
go build -o server ./cmd/server
# Build Docker image
docker build -t k8s-agent-dashboard:dev .
```
### Build for ARM64
```bash
# Build multi-arch image
docker buildx build --platform linux/arm64 -t ghcr.io/user/k8s-agent-dashboard:latest .
```
## Deployment
### Using Kustomize
```bash
kubectl apply -k deploy/
```
### Using ArgoCD
1. Copy `deploy/` contents to your GitOps repo under `apps/k8s-agent-dashboard/`
2. Apply the ArgoCD application:
```bash
kubectl apply -f deploy/argocd-application.yaml
```
## Configuration
### Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| PORT | 8080 | Server port |
### Command Line Flags
| Flag | Default | Description |
|------|---------|-------------|
| --port | 8080 | Server port |
| --data | /data | Data directory for persistent state |
## API Endpoints
| Method | Path | Description |
|--------|------|-------------|
| GET | /api/health | Health check |
| GET | /api/status | Cluster status |
| GET | /api/pending | Pending actions |
| POST | /api/pending/{id}/approve | Approve action |
| POST | /api/pending/{id}/reject | Reject action |
| GET | /api/history | Action history |
| GET | /api/workflows | List workflows |
| POST | /api/workflows/{name}/run | Trigger workflow |
## Integration with Claude Code
The dashboard reads/writes state files that can be shared with Claude Code agents:
- `status.json` - Cluster status (written by agents)
- `pending.json` - Pending actions (read/write)
- `history.json` - Action history (append by agents)
To share state, mount the same PVC or directory in both the dashboard and Claude Code's data directory.

View File

@@ -0,0 +1,81 @@
package main
import (
"embed"
"flag"
"io/fs"
"log"
"net/http"
"os"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/go-chi/cors"
"github.com/will/k8s-agent-dashboard/internal/api"
"github.com/will/k8s-agent-dashboard/internal/store"
)
//go:embed all:web
var webFS embed.FS
func main() {
port := flag.String("port", "8080", "Server port")
dataDir := flag.String("data", "/data", "Data directory for state")
flag.Parse()
// Initialize store
s, err := store.New(*dataDir)
if err != nil {
log.Fatalf("Failed to initialize store: %v", err)
}
// Create router
r := chi.NewRouter()
// Middleware
r.Use(middleware.Logger)
r.Use(middleware.Recoverer)
r.Use(middleware.Compress(5))
r.Use(cors.Handler(cors.Options{
AllowedOrigins: []string{"*"},
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Content-Type"},
ExposedHeaders: []string{"Link"},
AllowCredentials: false,
MaxAge: 300,
}))
// API routes
r.Route("/api", func(r chi.Router) {
r.Get("/health", api.HealthCheck)
r.Get("/status", api.GetClusterStatus(s))
r.Get("/pending", api.GetPendingActions(s))
r.Post("/pending/{id}/approve", api.ApproveAction(s))
r.Post("/pending/{id}/reject", api.RejectAction(s))
r.Get("/history", api.GetActionHistory(s))
r.Get("/workflows", api.GetWorkflows(s))
r.Post("/workflows/{name}/run", api.RunWorkflow(s))
})
// Static files
webContent, err := fs.Sub(webFS, "web")
if err != nil {
log.Fatalf("Failed to get web content: %v", err)
}
fileServer := http.FileServer(http.FS(webContent))
r.Handle("/*", fileServer)
// Start server
addr := ":" + *port
if envPort := os.Getenv("PORT"); envPort != "" {
addr = ":" + envPort
}
log.Printf("Starting server on %s", addr)
log.Printf("Data directory: %s", *dataDir)
if err := http.ListenAndServe(addr, r); err != nil {
log.Fatalf("Server failed: %v", err)
}
}

View File

@@ -0,0 +1,112 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>K8s Agent Dashboard</title>
<link rel="stylesheet" href="/static/css/style.css">
</head>
<body>
<header>
<h1>K8s Agent Dashboard</h1>
<div class="cluster-health" id="cluster-health">
<span class="health-indicator"></span>
<span class="health-text">Loading...</span>
</div>
</header>
<nav>
<button class="nav-btn active" data-view="status">Status</button>
<button class="nav-btn" data-view="pending">Pending <span id="pending-count" class="badge">0</span></button>
<button class="nav-btn" data-view="history">History</button>
<button class="nav-btn" data-view="workflows">Workflows</button>
</nav>
<main>
<!-- Status View -->
<section id="status-view" class="view active">
<div class="card">
<h2>Nodes</h2>
<table id="nodes-table">
<thead>
<tr>
<th>Node</th>
<th>Status</th>
<th>CPU</th>
<th>Memory</th>
<th>Conditions</th>
</tr>
</thead>
<tbody></tbody>
</table>
</div>
<div class="card">
<h2>Active Alerts</h2>
<div id="alerts-list" class="alerts-list">
<p class="empty-state">No active alerts</p>
</div>
</div>
<div class="card">
<h2>ArgoCD Applications</h2>
<table id="apps-table">
<thead>
<tr>
<th>Application</th>
<th>Sync</th>
<th>Health</th>
<th>Revision</th>
</tr>
</thead>
<tbody></tbody>
</table>
</div>
</section>
<!-- Pending Actions View -->
<section id="pending-view" class="view">
<div class="card">
<h2>Pending Actions</h2>
<div id="pending-list" class="pending-list">
<p class="empty-state">No pending actions</p>
</div>
</div>
</section>
<!-- History View -->
<section id="history-view" class="view">
<div class="card">
<h2>Action History</h2>
<table id="history-table">
<thead>
<tr>
<th>Time</th>
<th>Agent</th>
<th>Action</th>
<th>Result</th>
</tr>
</thead>
<tbody></tbody>
</table>
</div>
</section>
<!-- Workflows View -->
<section id="workflows-view" class="view">
<div class="card">
<h2>Workflows</h2>
<div id="workflows-list" class="workflows-list">
<p class="empty-state">Loading workflows...</p>
</div>
</div>
</section>
</main>
<footer>
<p>K8s Agent Dashboard | Last updated: <span id="last-update">-</span></p>
</footer>
<script src="/static/js/app.js"></script>
</body>
</html>

View File

@@ -0,0 +1,355 @@
:root {
--bg-primary: #1a1a2e;
--bg-secondary: #16213e;
--bg-card: #0f3460;
--text-primary: #eaeaea;
--text-secondary: #a0a0a0;
--accent: #e94560;
--success: #4ade80;
--warning: #fbbf24;
--danger: #ef4444;
--info: #60a5fa;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
background: var(--bg-primary);
color: var(--text-primary);
min-height: 100vh;
line-height: 1.6;
}
header {
background: var(--bg-secondary);
padding: 1rem 2rem;
display: flex;
justify-content: space-between;
align-items: center;
border-bottom: 1px solid var(--bg-card);
}
header h1 {
font-size: 1.5rem;
font-weight: 600;
}
.cluster-health {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.5rem 1rem;
background: var(--bg-card);
border-radius: 20px;
}
.health-indicator {
width: 12px;
height: 12px;
border-radius: 50%;
background: var(--text-secondary);
}
.health-indicator.healthy { background: var(--success); }
.health-indicator.degraded { background: var(--warning); }
.health-indicator.critical { background: var(--danger); }
nav {
background: var(--bg-secondary);
padding: 0.5rem 2rem;
display: flex;
gap: 0.5rem;
border-bottom: 1px solid var(--bg-card);
}
.nav-btn {
background: transparent;
border: none;
color: var(--text-secondary);
padding: 0.75rem 1.5rem;
cursor: pointer;
border-radius: 8px;
font-size: 0.9rem;
transition: all 0.2s;
}
.nav-btn:hover {
background: var(--bg-card);
color: var(--text-primary);
}
.nav-btn.active {
background: var(--accent);
color: white;
}
.badge {
background: var(--danger);
color: white;
font-size: 0.75rem;
padding: 0.1rem 0.5rem;
border-radius: 10px;
margin-left: 0.25rem;
}
main {
padding: 2rem;
max-width: 1400px;
margin: 0 auto;
}
.view {
display: none;
}
.view.active {
display: block;
}
.card {
background: var(--bg-card);
border-radius: 12px;
padding: 1.5rem;
margin-bottom: 1.5rem;
}
.card h2 {
font-size: 1.1rem;
margin-bottom: 1rem;
color: var(--text-primary);
}
table {
width: 100%;
border-collapse: collapse;
}
th, td {
text-align: left;
padding: 0.75rem;
border-bottom: 1px solid var(--bg-secondary);
}
th {
color: var(--text-secondary);
font-weight: 500;
font-size: 0.85rem;
text-transform: uppercase;
}
td {
font-size: 0.9rem;
}
.status-badge {
padding: 0.25rem 0.75rem;
border-radius: 12px;
font-size: 0.8rem;
font-weight: 500;
}
.status-ready, .status-healthy, .status-synced, .status-success {
background: rgba(74, 222, 128, 0.2);
color: var(--success);
}
.status-notready, .status-degraded, .status-outofsync, .status-failed {
background: rgba(239, 68, 68, 0.2);
color: var(--danger);
}
.status-progressing, .status-pending {
background: rgba(251, 191, 36, 0.2);
color: var(--warning);
}
.status-rejected {
background: rgba(239, 68, 68, 0.2);
color: var(--danger);
}
.status-approved {
background: rgba(74, 222, 128, 0.2);
color: var(--success);
}
.alerts-list, .pending-list, .workflows-list {
display: flex;
flex-direction: column;
gap: 0.75rem;
}
.alert-item {
display: flex;
align-items: center;
gap: 1rem;
padding: 0.75rem;
background: var(--bg-secondary);
border-radius: 8px;
border-left: 3px solid var(--danger);
}
.alert-item.warning {
border-left-color: var(--warning);
}
.pending-item {
background: var(--bg-secondary);
border-radius: 8px;
padding: 1rem;
border-left: 3px solid var(--warning);
}
.pending-item .header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: 0.5rem;
}
.pending-item .agent {
font-size: 0.8rem;
color: var(--text-secondary);
}
.pending-item .action {
font-weight: 600;
}
.pending-item .description {
color: var(--text-secondary);
font-size: 0.9rem;
margin-bottom: 1rem;
}
.pending-item .buttons {
display: flex;
gap: 0.5rem;
}
.btn {
padding: 0.5rem 1rem;
border: none;
border-radius: 6px;
cursor: pointer;
font-size: 0.85rem;
font-weight: 500;
transition: all 0.2s;
}
.btn-approve {
background: var(--success);
color: var(--bg-primary);
}
.btn-reject {
background: var(--danger);
color: white;
}
.btn-run {
background: var(--info);
color: var(--bg-primary);
}
.btn:hover {
opacity: 0.9;
transform: translateY(-1px);
}
.workflow-item {
background: var(--bg-secondary);
border-radius: 8px;
padding: 1rem;
display: flex;
justify-content: space-between;
align-items: center;
}
.workflow-item .info h3 {
font-size: 1rem;
margin-bottom: 0.25rem;
}
.workflow-item .info p {
color: var(--text-secondary);
font-size: 0.85rem;
}
.workflow-item .triggers {
display: flex;
gap: 0.5rem;
margin-top: 0.5rem;
}
.trigger-tag {
background: var(--bg-card);
padding: 0.2rem 0.5rem;
border-radius: 4px;
font-size: 0.75rem;
color: var(--text-secondary);
}
.empty-state {
color: var(--text-secondary);
text-align: center;
padding: 2rem;
font-style: italic;
}
footer {
text-align: center;
padding: 1rem;
color: var(--text-secondary);
font-size: 0.85rem;
border-top: 1px solid var(--bg-card);
}
/* Progress bar for resource usage */
.progress-bar {
width: 100px;
height: 8px;
background: var(--bg-secondary);
border-radius: 4px;
overflow: hidden;
}
.progress-bar .fill {
height: 100%;
background: var(--success);
transition: width 0.3s;
}
.progress-bar .fill.warning { background: var(--warning); }
.progress-bar .fill.danger { background: var(--danger); }
/* Responsive */
@media (max-width: 768px) {
header {
flex-direction: column;
gap: 1rem;
text-align: center;
}
nav {
flex-wrap: wrap;
justify-content: center;
}
main {
padding: 1rem;
}
table {
font-size: 0.85rem;
}
th, td {
padding: 0.5rem;
}
}

View File

@@ -0,0 +1,306 @@
// K8s Agent Dashboard - Frontend JavaScript
const API_BASE = '/api';
// State
let currentView = 'status';
// Initialize
document.addEventListener('DOMContentLoaded', () => {
setupNavigation();
loadAllData();
// Refresh data every 30 seconds
setInterval(loadAllData, 30000);
});
// Navigation
function setupNavigation() {
document.querySelectorAll('.nav-btn').forEach(btn => {
btn.addEventListener('click', () => {
const view = btn.dataset.view;
switchView(view);
});
});
}
function switchView(view) {
currentView = view;
// Update nav buttons
document.querySelectorAll('.nav-btn').forEach(btn => {
btn.classList.toggle('active', btn.dataset.view === view);
});
// Update views
document.querySelectorAll('.view').forEach(v => {
v.classList.toggle('active', v.id === `${view}-view`);
});
}
// Data Loading
async function loadAllData() {
try {
await Promise.all([
loadClusterStatus(),
loadPendingActions(),
loadHistory(),
loadWorkflows()
]);
updateLastUpdate();
} catch (error) {
console.error('Error loading data:', error);
}
}
async function loadClusterStatus() {
try {
const response = await fetch(`${API_BASE}/status`);
const data = await response.json();
renderClusterStatus(data);
} catch (error) {
console.error('Error loading status:', error);
}
}
async function loadPendingActions() {
try {
const response = await fetch(`${API_BASE}/pending`);
const data = await response.json();
renderPendingActions(data.actions || []);
document.getElementById('pending-count').textContent = data.count || 0;
} catch (error) {
console.error('Error loading pending:', error);
}
}
async function loadHistory() {
try {
const response = await fetch(`${API_BASE}/history?limit=20`);
const data = await response.json();
renderHistory(data.history || []);
} catch (error) {
console.error('Error loading history:', error);
}
}
async function loadWorkflows() {
try {
const response = await fetch(`${API_BASE}/workflows`);
const data = await response.json();
renderWorkflows(data.workflows || []);
} catch (error) {
console.error('Error loading workflows:', error);
}
}
// Rendering
function renderClusterStatus(status) {
// Update health indicator
const healthEl = document.getElementById('cluster-health');
const indicator = healthEl.querySelector('.health-indicator');
const text = healthEl.querySelector('.health-text');
const health = (status.health || 'Unknown').toLowerCase();
indicator.className = `health-indicator ${health}`;
text.textContent = status.health || 'Unknown';
// Render nodes
const nodesBody = document.querySelector('#nodes-table tbody');
if (status.nodes && status.nodes.length > 0) {
nodesBody.innerHTML = status.nodes.map(node => `
<tr>
<td>${node.name}</td>
<td><span class="status-badge status-${node.status.toLowerCase()}">${node.status}</span></td>
<td>
<div class="progress-bar">
<div class="fill ${getProgressClass(node.cpu_percent)}" style="width: ${node.cpu_percent}%"></div>
</div>
${node.cpu_percent.toFixed(0)}%
</td>
<td>
<div class="progress-bar">
<div class="fill ${getProgressClass(node.memory_percent)}" style="width: ${node.memory_percent}%"></div>
</div>
${node.memory_percent.toFixed(0)}%
</td>
<td>${node.conditions}</td>
</tr>
`).join('');
} else {
nodesBody.innerHTML = '<tr><td colspan="5" class="empty-state">No nodes data available</td></tr>';
}
// Render alerts
const alertsList = document.getElementById('alerts-list');
if (status.alerts && status.alerts.length > 0) {
alertsList.innerHTML = status.alerts.map(alert => `
<div class="alert-item ${alert.severity}">
<strong>[${alert.severity.toUpperCase()}]</strong>
<span>${alert.name}</span>
<span class="description">${alert.description}</span>
</div>
`).join('');
} else {
alertsList.innerHTML = '<p class="empty-state">No active alerts</p>';
}
// Render apps
const appsBody = document.querySelector('#apps-table tbody');
if (status.apps && status.apps.length > 0) {
appsBody.innerHTML = status.apps.map(app => `
<tr>
<td>${app.name}</td>
<td><span class="status-badge status-${app.sync_status.toLowerCase().replace(' ', '')}">${app.sync_status}</span></td>
<td><span class="status-badge status-${app.health.toLowerCase()}">${app.health}</span></td>
<td><code>${app.revision.substring(0, 7)}</code></td>
</tr>
`).join('');
} else {
appsBody.innerHTML = '<tr><td colspan="4" class="empty-state">No ArgoCD apps data available</td></tr>';
}
}
function renderPendingActions(actions) {
const list = document.getElementById('pending-list');
if (actions.length === 0) {
list.innerHTML = '<p class="empty-state">No pending actions</p>';
return;
}
list.innerHTML = actions.map(action => `
<div class="pending-item" data-id="${action.id}">
<div class="header">
<div>
<span class="agent">${action.agent}</span>
<div class="action">${action.action}</div>
</div>
<span class="status-badge status-pending">${action.risk} risk</span>
</div>
<div class="description">${action.description}</div>
<div class="buttons">
<button class="btn btn-approve" onclick="approveAction('${action.id}')">Approve</button>
<button class="btn btn-reject" onclick="rejectAction('${action.id}')">Reject</button>
</div>
</div>
`).join('');
}
function renderHistory(history) {
const tbody = document.querySelector('#history-table tbody');
if (history.length === 0) {
tbody.innerHTML = '<tr><td colspan="4" class="empty-state">No history available</td></tr>';
return;
}
tbody.innerHTML = history.map(entry => `
<tr>
<td>${formatTime(entry.timestamp)}</td>
<td>${entry.agent}</td>
<td>${entry.action}</td>
<td><span class="status-badge status-${entry.result}">${entry.result}</span></td>
</tr>
`).join('');
}
function renderWorkflows(workflows) {
const list = document.getElementById('workflows-list');
if (workflows.length === 0) {
list.innerHTML = '<p class="empty-state">No workflows defined</p>';
return;
}
list.innerHTML = workflows.map(wf => `
<div class="workflow-item">
<div class="info">
<h3>${wf.name}</h3>
<p>${wf.description}</p>
<div class="triggers">
${wf.triggers.map(t => `<span class="trigger-tag">${t}</span>`).join('')}
</div>
</div>
<button class="btn btn-run" onclick="runWorkflow('${wf.name}')">Run</button>
</div>
`).join('');
}
// Actions
async function approveAction(id) {
try {
const response = await fetch(`${API_BASE}/pending/${id}/approve`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({})
});
if (response.ok) {
loadPendingActions();
loadHistory();
} else {
alert('Failed to approve action');
}
} catch (error) {
console.error('Error approving action:', error);
alert('Error approving action');
}
}
async function rejectAction(id) {
const reason = prompt('Reason for rejection (optional):');
try {
const response = await fetch(`${API_BASE}/pending/${id}/reject`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ reason: reason || 'Rejected by user' })
});
if (response.ok) {
loadPendingActions();
loadHistory();
} else {
alert('Failed to reject action');
}
} catch (error) {
console.error('Error rejecting action:', error);
alert('Error rejecting action');
}
}
async function runWorkflow(name) {
try {
const response = await fetch(`${API_BASE}/workflows/${name}/run`, {
method: 'POST'
});
const data = await response.json();
alert(data.message);
} catch (error) {
console.error('Error running workflow:', error);
alert('Error running workflow');
}
}
// Helpers
function getProgressClass(percent) {
if (percent >= 80) return 'danger';
if (percent >= 60) return 'warning';
return '';
}
function formatTime(timestamp) {
const date = new Date(timestamp);
return date.toLocaleString('en-US', {
month: 'short',
day: 'numeric',
hour: '2-digit',
minute: '2-digit'
});
}
function updateLastUpdate() {
const now = new Date();
document.getElementById('last-update').textContent = now.toLocaleTimeString();
}

View File

@@ -0,0 +1,45 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: k8s-agent-dashboard
namespace: argocd
labels:
app.kubernetes.io/name: k8s-agent-dashboard
app.kubernetes.io/part-of: k8s-agent-system
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
# Update this to your GitOps repo
repoURL: https://gitea.example.com/user/gitops-repo.git
targetRevision: HEAD
path: apps/k8s-agent-dashboard
destination:
server: https://kubernetes.default.svc
namespace: k8s-agent
syncPolicy:
automated:
prune: true
selfHeal: true
allowEmpty: false
syncOptions:
- CreateNamespace=true
- PrunePropagationPolicy=foreground
- PruneLast=true
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m
# Health checks
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /spec/replicas

View File

@@ -0,0 +1,90 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: k8s-agent-dashboard
namespace: k8s-agent
labels:
app.kubernetes.io/name: k8s-agent-dashboard
app.kubernetes.io/component: dashboard
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: k8s-agent-dashboard
template:
metadata:
labels:
app.kubernetes.io/name: k8s-agent-dashboard
spec:
# Target Pi 3 node (lightweight workload)
tolerations:
- key: "node-type"
operator: "Equal"
value: "pi3"
effect: "NoSchedule"
nodeSelector:
kubernetes.io/arch: arm64
# Security context
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
containers:
- name: dashboard
image: ghcr.io/will/k8s-agent-dashboard:latest
imagePullPolicy: Always
ports:
- name: http
containerPort: 8080
protocol: TCP
args:
- "--port"
- "8080"
- "--data"
- "/data"
# Resource limits for Pi 3 (1GB RAM)
resources:
requests:
memory: "32Mi"
cpu: "10m"
limits:
memory: "64Mi"
cpu: "100m"
# Health checks
livenessProbe:
httpGet:
path: /api/health
port: http
initialDelaySeconds: 5
periodSeconds: 30
timeoutSeconds: 3
readinessProbe:
httpGet:
path: /api/health
port: http
initialDelaySeconds: 3
periodSeconds: 10
timeoutSeconds: 3
# Volume mount for persistent data
volumeMounts:
- name: data
mountPath: /data
# Security
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
volumes:
- name: data
persistentVolumeClaim:
claimName: k8s-agent-dashboard-data

View File

@@ -0,0 +1,29 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: k8s-agent-dashboard
namespace: k8s-agent
labels:
app.kubernetes.io/name: k8s-agent-dashboard
app.kubernetes.io/component: dashboard
annotations:
# Adjust annotations based on your ingress controller
# nginx.ingress.kubernetes.io/ssl-redirect: "false"
spec:
ingressClassName: nginx # or traefik, etc.
rules:
- host: k8s-agent.local # Adjust to your domain
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: k8s-agent-dashboard
port:
name: http
# Uncomment for TLS
# tls:
# - hosts:
# - k8s-agent.local
# secretName: k8s-agent-dashboard-tls

View File

@@ -0,0 +1,19 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: k8s-agent
resources:
- namespace.yaml
- pvc.yaml
- deployment.yaml
- service.yaml
- ingress.yaml
commonLabels:
app.kubernetes.io/part-of: k8s-agent-system
app.kubernetes.io/managed-by: argocd
images:
- name: ghcr.io/will/k8s-agent-dashboard
newTag: latest

View File

@@ -0,0 +1,7 @@
apiVersion: v1
kind: Namespace
metadata:
name: k8s-agent
labels:
app.kubernetes.io/name: k8s-agent-dashboard
app.kubernetes.io/part-of: k8s-agent-system

16
dashboard/deploy/pvc.yaml Normal file
View File

@@ -0,0 +1,16 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: k8s-agent-dashboard-data
namespace: k8s-agent
labels:
app.kubernetes.io/name: k8s-agent-dashboard
app.kubernetes.io/component: dashboard
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Mi
# Adjust storageClassName based on your cluster
# storageClassName: local-path

View File

@@ -0,0 +1,17 @@
apiVersion: v1
kind: Service
metadata:
name: k8s-agent-dashboard
namespace: k8s-agent
labels:
app.kubernetes.io/name: k8s-agent-dashboard
app.kubernetes.io/component: dashboard
spec:
type: ClusterIP
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
selector:
app.kubernetes.io/name: k8s-agent-dashboard

8
dashboard/go.mod Normal file
View File

@@ -0,0 +1,8 @@
module github.com/will/k8s-agent-dashboard
go 1.21
require (
github.com/go-chi/chi/v5 v5.0.11
github.com/go-chi/cors v1.2.1
)

4
dashboard/go.sum Normal file
View File

@@ -0,0 +1,4 @@
github.com/go-chi/chi/v5 v5.0.11 h1/BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA=
github.com/go-chi/chi/v5 v5.0.11/go.mod h1/DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4=
github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn3rn0gOeEOrPIsEDqiK+0=

View File

@@ -0,0 +1,157 @@
package api
import (
"encoding/json"
"net/http"
"strconv"
"github.com/go-chi/chi/v5"
"github.com/will/k8s-agent-dashboard/internal/store"
)
// JSON helper
func respondJSON(w http.ResponseWriter, status int, data interface{}) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
json.NewEncoder(w).Encode(data)
}
func respondError(w http.ResponseWriter, status int, message string) {
respondJSON(w, status, map[string]string{"error": message})
}
// HealthCheck returns API health status
func HealthCheck(w http.ResponseWriter, r *http.Request) {
respondJSON(w, http.StatusOK, map[string]string{
"status": "ok",
"service": "k8s-agent-dashboard",
})
}
// GetClusterStatus returns current cluster status
func GetClusterStatus(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
status := s.GetClusterStatus()
respondJSON(w, http.StatusOK, status)
}
}
// GetPendingActions returns all pending actions
func GetPendingActions(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
actions := s.GetPendingActions()
respondJSON(w, http.StatusOK, map[string]interface{}{
"count": len(actions),
"actions": actions,
})
}
}
// ApproveAction approves a pending action
func ApproveAction(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
if id == "" {
respondError(w, http.StatusBadRequest, "missing action id")
return
}
var body struct {
Reason string `json:"reason"`
}
json.NewDecoder(r.Body).Decode(&body)
action, err := s.ApproveAction(id, body.Reason)
if err != nil {
respondError(w, http.StatusNotFound, err.Error())
return
}
respondJSON(w, http.StatusOK, map[string]interface{}{
"status": "approved",
"action": action,
"message": "Action approved and ready for execution",
})
}
}
// RejectAction rejects a pending action
func RejectAction(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
if id == "" {
respondError(w, http.StatusBadRequest, "missing action id")
return
}
var body struct {
Reason string `json:"reason"`
}
json.NewDecoder(r.Body).Decode(&body)
if body.Reason == "" {
body.Reason = "Rejected by user"
}
action, err := s.RejectAction(id, body.Reason)
if err != nil {
respondError(w, http.StatusNotFound, err.Error())
return
}
respondJSON(w, http.StatusOK, map[string]interface{}{
"status": "rejected",
"action": action,
"message": "Action rejected",
})
}
}
// GetActionHistory returns action history
func GetActionHistory(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
limitStr := r.URL.Query().Get("limit")
limit := 50
if limitStr != "" {
if l, err := strconv.Atoi(limitStr); err == nil {
limit = l
}
}
history := s.GetActionHistory(limit)
respondJSON(w, http.StatusOK, map[string]interface{}{
"count": len(history),
"history": history,
})
}
}
// GetWorkflows returns defined workflows
func GetWorkflows(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
workflows := s.GetWorkflows()
respondJSON(w, http.StatusOK, map[string]interface{}{
"count": len(workflows),
"workflows": workflows,
})
}
}
// RunWorkflow triggers a workflow execution
func RunWorkflow(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
respondError(w, http.StatusBadRequest, "missing workflow name")
return
}
// In Phase 2, we just acknowledge the request
// Phase 3 will implement actual execution via Claude Code
respondJSON(w, http.StatusAccepted, map[string]interface{}{
"status": "queued",
"workflow": name,
"message": "Workflow queued for execution. Use Claude Code CLI to run workflows.",
})
}
}

View File

@@ -0,0 +1,80 @@
package models
import "time"
// ClusterStatus represents the overall cluster health
type ClusterStatus struct {
Health string `json:"health"` // Healthy, Degraded, Critical
UpdatedAt time.Time `json:"updated_at"`
Nodes []NodeStatus `json:"nodes"`
Alerts []Alert `json:"alerts"`
Apps []AppStatus `json:"apps"`
}
// NodeStatus represents a single node's status
type NodeStatus struct {
Name string `json:"name"`
Status string `json:"status"` // Ready, NotReady
CPU float64 `json:"cpu_percent"`
Memory float64 `json:"memory_percent"`
Conditions string `json:"conditions"` // OK, MemoryPressure, DiskPressure, etc.
}
// Alert represents a Prometheus/Alertmanager alert
type Alert struct {
Name string `json:"name"`
Severity string `json:"severity"` // warning, critical
Description string `json:"description"`
FiringAt time.Time `json:"firing_at"`
}
// AppStatus represents an ArgoCD application status
type AppStatus struct {
Name string `json:"name"`
SyncStatus string `json:"sync_status"` // Synced, OutOfSync
Health string `json:"health"` // Healthy, Progressing, Degraded
Revision string `json:"revision"`
}
// PendingAction represents an action awaiting user approval
type PendingAction struct {
ID string `json:"id"`
CreatedAt time.Time `json:"created_at"`
Agent string `json:"agent"`
Action string `json:"action"`
Description string `json:"description"`
Details map[string]interface{} `json:"details"`
Risk string `json:"risk"` // low, medium, high
Workflow string `json:"workflow,omitempty"`
}
// ActionDecision represents the user's decision on a pending action
type ActionDecision struct {
ID string `json:"id"`
Decision string `json:"decision"` // approved, rejected
DecidedAt time.Time `json:"decided_at"`
DecidedBy string `json:"decided_by,omitempty"`
Reason string `json:"reason,omitempty"`
}
// ActionHistory represents a completed action
type ActionHistory struct {
ID string `json:"id"`
Timestamp time.Time `json:"timestamp"`
Agent string `json:"agent"`
Action string `json:"action"`
Description string `json:"description"`
Details map[string]interface{} `json:"details,omitempty"`
Result string `json:"result"` // success, failed
AutoApproved bool `json:"auto_approved"`
Workflow string `json:"workflow,omitempty"`
}
// Workflow represents a defined workflow
type Workflow struct {
Name string `json:"name"`
Description string `json:"description"`
Triggers []string `json:"triggers"`
LastRun *time.Time `json:"last_run,omitempty"`
Status string `json:"status,omitempty"` // idle, running, completed, failed
}

View File

@@ -0,0 +1,244 @@
package store
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/will/k8s-agent-dashboard/internal/models"
)
// Store manages persistent state for the dashboard
type Store struct {
dataDir string
mu sync.RWMutex
// In-memory cache
status *models.ClusterStatus
pending []models.PendingAction
history []models.ActionHistory
workflows []models.Workflow
}
// New creates a new store instance
func New(dataDir string) (*Store, error) {
// Ensure data directory exists
if err := os.MkdirAll(dataDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create data dir: %w", err)
}
s := &Store{
dataDir: dataDir,
pending: make([]models.PendingAction, 0),
history: make([]models.ActionHistory, 0),
workflows: make([]models.Workflow, 0),
}
// Load existing data
if err := s.load(); err != nil {
return nil, err
}
return s, nil
}
func (s *Store) load() error {
// Load pending actions
pendingPath := filepath.Join(s.dataDir, "pending.json")
if data, err := os.ReadFile(pendingPath); err == nil {
if err := json.Unmarshal(data, &s.pending); err != nil {
return fmt.Errorf("failed to parse pending.json: %w", err)
}
}
// Load history
historyPath := filepath.Join(s.dataDir, "history.json")
if data, err := os.ReadFile(historyPath); err == nil {
if err := json.Unmarshal(data, &s.history); err != nil {
return fmt.Errorf("failed to parse history.json: %w", err)
}
}
// Load status
statusPath := filepath.Join(s.dataDir, "status.json")
if data, err := os.ReadFile(statusPath); err == nil {
s.status = &models.ClusterStatus{}
if err := json.Unmarshal(data, s.status); err != nil {
return fmt.Errorf("failed to parse status.json: %w", err)
}
}
return nil
}
func (s *Store) save(filename string, data interface{}) error {
path := filepath.Join(s.dataDir, filename)
bytes, err := json.MarshalIndent(data, "", " ")
if err != nil {
return err
}
return os.WriteFile(path, bytes, 0644)
}
// GetClusterStatus returns the current cluster status
func (s *Store) GetClusterStatus() *models.ClusterStatus {
s.mu.RLock()
defer s.mu.RUnlock()
if s.status == nil {
// Return demo status if none exists
return &models.ClusterStatus{
Health: "Unknown",
UpdatedAt: time.Now(),
Nodes: []models.NodeStatus{},
Alerts: []models.Alert{},
Apps: []models.AppStatus{},
}
}
return s.status
}
// UpdateClusterStatus updates the cluster status
func (s *Store) UpdateClusterStatus(status *models.ClusterStatus) error {
s.mu.Lock()
defer s.mu.Unlock()
status.UpdatedAt = time.Now()
s.status = status
return s.save("status.json", status)
}
// GetPendingActions returns all pending actions
func (s *Store) GetPendingActions() []models.PendingAction {
s.mu.RLock()
defer s.mu.RUnlock()
return s.pending
}
// AddPendingAction adds a new pending action
func (s *Store) AddPendingAction(action models.PendingAction) error {
s.mu.Lock()
defer s.mu.Unlock()
action.CreatedAt = time.Now()
s.pending = append(s.pending, action)
return s.save("pending.json", s.pending)
}
// ApproveAction approves a pending action
func (s *Store) ApproveAction(id string, reason string) (*models.PendingAction, error) {
s.mu.Lock()
defer s.mu.Unlock()
for i, action := range s.pending {
if action.ID == id {
// Remove from pending
s.pending = append(s.pending[:i], s.pending[i+1:]...)
// Add to history
historyEntry := models.ActionHistory{
ID: action.ID,
Timestamp: time.Now(),
Agent: action.Agent,
Action: action.Action,
Description: action.Description,
Details: action.Details,
Result: "approved",
AutoApproved: false,
Workflow: action.Workflow,
}
s.history = append([]models.ActionHistory{historyEntry}, s.history...)
// Keep only last 100 history entries
if len(s.history) > 100 {
s.history = s.history[:100]
}
// Save both files
s.save("pending.json", s.pending)
s.save("history.json", s.history)
return &action, nil
}
}
return nil, fmt.Errorf("action not found: %s", id)
}
// RejectAction rejects a pending action
func (s *Store) RejectAction(id string, reason string) (*models.PendingAction, error) {
s.mu.Lock()
defer s.mu.Unlock()
for i, action := range s.pending {
if action.ID == id {
// Remove from pending
s.pending = append(s.pending[:i], s.pending[i+1:]...)
// Add to history as rejected
historyEntry := models.ActionHistory{
ID: action.ID,
Timestamp: time.Now(),
Agent: action.Agent,
Action: action.Action,
Description: action.Description + " (REJECTED: " + reason + ")",
Details: action.Details,
Result: "rejected",
AutoApproved: false,
Workflow: action.Workflow,
}
s.history = append([]models.ActionHistory{historyEntry}, s.history...)
if len(s.history) > 100 {
s.history = s.history[:100]
}
s.save("pending.json", s.pending)
s.save("history.json", s.history)
return &action, nil
}
}
return nil, fmt.Errorf("action not found: %s", id)
}
// GetActionHistory returns the action history
func (s *Store) GetActionHistory(limit int) []models.ActionHistory {
s.mu.RLock()
defer s.mu.RUnlock()
if limit <= 0 || limit > len(s.history) {
return s.history
}
return s.history[:limit]
}
// GetWorkflows returns all defined workflows
func (s *Store) GetWorkflows() []models.Workflow {
s.mu.RLock()
defer s.mu.RUnlock()
// Return predefined workflows based on what we have in ~/.claude/workflows
return []models.Workflow{
{
Name: "cluster-health-check",
Description: "Comprehensive cluster health assessment",
Triggers: []string{"schedule: 0 */6 * * *", "manual"},
Status: "idle",
},
{
Name: "deploy-app",
Description: "Deploy or update an application",
Triggers: []string{"manual"},
Status: "idle",
},
{
Name: "pod-crashloop-remediation",
Description: "Diagnose and remediate pods in CrashLoopBackOff",
Triggers: []string{"alert: KubePodCrashLooping", "manual"},
Status: "idle",
},
}
}