feat: Implement Phase 2 dashboard for K8s agent system

Lightweight Go-based dashboard for Raspberry Pi cluster:

Backend:
- chi router with REST API
- Embedded static file serving
- JSON file-based state storage
- Health checks and CORS support

Frontend:
- Responsive dark theme UI
- Status view with nodes, alerts, ArgoCD apps
- Pending actions with approve/reject
- Action history and audit trail
- Workflow listing and manual triggers

Deployment:
- Multi-stage Dockerfile (small Alpine image)
- Kubernetes manifests with Pi 3 tolerations
- Resource limits: 32-64Mi memory, 10-100m CPU
- ArgoCD application manifest
- Kustomize configuration

API endpoints:
- GET /api/status - Cluster status
- GET/POST /api/pending - Action management
- GET /api/history - Action audit trail
- GET/POST /api/workflows - Workflow management

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
OpenCode Test
2025-12-26 11:34:36 -08:00
parent a80f714fc2
commit 5646508adb
18 changed files with 1712 additions and 0 deletions

View File

@@ -0,0 +1,157 @@
package api
import (
"encoding/json"
"net/http"
"strconv"
"github.com/go-chi/chi/v5"
"github.com/will/k8s-agent-dashboard/internal/store"
)
// JSON helper
func respondJSON(w http.ResponseWriter, status int, data interface{}) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
json.NewEncoder(w).Encode(data)
}
func respondError(w http.ResponseWriter, status int, message string) {
respondJSON(w, status, map[string]string{"error": message})
}
// HealthCheck returns API health status
func HealthCheck(w http.ResponseWriter, r *http.Request) {
respondJSON(w, http.StatusOK, map[string]string{
"status": "ok",
"service": "k8s-agent-dashboard",
})
}
// GetClusterStatus returns current cluster status
func GetClusterStatus(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
status := s.GetClusterStatus()
respondJSON(w, http.StatusOK, status)
}
}
// GetPendingActions returns all pending actions
func GetPendingActions(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
actions := s.GetPendingActions()
respondJSON(w, http.StatusOK, map[string]interface{}{
"count": len(actions),
"actions": actions,
})
}
}
// ApproveAction approves a pending action
func ApproveAction(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
if id == "" {
respondError(w, http.StatusBadRequest, "missing action id")
return
}
var body struct {
Reason string `json:"reason"`
}
json.NewDecoder(r.Body).Decode(&body)
action, err := s.ApproveAction(id, body.Reason)
if err != nil {
respondError(w, http.StatusNotFound, err.Error())
return
}
respondJSON(w, http.StatusOK, map[string]interface{}{
"status": "approved",
"action": action,
"message": "Action approved and ready for execution",
})
}
}
// RejectAction rejects a pending action
func RejectAction(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
if id == "" {
respondError(w, http.StatusBadRequest, "missing action id")
return
}
var body struct {
Reason string `json:"reason"`
}
json.NewDecoder(r.Body).Decode(&body)
if body.Reason == "" {
body.Reason = "Rejected by user"
}
action, err := s.RejectAction(id, body.Reason)
if err != nil {
respondError(w, http.StatusNotFound, err.Error())
return
}
respondJSON(w, http.StatusOK, map[string]interface{}{
"status": "rejected",
"action": action,
"message": "Action rejected",
})
}
}
// GetActionHistory returns action history
func GetActionHistory(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
limitStr := r.URL.Query().Get("limit")
limit := 50
if limitStr != "" {
if l, err := strconv.Atoi(limitStr); err == nil {
limit = l
}
}
history := s.GetActionHistory(limit)
respondJSON(w, http.StatusOK, map[string]interface{}{
"count": len(history),
"history": history,
})
}
}
// GetWorkflows returns defined workflows
func GetWorkflows(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
workflows := s.GetWorkflows()
respondJSON(w, http.StatusOK, map[string]interface{}{
"count": len(workflows),
"workflows": workflows,
})
}
}
// RunWorkflow triggers a workflow execution
func RunWorkflow(s *store.Store) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
respondError(w, http.StatusBadRequest, "missing workflow name")
return
}
// In Phase 2, we just acknowledge the request
// Phase 3 will implement actual execution via Claude Code
respondJSON(w, http.StatusAccepted, map[string]interface{}{
"status": "queued",
"workflow": name,
"message": "Workflow queued for execution. Use Claude Code CLI to run workflows.",
})
}
}

View File

@@ -0,0 +1,80 @@
package models
import "time"
// ClusterStatus represents the overall cluster health
type ClusterStatus struct {
Health string `json:"health"` // Healthy, Degraded, Critical
UpdatedAt time.Time `json:"updated_at"`
Nodes []NodeStatus `json:"nodes"`
Alerts []Alert `json:"alerts"`
Apps []AppStatus `json:"apps"`
}
// NodeStatus represents a single node's status
type NodeStatus struct {
Name string `json:"name"`
Status string `json:"status"` // Ready, NotReady
CPU float64 `json:"cpu_percent"`
Memory float64 `json:"memory_percent"`
Conditions string `json:"conditions"` // OK, MemoryPressure, DiskPressure, etc.
}
// Alert represents a Prometheus/Alertmanager alert
type Alert struct {
Name string `json:"name"`
Severity string `json:"severity"` // warning, critical
Description string `json:"description"`
FiringAt time.Time `json:"firing_at"`
}
// AppStatus represents an ArgoCD application status
type AppStatus struct {
Name string `json:"name"`
SyncStatus string `json:"sync_status"` // Synced, OutOfSync
Health string `json:"health"` // Healthy, Progressing, Degraded
Revision string `json:"revision"`
}
// PendingAction represents an action awaiting user approval
type PendingAction struct {
ID string `json:"id"`
CreatedAt time.Time `json:"created_at"`
Agent string `json:"agent"`
Action string `json:"action"`
Description string `json:"description"`
Details map[string]interface{} `json:"details"`
Risk string `json:"risk"` // low, medium, high
Workflow string `json:"workflow,omitempty"`
}
// ActionDecision represents the user's decision on a pending action
type ActionDecision struct {
ID string `json:"id"`
Decision string `json:"decision"` // approved, rejected
DecidedAt time.Time `json:"decided_at"`
DecidedBy string `json:"decided_by,omitempty"`
Reason string `json:"reason,omitempty"`
}
// ActionHistory represents a completed action
type ActionHistory struct {
ID string `json:"id"`
Timestamp time.Time `json:"timestamp"`
Agent string `json:"agent"`
Action string `json:"action"`
Description string `json:"description"`
Details map[string]interface{} `json:"details,omitempty"`
Result string `json:"result"` // success, failed
AutoApproved bool `json:"auto_approved"`
Workflow string `json:"workflow,omitempty"`
}
// Workflow represents a defined workflow
type Workflow struct {
Name string `json:"name"`
Description string `json:"description"`
Triggers []string `json:"triggers"`
LastRun *time.Time `json:"last_run,omitempty"`
Status string `json:"status,omitempty"` // idle, running, completed, failed
}

View File

@@ -0,0 +1,244 @@
package store
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/will/k8s-agent-dashboard/internal/models"
)
// Store manages persistent state for the dashboard
type Store struct {
dataDir string
mu sync.RWMutex
// In-memory cache
status *models.ClusterStatus
pending []models.PendingAction
history []models.ActionHistory
workflows []models.Workflow
}
// New creates a new store instance
func New(dataDir string) (*Store, error) {
// Ensure data directory exists
if err := os.MkdirAll(dataDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create data dir: %w", err)
}
s := &Store{
dataDir: dataDir,
pending: make([]models.PendingAction, 0),
history: make([]models.ActionHistory, 0),
workflows: make([]models.Workflow, 0),
}
// Load existing data
if err := s.load(); err != nil {
return nil, err
}
return s, nil
}
func (s *Store) load() error {
// Load pending actions
pendingPath := filepath.Join(s.dataDir, "pending.json")
if data, err := os.ReadFile(pendingPath); err == nil {
if err := json.Unmarshal(data, &s.pending); err != nil {
return fmt.Errorf("failed to parse pending.json: %w", err)
}
}
// Load history
historyPath := filepath.Join(s.dataDir, "history.json")
if data, err := os.ReadFile(historyPath); err == nil {
if err := json.Unmarshal(data, &s.history); err != nil {
return fmt.Errorf("failed to parse history.json: %w", err)
}
}
// Load status
statusPath := filepath.Join(s.dataDir, "status.json")
if data, err := os.ReadFile(statusPath); err == nil {
s.status = &models.ClusterStatus{}
if err := json.Unmarshal(data, s.status); err != nil {
return fmt.Errorf("failed to parse status.json: %w", err)
}
}
return nil
}
func (s *Store) save(filename string, data interface{}) error {
path := filepath.Join(s.dataDir, filename)
bytes, err := json.MarshalIndent(data, "", " ")
if err != nil {
return err
}
return os.WriteFile(path, bytes, 0644)
}
// GetClusterStatus returns the current cluster status
func (s *Store) GetClusterStatus() *models.ClusterStatus {
s.mu.RLock()
defer s.mu.RUnlock()
if s.status == nil {
// Return demo status if none exists
return &models.ClusterStatus{
Health: "Unknown",
UpdatedAt: time.Now(),
Nodes: []models.NodeStatus{},
Alerts: []models.Alert{},
Apps: []models.AppStatus{},
}
}
return s.status
}
// UpdateClusterStatus updates the cluster status
func (s *Store) UpdateClusterStatus(status *models.ClusterStatus) error {
s.mu.Lock()
defer s.mu.Unlock()
status.UpdatedAt = time.Now()
s.status = status
return s.save("status.json", status)
}
// GetPendingActions returns all pending actions
func (s *Store) GetPendingActions() []models.PendingAction {
s.mu.RLock()
defer s.mu.RUnlock()
return s.pending
}
// AddPendingAction adds a new pending action
func (s *Store) AddPendingAction(action models.PendingAction) error {
s.mu.Lock()
defer s.mu.Unlock()
action.CreatedAt = time.Now()
s.pending = append(s.pending, action)
return s.save("pending.json", s.pending)
}
// ApproveAction approves a pending action
func (s *Store) ApproveAction(id string, reason string) (*models.PendingAction, error) {
s.mu.Lock()
defer s.mu.Unlock()
for i, action := range s.pending {
if action.ID == id {
// Remove from pending
s.pending = append(s.pending[:i], s.pending[i+1:]...)
// Add to history
historyEntry := models.ActionHistory{
ID: action.ID,
Timestamp: time.Now(),
Agent: action.Agent,
Action: action.Action,
Description: action.Description,
Details: action.Details,
Result: "approved",
AutoApproved: false,
Workflow: action.Workflow,
}
s.history = append([]models.ActionHistory{historyEntry}, s.history...)
// Keep only last 100 history entries
if len(s.history) > 100 {
s.history = s.history[:100]
}
// Save both files
s.save("pending.json", s.pending)
s.save("history.json", s.history)
return &action, nil
}
}
return nil, fmt.Errorf("action not found: %s", id)
}
// RejectAction rejects a pending action
func (s *Store) RejectAction(id string, reason string) (*models.PendingAction, error) {
s.mu.Lock()
defer s.mu.Unlock()
for i, action := range s.pending {
if action.ID == id {
// Remove from pending
s.pending = append(s.pending[:i], s.pending[i+1:]...)
// Add to history as rejected
historyEntry := models.ActionHistory{
ID: action.ID,
Timestamp: time.Now(),
Agent: action.Agent,
Action: action.Action,
Description: action.Description + " (REJECTED: " + reason + ")",
Details: action.Details,
Result: "rejected",
AutoApproved: false,
Workflow: action.Workflow,
}
s.history = append([]models.ActionHistory{historyEntry}, s.history...)
if len(s.history) > 100 {
s.history = s.history[:100]
}
s.save("pending.json", s.pending)
s.save("history.json", s.history)
return &action, nil
}
}
return nil, fmt.Errorf("action not found: %s", id)
}
// GetActionHistory returns the action history
func (s *Store) GetActionHistory(limit int) []models.ActionHistory {
s.mu.RLock()
defer s.mu.RUnlock()
if limit <= 0 || limit > len(s.history) {
return s.history
}
return s.history[:limit]
}
// GetWorkflows returns all defined workflows
func (s *Store) GetWorkflows() []models.Workflow {
s.mu.RLock()
defer s.mu.RUnlock()
// Return predefined workflows based on what we have in ~/.claude/workflows
return []models.Workflow{
{
Name: "cluster-health-check",
Description: "Comprehensive cluster health assessment",
Triggers: []string{"schedule: 0 */6 * * *", "manual"},
Status: "idle",
},
{
Name: "deploy-app",
Description: "Deploy or update an application",
Triggers: []string{"manual"},
Status: "idle",
},
{
Name: "pod-crashloop-remediation",
Description: "Diagnose and remediate pods in CrashLoopBackOff",
Triggers: []string{"alert: KubePodCrashLooping", "manual"},
Status: "idle",
},
}
}