feat: Implement Phase 2 dashboard for K8s agent system
Lightweight Go-based dashboard for Raspberry Pi cluster: Backend: - chi router with REST API - Embedded static file serving - JSON file-based state storage - Health checks and CORS support Frontend: - Responsive dark theme UI - Status view with nodes, alerts, ArgoCD apps - Pending actions with approve/reject - Action history and audit trail - Workflow listing and manual triggers Deployment: - Multi-stage Dockerfile (small Alpine image) - Kubernetes manifests with Pi 3 tolerations - Resource limits: 32-64Mi memory, 10-100m CPU - ArgoCD application manifest - Kustomize configuration API endpoints: - GET /api/status - Cluster status - GET/POST /api/pending - Action management - GET /api/history - Action audit trail - GET/POST /api/workflows - Workflow management 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
157
dashboard/internal/api/handlers.go
Normal file
157
dashboard/internal/api/handlers.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/will/k8s-agent-dashboard/internal/store"
|
||||
)
|
||||
|
||||
// JSON helper
|
||||
func respondJSON(w http.ResponseWriter, status int, data interface{}) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
json.NewEncoder(w).Encode(data)
|
||||
}
|
||||
|
||||
func respondError(w http.ResponseWriter, status int, message string) {
|
||||
respondJSON(w, status, map[string]string{"error": message})
|
||||
}
|
||||
|
||||
// HealthCheck returns API health status
|
||||
func HealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
respondJSON(w, http.StatusOK, map[string]string{
|
||||
"status": "ok",
|
||||
"service": "k8s-agent-dashboard",
|
||||
})
|
||||
}
|
||||
|
||||
// GetClusterStatus returns current cluster status
|
||||
func GetClusterStatus(s *store.Store) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
status := s.GetClusterStatus()
|
||||
respondJSON(w, http.StatusOK, status)
|
||||
}
|
||||
}
|
||||
|
||||
// GetPendingActions returns all pending actions
|
||||
func GetPendingActions(s *store.Store) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
actions := s.GetPendingActions()
|
||||
respondJSON(w, http.StatusOK, map[string]interface{}{
|
||||
"count": len(actions),
|
||||
"actions": actions,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ApproveAction approves a pending action
|
||||
func ApproveAction(s *store.Store) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if id == "" {
|
||||
respondError(w, http.StatusBadRequest, "missing action id")
|
||||
return
|
||||
}
|
||||
|
||||
var body struct {
|
||||
Reason string `json:"reason"`
|
||||
}
|
||||
json.NewDecoder(r.Body).Decode(&body)
|
||||
|
||||
action, err := s.ApproveAction(id, body.Reason)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusNotFound, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]interface{}{
|
||||
"status": "approved",
|
||||
"action": action,
|
||||
"message": "Action approved and ready for execution",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// RejectAction rejects a pending action
|
||||
func RejectAction(s *store.Store) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if id == "" {
|
||||
respondError(w, http.StatusBadRequest, "missing action id")
|
||||
return
|
||||
}
|
||||
|
||||
var body struct {
|
||||
Reason string `json:"reason"`
|
||||
}
|
||||
json.NewDecoder(r.Body).Decode(&body)
|
||||
|
||||
if body.Reason == "" {
|
||||
body.Reason = "Rejected by user"
|
||||
}
|
||||
|
||||
action, err := s.RejectAction(id, body.Reason)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusNotFound, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]interface{}{
|
||||
"status": "rejected",
|
||||
"action": action,
|
||||
"message": "Action rejected",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// GetActionHistory returns action history
|
||||
func GetActionHistory(s *store.Store) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
limitStr := r.URL.Query().Get("limit")
|
||||
limit := 50
|
||||
if limitStr != "" {
|
||||
if l, err := strconv.Atoi(limitStr); err == nil {
|
||||
limit = l
|
||||
}
|
||||
}
|
||||
|
||||
history := s.GetActionHistory(limit)
|
||||
respondJSON(w, http.StatusOK, map[string]interface{}{
|
||||
"count": len(history),
|
||||
"history": history,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// GetWorkflows returns defined workflows
|
||||
func GetWorkflows(s *store.Store) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
workflows := s.GetWorkflows()
|
||||
respondJSON(w, http.StatusOK, map[string]interface{}{
|
||||
"count": len(workflows),
|
||||
"workflows": workflows,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// RunWorkflow triggers a workflow execution
|
||||
func RunWorkflow(s *store.Store) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
name := chi.URLParam(r, "name")
|
||||
if name == "" {
|
||||
respondError(w, http.StatusBadRequest, "missing workflow name")
|
||||
return
|
||||
}
|
||||
|
||||
// In Phase 2, we just acknowledge the request
|
||||
// Phase 3 will implement actual execution via Claude Code
|
||||
respondJSON(w, http.StatusAccepted, map[string]interface{}{
|
||||
"status": "queued",
|
||||
"workflow": name,
|
||||
"message": "Workflow queued for execution. Use Claude Code CLI to run workflows.",
|
||||
})
|
||||
}
|
||||
}
|
||||
80
dashboard/internal/models/models.go
Normal file
80
dashboard/internal/models/models.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package models
|
||||
|
||||
import "time"
|
||||
|
||||
// ClusterStatus represents the overall cluster health
|
||||
type ClusterStatus struct {
|
||||
Health string `json:"health"` // Healthy, Degraded, Critical
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
Nodes []NodeStatus `json:"nodes"`
|
||||
Alerts []Alert `json:"alerts"`
|
||||
Apps []AppStatus `json:"apps"`
|
||||
}
|
||||
|
||||
// NodeStatus represents a single node's status
|
||||
type NodeStatus struct {
|
||||
Name string `json:"name"`
|
||||
Status string `json:"status"` // Ready, NotReady
|
||||
CPU float64 `json:"cpu_percent"`
|
||||
Memory float64 `json:"memory_percent"`
|
||||
Conditions string `json:"conditions"` // OK, MemoryPressure, DiskPressure, etc.
|
||||
}
|
||||
|
||||
// Alert represents a Prometheus/Alertmanager alert
|
||||
type Alert struct {
|
||||
Name string `json:"name"`
|
||||
Severity string `json:"severity"` // warning, critical
|
||||
Description string `json:"description"`
|
||||
FiringAt time.Time `json:"firing_at"`
|
||||
}
|
||||
|
||||
// AppStatus represents an ArgoCD application status
|
||||
type AppStatus struct {
|
||||
Name string `json:"name"`
|
||||
SyncStatus string `json:"sync_status"` // Synced, OutOfSync
|
||||
Health string `json:"health"` // Healthy, Progressing, Degraded
|
||||
Revision string `json:"revision"`
|
||||
}
|
||||
|
||||
// PendingAction represents an action awaiting user approval
|
||||
type PendingAction struct {
|
||||
ID string `json:"id"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Agent string `json:"agent"`
|
||||
Action string `json:"action"`
|
||||
Description string `json:"description"`
|
||||
Details map[string]interface{} `json:"details"`
|
||||
Risk string `json:"risk"` // low, medium, high
|
||||
Workflow string `json:"workflow,omitempty"`
|
||||
}
|
||||
|
||||
// ActionDecision represents the user's decision on a pending action
|
||||
type ActionDecision struct {
|
||||
ID string `json:"id"`
|
||||
Decision string `json:"decision"` // approved, rejected
|
||||
DecidedAt time.Time `json:"decided_at"`
|
||||
DecidedBy string `json:"decided_by,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
}
|
||||
|
||||
// ActionHistory represents a completed action
|
||||
type ActionHistory struct {
|
||||
ID string `json:"id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Agent string `json:"agent"`
|
||||
Action string `json:"action"`
|
||||
Description string `json:"description"`
|
||||
Details map[string]interface{} `json:"details,omitempty"`
|
||||
Result string `json:"result"` // success, failed
|
||||
AutoApproved bool `json:"auto_approved"`
|
||||
Workflow string `json:"workflow,omitempty"`
|
||||
}
|
||||
|
||||
// Workflow represents a defined workflow
|
||||
type Workflow struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Triggers []string `json:"triggers"`
|
||||
LastRun *time.Time `json:"last_run,omitempty"`
|
||||
Status string `json:"status,omitempty"` // idle, running, completed, failed
|
||||
}
|
||||
244
dashboard/internal/store/store.go
Normal file
244
dashboard/internal/store/store.go
Normal file
@@ -0,0 +1,244 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/will/k8s-agent-dashboard/internal/models"
|
||||
)
|
||||
|
||||
// Store manages persistent state for the dashboard
|
||||
type Store struct {
|
||||
dataDir string
|
||||
mu sync.RWMutex
|
||||
|
||||
// In-memory cache
|
||||
status *models.ClusterStatus
|
||||
pending []models.PendingAction
|
||||
history []models.ActionHistory
|
||||
workflows []models.Workflow
|
||||
}
|
||||
|
||||
// New creates a new store instance
|
||||
func New(dataDir string) (*Store, error) {
|
||||
// Ensure data directory exists
|
||||
if err := os.MkdirAll(dataDir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create data dir: %w", err)
|
||||
}
|
||||
|
||||
s := &Store{
|
||||
dataDir: dataDir,
|
||||
pending: make([]models.PendingAction, 0),
|
||||
history: make([]models.ActionHistory, 0),
|
||||
workflows: make([]models.Workflow, 0),
|
||||
}
|
||||
|
||||
// Load existing data
|
||||
if err := s.load(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *Store) load() error {
|
||||
// Load pending actions
|
||||
pendingPath := filepath.Join(s.dataDir, "pending.json")
|
||||
if data, err := os.ReadFile(pendingPath); err == nil {
|
||||
if err := json.Unmarshal(data, &s.pending); err != nil {
|
||||
return fmt.Errorf("failed to parse pending.json: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Load history
|
||||
historyPath := filepath.Join(s.dataDir, "history.json")
|
||||
if data, err := os.ReadFile(historyPath); err == nil {
|
||||
if err := json.Unmarshal(data, &s.history); err != nil {
|
||||
return fmt.Errorf("failed to parse history.json: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Load status
|
||||
statusPath := filepath.Join(s.dataDir, "status.json")
|
||||
if data, err := os.ReadFile(statusPath); err == nil {
|
||||
s.status = &models.ClusterStatus{}
|
||||
if err := json.Unmarshal(data, s.status); err != nil {
|
||||
return fmt.Errorf("failed to parse status.json: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) save(filename string, data interface{}) error {
|
||||
path := filepath.Join(s.dataDir, filename)
|
||||
bytes, err := json.MarshalIndent(data, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(path, bytes, 0644)
|
||||
}
|
||||
|
||||
// GetClusterStatus returns the current cluster status
|
||||
func (s *Store) GetClusterStatus() *models.ClusterStatus {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
if s.status == nil {
|
||||
// Return demo status if none exists
|
||||
return &models.ClusterStatus{
|
||||
Health: "Unknown",
|
||||
UpdatedAt: time.Now(),
|
||||
Nodes: []models.NodeStatus{},
|
||||
Alerts: []models.Alert{},
|
||||
Apps: []models.AppStatus{},
|
||||
}
|
||||
}
|
||||
return s.status
|
||||
}
|
||||
|
||||
// UpdateClusterStatus updates the cluster status
|
||||
func (s *Store) UpdateClusterStatus(status *models.ClusterStatus) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
status.UpdatedAt = time.Now()
|
||||
s.status = status
|
||||
return s.save("status.json", status)
|
||||
}
|
||||
|
||||
// GetPendingActions returns all pending actions
|
||||
func (s *Store) GetPendingActions() []models.PendingAction {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.pending
|
||||
}
|
||||
|
||||
// AddPendingAction adds a new pending action
|
||||
func (s *Store) AddPendingAction(action models.PendingAction) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
action.CreatedAt = time.Now()
|
||||
s.pending = append(s.pending, action)
|
||||
return s.save("pending.json", s.pending)
|
||||
}
|
||||
|
||||
// ApproveAction approves a pending action
|
||||
func (s *Store) ApproveAction(id string, reason string) (*models.PendingAction, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
for i, action := range s.pending {
|
||||
if action.ID == id {
|
||||
// Remove from pending
|
||||
s.pending = append(s.pending[:i], s.pending[i+1:]...)
|
||||
|
||||
// Add to history
|
||||
historyEntry := models.ActionHistory{
|
||||
ID: action.ID,
|
||||
Timestamp: time.Now(),
|
||||
Agent: action.Agent,
|
||||
Action: action.Action,
|
||||
Description: action.Description,
|
||||
Details: action.Details,
|
||||
Result: "approved",
|
||||
AutoApproved: false,
|
||||
Workflow: action.Workflow,
|
||||
}
|
||||
s.history = append([]models.ActionHistory{historyEntry}, s.history...)
|
||||
|
||||
// Keep only last 100 history entries
|
||||
if len(s.history) > 100 {
|
||||
s.history = s.history[:100]
|
||||
}
|
||||
|
||||
// Save both files
|
||||
s.save("pending.json", s.pending)
|
||||
s.save("history.json", s.history)
|
||||
|
||||
return &action, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("action not found: %s", id)
|
||||
}
|
||||
|
||||
// RejectAction rejects a pending action
|
||||
func (s *Store) RejectAction(id string, reason string) (*models.PendingAction, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
for i, action := range s.pending {
|
||||
if action.ID == id {
|
||||
// Remove from pending
|
||||
s.pending = append(s.pending[:i], s.pending[i+1:]...)
|
||||
|
||||
// Add to history as rejected
|
||||
historyEntry := models.ActionHistory{
|
||||
ID: action.ID,
|
||||
Timestamp: time.Now(),
|
||||
Agent: action.Agent,
|
||||
Action: action.Action,
|
||||
Description: action.Description + " (REJECTED: " + reason + ")",
|
||||
Details: action.Details,
|
||||
Result: "rejected",
|
||||
AutoApproved: false,
|
||||
Workflow: action.Workflow,
|
||||
}
|
||||
s.history = append([]models.ActionHistory{historyEntry}, s.history...)
|
||||
|
||||
if len(s.history) > 100 {
|
||||
s.history = s.history[:100]
|
||||
}
|
||||
|
||||
s.save("pending.json", s.pending)
|
||||
s.save("history.json", s.history)
|
||||
|
||||
return &action, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("action not found: %s", id)
|
||||
}
|
||||
|
||||
// GetActionHistory returns the action history
|
||||
func (s *Store) GetActionHistory(limit int) []models.ActionHistory {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
if limit <= 0 || limit > len(s.history) {
|
||||
return s.history
|
||||
}
|
||||
return s.history[:limit]
|
||||
}
|
||||
|
||||
// GetWorkflows returns all defined workflows
|
||||
func (s *Store) GetWorkflows() []models.Workflow {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
// Return predefined workflows based on what we have in ~/.claude/workflows
|
||||
return []models.Workflow{
|
||||
{
|
||||
Name: "cluster-health-check",
|
||||
Description: "Comprehensive cluster health assessment",
|
||||
Triggers: []string{"schedule: 0 */6 * * *", "manual"},
|
||||
Status: "idle",
|
||||
},
|
||||
{
|
||||
Name: "deploy-app",
|
||||
Description: "Deploy or update an application",
|
||||
Triggers: []string{"manual"},
|
||||
Status: "idle",
|
||||
},
|
||||
{
|
||||
Name: "pod-crashloop-remediation",
|
||||
Description: "Diagnose and remediate pods in CrashLoopBackOff",
|
||||
Triggers: []string{"alert: KubePodCrashLooping", "manual"},
|
||||
Status: "idle",
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user