Automation components for scheduled and event-driven workflows:
Scheduler:
- scheduler.sh for cron-based workflow execution
- Logs workflow runs to ~/.claude/logs/workflows/
- Notifies dashboard on completion
Alertmanager Integration:
- webhook-receiver.sh for processing alerts
- Dashboard endpoint /api/webhooks/alertmanager
- Example alertmanager-config.yaml with routing rules
- Maps alerts to workflows (crashloop, node issues, resources)
New Incident Workflows:
- node-issue-response.yaml: Handle NotReady/unreachable nodes
- resource-pressure-response.yaml: Respond to memory/CPU overcommit
- argocd-sync-failure.yaml: Investigate and fix sync failures
Dashboard Updates:
- POST /api/webhooks/alertmanager endpoint
- POST /api/workflows/{name}/complete endpoint
- Alerts create pending actions for visibility
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
249 lines
6.5 KiB
Go
249 lines
6.5 KiB
Go
package api
|
|
|
|
import (
|
|
"encoding/json"
|
|
"net/http"
|
|
"strconv"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
"github.com/will/k8s-agent-dashboard/internal/models"
|
|
"github.com/will/k8s-agent-dashboard/internal/store"
|
|
)
|
|
|
|
// JSON helper
|
|
func respondJSON(w http.ResponseWriter, status int, data interface{}) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(status)
|
|
json.NewEncoder(w).Encode(data)
|
|
}
|
|
|
|
func respondError(w http.ResponseWriter, status int, message string) {
|
|
respondJSON(w, status, map[string]string{"error": message})
|
|
}
|
|
|
|
// HealthCheck returns API health status
|
|
func HealthCheck(w http.ResponseWriter, r *http.Request) {
|
|
respondJSON(w, http.StatusOK, map[string]string{
|
|
"status": "ok",
|
|
"service": "k8s-agent-dashboard",
|
|
})
|
|
}
|
|
|
|
// GetClusterStatus returns current cluster status
|
|
func GetClusterStatus(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
status := s.GetClusterStatus()
|
|
respondJSON(w, http.StatusOK, status)
|
|
}
|
|
}
|
|
|
|
// GetPendingActions returns all pending actions
|
|
func GetPendingActions(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
actions := s.GetPendingActions()
|
|
respondJSON(w, http.StatusOK, map[string]interface{}{
|
|
"count": len(actions),
|
|
"actions": actions,
|
|
})
|
|
}
|
|
}
|
|
|
|
// ApproveAction approves a pending action
|
|
func ApproveAction(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
id := chi.URLParam(r, "id")
|
|
if id == "" {
|
|
respondError(w, http.StatusBadRequest, "missing action id")
|
|
return
|
|
}
|
|
|
|
var body struct {
|
|
Reason string `json:"reason"`
|
|
}
|
|
json.NewDecoder(r.Body).Decode(&body)
|
|
|
|
action, err := s.ApproveAction(id, body.Reason)
|
|
if err != nil {
|
|
respondError(w, http.StatusNotFound, err.Error())
|
|
return
|
|
}
|
|
|
|
respondJSON(w, http.StatusOK, map[string]interface{}{
|
|
"status": "approved",
|
|
"action": action,
|
|
"message": "Action approved and ready for execution",
|
|
})
|
|
}
|
|
}
|
|
|
|
// RejectAction rejects a pending action
|
|
func RejectAction(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
id := chi.URLParam(r, "id")
|
|
if id == "" {
|
|
respondError(w, http.StatusBadRequest, "missing action id")
|
|
return
|
|
}
|
|
|
|
var body struct {
|
|
Reason string `json:"reason"`
|
|
}
|
|
json.NewDecoder(r.Body).Decode(&body)
|
|
|
|
if body.Reason == "" {
|
|
body.Reason = "Rejected by user"
|
|
}
|
|
|
|
action, err := s.RejectAction(id, body.Reason)
|
|
if err != nil {
|
|
respondError(w, http.StatusNotFound, err.Error())
|
|
return
|
|
}
|
|
|
|
respondJSON(w, http.StatusOK, map[string]interface{}{
|
|
"status": "rejected",
|
|
"action": action,
|
|
"message": "Action rejected",
|
|
})
|
|
}
|
|
}
|
|
|
|
// GetActionHistory returns action history
|
|
func GetActionHistory(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
limitStr := r.URL.Query().Get("limit")
|
|
limit := 50
|
|
if limitStr != "" {
|
|
if l, err := strconv.Atoi(limitStr); err == nil {
|
|
limit = l
|
|
}
|
|
}
|
|
|
|
history := s.GetActionHistory(limit)
|
|
respondJSON(w, http.StatusOK, map[string]interface{}{
|
|
"count": len(history),
|
|
"history": history,
|
|
})
|
|
}
|
|
}
|
|
|
|
// GetWorkflows returns defined workflows
|
|
func GetWorkflows(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
workflows := s.GetWorkflows()
|
|
respondJSON(w, http.StatusOK, map[string]interface{}{
|
|
"count": len(workflows),
|
|
"workflows": workflows,
|
|
})
|
|
}
|
|
}
|
|
|
|
// RunWorkflow triggers a workflow execution
|
|
func RunWorkflow(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
name := chi.URLParam(r, "name")
|
|
if name == "" {
|
|
respondError(w, http.StatusBadRequest, "missing workflow name")
|
|
return
|
|
}
|
|
|
|
// Queue workflow for execution
|
|
respondJSON(w, http.StatusAccepted, map[string]interface{}{
|
|
"status": "queued",
|
|
"workflow": name,
|
|
"message": "Workflow queued for execution. Use Claude Code CLI to run workflows.",
|
|
})
|
|
}
|
|
}
|
|
|
|
// AlertmanagerWebhook receives alerts from Alertmanager
|
|
func AlertmanagerWebhook(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
var payload struct {
|
|
Alerts []struct {
|
|
Status string `json:"status"`
|
|
Labels map[string]string `json:"labels"`
|
|
Annotations map[string]string `json:"annotations"`
|
|
StartsAt string `json:"startsAt"`
|
|
EndsAt string `json:"endsAt"`
|
|
} `json:"alerts"`
|
|
}
|
|
|
|
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
|
respondError(w, http.StatusBadRequest, "invalid payload")
|
|
return
|
|
}
|
|
|
|
processed := 0
|
|
for _, alert := range payload.Alerts {
|
|
if alert.Status != "firing" {
|
|
continue
|
|
}
|
|
|
|
alertName := alert.Labels["alertname"]
|
|
namespace := alert.Labels["namespace"]
|
|
pod := alert.Labels["pod"]
|
|
|
|
// Map alerts to workflows and create pending actions
|
|
var workflow string
|
|
var description string
|
|
|
|
switch alertName {
|
|
case "KubePodCrashLooping", "KubePodNotReady":
|
|
workflow = "pod-crashloop-remediation"
|
|
description = "Pod " + pod + " in " + namespace + " is " + alertName
|
|
case "KubeNodeNotReady", "KubeNodeUnreachable":
|
|
workflow = "node-issue-response"
|
|
description = "Node issue: " + alertName
|
|
case "KubeMemoryOvercommit", "KubeCPUOvercommit":
|
|
workflow = "resource-pressure-response"
|
|
description = "Resource pressure: " + alertName
|
|
default:
|
|
continue
|
|
}
|
|
|
|
// Log the alert as a pending action for visibility
|
|
s.AddPendingAction(models.PendingAction{
|
|
ID: "alert-" + alertName + "-" + namespace + "-" + pod,
|
|
Agent: "alertmanager",
|
|
Action: "run-workflow:" + workflow,
|
|
Description: description,
|
|
Risk: "medium",
|
|
Workflow: workflow,
|
|
Details: map[string]interface{}{
|
|
"alertname": alertName,
|
|
"namespace": namespace,
|
|
"pod": pod,
|
|
"labels": alert.Labels,
|
|
},
|
|
})
|
|
processed++
|
|
}
|
|
|
|
respondJSON(w, http.StatusOK, map[string]interface{}{
|
|
"status": "received",
|
|
"processed": processed,
|
|
"total": len(payload.Alerts),
|
|
})
|
|
}
|
|
}
|
|
|
|
// CompleteWorkflow marks a workflow as completed
|
|
func CompleteWorkflow(s *store.Store) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
name := chi.URLParam(r, "name")
|
|
|
|
var body struct {
|
|
LogFile string `json:"log_file"`
|
|
ExitCode int `json:"exit_code"`
|
|
}
|
|
json.NewDecoder(r.Body).Decode(&body)
|
|
|
|
respondJSON(w, http.StatusOK, map[string]interface{}{
|
|
"status": "completed",
|
|
"workflow": name,
|
|
"log_file": body.LogFile,
|
|
})
|
|
}
|
|
}
|