Automation components for scheduled and event-driven workflows:
Scheduler:
- scheduler.sh for cron-based workflow execution
- Logs workflow runs to ~/.claude/logs/workflows/
- Notifies dashboard on completion
Alertmanager Integration:
- webhook-receiver.sh for processing alerts
- Dashboard endpoint /api/webhooks/alertmanager
- Example alertmanager-config.yaml with routing rules
- Maps alerts to workflows (crashloop, node issues, resources)
New Incident Workflows:
- node-issue-response.yaml: Handle NotReady/unreachable nodes
- resource-pressure-response.yaml: Respond to memory/CPU overcommit
- argocd-sync-failure.yaml: Investigate and fix sync failures
Dashboard Updates:
- POST /api/webhooks/alertmanager endpoint
- POST /api/workflows/{name}/complete endpoint
- Alerts create pending actions for visibility
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
90 lines
2.4 KiB
Bash
Executable File
90 lines
2.4 KiB
Bash
Executable File
#!/bin/bash
|
|
# K8s Agent Workflow Scheduler
|
|
# Run workflows on a schedule using Claude Code CLI
|
|
#
|
|
# Usage:
|
|
# ./scheduler.sh <workflow-name>
|
|
# ./scheduler.sh cluster-health-check
|
|
#
|
|
# Cron examples:
|
|
# # Health check every 6 hours
|
|
# 0 */6 * * * /home/will/.claude/automation/scheduler.sh cluster-health-check
|
|
#
|
|
# # Daily resource report at 8am
|
|
# 0 8 * * * /home/will/.claude/automation/scheduler.sh daily-report
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
CLAUDE_DIR="${SCRIPT_DIR}/.."
|
|
LOG_DIR="${CLAUDE_DIR}/logs/workflows"
|
|
WORKFLOW_DIR="${CLAUDE_DIR}/workflows"
|
|
|
|
# Ensure log directory exists
|
|
mkdir -p "${LOG_DIR}"
|
|
|
|
# Get workflow name
|
|
WORKFLOW="${1:-}"
|
|
if [[ -z "${WORKFLOW}" ]]; then
|
|
echo "Usage: $0 <workflow-name>"
|
|
echo "Available workflows:"
|
|
find "${WORKFLOW_DIR}" -name "*.yaml" -o -name "*.md" | while read -r f; do
|
|
basename "${f}" | sed 's/\.[^.]*$//'
|
|
done
|
|
exit 1
|
|
fi
|
|
|
|
# Find workflow file
|
|
WORKFLOW_FILE=""
|
|
for ext in yaml yml md; do
|
|
for dir in health deploy incidents; do
|
|
if [[ -f "${WORKFLOW_DIR}/${dir}/${WORKFLOW}.${ext}" ]]; then
|
|
WORKFLOW_FILE="${WORKFLOW_DIR}/${dir}/${WORKFLOW}.${ext}"
|
|
break 2
|
|
fi
|
|
done
|
|
done
|
|
|
|
if [[ -z "${WORKFLOW_FILE}" ]]; then
|
|
echo "Error: Workflow '${WORKFLOW}' not found"
|
|
exit 1
|
|
fi
|
|
|
|
# Generate log filename
|
|
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
|
|
LOG_FILE="${LOG_DIR}/${WORKFLOW}_${TIMESTAMP}.log"
|
|
|
|
echo "Running workflow: ${WORKFLOW}"
|
|
echo "Workflow file: ${WORKFLOW_FILE}"
|
|
echo "Log file: ${LOG_FILE}"
|
|
|
|
# Run Claude Code with the workflow
|
|
# Using --print to get output, --dangerously-skip-permissions for automation
|
|
{
|
|
echo "=== Workflow: ${WORKFLOW} ==="
|
|
echo "=== Started: $(date) ==="
|
|
echo ""
|
|
|
|
# Read workflow and pass to Claude Code
|
|
claude --print --dangerously-skip-permissions \
|
|
"Run the following workflow: $(cat "${WORKFLOW_FILE}")" \
|
|
2>&1
|
|
|
|
EXIT_CODE=$?
|
|
|
|
echo ""
|
|
echo "=== Completed: $(date) ==="
|
|
echo "=== Exit code: ${EXIT_CODE} ==="
|
|
} | tee "${LOG_FILE}"
|
|
|
|
# Notify dashboard of completion (if running)
|
|
DASHBOARD_URL="${DASHBOARD_URL:-http://localhost:8080}"
|
|
if curl -s "${DASHBOARD_URL}/api/health" > /dev/null 2>&1; then
|
|
curl -s -X POST "${DASHBOARD_URL}/api/workflows/${WORKFLOW}/complete" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"log_file\": \"${LOG_FILE}\", \"exit_code\": ${EXIT_CODE:-0}}" \
|
|
> /dev/null 2>&1 || true
|
|
fi
|
|
|
|
exit ${EXIT_CODE:-0}
|