From 5d5fc87c0183b3876dbeafc80ab3ae39a69117b1 Mon Sep 17 00:00:00 2001 From: zap Date: Sat, 7 Mar 2026 19:03:13 +0000 Subject: [PATCH] feat(ops): add systemd checks for ollama and llama.cpp --- scripts/ops-sentinel.sh | 45 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/scripts/ops-sentinel.sh b/scripts/ops-sentinel.sh index ec03b50..b7b6a3c 100755 --- a/scripts/ops-sentinel.sh +++ b/scripts/ops-sentinel.sh @@ -13,6 +13,10 @@ MCP_URL="${MCP_URL:-http://192.168.153.113:18802/mcp}" OLLAMA_URL="${OLLAMA_URL:-http://127.0.0.1:11434}" OLLAMA_EMBED_MODEL="${OLLAMA_EMBED_MODEL:-nomic-embed-text:latest}" OLLAMA_REQUIRED="${OLLAMA_REQUIRED:-false}" +OLLAMA_SERVICE="${OLLAMA_SERVICE:-ollama.service}" +LLAMA_CPP_SERVICE="${LLAMA_CPP_SERVICE:-llama-server.service}" +LLAMA_CPP_URL="${LLAMA_CPP_URL:-http://127.0.0.1:8081/health}" +LLAMA_CPP_REQUIRED="${LLAMA_CPP_REQUIRED:-false}" MINIO_URL="${MINIO_URL:-http://192.168.153.253:9000}" DOCKER_EXPECTED_CONTAINERS="${DOCKER_EXPECTED_CONTAINERS:-searxng whisper-server brave-search}" DOCKER_REQUIRE_EXPECTED="${DOCKER_REQUIRE_EXPECTED:-false}" @@ -128,7 +132,28 @@ else add_next "Create backup log or fix backup script path" fi -# 3) Docker service health (containers + health state) +# 3) systemd service health (local model runtimes) +if command -v systemctl >/dev/null 2>&1; then + for unit in "$OLLAMA_SERVICE" "$LLAMA_CPP_SERVICE"; do + if systemctl is-active --quiet "$unit"; then + add_watch "P4 systemd service active: ${unit}" + else + if [[ "$unit" == "$OLLAMA_SERVICE" && "${OLLAMA_REQUIRED}" != "true" ]]; then + add_watch "P3 systemd service inactive: ${unit} (optional)" + elif [[ "$unit" == "$LLAMA_CPP_SERVICE" && "${LLAMA_CPP_REQUIRED}" != "true" ]]; then + add_watch "P3 systemd service inactive: ${unit} (optional)" + else + add_now "P1 systemd service inactive: ${unit}" + mark_p1 + add_next "Check unit: systemctl status ${unit}" + fi + fi + done +else + add_watch "P3 systemctl not available" +fi + +# 4) Docker service health (containers + health state) if command -v docker >/dev/null 2>&1; then if docker ps --format '{{.Names}}\t{{.Status}}' >"$ARTIFACT_DIR/docker-ps.txt" 2>"$ARTIFACT_DIR/docker-ps.err"; then for svc in $DOCKER_EXPECTED_CONTAINERS; do @@ -159,13 +184,25 @@ else add_watch "P3 docker CLI not available" fi -# 4) Key LAN + local service probes +# 5) Key LAN + local service probes http_probe "searxng" "$SEARX_URL" '^200$' http_probe "whisper" "$WHISPER_URL" '^200$' http_probe "brave-mcp" "$MCP_URL" '^(200|406)$' http_probe "minio-live" "${MINIO_URL%/}/minio/health/live" '^200$' -# 5) Ollama embeddings availability + target model +if curl -sS -m 6 "$LLAMA_CPP_URL" >"$ARTIFACT_DIR/llamacpp-health.txt" 2>"$ARTIFACT_DIR/llamacpp-health.err"; then + add_watch "P4 llama.cpp endpoint responsive (${LLAMA_CPP_URL})" +else + if [[ "${LLAMA_CPP_REQUIRED}" == "true" ]]; then + add_now "P1 llama.cpp endpoint unreachable (${LLAMA_CPP_URL})" + mark_p1 + add_next "Check llama.cpp bind address/port and service logs" + else + add_watch "P3 llama.cpp endpoint unreachable at configured URL (${LLAMA_CPP_URL})" + fi +fi + +# 6) Ollama embeddings availability + target model if curl -sS -m 6 "${OLLAMA_URL%/}/api/tags" >"$ARTIFACT_DIR/ollama-tags.json" 2>"$ARTIFACT_DIR/ollama-tags.err"; then if jq -e --arg model "$OLLAMA_EMBED_MODEL" '.models[]? | select(.name == $model)' "$ARTIFACT_DIR/ollama-tags.json" >/dev/null 2>&1; then add_watch "P4 Ollama up; embedding model present (${OLLAMA_EMBED_MODEL})" @@ -184,7 +221,7 @@ else fi fi -# 6) Host pressure: disk + memory +# 7) Host pressure: disk + memory root_disk_pct="$(df -P / | awk 'NR==2 {gsub(/%/,"",$5); print $5}' 2>/dev/null || echo 0)" if [[ "$root_disk_pct" =~ ^[0-9]+$ ]]; then if (( root_disk_pct >= 95 )); then