diff --git a/scripts/ops-sentinel.sh b/scripts/ops-sentinel.sh index 81e2ee6..a1e0f98 100755 --- a/scripts/ops-sentinel.sh +++ b/scripts/ops-sentinel.sh @@ -10,13 +10,14 @@ BACKUP_MAX_AGE_HOURS="${BACKUP_MAX_AGE_HOURS:-8}" SEARX_URL="${SEARX_URL:-http://192.168.153.113:18803}" WHISPER_URL="${WHISPER_URL:-http://192.168.153.113:18801}" MCP_URL="${MCP_URL:-http://192.168.153.113:18802/mcp}" -OLLAMA_URL="${OLLAMA_URL:-http://192.168.153.113:11434}" +OLLAMA_URL="${OLLAMA_URL:-http://192.168.153.113:18807}" OLLAMA_EMBED_MODEL="${OLLAMA_EMBED_MODEL:-nomic-embed-text:latest}" -OLLAMA_REQUIRED="${OLLAMA_REQUIRED:-false}" +OLLAMA_REQUIRED="${OLLAMA_REQUIRED:-true}" OLLAMA_SERVICE="${OLLAMA_SERVICE:-ollama.service}" LLAMA_CPP_SERVICE="${LLAMA_CPP_SERVICE:-llama-server.service}" -LLAMA_CPP_URL="${LLAMA_CPP_URL:-http://127.0.0.1:8081/health}" -LLAMA_CPP_REQUIRED="${LLAMA_CPP_REQUIRED:-false}" +LLAMA_CPP_URL="${LLAMA_CPP_URL:-http://192.168.153.113:18806/health}" +LLAMA_CPP_REQUIRED="${LLAMA_CPP_REQUIRED:-true}" +SYSTEMD_LOCAL_CHECKS="${SYSTEMD_LOCAL_CHECKS:-false}" MINIO_URL="${MINIO_URL:-http://192.168.153.253:9000}" DOCKER_EXPECTED_CONTAINERS="${DOCKER_EXPECTED_CONTAINERS:-searxng whisper-server brave-search}" DOCKER_REQUIRE_EXPECTED="${DOCKER_REQUIRE_EXPECTED:-false}" @@ -133,24 +134,28 @@ else fi # 3) systemd service health (local model runtimes) -if command -v systemctl >/dev/null 2>&1; then - for unit in "$OLLAMA_SERVICE" "$LLAMA_CPP_SERVICE"; do - if systemctl is-active --quiet "$unit"; then - add_watch "P4 systemd service active: ${unit}" - else - if [[ "$unit" == "$OLLAMA_SERVICE" && "${OLLAMA_REQUIRED}" != "true" ]]; then - add_watch "P3 systemd service inactive: ${unit} (optional)" - elif [[ "$unit" == "$LLAMA_CPP_SERVICE" && "${LLAMA_CPP_REQUIRED}" != "true" ]]; then - add_watch "P3 systemd service inactive: ${unit} (optional)" +if [[ "${SYSTEMD_LOCAL_CHECKS}" == "true" ]]; then + if command -v systemctl >/dev/null 2>&1; then + for unit in "$OLLAMA_SERVICE" "$LLAMA_CPP_SERVICE"; do + if systemctl is-active --quiet "$unit"; then + add_watch "P4 systemd service active: ${unit}" else - add_now "P1 systemd service inactive: ${unit}" - mark_p1 - add_next "Check unit: systemctl status ${unit}" + if [[ "$unit" == "$OLLAMA_SERVICE" && "${OLLAMA_REQUIRED}" != "true" ]]; then + add_watch "P3 systemd service inactive: ${unit} (optional)" + elif [[ "$unit" == "$LLAMA_CPP_SERVICE" && "${LLAMA_CPP_REQUIRED}" != "true" ]]; then + add_watch "P3 systemd service inactive: ${unit} (optional)" + else + add_now "P1 systemd service inactive: ${unit}" + mark_p1 + add_next "Check unit: systemctl status ${unit}" + fi fi - fi - done + done + else + add_watch "P3 systemctl not available" + fi else - add_watch "P3 systemctl not available" + add_watch "P3 Skipping local systemd checks (SYSTEMD_LOCAL_CHECKS=false)" fi # 4) Docker service health (containers + health state) @@ -190,15 +195,22 @@ http_probe "whisper" "$WHISPER_URL" '^200$' http_probe "brave-mcp" "$MCP_URL" '^(200|406)$' http_probe "minio-live" "${MINIO_URL%/}/minio/health/live" '^200$' -if curl -sS -m 6 "$LLAMA_CPP_URL" >"$ARTIFACT_DIR/llamacpp-health.txt" 2>"$ARTIFACT_DIR/llamacpp-health.err"; then - add_watch "P4 llama.cpp endpoint responsive (${LLAMA_CPP_URL})" +llama_code="$(curl -sS -m 6 -o "$ARTIFACT_DIR/llamacpp-health.txt" -w '%{http_code}' "$LLAMA_CPP_URL" 2>"$ARTIFACT_DIR/llamacpp-health.err" || true)" +if [[ -n "$llama_code" && "$llama_code" != "000" ]]; then + add_watch "P4 llama.cpp endpoint responsive (${LLAMA_CPP_URL}, HTTP ${llama_code})" else - if [[ "${LLAMA_CPP_REQUIRED}" == "true" ]]; then - add_now "P1 llama.cpp endpoint unreachable (${LLAMA_CPP_URL})" - mark_p1 - add_next "Check llama.cpp bind address/port and service logs" + llama_root="${LLAMA_CPP_URL%/health}" + llama_code_root="$(curl -sS -m 6 -o "$ARTIFACT_DIR/llamacpp-root.txt" -w '%{http_code}' "$llama_root" 2>"$ARTIFACT_DIR/llamacpp-root.err" || true)" + if [[ -n "$llama_code_root" && "$llama_code_root" != "000" ]]; then + add_watch "P4 llama.cpp host reachable (${llama_root}, HTTP ${llama_code_root})" else - add_watch "P3 llama.cpp endpoint unreachable at configured URL (${LLAMA_CPP_URL})" + if [[ "${LLAMA_CPP_REQUIRED}" == "true" ]]; then + add_now "P1 llama.cpp endpoint unreachable (${LLAMA_CPP_URL})" + mark_p1 + add_next "Check llama.cpp bind address/port and service logs" + else + add_watch "P3 llama.cpp endpoint unreachable at configured URL (${LLAMA_CPP_URL})" + fi fi fi