{ "updatedAt": "2026-05-14T00:32:57.803Z", "createdAt": "2026-05-12T17:48:01.214Z", "id": "lDKocSFXBQWQrDd3", "name": "Swarm Health Watchdog", "description": "Every 15 minutes, checks core swarm endpoints from inside n8n. Alerts after two consecutive failures and reports recoveries to Telegram and Discord.", "active": true, "isArchived": false, "nodes": [ { "parameters": {}, "type": "n8n-nodes-base.manualTrigger", "typeVersion": 1, "position": [ -620, -100 ], "id": "3759f3cd-fa90-49b6-ad08-322d21f3d727", "name": "Manual Trigger" }, { "parameters": { "rule": { "interval": [ { "field": "minutes", "minutesInterval": 15 } ] } }, "type": "n8n-nodes-base.scheduleTrigger", "typeVersion": 1.3, "position": [ -620, 100 ], "id": "9d209ddb-8da7-48ad-850c-ec0e452760ca", "name": "Every 15 Minutes" }, { "parameters": { "mode": "runOnceForAllItems", "jsCode": "const staticData = $getWorkflowStaticData('global');\nconst CONFIG = {\n timeoutMs: 5000,\n failureThreshold: 2,\n reminderEveryFailedRuns: 6,\n};\nconst services = [\n { key: 'brave', name: 'Brave MCP', port: 18802, url: 'http://172.19.0.1:18802/mcp', ok: [200, 400, 404, 405, 406], docker: 'brave-search' },\n { key: 'searxng', name: 'SearXNG', port: 18803, url: 'http://172.19.0.1:18803/search?q=health&format=json', ok: [200], docker: 'searxng' },\n { key: 'litellm', name: 'LiteLLM', port: 18804, url: 'http://172.19.0.1:18804/health/liveliness', ok: [200], docker: 'litellm' },\n { key: 'kokoro', name: 'Kokoro TTS', port: 18805, url: 'http://172.19.0.1:18805/health', ok: [200], docker: 'kokoro-tts' },\n { key: 'llamacpp', name: 'llama.cpp', port: 18806, url: 'http://172.19.0.1:18806/health', ok: [200] },\n { key: 'ollama', name: 'Ollama embeddings', port: 18807, url: 'http://172.19.0.1:18807/api/version', ok: [200] },\n { key: 'n8n', name: 'n8n', port: 18808, url: 'http://127.0.0.1:5678/healthz', ok: [200], docker: 'n8n-agent' },\n { key: 'whisper', name: 'Whisper', port: 18811, url: 'http://172.19.0.1:18811/', ok: [200, 404], docker: 'whisper-server' },\n];\n\nasync function fetchWithTimeout(url, options = {}, timeoutMs = CONFIG.timeoutMs) {\n let timer;\n const timeoutPromise = new Promise((_, reject) => {\n timer = setTimeout(() => reject(new Error(`Request timed out after ${timeoutMs}ms`)), timeoutMs);\n });\n try {\n return await Promise.race([fetch(url, options), timeoutPromise]);\n } finally {\n if (timer) clearTimeout(timer);\n }\n}\n\n// Fetch Docker container health from host-side endpoint\nlet dockerHealth = {};\ntry {\n const dhRes = await fetchWithTimeout('http://172.19.0.1:18809/health', { method: 'GET' }, 3000);\n if (dhRes.ok) {\n const dhData = await dhRes.json();\n for (const c of (dhData.containers || [])) {\n dockerHealth[c.name] = c;\n }\n }\n} catch (_) {\n // Docker health endpoint unavailable - continue without it\n}\n\nasync function check(svc) {\n const started = Date.now();\n try {\n const res = await fetchWithTimeout(svc.url, { method: 'GET' }, CONFIG.timeoutMs);\n const ms = Date.now() - started;\n const body = await res.text().catch(() => '');\n return {\n ...svc,\n healthy: svc.ok.includes(res.status),\n status: res.status,\n ms,\n detail: body.slice(0, 160).replace(/\\s+/g, ' ').trim(),\n docker: svc.docker ? (dockerHealth[svc.docker] || { name: svc.docker, status: 'unknown', health: 'unknown', restarts: -1 }) : null,\n };\n } catch (error) {\n return {\n ...svc,\n healthy: false,\n status: 'error',\n ms: Date.now() - started,\n detail: error.message,\n docker: svc.docker ? (dockerHealth[svc.docker] || { name: svc.docker, status: 'unknown', health: 'unknown', restarts: -1 }) : null,\n };\n }\n}\n\nfunction suggestedFix(r) {\n const dh = r.docker;\n const dockerInfo = dh ? ` [docker: ${dh.status}/${dh.health} restarts=${dh.restarts}]` : '';\n if (r.key === 'llamacpp') return 'systemctl status llama-server.service; restart only if it is down.' + dockerInfo;\n if (r.key === 'ollama') return 'systemctl --user status ollama.service; verify port 18807 and nomic-embed-text.' + dockerInfo;\n if (r.key === 'n8n') return 'docker logs n8n-agent --tail 100; check database/API health.' + dockerInfo;\n if (['searxng','litellm','kokoro','whisper','brave'].includes(r.key)) return `cd ~/lab/swarm && docker compose ps; inspect ${r.name} logs.${dockerInfo}`;\n return 'Check service logs and port listener.' + dockerInfo;\n}\n\nconst results = await Promise.all(services.map(check));\nconst now = new Date().toISOString();\nstaticData.services = staticData.services || {};\nconst alerts = [];\nconst recoveries = [];\nfor (const r of results) {\n const prev = staticData.services[r.key] || { failedRuns: 0, alerted: false };\n if (r.healthy) {\n if (prev.alerted) recoveries.push({ ...r, previousFailedRuns: prev.failedRuns });\n staticData.services[r.key] = { failedRuns: 0, alerted: false, lastOk: now, lastStatus: r.status, lastDetail: r.detail };\n } else {\n const failedRuns = (prev.failedRuns || 0) + 1;\n const shouldAlert = failedRuns >= CONFIG.failureThreshold && (!prev.alerted || (CONFIG.reminderEveryFailedRuns > 0 && failedRuns % CONFIG.reminderEveryFailedRuns === 0));\n staticData.services[r.key] = { failedRuns, alerted: prev.alerted || shouldAlert, lastFailure: now, lastStatus: r.status, lastDetail: r.detail };\n if (shouldAlert) alerts.push({ ...r, failedRuns, suggestedFix: suggestedFix(r) });\n }\n}\n\nif (!alerts.length && !recoveries.length) return [];\nlet lines = [];\nif (alerts.length) {\n lines.push('\\u{1F6A8} Swarm Health Watchdog');\n for (const a of alerts) {\n const dh = a.docker;\n const dockerStr = dh ? ` | docker:${dh.status}/${dh.health}/restarts=${dh.restarts}` : '';\n lines.push(`- ${a.name} :${a.port} failed ${a.failedRuns} checks; status=${a.status}${dockerStr}; detail=${a.detail || 'n/a'}; fix=${a.suggestedFix}`);\n }\n}\nif (recoveries.length) {\n lines.push('\\u2705 Swarm service recovered');\n for (const r of recoveries) {\n const dh = r.docker;\n const dockerStr = dh ? ` | docker:${dh.status}/${dh.health}` : '';\n lines.push(`- ${r.name} :${r.port} healthy again; status=${r.status}; latency=${r.ms}ms${dockerStr}`);\n }\n}\nlines.push(`checked=${now}`);\nreturn [{ json: { text: lines.join('\\n'), alerts, recoveries, results, dockerHealth, checkedAt: now } }];" }, "type": "n8n-nodes-base.code", "typeVersion": 2, "position": [ -340, 0 ], "id": "b3f76d53-204b-45bb-9a48-8cf20262319d", "name": "Check Swarm Services" }, { "parameters": { "chatId": "8367012007", "text": "={{$json.text}}", "additionalFields": { "parse_mode": "Markdown" } }, "type": "n8n-nodes-base.telegram", "typeVersion": 1.2, "position": [ -80, -80 ], "id": "32d7ad9f-80bb-4acf-b546-89f04db32a6a", "name": "Send Telegram Alert", "credentials": { "telegramApi": { "id": "aox4dyIWVSRdcH5z", "name": "Telegram Bot (OpenClaw)" } } }, { "parameters": { "authentication": "predefinedCredentialType", "nodeCredentialType": "httpHeaderAuth", "method": "POST", "url": "https://discord.com/api/v10/channels/425781661268049931/messages", "sendBody": true, "specifyBody": "json", "jsonBody": "={{ { content: $json.text } }}", "options": {} }, "type": "n8n-nodes-base.httpRequest", "typeVersion": 4.2, "position": [ -80, 100 ], "id": "7eb589f5-6e50-4e1e-8a37-391f06785ad87", "name": "Send Discord Alert", "credentials": { "httpHeaderAuth": { "id": "UgPqYcoCNNIgr55m", "name": "Discord Bot Auth" } } } ], "connections": { "Manual Trigger": { "main": [ [ { "node": "Check Swarm Services", "type": "main", "index": 0 } ] ] }, "Every 15 Minutes": { "main": [ [ { "node": "Check Swarm Services", "type": "main", "index": 0 } ] ] }, "Check Swarm Services": { "main": [ [ { "node": "Send Telegram Alert", "type": "main", "index": 0 }, { "node": "Send Discord Alert", "type": "main", "index": 0 } ] ] } }, "settings": { "executionOrder": "v1", "timezone": "America/Los_Angeles", "saveDataErrorExecution": "all", "saveDataSuccessExecution": "all", "callerPolicy": "workflowsFromSameOwner", "availableInMCP": false }, "staticData": { "node:Every 15 Minutes": { "recurrenceRules": [] }, "global": { "services": { "brave": { "failedRuns": 4, "alerted": true, "lastFailure": "2026-05-14T00:30:40.067Z", "lastStatus": "error", "lastDetail": "fetch is not defined" }, "searxng": { "failedRuns": 4, "alerted": true, "lastFailure": "2026-05-14T00:30:40.067Z", "lastStatus": "error", "lastDetail": "fetch is not defined" }, "litellm": { "failedRuns": 4, "alerted": true, "lastFailure": "2026-05-14T00:30:40.067Z", "lastStatus": "error", "lastDetail": "fetch is not defined" }, "kokoro": { "failedRuns": 4, "alerted": true, "lastFailure": "2026-05-14T00:30:40.067Z", "lastStatus": "error", "lastDetail": "fetch is not defined" }, "llamacpp": { "failedRuns": 4, "alerted": true, "lastFailure": "2026-05-14T00:30:40.067Z", "lastStatus": "error", "lastDetail": "fetch is not defined" }, "ollama": { "failedRuns": 4, "alerted": true, "lastFailure": "2026-05-14T00:30:40.067Z", "lastStatus": "error", "lastDetail": "fetch is not defined" }, "n8n": { "failedRuns": 4, "alerted": true, "lastFailure": "2026-05-14T00:30:40.067Z", "lastStatus": "error", "lastDetail": "fetch is not defined" }, "whisper": { "failedRuns": 4, "alerted": true, "lastFailure": "2026-05-14T00:30:40.067Z", "lastStatus": "error", "lastDetail": "fetch is not defined" } } } }, "meta": null, "versionId": "eec5521b-fb44-44ea-b238-aff842560f98", "activeVersionId": "eec5521b-fb44-44ea-b238-aff842560f98", "versionCounter": 52, "triggerCount": 1, "shared": [ { "updatedAt": "2026-05-12T17:39:10.124Z", "createdAt": "2026-05-12T17:39:10.124Z", "role": "workflow:owner", "workflowId": "lDKocSFXBQWQrDd3", "projectId": "WGdp8QunI1tHpjXa", "project": { "updatedAt": "2026-03-11T21:08:10.005Z", "createdAt": "2026-03-11T21:05:11.541Z", "id": "WGdp8QunI1tHpjXa", "name": "will will ", "type": "personal", "icon": null, "description": null, "creatorId": "5ad50ead-6e6a-4d12-ab5b-e5db15835bb5" } } ], "tags": [], "activeVersion": { "updatedAt": "2026-05-14T00:09:09.316Z", "createdAt": "2026-05-14T00:09:09.316Z", "versionId": "eec5521b-fb44-44ea-b238-aff842560f98", "workflowId": "lDKocSFXBQWQrDd3", "nodes": [ { "parameters": {}, "type": "n8n-nodes-base.manualTrigger", "typeVersion": 1, "position": [ -620, -100 ], "id": "3759f3cd-fa90-49b6-ad08-322d21f3d727", "name": "Manual Trigger" }, { "parameters": { "rule": { "interval": [ { "field": "minutes", "minutesInterval": 15 } ] } }, "type": "n8n-nodes-base.scheduleTrigger", "typeVersion": 1.3, "position": [ -620, 100 ], "id": "9d209ddb-8da7-48ad-850c-ec0e452760ca", "name": "Every 15 Minutes" }, { "parameters": { "mode": "runOnceForAllItems", "jsCode": "const staticData = $getWorkflowStaticData('global');\nconst CONFIG = {\n timeoutMs: 5000,\n failureThreshold: 2,\n reminderEveryFailedRuns: 6,\n};\nconst services = [\n { key: 'brave', name: 'Brave MCP', port: 18802, url: 'http://172.19.0.1:18802/mcp', ok: [200, 400, 404, 405, 406], docker: 'brave-search' },\n { key: 'searxng', name: 'SearXNG', port: 18803, url: 'http://172.19.0.1:18803/search?q=health&format=json', ok: [200], docker: 'searxng' },\n { key: 'litellm', name: 'LiteLLM', port: 18804, url: 'http://172.19.0.1:18804/health/liveliness', ok: [200], docker: 'litellm' },\n { key: 'kokoro', name: 'Kokoro TTS', port: 18805, url: 'http://172.19.0.1:18805/health', ok: [200], docker: 'kokoro-tts' },\n { key: 'llamacpp', name: 'llama.cpp', port: 18806, url: 'http://172.19.0.1:18806/health', ok: [200] },\n { key: 'ollama', name: 'Ollama embeddings', port: 18807, url: 'http://172.19.0.1:18807/api/version', ok: [200] },\n { key: 'n8n', name: 'n8n', port: 18808, url: 'http://127.0.0.1:5678/healthz', ok: [200], docker: 'n8n-agent' },\n { key: 'whisper', name: 'Whisper', port: 18811, url: 'http://172.19.0.1:18811/', ok: [200, 404], docker: 'whisper-server' },\n];\n\nasync function fetchWithTimeout(url, options = {}, timeoutMs = CONFIG.timeoutMs) {\n let timer;\n const timeoutPromise = new Promise((_, reject) => {\n timer = setTimeout(() => reject(new Error(`Request timed out after ${timeoutMs}ms`)), timeoutMs);\n });\n try {\n return await Promise.race([fetch(url, options), timeoutPromise]);\n } finally {\n if (timer) clearTimeout(timer);\n }\n}\n\n// Fetch Docker container health from host-side endpoint\nlet dockerHealth = {};\ntry {\n const dhRes = await fetchWithTimeout('http://172.19.0.1:18809/health', { method: 'GET' }, 3000);\n if (dhRes.ok) {\n const dhData = await dhRes.json();\n for (const c of (dhData.containers || [])) {\n dockerHealth[c.name] = c;\n }\n }\n} catch (_) {\n // Docker health endpoint unavailable - continue without it\n}\n\nasync function check(svc) {\n const started = Date.now();\n try {\n const res = await fetchWithTimeout(svc.url, { method: 'GET' }, CONFIG.timeoutMs);\n const ms = Date.now() - started;\n const body = await res.text().catch(() => '');\n return {\n ...svc,\n healthy: svc.ok.includes(res.status),\n status: res.status,\n ms,\n detail: body.slice(0, 160).replace(/\\s+/g, ' ').trim(),\n docker: svc.docker ? (dockerHealth[svc.docker] || { name: svc.docker, status: 'unknown', health: 'unknown', restarts: -1 }) : null,\n };\n } catch (error) {\n return {\n ...svc,\n healthy: false,\n status: 'error',\n ms: Date.now() - started,\n detail: error.message,\n docker: svc.docker ? (dockerHealth[svc.docker] || { name: svc.docker, status: 'unknown', health: 'unknown', restarts: -1 }) : null,\n };\n }\n}\n\nfunction suggestedFix(r) {\n const dh = r.docker;\n const dockerInfo = dh ? ` [docker: ${dh.status}/${dh.health} restarts=${dh.restarts}]` : '';\n if (r.key === 'llamacpp') return 'systemctl status llama-server.service; restart only if it is down.' + dockerInfo;\n if (r.key === 'ollama') return 'systemctl --user status ollama.service; verify port 18807 and nomic-embed-text.' + dockerInfo;\n if (r.key === 'n8n') return 'docker logs n8n-agent --tail 100; check database/API health.' + dockerInfo;\n if (['searxng','litellm','kokoro','whisper','brave'].includes(r.key)) return `cd ~/lab/swarm && docker compose ps; inspect ${r.name} logs.${dockerInfo}`;\n return 'Check service logs and port listener.' + dockerInfo;\n}\n\nconst results = await Promise.all(services.map(check));\nconst now = new Date().toISOString();\nstaticData.services = staticData.services || {};\nconst alerts = [];\nconst recoveries = [];\nfor (const r of results) {\n const prev = staticData.services[r.key] || { failedRuns: 0, alerted: false };\n if (r.healthy) {\n if (prev.alerted) recoveries.push({ ...r, previousFailedRuns: prev.failedRuns });\n staticData.services[r.key] = { failedRuns: 0, alerted: false, lastOk: now, lastStatus: r.status, lastDetail: r.detail };\n } else {\n const failedRuns = (prev.failedRuns || 0) + 1;\n const shouldAlert = failedRuns >= CONFIG.failureThreshold && (!prev.alerted || (CONFIG.reminderEveryFailedRuns > 0 && failedRuns % CONFIG.reminderEveryFailedRuns === 0));\n staticData.services[r.key] = { failedRuns, alerted: prev.alerted || shouldAlert, lastFailure: now, lastStatus: r.status, lastDetail: r.detail };\n if (shouldAlert) alerts.push({ ...r, failedRuns, suggestedFix: suggestedFix(r) });\n }\n}\n\nif (!alerts.length && !recoveries.length) return [];\nlet lines = [];\nif (alerts.length) {\n lines.push('\\u{1F6A8} Swarm Health Watchdog');\n for (const a of alerts) {\n const dh = a.docker;\n const dockerStr = dh ? ` | docker:${dh.status}/${dh.health}/restarts=${dh.restarts}` : '';\n lines.push(`- ${a.name} :${a.port} failed ${a.failedRuns} checks; status=${a.status}${dockerStr}; detail=${a.detail || 'n/a'}; fix=${a.suggestedFix}`);\n }\n}\nif (recoveries.length) {\n lines.push('\\u2705 Swarm service recovered');\n for (const r of recoveries) {\n const dh = r.docker;\n const dockerStr = dh ? ` | docker:${dh.status}/${dh.health}` : '';\n lines.push(`- ${r.name} :${r.port} healthy again; status=${r.status}; latency=${r.ms}ms${dockerStr}`);\n }\n}\nlines.push(`checked=${now}`);\nreturn [{ json: { text: lines.join('\\n'), alerts, recoveries, results, dockerHealth, checkedAt: now } }];" }, "type": "n8n-nodes-base.code", "typeVersion": 2, "position": [ -340, 0 ], "id": "b3f76d53-204b-45bb-9a48-8cf20262319d", "name": "Check Swarm Services" }, { "parameters": { "chatId": "8367012007", "text": "={{$json.text}}", "additionalFields": { "parse_mode": "Markdown" } }, "type": "n8n-nodes-base.telegram", "typeVersion": 1.2, "position": [ -80, -80 ], "id": "32d7ad9f-80bb-4acf-b546-89f04db32a6a", "name": "Send Telegram Alert", "credentials": { "telegramApi": { "id": "aox4dyIWVSRdcH5z", "name": "Telegram Bot (OpenClaw)" } } }, { "parameters": { "authentication": "predefinedCredentialType", "nodeCredentialType": "httpHeaderAuth", "method": "POST", "url": "https://discord.com/api/v10/channels/425781661268049931/messages", "sendBody": true, "specifyBody": "json", "jsonBody": "={{ { content: $json.text } }}", "options": {} }, "type": "n8n-nodes-base.httpRequest", "typeVersion": 4.2, "position": [ -80, 100 ], "id": "7eb589f5-6e50-4e1e-8a37-391f06785ad87", "name": "Send Discord Alert", "credentials": { "httpHeaderAuth": { "id": "UgPqYcoCNNIgr55m", "name": "Discord Bot Auth" } } } ], "connections": { "Manual Trigger": { "main": [ [ { "node": "Check Swarm Services", "type": "main", "index": 0 } ] ] }, "Every 15 Minutes": { "main": [ [ { "node": "Check Swarm Services", "type": "main", "index": 0 } ] ] }, "Check Swarm Services": { "main": [ [ { "node": "Send Telegram Alert", "type": "main", "index": 0 }, { "node": "Send Discord Alert", "type": "main", "index": 0 } ] ] } }, "authors": "will will", "name": null, "description": null, "autosaved": false, "workflowPublishHistory": [ { "createdAt": "2026-05-14T00:09:09.344Z", "id": 1489, "workflowId": "lDKocSFXBQWQrDd3", "versionId": "eec5521b-fb44-44ea-b238-aff842560f98", "event": "activated", "userId": "5ad50ead-6e6a-4d12-ab5b-e5db15835bb5" }, { "createdAt": "2026-05-14T00:32:57.833Z", "id": 1495, "workflowId": "lDKocSFXBQWQrDd3", "versionId": "eec5521b-fb44-44ea-b238-aff842560f98", "event": "activated", "userId": "5ad50ead-6e6a-4d12-ab5b-e5db15835bb5" }, { "createdAt": "2026-05-14T00:32:57.790Z", "id": 1494, "workflowId": "lDKocSFXBQWQrDd3", "versionId": "eec5521b-fb44-44ea-b238-aff842560f98", "event": "deactivated", "userId": "5ad50ead-6e6a-4d12-ab5b-e5db15835bb5" } ] } }