feat(heartbeat): add provider error-rate spike check

This commit is contained in:
William Valentin
2026-02-16 13:52:40 -08:00
parent 07340ff0af
commit 71af3b5a42
8 changed files with 120 additions and 6 deletions
+3 -2
View File
@@ -7,7 +7,7 @@
"status": "completed",
"date": "2026-02-16",
"updated": "2026-02-16",
"summary": "Added first-class automation presets and scheduling upgrades: `automation.daily_briefing` now auto-registers an opinionated cron job for morning briefings, and backup scheduling now supports cron expressions via `backup.schedule` plus optional `backup.run_on_start` while preserving interval fallback. Added `BackupScheduler` with `backup.notify` channel alerts, configurable `backup.failure_threshold`, and recovery notifications (`backup.notify_recovery`) so backup failures/recoveries proactively notify operators. Extended heartbeat monitoring with `process_memory` and `backup` checks (with thresholds) so high RSS usage and backup failure streaks proactively trigger health alerts.",
"summary": "Added first-class automation presets and scheduling upgrades: `automation.daily_briefing` now auto-registers an opinionated cron job for morning briefings, and backup scheduling now supports cron expressions via `backup.schedule` plus optional `backup.run_on_start` while preserving interval fallback. Added `BackupScheduler` with `backup.notify` channel alerts, configurable `backup.failure_threshold`, and recovery notifications (`backup.notify_recovery`) so backup failures/recoveries proactively notify operators. Extended heartbeat monitoring with `process_memory`, `backup`, and `provider_errors` checks (with thresholds) so high RSS usage, backup failure streaks, and model-provider error spikes proactively trigger health alerts.",
"files_modified": [
"src/config/schema.ts",
"src/config/schema.test.ts",
@@ -24,6 +24,7 @@
"src/daemon/channels.ts",
"src/daemon/channels.test.ts",
"src/daemon/index.ts",
"src/daemon/services.ts",
"src/gateway/handlers/services.ts",
"src/gateway/handlers/services.test.ts",
"config/default.yaml",
@@ -3315,7 +3316,7 @@
}
},
"overall_progress": {
"total_test_count": 1830,
"total_test_count": 1832,
"all_tests_passing": true,
"p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)",