feat(ops): add setup operator pack, heartbeat alert cooldown, and doctor strict mode

This commit is contained in:
William Valentin
2026-02-16 14:57:56 -08:00
parent 030fb13a26
commit 3210e75c94
12 changed files with 274 additions and 17 deletions
+29
View File
@@ -8,6 +8,7 @@ function makeConfig(overrides?: Partial<HeartbeatConfig>): HeartbeatConfig {
return {
enabled: true,
interval: '5m',
notify_cooldown: '30m',
checks: ['gateway', 'model', 'channels', 'memory', 'disk', 'process_memory', 'backup'],
failure_threshold: 2,
disk_threshold_mb: 100,
@@ -227,6 +228,34 @@ describe('HeartbeatMonitor', () => {
expect(mockSend).toHaveBeenCalledTimes(1);
});
it('suppresses repeat failure notifications inside notify cooldown after recovery', async () => {
const mockSend = vi.fn().mockResolvedValue(undefined);
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
const deps = makeDeps({
config: makeConfig({
checks: ['model'],
failure_threshold: 1,
notify_cooldown: '1h',
notify: { channel: 'telegram', peer: '123' },
}),
modelRouter: undefined,
channelLookup: { get: mockGet },
});
monitor = new HeartbeatMonitor(deps);
await monitor.runChecks();
expect(mockSend).toHaveBeenCalledTimes(1);
Object.assign(deps, { modelRouter: { getTier: () => 'default' } });
await monitor.runChecks();
expect(mockSend).toHaveBeenCalledTimes(2);
Object.assign(deps, { modelRouter: undefined });
await monitor.runChecks();
expect(mockSend).toHaveBeenCalledTimes(2);
});
it('recovery notification sent when checks pass after failures', async () => {
const mockSend = vi.fn().mockResolvedValue(undefined);
const mockGet = vi.fn().mockReturnValue({ send: mockSend });