Files
flynn/src/automation/heartbeat.test.ts
T

586 lines
19 KiB
TypeScript

import { describe, it, expect, vi, afterEach } from 'vitest';
import { HeartbeatMonitor, parseInterval } from './heartbeat.js';
import type { HeartbeatDeps } from './heartbeat.js';
import type { HeartbeatConfig } from '../config/schema.js';
import type { ChannelAdapter } from '../channels/types.js';
function makeConfig(overrides?: Partial<HeartbeatConfig>): HeartbeatConfig {
return {
enabled: true,
interval: '5m',
notify_cooldown: '30m',
checks: ['gateway', 'model', 'channels', 'memory', 'disk', 'process_memory', 'backup'],
failure_threshold: 2,
disk_threshold_mb: 100,
process_memory_threshold_mb: 1500,
backup_failure_threshold: 1,
provider_error_rate_threshold: 0.5,
provider_error_min_calls: 5,
...overrides,
};
}
function makeDeps(overrides?: Partial<HeartbeatDeps>): HeartbeatDeps {
return {
config: makeConfig(),
getGatewayPort: () => 18800,
modelRouter: { getTier: () => 'default' },
channelLister: {
list: () => [
makeChannelAdapter('telegram', 'connected'),
makeChannelAdapter('webchat', 'connected'),
],
},
memoryDir: '/tmp/flynn-test-memory',
dataDir: '/tmp',
channelLookup: { get: vi.fn() },
processMemoryUsageMb: () => 256,
backupHealthProvider: () => ({
enabled: false,
hasRun: false,
consecutiveFailures: 0,
}),
getModelCalls: () => [],
...overrides,
};
}
function makeChannelAdapter(name: string, status: ChannelAdapter['status']): ChannelAdapter {
return {
name,
status,
connect: async () => {},
disconnect: async () => {},
send: async () => {},
onMessage: () => {},
};
}
describe('parseInterval', () => {
it('parses seconds', () => {
expect(parseInterval('60s')).toBe(60000);
expect(parseInterval('1s')).toBe(1000);
});
it('parses minutes', () => {
expect(parseInterval('5m')).toBe(300000);
expect(parseInterval('1m')).toBe(60000);
});
it('parses hours', () => {
expect(parseInterval('1h')).toBe(3600000);
expect(parseInterval('2h')).toBe(7200000);
});
it('treats bare numbers as seconds', () => {
expect(parseInterval('30')).toBe(30000);
});
it('throws on invalid format', () => {
expect(() => parseInterval('abc')).toThrow('Invalid interval format');
expect(() => parseInterval('')).toThrow('Invalid interval format');
});
});
describe('HeartbeatMonitor', () => {
let monitor: HeartbeatMonitor;
afterEach(() => {
monitor?.stop();
});
it('start() does nothing when enabled: false', () => {
const deps = makeDeps({ config: makeConfig({ enabled: false }) });
monitor = new HeartbeatMonitor(deps);
const setIntervalSpy = vi.spyOn(global, 'setInterval');
monitor.start();
expect(setIntervalSpy).not.toHaveBeenCalled();
setIntervalSpy.mockRestore();
});
it('start() sets an interval when enabled', () => {
const deps = makeDeps({ config: makeConfig({ enabled: true, checks: [] }) });
monitor = new HeartbeatMonitor(deps);
const setIntervalSpy = vi.spyOn(global, 'setInterval');
monitor.start();
expect(setIntervalSpy).toHaveBeenCalledWith(expect.any(Function), 300000);
setIntervalSpy.mockRestore();
});
it('stop() clears the timer', () => {
const deps = makeDeps({ config: makeConfig({ enabled: true, checks: [] }) });
monitor = new HeartbeatMonitor(deps);
const clearIntervalSpy = vi.spyOn(global, 'clearInterval');
monitor.start();
monitor.stop();
expect(clearIntervalSpy).toHaveBeenCalled();
clearIntervalSpy.mockRestore();
});
it('runChecks() runs all configured checks', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['model', 'channels'] }),
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
expect(result.checks).toHaveLength(2);
expect(result.checks[0].name).toBe('model');
expect(result.checks[1].name).toBe('channels');
});
it('returns healthy=true when all checks pass', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['model', 'channels'] }),
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
expect(result.healthy).toBe(true);
expect(result.checks.every((c) => c.healthy)).toBe(true);
});
it('returns healthy=false when any check fails', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['model', 'channels'] }),
modelRouter: undefined, // model check will fail
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
expect(result.healthy).toBe(false);
const modelCheck = result.checks.find((c) => c.name === 'model');
expect(modelCheck?.healthy).toBe(false);
});
it('getLastResult() returns most recent result', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['model'] }),
});
monitor = new HeartbeatMonitor(deps);
expect(monitor.getLastResult()).toBeUndefined();
await monitor.runChecks();
const lastResult = monitor.getLastResult();
expect(lastResult).toBeDefined();
expect(lastResult?.checks).toHaveLength(1);
expect(lastResult?.timestamp ?? 0).toBeGreaterThan(0);
});
it('notification sent after failure_threshold consecutive failures', async () => {
const mockSend = vi.fn().mockResolvedValue(undefined);
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
const deps = makeDeps({
config: makeConfig({
checks: ['model'],
failure_threshold: 2,
notify: { channel: 'telegram', peer: '123' },
}),
modelRouter: undefined, // will fail
channelLookup: { get: mockGet },
});
monitor = new HeartbeatMonitor(deps);
// First failure — below threshold, no notification
await monitor.runChecks();
expect(mockSend).not.toHaveBeenCalled();
// Second failure — meets threshold, should notify
await monitor.runChecks();
expect(mockSend).toHaveBeenCalledTimes(1);
expect(mockSend).toHaveBeenCalledWith('123', expect.objectContaining({
text: expect.stringContaining('FAILING'),
}));
});
it('does not send duplicate failure notifications', async () => {
const mockSend = vi.fn().mockResolvedValue(undefined);
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
const deps = makeDeps({
config: makeConfig({
checks: ['model'],
failure_threshold: 1,
notify: { channel: 'telegram', peer: '123' },
}),
modelRouter: undefined,
channelLookup: { get: mockGet },
});
monitor = new HeartbeatMonitor(deps);
await monitor.runChecks();
await monitor.runChecks();
await monitor.runChecks();
// Only one failure notification sent
expect(mockSend).toHaveBeenCalledTimes(1);
});
it('suppresses repeat failure notifications inside notify cooldown after recovery', async () => {
const mockSend = vi.fn().mockResolvedValue(undefined);
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
const deps = makeDeps({
config: makeConfig({
checks: ['model'],
failure_threshold: 1,
notify_cooldown: '1h',
notify: { channel: 'telegram', peer: '123' },
}),
modelRouter: undefined,
channelLookup: { get: mockGet },
});
monitor = new HeartbeatMonitor(deps);
await monitor.runChecks();
expect(mockSend).toHaveBeenCalledTimes(1);
Object.assign(deps, { modelRouter: { getTier: () => 'default' } });
await monitor.runChecks();
expect(mockSend).toHaveBeenCalledTimes(2);
Object.assign(deps, { modelRouter: undefined });
await monitor.runChecks();
expect(mockSend).toHaveBeenCalledTimes(2);
});
it('recovery notification sent when checks pass after failures', async () => {
const mockSend = vi.fn().mockResolvedValue(undefined);
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
let modelRouter: { getTier(): string } | undefined = undefined;
const deps = makeDeps({
config: makeConfig({
checks: ['model'],
failure_threshold: 1,
notify: { channel: 'telegram', peer: '123' },
}),
modelRouter,
channelLookup: { get: mockGet },
});
monitor = new HeartbeatMonitor(deps);
// Trigger failure notification
await monitor.runChecks();
expect(mockSend).toHaveBeenCalledTimes(1);
// "Fix" the model router by replacing deps (use Object.assign to mutate)
Object.assign(deps, { modelRouter: { getTier: () => 'default' } });
// We need a new monitor since deps is captured
monitor.stop();
monitor = new HeartbeatMonitor(deps);
// But the new monitor doesn't have the failure state, so let's test differently:
// Use a single monitor and manipulate the deps object's modelRouter
const mutableDeps = {
config: makeConfig({
checks: ['model'],
failure_threshold: 1,
notify: { channel: 'telegram', peer: '123' },
}),
getGatewayPort: () => 18800,
modelRouter: undefined as { getTier(): string } | undefined,
channelLister: { list: () => [] },
memoryDir: undefined,
dataDir: '/tmp',
channelLookup: { get: mockGet },
};
mockSend.mockClear();
const monitor2 = new HeartbeatMonitor(mutableDeps);
// Fail
await monitor2.runChecks();
expect(mockSend).toHaveBeenCalledTimes(1); // failure notification
// Now "recover"
mutableDeps.modelRouter = { getTier: () => 'default' };
// Need to re-create since deps is captured in constructor
// Actually, deps is stored by reference, so mutation works if we mutate the object
await monitor2.runChecks();
expect(mockSend).toHaveBeenCalledTimes(2); // recovery notification
expect(mockSend).toHaveBeenLastCalledWith('123', expect.objectContaining({
text: expect.stringContaining('RECOVERED'),
}));
monitor2.stop();
});
it('no notification when notify config is not set', async () => {
const deps = makeDeps({
config: makeConfig({
checks: ['model'],
failure_threshold: 1,
// no notify
}),
modelRouter: undefined,
});
monitor = new HeartbeatMonitor(deps);
// Should not throw
await monitor.runChecks();
await monitor.runChecks();
});
// ── Individual check tests ───────────────────────────────────
describe('model check', () => {
it('passes when model router is available', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['model'] }),
modelRouter: { getTier: () => 'fast' },
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'model');
if (!check) {throw new Error('Expected model check result');}
expect(check.healthy).toBe(true);
expect(check.message).toContain('fast');
});
it('fails when model router is undefined', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['model'] }),
modelRouter: undefined,
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'model');
if (!check) {throw new Error('Expected model check result');}
expect(check.healthy).toBe(false);
});
});
describe('channels check', () => {
it('passes when at least one channel is connected', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['channels'] }),
channelLister: {
list: () => [
makeChannelAdapter('telegram', 'connected'),
makeChannelAdapter('webchat', 'disconnected'),
],
},
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'channels');
if (!check) {throw new Error('Expected channels check result');}
expect(check.healthy).toBe(true);
expect(check.message).toContain('1/2 connected');
expect(check.message).toContain('webchat');
});
it('fails when no channels are connected', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['channels'] }),
channelLister: {
list: () => [
makeChannelAdapter('telegram', 'disconnected'),
],
},
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'channels');
if (!check) {throw new Error('Expected channels check result');}
expect(check.healthy).toBe(false);
});
});
describe('memory check', () => {
it('passes when memory is disabled', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['memory'] }),
memoryDir: undefined,
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'memory');
if (!check) {throw new Error('Expected memory check result');}
expect(check.healthy).toBe(true);
expect(check.message).toContain('disabled');
});
it('fails when memory dir is not accessible', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['memory'] }),
memoryDir: '/nonexistent/path/that/does/not/exist',
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'memory');
if (!check) {throw new Error('Expected memory check result');}
expect(check.healthy).toBe(false);
});
});
describe('disk check', () => {
it('passes when enough disk space available', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['disk'], disk_threshold_mb: 1 }),
dataDir: '/tmp',
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'disk');
if (!check) {throw new Error('Expected disk check result');}
expect(check.healthy).toBe(true);
expect(check.message).toContain('MB available');
});
it('fails when disk space is below threshold', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['disk'], disk_threshold_mb: 999999999 }),
dataDir: '/tmp',
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'disk');
if (!check) {throw new Error('Expected disk check result');}
expect(check.healthy).toBe(false);
expect(check.message).toContain('Low disk space');
});
it('fails when dataDir does not exist', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['disk'] }),
dataDir: '/nonexistent/path/that/does/not/exist',
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'disk');
if (!check) {throw new Error('Expected disk check result');}
expect(check.healthy).toBe(false);
});
});
describe('process_memory check', () => {
it('passes when RSS is below threshold', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['process_memory'], process_memory_threshold_mb: 512 }),
processMemoryUsageMb: () => 200,
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'process_memory');
if (!check) {throw new Error('Expected process_memory check result');}
expect(check.healthy).toBe(true);
});
it('fails when RSS is above threshold', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['process_memory'], process_memory_threshold_mb: 128 }),
processMemoryUsageMb: () => 512,
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'process_memory');
if (!check) {throw new Error('Expected process_memory check result');}
expect(check.healthy).toBe(false);
expect(check.message).toContain('High memory usage');
});
});
describe('backup check', () => {
it('passes when backup is disabled', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['backup'] }),
backupHealthProvider: () => ({
enabled: false,
hasRun: false,
consecutiveFailures: 0,
}),
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'backup');
if (!check) {throw new Error('Expected backup check result');}
expect(check.healthy).toBe(true);
expect(check.message).toContain('disabled');
});
it('fails when backup consecutive failures exceed threshold', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['backup'], backup_failure_threshold: 2 }),
backupHealthProvider: () => ({
enabled: true,
hasRun: true,
consecutiveFailures: 3,
lastError: 'minio unavailable',
}),
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'backup');
if (!check) {throw new Error('Expected backup check result');}
expect(check.healthy).toBe(false);
expect(check.message).toContain('Backup failing');
expect(check.message).toContain('minio unavailable');
});
});
describe('provider_errors check', () => {
it('passes when no model calls are recorded', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['provider_errors'] }),
getModelCalls: () => [],
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'provider_errors');
if (!check) {throw new Error('Expected provider_errors check result');}
expect(check.healthy).toBe(true);
expect(check.message).toContain('No model calls');
});
it('fails when a provider error rate breaches threshold', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['provider_errors'], provider_error_min_calls: 4, provider_error_rate_threshold: 0.5 }),
getModelCalls: () => [
{ provider: 'openai', error: 'rate limited' },
{ provider: 'openai', error: 'timeout' },
{ provider: 'openai' },
{ provider: 'openai' },
{ provider: 'anthropic' },
{ provider: 'anthropic' },
],
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'provider_errors');
if (!check) {throw new Error('Expected provider_errors check result');}
expect(check.healthy).toBe(false);
expect(check.message).toContain('openai');
});
});
});