586 lines
19 KiB
TypeScript
586 lines
19 KiB
TypeScript
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
import { HeartbeatMonitor, parseInterval } from './heartbeat.js';
|
|
import type { HeartbeatDeps } from './heartbeat.js';
|
|
import type { HeartbeatConfig } from '../config/schema.js';
|
|
import type { ChannelAdapter } from '../channels/types.js';
|
|
|
|
function makeConfig(overrides?: Partial<HeartbeatConfig>): HeartbeatConfig {
|
|
return {
|
|
enabled: true,
|
|
interval: '5m',
|
|
notify_cooldown: '30m',
|
|
checks: ['gateway', 'model', 'channels', 'memory', 'disk', 'process_memory', 'backup'],
|
|
failure_threshold: 2,
|
|
disk_threshold_mb: 100,
|
|
process_memory_threshold_mb: 1500,
|
|
backup_failure_threshold: 1,
|
|
provider_error_rate_threshold: 0.5,
|
|
provider_error_min_calls: 5,
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
function makeDeps(overrides?: Partial<HeartbeatDeps>): HeartbeatDeps {
|
|
return {
|
|
config: makeConfig(),
|
|
getGatewayPort: () => 18800,
|
|
modelRouter: { getTier: () => 'default' },
|
|
channelLister: {
|
|
list: () => [
|
|
makeChannelAdapter('telegram', 'connected'),
|
|
makeChannelAdapter('webchat', 'connected'),
|
|
],
|
|
},
|
|
memoryDir: '/tmp/flynn-test-memory',
|
|
dataDir: '/tmp',
|
|
channelLookup: { get: vi.fn() },
|
|
processMemoryUsageMb: () => 256,
|
|
backupHealthProvider: () => ({
|
|
enabled: false,
|
|
hasRun: false,
|
|
consecutiveFailures: 0,
|
|
}),
|
|
getModelCalls: () => [],
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
function makeChannelAdapter(name: string, status: ChannelAdapter['status']): ChannelAdapter {
|
|
return {
|
|
name,
|
|
status,
|
|
connect: async () => {},
|
|
disconnect: async () => {},
|
|
send: async () => {},
|
|
onMessage: () => {},
|
|
};
|
|
}
|
|
|
|
describe('parseInterval', () => {
|
|
it('parses seconds', () => {
|
|
expect(parseInterval('60s')).toBe(60000);
|
|
expect(parseInterval('1s')).toBe(1000);
|
|
});
|
|
|
|
it('parses minutes', () => {
|
|
expect(parseInterval('5m')).toBe(300000);
|
|
expect(parseInterval('1m')).toBe(60000);
|
|
});
|
|
|
|
it('parses hours', () => {
|
|
expect(parseInterval('1h')).toBe(3600000);
|
|
expect(parseInterval('2h')).toBe(7200000);
|
|
});
|
|
|
|
it('treats bare numbers as seconds', () => {
|
|
expect(parseInterval('30')).toBe(30000);
|
|
});
|
|
|
|
it('throws on invalid format', () => {
|
|
expect(() => parseInterval('abc')).toThrow('Invalid interval format');
|
|
expect(() => parseInterval('')).toThrow('Invalid interval format');
|
|
});
|
|
});
|
|
|
|
describe('HeartbeatMonitor', () => {
|
|
let monitor: HeartbeatMonitor;
|
|
|
|
afterEach(() => {
|
|
monitor?.stop();
|
|
});
|
|
|
|
it('start() does nothing when enabled: false', () => {
|
|
const deps = makeDeps({ config: makeConfig({ enabled: false }) });
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const setIntervalSpy = vi.spyOn(global, 'setInterval');
|
|
monitor.start();
|
|
|
|
expect(setIntervalSpy).not.toHaveBeenCalled();
|
|
setIntervalSpy.mockRestore();
|
|
});
|
|
|
|
it('start() sets an interval when enabled', () => {
|
|
const deps = makeDeps({ config: makeConfig({ enabled: true, checks: [] }) });
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const setIntervalSpy = vi.spyOn(global, 'setInterval');
|
|
monitor.start();
|
|
|
|
expect(setIntervalSpy).toHaveBeenCalledWith(expect.any(Function), 300000);
|
|
setIntervalSpy.mockRestore();
|
|
});
|
|
|
|
it('stop() clears the timer', () => {
|
|
const deps = makeDeps({ config: makeConfig({ enabled: true, checks: [] }) });
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const clearIntervalSpy = vi.spyOn(global, 'clearInterval');
|
|
monitor.start();
|
|
monitor.stop();
|
|
|
|
expect(clearIntervalSpy).toHaveBeenCalled();
|
|
clearIntervalSpy.mockRestore();
|
|
});
|
|
|
|
it('runChecks() runs all configured checks', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['model', 'channels'] }),
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
|
|
expect(result.checks).toHaveLength(2);
|
|
expect(result.checks[0].name).toBe('model');
|
|
expect(result.checks[1].name).toBe('channels');
|
|
});
|
|
|
|
it('returns healthy=true when all checks pass', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['model', 'channels'] }),
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
|
|
expect(result.healthy).toBe(true);
|
|
expect(result.checks.every((c) => c.healthy)).toBe(true);
|
|
});
|
|
|
|
it('returns healthy=false when any check fails', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['model', 'channels'] }),
|
|
modelRouter: undefined, // model check will fail
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
|
|
expect(result.healthy).toBe(false);
|
|
const modelCheck = result.checks.find((c) => c.name === 'model');
|
|
expect(modelCheck?.healthy).toBe(false);
|
|
});
|
|
|
|
it('getLastResult() returns most recent result', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['model'] }),
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
expect(monitor.getLastResult()).toBeUndefined();
|
|
|
|
await monitor.runChecks();
|
|
|
|
const lastResult = monitor.getLastResult();
|
|
expect(lastResult).toBeDefined();
|
|
expect(lastResult?.checks).toHaveLength(1);
|
|
expect(lastResult?.timestamp ?? 0).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('notification sent after failure_threshold consecutive failures', async () => {
|
|
const mockSend = vi.fn().mockResolvedValue(undefined);
|
|
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
|
|
|
const deps = makeDeps({
|
|
config: makeConfig({
|
|
checks: ['model'],
|
|
failure_threshold: 2,
|
|
notify: { channel: 'telegram', peer: '123' },
|
|
}),
|
|
modelRouter: undefined, // will fail
|
|
channelLookup: { get: mockGet },
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
// First failure — below threshold, no notification
|
|
await monitor.runChecks();
|
|
expect(mockSend).not.toHaveBeenCalled();
|
|
|
|
// Second failure — meets threshold, should notify
|
|
await monitor.runChecks();
|
|
expect(mockSend).toHaveBeenCalledTimes(1);
|
|
expect(mockSend).toHaveBeenCalledWith('123', expect.objectContaining({
|
|
text: expect.stringContaining('FAILING'),
|
|
}));
|
|
});
|
|
|
|
it('does not send duplicate failure notifications', async () => {
|
|
const mockSend = vi.fn().mockResolvedValue(undefined);
|
|
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
|
|
|
const deps = makeDeps({
|
|
config: makeConfig({
|
|
checks: ['model'],
|
|
failure_threshold: 1,
|
|
notify: { channel: 'telegram', peer: '123' },
|
|
}),
|
|
modelRouter: undefined,
|
|
channelLookup: { get: mockGet },
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
await monitor.runChecks();
|
|
await monitor.runChecks();
|
|
await monitor.runChecks();
|
|
|
|
// Only one failure notification sent
|
|
expect(mockSend).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('suppresses repeat failure notifications inside notify cooldown after recovery', async () => {
|
|
const mockSend = vi.fn().mockResolvedValue(undefined);
|
|
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
|
|
|
const deps = makeDeps({
|
|
config: makeConfig({
|
|
checks: ['model'],
|
|
failure_threshold: 1,
|
|
notify_cooldown: '1h',
|
|
notify: { channel: 'telegram', peer: '123' },
|
|
}),
|
|
modelRouter: undefined,
|
|
channelLookup: { get: mockGet },
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
await monitor.runChecks();
|
|
expect(mockSend).toHaveBeenCalledTimes(1);
|
|
|
|
Object.assign(deps, { modelRouter: { getTier: () => 'default' } });
|
|
await monitor.runChecks();
|
|
expect(mockSend).toHaveBeenCalledTimes(2);
|
|
|
|
Object.assign(deps, { modelRouter: undefined });
|
|
await monitor.runChecks();
|
|
expect(mockSend).toHaveBeenCalledTimes(2);
|
|
});
|
|
|
|
it('recovery notification sent when checks pass after failures', async () => {
|
|
const mockSend = vi.fn().mockResolvedValue(undefined);
|
|
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
|
|
|
let modelRouter: { getTier(): string } | undefined = undefined;
|
|
|
|
const deps = makeDeps({
|
|
config: makeConfig({
|
|
checks: ['model'],
|
|
failure_threshold: 1,
|
|
notify: { channel: 'telegram', peer: '123' },
|
|
}),
|
|
modelRouter,
|
|
channelLookup: { get: mockGet },
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
// Trigger failure notification
|
|
await monitor.runChecks();
|
|
expect(mockSend).toHaveBeenCalledTimes(1);
|
|
|
|
// "Fix" the model router by replacing deps (use Object.assign to mutate)
|
|
Object.assign(deps, { modelRouter: { getTier: () => 'default' } });
|
|
// We need a new monitor since deps is captured
|
|
monitor.stop();
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
// But the new monitor doesn't have the failure state, so let's test differently:
|
|
// Use a single monitor and manipulate the deps object's modelRouter
|
|
const mutableDeps = {
|
|
config: makeConfig({
|
|
checks: ['model'],
|
|
failure_threshold: 1,
|
|
notify: { channel: 'telegram', peer: '123' },
|
|
}),
|
|
getGatewayPort: () => 18800,
|
|
modelRouter: undefined as { getTier(): string } | undefined,
|
|
channelLister: { list: () => [] },
|
|
memoryDir: undefined,
|
|
dataDir: '/tmp',
|
|
channelLookup: { get: mockGet },
|
|
};
|
|
mockSend.mockClear();
|
|
|
|
const monitor2 = new HeartbeatMonitor(mutableDeps);
|
|
|
|
// Fail
|
|
await monitor2.runChecks();
|
|
expect(mockSend).toHaveBeenCalledTimes(1); // failure notification
|
|
|
|
// Now "recover"
|
|
mutableDeps.modelRouter = { getTier: () => 'default' };
|
|
// Need to re-create since deps is captured in constructor
|
|
// Actually, deps is stored by reference, so mutation works if we mutate the object
|
|
await monitor2.runChecks();
|
|
expect(mockSend).toHaveBeenCalledTimes(2); // recovery notification
|
|
expect(mockSend).toHaveBeenLastCalledWith('123', expect.objectContaining({
|
|
text: expect.stringContaining('RECOVERED'),
|
|
}));
|
|
|
|
monitor2.stop();
|
|
});
|
|
|
|
it('no notification when notify config is not set', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({
|
|
checks: ['model'],
|
|
failure_threshold: 1,
|
|
// no notify
|
|
}),
|
|
modelRouter: undefined,
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
// Should not throw
|
|
await monitor.runChecks();
|
|
await monitor.runChecks();
|
|
});
|
|
|
|
// ── Individual check tests ───────────────────────────────────
|
|
|
|
describe('model check', () => {
|
|
it('passes when model router is available', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['model'] }),
|
|
modelRouter: { getTier: () => 'fast' },
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'model');
|
|
if (!check) {throw new Error('Expected model check result');}
|
|
expect(check.healthy).toBe(true);
|
|
expect(check.message).toContain('fast');
|
|
});
|
|
|
|
it('fails when model router is undefined', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['model'] }),
|
|
modelRouter: undefined,
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'model');
|
|
if (!check) {throw new Error('Expected model check result');}
|
|
expect(check.healthy).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('channels check', () => {
|
|
it('passes when at least one channel is connected', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['channels'] }),
|
|
channelLister: {
|
|
list: () => [
|
|
makeChannelAdapter('telegram', 'connected'),
|
|
makeChannelAdapter('webchat', 'disconnected'),
|
|
],
|
|
},
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'channels');
|
|
if (!check) {throw new Error('Expected channels check result');}
|
|
expect(check.healthy).toBe(true);
|
|
expect(check.message).toContain('1/2 connected');
|
|
expect(check.message).toContain('webchat');
|
|
});
|
|
|
|
it('fails when no channels are connected', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['channels'] }),
|
|
channelLister: {
|
|
list: () => [
|
|
makeChannelAdapter('telegram', 'disconnected'),
|
|
],
|
|
},
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'channels');
|
|
if (!check) {throw new Error('Expected channels check result');}
|
|
expect(check.healthy).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('memory check', () => {
|
|
it('passes when memory is disabled', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['memory'] }),
|
|
memoryDir: undefined,
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'memory');
|
|
if (!check) {throw new Error('Expected memory check result');}
|
|
expect(check.healthy).toBe(true);
|
|
expect(check.message).toContain('disabled');
|
|
});
|
|
|
|
it('fails when memory dir is not accessible', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['memory'] }),
|
|
memoryDir: '/nonexistent/path/that/does/not/exist',
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'memory');
|
|
if (!check) {throw new Error('Expected memory check result');}
|
|
expect(check.healthy).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('disk check', () => {
|
|
it('passes when enough disk space available', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['disk'], disk_threshold_mb: 1 }),
|
|
dataDir: '/tmp',
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'disk');
|
|
if (!check) {throw new Error('Expected disk check result');}
|
|
expect(check.healthy).toBe(true);
|
|
expect(check.message).toContain('MB available');
|
|
});
|
|
|
|
it('fails when disk space is below threshold', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['disk'], disk_threshold_mb: 999999999 }),
|
|
dataDir: '/tmp',
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'disk');
|
|
if (!check) {throw new Error('Expected disk check result');}
|
|
expect(check.healthy).toBe(false);
|
|
expect(check.message).toContain('Low disk space');
|
|
});
|
|
|
|
it('fails when dataDir does not exist', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['disk'] }),
|
|
dataDir: '/nonexistent/path/that/does/not/exist',
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'disk');
|
|
if (!check) {throw new Error('Expected disk check result');}
|
|
expect(check.healthy).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('process_memory check', () => {
|
|
it('passes when RSS is below threshold', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['process_memory'], process_memory_threshold_mb: 512 }),
|
|
processMemoryUsageMb: () => 200,
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'process_memory');
|
|
if (!check) {throw new Error('Expected process_memory check result');}
|
|
expect(check.healthy).toBe(true);
|
|
});
|
|
|
|
it('fails when RSS is above threshold', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['process_memory'], process_memory_threshold_mb: 128 }),
|
|
processMemoryUsageMb: () => 512,
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'process_memory');
|
|
if (!check) {throw new Error('Expected process_memory check result');}
|
|
expect(check.healthy).toBe(false);
|
|
expect(check.message).toContain('High memory usage');
|
|
});
|
|
});
|
|
|
|
describe('backup check', () => {
|
|
it('passes when backup is disabled', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['backup'] }),
|
|
backupHealthProvider: () => ({
|
|
enabled: false,
|
|
hasRun: false,
|
|
consecutiveFailures: 0,
|
|
}),
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'backup');
|
|
if (!check) {throw new Error('Expected backup check result');}
|
|
expect(check.healthy).toBe(true);
|
|
expect(check.message).toContain('disabled');
|
|
});
|
|
|
|
it('fails when backup consecutive failures exceed threshold', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['backup'], backup_failure_threshold: 2 }),
|
|
backupHealthProvider: () => ({
|
|
enabled: true,
|
|
hasRun: true,
|
|
consecutiveFailures: 3,
|
|
lastError: 'minio unavailable',
|
|
}),
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'backup');
|
|
if (!check) {throw new Error('Expected backup check result');}
|
|
expect(check.healthy).toBe(false);
|
|
expect(check.message).toContain('Backup failing');
|
|
expect(check.message).toContain('minio unavailable');
|
|
});
|
|
});
|
|
|
|
describe('provider_errors check', () => {
|
|
it('passes when no model calls are recorded', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['provider_errors'] }),
|
|
getModelCalls: () => [],
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'provider_errors');
|
|
if (!check) {throw new Error('Expected provider_errors check result');}
|
|
expect(check.healthy).toBe(true);
|
|
expect(check.message).toContain('No model calls');
|
|
});
|
|
|
|
it('fails when a provider error rate breaches threshold', async () => {
|
|
const deps = makeDeps({
|
|
config: makeConfig({ checks: ['provider_errors'], provider_error_min_calls: 4, provider_error_rate_threshold: 0.5 }),
|
|
getModelCalls: () => [
|
|
{ provider: 'openai', error: 'rate limited' },
|
|
{ provider: 'openai', error: 'timeout' },
|
|
{ provider: 'openai' },
|
|
{ provider: 'openai' },
|
|
{ provider: 'anthropic' },
|
|
{ provider: 'anthropic' },
|
|
],
|
|
});
|
|
monitor = new HeartbeatMonitor(deps);
|
|
|
|
const result = await monitor.runChecks();
|
|
const check = result.checks.find((c) => c.name === 'provider_errors');
|
|
if (!check) {throw new Error('Expected provider_errors check result');}
|
|
expect(check.healthy).toBe(false);
|
|
expect(check.message).toContain('openai');
|
|
});
|
|
});
|
|
});
|