feat(heartbeat): add process memory and backup health checks

This commit is contained in:
William Valentin
2026-02-16 13:50:39 -08:00
parent 8684c3a07d
commit 07340ff0af
11 changed files with 282 additions and 8 deletions
+78 -1
View File
@@ -8,9 +8,11 @@ function makeConfig(overrides?: Partial<HeartbeatConfig>): HeartbeatConfig {
return {
enabled: true,
interval: '5m',
checks: ['gateway', 'model', 'channels', 'memory', 'disk'],
checks: ['gateway', 'model', 'channels', 'memory', 'disk', 'process_memory', 'backup'],
failure_threshold: 2,
disk_threshold_mb: 100,
process_memory_threshold_mb: 1500,
backup_failure_threshold: 1,
...overrides,
};
}
@@ -29,6 +31,12 @@ function makeDeps(overrides?: Partial<HeartbeatDeps>): HeartbeatDeps {
memoryDir: '/tmp/flynn-test-memory',
dataDir: '/tmp',
channelLookup: { get: vi.fn() },
processMemoryUsageMb: () => 256,
backupHealthProvider: () => ({
enabled: false,
hasRun: false,
consecutiveFailures: 0,
}),
...overrides,
};
}
@@ -436,4 +444,73 @@ describe('HeartbeatMonitor', () => {
expect(check.healthy).toBe(false);
});
});
describe('process_memory check', () => {
it('passes when RSS is below threshold', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['process_memory'], process_memory_threshold_mb: 512 }),
processMemoryUsageMb: () => 200,
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'process_memory');
if (!check) {throw new Error('Expected process_memory check result');}
expect(check.healthy).toBe(true);
});
it('fails when RSS is above threshold', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['process_memory'], process_memory_threshold_mb: 128 }),
processMemoryUsageMb: () => 512,
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'process_memory');
if (!check) {throw new Error('Expected process_memory check result');}
expect(check.healthy).toBe(false);
expect(check.message).toContain('High memory usage');
});
});
describe('backup check', () => {
it('passes when backup is disabled', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['backup'] }),
backupHealthProvider: () => ({
enabled: false,
hasRun: false,
consecutiveFailures: 0,
}),
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'backup');
if (!check) {throw new Error('Expected backup check result');}
expect(check.healthy).toBe(true);
expect(check.message).toContain('disabled');
});
it('fails when backup consecutive failures exceed threshold', async () => {
const deps = makeDeps({
config: makeConfig({ checks: ['backup'], backup_failure_threshold: 2 }),
backupHealthProvider: () => ({
enabled: true,
hasRun: true,
consecutiveFailures: 3,
lastError: 'minio unavailable',
}),
});
monitor = new HeartbeatMonitor(deps);
const result = await monitor.runChecks();
const check = result.checks.find((c) => c.name === 'backup');
if (!check) {throw new Error('Expected backup check result');}
expect(check.healthy).toBe(false);
expect(check.message).toContain('Backup failing');
expect(check.message).toContain('minio unavailable');
});
});
});