feat(heartbeat): add provider error-rate spike check
This commit is contained in:
@@ -13,6 +13,8 @@ function makeConfig(overrides?: Partial<HeartbeatConfig>): HeartbeatConfig {
|
||||
disk_threshold_mb: 100,
|
||||
process_memory_threshold_mb: 1500,
|
||||
backup_failure_threshold: 1,
|
||||
provider_error_rate_threshold: 0.5,
|
||||
provider_error_min_calls: 5,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
@@ -37,6 +39,7 @@ function makeDeps(overrides?: Partial<HeartbeatDeps>): HeartbeatDeps {
|
||||
hasRun: false,
|
||||
consecutiveFailures: 0,
|
||||
}),
|
||||
getModelCalls: () => [],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
@@ -513,4 +516,41 @@ describe('HeartbeatMonitor', () => {
|
||||
expect(check.message).toContain('minio unavailable');
|
||||
});
|
||||
});
|
||||
|
||||
describe('provider_errors check', () => {
|
||||
it('passes when no model calls are recorded', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['provider_errors'] }),
|
||||
getModelCalls: () => [],
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'provider_errors');
|
||||
if (!check) {throw new Error('Expected provider_errors check result');}
|
||||
expect(check.healthy).toBe(true);
|
||||
expect(check.message).toContain('No model calls');
|
||||
});
|
||||
|
||||
it('fails when a provider error rate breaches threshold', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['provider_errors'], provider_error_min_calls: 4, provider_error_rate_threshold: 0.5 }),
|
||||
getModelCalls: () => [
|
||||
{ provider: 'openai', error: 'rate limited' },
|
||||
{ provider: 'openai', error: 'timeout' },
|
||||
{ provider: 'openai' },
|
||||
{ provider: 'openai' },
|
||||
{ provider: 'anthropic' },
|
||||
{ provider: 'anthropic' },
|
||||
],
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'provider_errors');
|
||||
if (!check) {throw new Error('Expected provider_errors check result');}
|
||||
expect(check.healthy).toBe(false);
|
||||
expect(check.message).toContain('openai');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user