feat: add heartbeat monitor and vector memory search (Tier 2)
Heartbeat: - HeartbeatMonitor with 5 checks: gateway, model, channels, memory, disk - Configurable interval, failure threshold, notification channel - Recovery notifications when health restores - 25 new tests Vector Memory Search: - EmbeddingProvider interface with OpenAI, Gemini, Ollama, LlamaCpp backends - SQLite-backed VectorStore with cosine similarity search - Text chunker with paragraph-aware splitting and overlap - HybridSearch merging keyword + vector results with configurable weight - Background indexer with dirty-namespace tracking - Graceful fallback to keyword search when embeddings unavailable - 51 new tests Config: automation.heartbeat + memory.embedding schema sections Total: 950 tests passing, all types clean
This commit is contained in:
@@ -0,0 +1,418 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { HeartbeatMonitor, parseInterval } from './heartbeat.js';
|
||||
import type { HeartbeatDeps } from './heartbeat.js';
|
||||
import type { HeartbeatConfig } from '../config/schema.js';
|
||||
|
||||
function makeConfig(overrides?: Partial<HeartbeatConfig>): HeartbeatConfig {
|
||||
return {
|
||||
enabled: true,
|
||||
interval: '5m',
|
||||
checks: ['gateway', 'model', 'channels', 'memory', 'disk'],
|
||||
failure_threshold: 2,
|
||||
disk_threshold_mb: 100,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeDeps(overrides?: Partial<HeartbeatDeps>): HeartbeatDeps {
|
||||
return {
|
||||
config: makeConfig(),
|
||||
getGatewayPort: () => 18800,
|
||||
modelRouter: { getTier: () => 'default' },
|
||||
channelLister: {
|
||||
list: () => [
|
||||
{ name: 'telegram', status: 'connected' } as any,
|
||||
{ name: 'webchat', status: 'connected' } as any,
|
||||
],
|
||||
},
|
||||
memoryDir: '/tmp/flynn-test-memory',
|
||||
dataDir: '/tmp',
|
||||
channelLookup: { get: vi.fn() },
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('parseInterval', () => {
|
||||
it('parses seconds', () => {
|
||||
expect(parseInterval('60s')).toBe(60000);
|
||||
expect(parseInterval('1s')).toBe(1000);
|
||||
});
|
||||
|
||||
it('parses minutes', () => {
|
||||
expect(parseInterval('5m')).toBe(300000);
|
||||
expect(parseInterval('1m')).toBe(60000);
|
||||
});
|
||||
|
||||
it('parses hours', () => {
|
||||
expect(parseInterval('1h')).toBe(3600000);
|
||||
expect(parseInterval('2h')).toBe(7200000);
|
||||
});
|
||||
|
||||
it('treats bare numbers as seconds', () => {
|
||||
expect(parseInterval('30')).toBe(30000);
|
||||
});
|
||||
|
||||
it('throws on invalid format', () => {
|
||||
expect(() => parseInterval('abc')).toThrow('Invalid interval format');
|
||||
expect(() => parseInterval('')).toThrow('Invalid interval format');
|
||||
});
|
||||
});
|
||||
|
||||
describe('HeartbeatMonitor', () => {
|
||||
let monitor: HeartbeatMonitor;
|
||||
|
||||
afterEach(() => {
|
||||
monitor?.stop();
|
||||
});
|
||||
|
||||
it('start() does nothing when enabled: false', () => {
|
||||
const deps = makeDeps({ config: makeConfig({ enabled: false }) });
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const setIntervalSpy = vi.spyOn(global, 'setInterval');
|
||||
monitor.start();
|
||||
|
||||
expect(setIntervalSpy).not.toHaveBeenCalled();
|
||||
setIntervalSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('start() sets an interval when enabled', () => {
|
||||
const deps = makeDeps({ config: makeConfig({ enabled: true, checks: [] }) });
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const setIntervalSpy = vi.spyOn(global, 'setInterval');
|
||||
monitor.start();
|
||||
|
||||
expect(setIntervalSpy).toHaveBeenCalledWith(expect.any(Function), 300000);
|
||||
setIntervalSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('stop() clears the timer', () => {
|
||||
const deps = makeDeps({ config: makeConfig({ enabled: true, checks: [] }) });
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const clearIntervalSpy = vi.spyOn(global, 'clearInterval');
|
||||
monitor.start();
|
||||
monitor.stop();
|
||||
|
||||
expect(clearIntervalSpy).toHaveBeenCalled();
|
||||
clearIntervalSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('runChecks() runs all configured checks', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['model', 'channels'] }),
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
|
||||
expect(result.checks).toHaveLength(2);
|
||||
expect(result.checks[0].name).toBe('model');
|
||||
expect(result.checks[1].name).toBe('channels');
|
||||
});
|
||||
|
||||
it('returns healthy=true when all checks pass', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['model', 'channels'] }),
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
|
||||
expect(result.healthy).toBe(true);
|
||||
expect(result.checks.every((c) => c.healthy)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns healthy=false when any check fails', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['model', 'channels'] }),
|
||||
modelRouter: undefined, // model check will fail
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
|
||||
expect(result.healthy).toBe(false);
|
||||
const modelCheck = result.checks.find((c) => c.name === 'model');
|
||||
expect(modelCheck?.healthy).toBe(false);
|
||||
});
|
||||
|
||||
it('getLastResult() returns most recent result', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['model'] }),
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
expect(monitor.getLastResult()).toBeUndefined();
|
||||
|
||||
await monitor.runChecks();
|
||||
|
||||
const lastResult = monitor.getLastResult();
|
||||
expect(lastResult).toBeDefined();
|
||||
expect(lastResult!.checks).toHaveLength(1);
|
||||
expect(lastResult!.timestamp).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('notification sent after failure_threshold consecutive failures', async () => {
|
||||
const mockSend = vi.fn().mockResolvedValue(undefined);
|
||||
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
||||
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({
|
||||
checks: ['model'],
|
||||
failure_threshold: 2,
|
||||
notify: { channel: 'telegram', peer: '123' },
|
||||
}),
|
||||
modelRouter: undefined, // will fail
|
||||
channelLookup: { get: mockGet },
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
// First failure — below threshold, no notification
|
||||
await monitor.runChecks();
|
||||
expect(mockSend).not.toHaveBeenCalled();
|
||||
|
||||
// Second failure — meets threshold, should notify
|
||||
await monitor.runChecks();
|
||||
expect(mockSend).toHaveBeenCalledTimes(1);
|
||||
expect(mockSend).toHaveBeenCalledWith('123', expect.objectContaining({
|
||||
text: expect.stringContaining('FAILING'),
|
||||
}));
|
||||
});
|
||||
|
||||
it('does not send duplicate failure notifications', async () => {
|
||||
const mockSend = vi.fn().mockResolvedValue(undefined);
|
||||
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
||||
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({
|
||||
checks: ['model'],
|
||||
failure_threshold: 1,
|
||||
notify: { channel: 'telegram', peer: '123' },
|
||||
}),
|
||||
modelRouter: undefined,
|
||||
channelLookup: { get: mockGet },
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
await monitor.runChecks();
|
||||
await monitor.runChecks();
|
||||
await monitor.runChecks();
|
||||
|
||||
// Only one failure notification sent
|
||||
expect(mockSend).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('recovery notification sent when checks pass after failures', async () => {
|
||||
const mockSend = vi.fn().mockResolvedValue(undefined);
|
||||
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
||||
|
||||
let modelRouter: { getTier(): string } | undefined = undefined;
|
||||
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({
|
||||
checks: ['model'],
|
||||
failure_threshold: 1,
|
||||
notify: { channel: 'telegram', peer: '123' },
|
||||
}),
|
||||
modelRouter,
|
||||
channelLookup: { get: mockGet },
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
// Trigger failure notification
|
||||
await monitor.runChecks();
|
||||
expect(mockSend).toHaveBeenCalledTimes(1);
|
||||
|
||||
// "Fix" the model router by replacing deps (use Object.assign to mutate)
|
||||
Object.assign(deps, { modelRouter: { getTier: () => 'default' } });
|
||||
// We need a new monitor since deps is captured
|
||||
monitor.stop();
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
// But the new monitor doesn't have the failure state, so let's test differently:
|
||||
// Use a single monitor and manipulate the deps object's modelRouter
|
||||
const mutableDeps = {
|
||||
config: makeConfig({
|
||||
checks: ['model'],
|
||||
failure_threshold: 1,
|
||||
notify: { channel: 'telegram', peer: '123' },
|
||||
}),
|
||||
getGatewayPort: () => 18800,
|
||||
modelRouter: undefined as { getTier(): string } | undefined,
|
||||
channelLister: { list: () => [] },
|
||||
memoryDir: undefined,
|
||||
dataDir: '/tmp',
|
||||
channelLookup: { get: mockGet },
|
||||
};
|
||||
mockSend.mockClear();
|
||||
|
||||
const monitor2 = new HeartbeatMonitor(mutableDeps);
|
||||
|
||||
// Fail
|
||||
await monitor2.runChecks();
|
||||
expect(mockSend).toHaveBeenCalledTimes(1); // failure notification
|
||||
|
||||
// Now "recover"
|
||||
mutableDeps.modelRouter = { getTier: () => 'default' };
|
||||
// Need to re-create since deps is captured in constructor
|
||||
// Actually, deps is stored by reference, so mutation works if we mutate the object
|
||||
await monitor2.runChecks();
|
||||
expect(mockSend).toHaveBeenCalledTimes(2); // recovery notification
|
||||
expect(mockSend).toHaveBeenLastCalledWith('123', expect.objectContaining({
|
||||
text: expect.stringContaining('RECOVERED'),
|
||||
}));
|
||||
|
||||
monitor2.stop();
|
||||
});
|
||||
|
||||
it('no notification when notify config is not set', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({
|
||||
checks: ['model'],
|
||||
failure_threshold: 1,
|
||||
// no notify
|
||||
}),
|
||||
modelRouter: undefined,
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
// Should not throw
|
||||
await monitor.runChecks();
|
||||
await monitor.runChecks();
|
||||
});
|
||||
|
||||
// ── Individual check tests ───────────────────────────────────
|
||||
|
||||
describe('model check', () => {
|
||||
it('passes when model router is available', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['model'] }),
|
||||
modelRouter: { getTier: () => 'fast' },
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'model')!;
|
||||
expect(check.healthy).toBe(true);
|
||||
expect(check.message).toContain('fast');
|
||||
});
|
||||
|
||||
it('fails when model router is undefined', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['model'] }),
|
||||
modelRouter: undefined,
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'model')!;
|
||||
expect(check.healthy).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('channels check', () => {
|
||||
it('passes when at least one channel is connected', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['channels'] }),
|
||||
channelLister: {
|
||||
list: () => [
|
||||
{ name: 'telegram', status: 'connected' } as any,
|
||||
{ name: 'webchat', status: 'disconnected' } as any,
|
||||
],
|
||||
},
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'channels')!;
|
||||
expect(check.healthy).toBe(true);
|
||||
expect(check.message).toContain('1/2 connected');
|
||||
expect(check.message).toContain('webchat');
|
||||
});
|
||||
|
||||
it('fails when no channels are connected', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['channels'] }),
|
||||
channelLister: {
|
||||
list: () => [
|
||||
{ name: 'telegram', status: 'disconnected' } as any,
|
||||
],
|
||||
},
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'channels')!;
|
||||
expect(check.healthy).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('memory check', () => {
|
||||
it('passes when memory is disabled', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['memory'] }),
|
||||
memoryDir: undefined,
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'memory')!;
|
||||
expect(check.healthy).toBe(true);
|
||||
expect(check.message).toContain('disabled');
|
||||
});
|
||||
|
||||
it('fails when memory dir is not accessible', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['memory'] }),
|
||||
memoryDir: '/nonexistent/path/that/does/not/exist',
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'memory')!;
|
||||
expect(check.healthy).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('disk check', () => {
|
||||
it('passes when enough disk space available', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['disk'], disk_threshold_mb: 1 }),
|
||||
dataDir: '/tmp',
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'disk')!;
|
||||
expect(check.healthy).toBe(true);
|
||||
expect(check.message).toContain('MB available');
|
||||
});
|
||||
|
||||
it('fails when disk space is below threshold', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['disk'], disk_threshold_mb: 999999999 }),
|
||||
dataDir: '/tmp',
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'disk')!;
|
||||
expect(check.healthy).toBe(false);
|
||||
expect(check.message).toContain('Low disk space');
|
||||
});
|
||||
|
||||
it('fails when dataDir does not exist', async () => {
|
||||
const deps = makeDeps({
|
||||
config: makeConfig({ checks: ['disk'] }),
|
||||
dataDir: '/nonexistent/path/that/does/not/exist',
|
||||
});
|
||||
monitor = new HeartbeatMonitor(deps);
|
||||
|
||||
const result = await monitor.runChecks();
|
||||
const check = result.checks.find((c) => c.name === 'disk')!;
|
||||
expect(check.healthy).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,307 @@
|
||||
import { statfsSync, accessSync, constants as fsConstants } from 'fs';
|
||||
import { request } from 'http';
|
||||
import type { HeartbeatConfig, HeartbeatCheck } from '../config/schema.js';
|
||||
import type { ChannelAdapter, ChannelStatus, OutboundMessage } from '../channels/types.js';
|
||||
|
||||
/** Result of a single health check. */
|
||||
export interface CheckResult {
|
||||
name: HeartbeatCheck;
|
||||
healthy: boolean;
|
||||
message: string;
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
/** Result of a full heartbeat cycle. */
|
||||
export interface HeartbeatResult {
|
||||
healthy: boolean;
|
||||
checks: CheckResult[];
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
/** Minimal interface for sending notifications via a channel. */
|
||||
interface ChannelLookup {
|
||||
get(name: string): { send(peerId: string, message: OutboundMessage): Promise<void> } | undefined;
|
||||
}
|
||||
|
||||
/** Minimal interface for listing channel adapters. */
|
||||
interface ChannelLister {
|
||||
list(): ChannelAdapter[];
|
||||
}
|
||||
|
||||
/** Dependencies injected into HeartbeatMonitor. */
|
||||
export interface HeartbeatDeps {
|
||||
config: HeartbeatConfig;
|
||||
getGatewayPort: () => number;
|
||||
modelRouter: { getTier(): string } | undefined;
|
||||
channelLister: ChannelLister;
|
||||
memoryDir: string | undefined;
|
||||
dataDir: string;
|
||||
channelLookup: ChannelLookup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a human-friendly interval string into milliseconds.
|
||||
* Supports: '60s', '5m', '1h'. Bare numbers are treated as seconds.
|
||||
*/
|
||||
export function parseInterval(interval: string): number {
|
||||
const match = interval.trim().match(/^(\d+(?:\.\d+)?)\s*(s|m|h)?$/i);
|
||||
if (!match) {
|
||||
throw new Error(`Invalid interval format: '${interval}'. Use e.g. '60s', '5m', '1h'.`);
|
||||
}
|
||||
|
||||
const value = parseFloat(match[1]);
|
||||
const unit = (match[2] ?? 's').toLowerCase();
|
||||
|
||||
switch (unit) {
|
||||
case 's': return Math.round(value * 1000);
|
||||
case 'm': return Math.round(value * 60 * 1000);
|
||||
case 'h': return Math.round(value * 60 * 60 * 1000);
|
||||
default: return Math.round(value * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
export class HeartbeatMonitor {
|
||||
private timer: ReturnType<typeof setInterval> | undefined;
|
||||
private lastResult: HeartbeatResult | undefined;
|
||||
private consecutiveFailures = 0;
|
||||
private notifiedFailure = false;
|
||||
private readonly deps: HeartbeatDeps;
|
||||
|
||||
constructor(deps: HeartbeatDeps) {
|
||||
this.deps = deps;
|
||||
}
|
||||
|
||||
/** Start the heartbeat monitor. Does nothing if disabled. */
|
||||
start(): void {
|
||||
if (!this.deps.config.enabled) return;
|
||||
|
||||
const intervalMs = parseInterval(this.deps.config.interval);
|
||||
console.log(`HeartbeatMonitor: starting (interval=${this.deps.config.interval}, checks=[${this.deps.config.checks.join(', ')}])`);
|
||||
|
||||
this.timer = setInterval(() => {
|
||||
this.runChecks().catch((err) => {
|
||||
console.error('HeartbeatMonitor: unexpected error during check cycle:', err);
|
||||
});
|
||||
}, intervalMs);
|
||||
|
||||
// Also run immediately on start
|
||||
this.runChecks().catch((err) => {
|
||||
console.error('HeartbeatMonitor: unexpected error during initial check:', err);
|
||||
});
|
||||
}
|
||||
|
||||
/** Stop the heartbeat monitor. */
|
||||
stop(): void {
|
||||
if (this.timer) {
|
||||
clearInterval(this.timer);
|
||||
this.timer = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/** Run all configured checks and return the result. */
|
||||
async runChecks(): Promise<HeartbeatResult> {
|
||||
const checks: CheckResult[] = [];
|
||||
|
||||
for (const check of this.deps.config.checks) {
|
||||
const start = Date.now();
|
||||
let result: CheckResult;
|
||||
|
||||
try {
|
||||
switch (check) {
|
||||
case 'gateway':
|
||||
result = await this.checkGateway(start);
|
||||
break;
|
||||
case 'model':
|
||||
result = this.checkModel(start);
|
||||
break;
|
||||
case 'channels':
|
||||
result = this.checkChannels(start);
|
||||
break;
|
||||
case 'memory':
|
||||
result = this.checkMemory(start);
|
||||
break;
|
||||
case 'disk':
|
||||
result = this.checkDisk(start);
|
||||
break;
|
||||
default:
|
||||
result = { name: check, healthy: false, message: `Unknown check: ${check}`, durationMs: Date.now() - start };
|
||||
}
|
||||
} catch (err) {
|
||||
result = {
|
||||
name: check,
|
||||
healthy: false,
|
||||
message: err instanceof Error ? err.message : 'Unknown error',
|
||||
durationMs: Date.now() - start,
|
||||
};
|
||||
}
|
||||
|
||||
checks.push(result);
|
||||
}
|
||||
|
||||
const healthy = checks.every((c) => c.healthy);
|
||||
const heartbeatResult: HeartbeatResult = {
|
||||
healthy,
|
||||
checks,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
this.lastResult = heartbeatResult;
|
||||
|
||||
// Failure tracking and notification
|
||||
if (!healthy) {
|
||||
this.consecutiveFailures++;
|
||||
if (this.consecutiveFailures >= this.deps.config.failure_threshold && !this.notifiedFailure) {
|
||||
this.notifiedFailure = true;
|
||||
const failedChecks = checks.filter((c) => !c.healthy).map((c) => `${c.name}: ${c.message}`);
|
||||
await this.notify(`Heartbeat FAILING (${this.consecutiveFailures} consecutive failures):\n${failedChecks.join('\n')}`);
|
||||
}
|
||||
} else {
|
||||
if (this.notifiedFailure) {
|
||||
// Recovery notification
|
||||
await this.notify(`Heartbeat RECOVERED after ${this.consecutiveFailures} consecutive failure(s). All checks passing.`);
|
||||
}
|
||||
this.consecutiveFailures = 0;
|
||||
this.notifiedFailure = false;
|
||||
}
|
||||
|
||||
return heartbeatResult;
|
||||
}
|
||||
|
||||
/** Get the most recent heartbeat result. */
|
||||
getLastResult(): HeartbeatResult | undefined {
|
||||
return this.lastResult;
|
||||
}
|
||||
|
||||
// ── Individual checks ──────────────────────────────────────────
|
||||
|
||||
private async checkGateway(start: number): Promise<CheckResult> {
|
||||
const port = this.deps.getGatewayPort();
|
||||
|
||||
return new Promise<CheckResult>((resolve) => {
|
||||
const req = request(
|
||||
{ hostname: '127.0.0.1', port, path: '/api/health', method: 'GET', timeout: 5000 },
|
||||
(res) => {
|
||||
// Consume the response body
|
||||
res.resume();
|
||||
const healthy = res.statusCode !== undefined && res.statusCode >= 200 && res.statusCode < 400;
|
||||
resolve({
|
||||
name: 'gateway',
|
||||
healthy,
|
||||
message: healthy ? `HTTP ${res.statusCode}` : `HTTP ${res.statusCode ?? 'no response'}`,
|
||||
durationMs: Date.now() - start,
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
req.on('error', (err) => {
|
||||
resolve({
|
||||
name: 'gateway',
|
||||
healthy: false,
|
||||
message: err.message,
|
||||
durationMs: Date.now() - start,
|
||||
});
|
||||
});
|
||||
|
||||
req.on('timeout', () => {
|
||||
req.destroy();
|
||||
resolve({
|
||||
name: 'gateway',
|
||||
healthy: false,
|
||||
message: 'Request timed out',
|
||||
durationMs: Date.now() - start,
|
||||
});
|
||||
});
|
||||
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
private checkModel(start: number): CheckResult {
|
||||
// Lightweight check: verify the model router is present and has a tier set
|
||||
const router = this.deps.modelRouter;
|
||||
if (!router) {
|
||||
return { name: 'model', healthy: false, message: 'Model router not available', durationMs: Date.now() - start };
|
||||
}
|
||||
|
||||
const tier = router.getTier();
|
||||
return { name: 'model', healthy: true, message: `Active tier: ${tier}`, durationMs: Date.now() - start };
|
||||
}
|
||||
|
||||
private checkChannels(start: number): CheckResult {
|
||||
const adapters = this.deps.channelLister.list();
|
||||
const connected = adapters.filter((a) => a.status === 'connected');
|
||||
const disconnected = adapters.filter((a) => a.status !== 'connected');
|
||||
|
||||
// Healthy if at least one adapter is connected
|
||||
const healthy = connected.length > 0;
|
||||
const details = `${connected.length}/${adapters.length} connected`;
|
||||
const message = disconnected.length > 0
|
||||
? `${details} (disconnected: ${disconnected.map((a) => a.name).join(', ')})`
|
||||
: details;
|
||||
|
||||
return { name: 'channels', healthy, message, durationMs: Date.now() - start };
|
||||
}
|
||||
|
||||
private checkMemory(start: number): CheckResult {
|
||||
const memoryDir = this.deps.memoryDir;
|
||||
if (!memoryDir) {
|
||||
return { name: 'memory', healthy: true, message: 'Memory store disabled', durationMs: Date.now() - start };
|
||||
}
|
||||
|
||||
try {
|
||||
accessSync(memoryDir, fsConstants.R_OK | fsConstants.W_OK);
|
||||
return { name: 'memory', healthy: true, message: 'Directory accessible', durationMs: Date.now() - start };
|
||||
} catch (err) {
|
||||
return {
|
||||
name: 'memory',
|
||||
healthy: false,
|
||||
message: err instanceof Error ? err.message : 'Directory not accessible',
|
||||
durationMs: Date.now() - start,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private checkDisk(start: number): CheckResult {
|
||||
try {
|
||||
const stats = statfsSync(this.deps.dataDir);
|
||||
const availableMb = (stats.bavail * stats.bsize) / (1024 * 1024);
|
||||
const thresholdMb = this.deps.config.disk_threshold_mb;
|
||||
const healthy = availableMb >= thresholdMb;
|
||||
|
||||
return {
|
||||
name: 'disk',
|
||||
healthy,
|
||||
message: healthy
|
||||
? `${Math.round(availableMb)} MB available`
|
||||
: `Low disk space: ${Math.round(availableMb)} MB available (threshold: ${thresholdMb} MB)`,
|
||||
durationMs: Date.now() - start,
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
name: 'disk',
|
||||
healthy: false,
|
||||
message: err instanceof Error ? err.message : 'Failed to check disk',
|
||||
durationMs: Date.now() - start,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ── Notification ───────────────────────────────────────────────
|
||||
|
||||
private async notify(text: string): Promise<void> {
|
||||
const notifyConfig = this.deps.config.notify;
|
||||
if (!notifyConfig) return;
|
||||
|
||||
const adapter = this.deps.channelLookup.get(notifyConfig.channel);
|
||||
if (!adapter) {
|
||||
console.warn(`HeartbeatMonitor: notification channel '${notifyConfig.channel}' not found`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await adapter.send(notifyConfig.peer, { text });
|
||||
} catch (err) {
|
||||
console.error('HeartbeatMonitor: failed to send notification:', err);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,2 +1,4 @@
|
||||
export { CronScheduler } from './cron.js';
|
||||
export { WebhookHandler } from './webhooks.js';
|
||||
export { HeartbeatMonitor, parseInterval } from './heartbeat.js';
|
||||
export type { HeartbeatResult, HeartbeatDeps, CheckResult } from './heartbeat.js';
|
||||
|
||||
Reference in New Issue
Block a user