feat(ops): add setup operator pack, heartbeat alert cooldown, and doctor strict mode
This commit is contained in:
@@ -89,6 +89,9 @@ flynn send "What's the weather in London?"
|
|||||||
# Check system health
|
# Check system health
|
||||||
flynn doctor --config ~/.config/flynn/config.yaml
|
flynn doctor --config ~/.config/flynn/config.yaml
|
||||||
|
|
||||||
|
# Treat warnings as failures (useful in CI)
|
||||||
|
flynn doctor --strict
|
||||||
|
|
||||||
# Show current config (secrets masked)
|
# Show current config (secrets masked)
|
||||||
flynn config
|
flynn config
|
||||||
|
|
||||||
@@ -705,6 +708,7 @@ automation:
|
|||||||
heartbeat:
|
heartbeat:
|
||||||
enabled: true
|
enabled: true
|
||||||
interval: "5m" # Check every 5 minutes
|
interval: "5m" # Check every 5 minutes
|
||||||
|
notify_cooldown: "30m" # Suppress repeated alerts inside cooldown window
|
||||||
checks: [gateway, model, channels, memory, disk, process_memory, backup, provider_errors]
|
checks: [gateway, model, channels, memory, disk, process_memory, backup, provider_errors]
|
||||||
notify:
|
notify:
|
||||||
channel: telegram
|
channel: telegram
|
||||||
@@ -731,6 +735,7 @@ automation:
|
|||||||
| `provider_errors` | Model provider error rates stay below threshold |
|
| `provider_errors` | Model provider error rates stay below threshold |
|
||||||
|
|
||||||
The monitor sends a notification when failures reach the configured threshold and a recovery notification when all checks pass again.
|
The monitor sends a notification when failures reach the configured threshold and a recovery notification when all checks pass again.
|
||||||
|
Repeated failure/recovery notifications are throttled by `notify_cooldown`.
|
||||||
|
|
||||||
### Heartbeat Config Fields
|
### Heartbeat Config Fields
|
||||||
|
|
||||||
@@ -738,7 +743,8 @@ The monitor sends a notification when failures reach the configured threshold an
|
|||||||
|-------|----------|-------------|
|
|-------|----------|-------------|
|
||||||
| `enabled` | no | Enable the heartbeat monitor (default: `false`) |
|
| `enabled` | no | Enable the heartbeat monitor (default: `false`) |
|
||||||
| `interval` | no | Check interval: `60s`, `5m`, `1h` (default: `5m`) |
|
| `interval` | no | Check interval: `60s`, `5m`, `1h` (default: `5m`) |
|
||||||
| `checks` | no | Which checks to run (default: all five) |
|
| `notify_cooldown` | no | Minimum time between repeated heartbeat notifications of the same type (default: `30m`) |
|
||||||
|
| `checks` | no | Which checks to run (default: `gateway, model, channels, memory, disk, process_memory, backup, provider_errors`) |
|
||||||
| `notify.channel` | no | Channel to send failure/recovery notifications |
|
| `notify.channel` | no | Channel to send failure/recovery notifications |
|
||||||
| `notify.peer` | no | Peer/chat ID for notifications |
|
| `notify.peer` | no | Peer/chat ID for notifications |
|
||||||
| `failure_threshold` | no | Consecutive failures before notifying (default: `2`) |
|
| `failure_threshold` | no | Consecutive failures before notifying (default: `2`) |
|
||||||
@@ -748,6 +754,23 @@ The monitor sends a notification when failures reach the configured threshold an
|
|||||||
| `provider_error_rate_threshold` | no | Error-rate threshold (0..1) for `provider_errors` check (default: `0.5`) |
|
| `provider_error_rate_threshold` | no | Error-rate threshold (0..1) for `provider_errors` check (default: `0.5`) |
|
||||||
| `provider_error_min_calls` | no | Minimum provider calls before applying error-rate threshold (default: `5`) |
|
| `provider_error_min_calls` | no | Minimum provider calls before applying error-rate threshold (default: `5`) |
|
||||||
|
|
||||||
|
### Common Schedules and Routing
|
||||||
|
|
||||||
|
- Nightly backups to Telegram alerts:
|
||||||
|
- `backup.schedule: "0 2 * * *"`
|
||||||
|
- `backup.notify.channel: telegram`
|
||||||
|
- Weekday daily briefing to Discord:
|
||||||
|
- `automation.daily_briefing.schedule: "0 8 * * 1-5"`
|
||||||
|
- `automation.daily_briefing.output.channel: discord`
|
||||||
|
- High-frequency heartbeat to Slack:
|
||||||
|
- `automation.heartbeat.interval: "2m"`
|
||||||
|
- `automation.heartbeat.notify.channel: slack`
|
||||||
|
- MinIO sync every 6h to WebChat:
|
||||||
|
- `automation.minio_sync.interval: "6h"`
|
||||||
|
- `automation.minio_sync.notify.channel: webchat`
|
||||||
|
|
||||||
|
`flynn setup` now includes an Operator Pack option in Automation that preconfigures scheduled backups, heartbeat alerts, a daily briefing, and a default MinIO sync task.
|
||||||
|
|
||||||
## Gmail Pub/Sub Watcher
|
## Gmail Pub/Sub Watcher
|
||||||
|
|
||||||
Monitor a Gmail inbox and forward new messages into the agent pipeline.
|
Monitor a Gmail inbox and forward new messages into the agent pipeline.
|
||||||
|
|||||||
@@ -317,6 +317,7 @@ hooks:
|
|||||||
# heartbeat:
|
# heartbeat:
|
||||||
# enabled: false
|
# enabled: false
|
||||||
# interval: "5m"
|
# interval: "5m"
|
||||||
|
# notify_cooldown: "30m"
|
||||||
# checks: [gateway, model, channels, memory, disk, process_memory, backup, provider_errors]
|
# checks: [gateway, model, channels, memory, disk, process_memory, backup, provider_errors]
|
||||||
# notify:
|
# notify:
|
||||||
# channel: telegram
|
# channel: telegram
|
||||||
|
|||||||
+22
-1
@@ -189,6 +189,27 @@
|
|||||||
],
|
],
|
||||||
"test_status": "pnpm test:run src/cli/minioExtractors.test.ts src/cli/doctor.test.ts + pnpm typecheck passing"
|
"test_status": "pnpm test:run src/cli/minioExtractors.test.ts src/cli/doctor.test.ts + pnpm typecheck passing"
|
||||||
},
|
},
|
||||||
|
"operator-pack-heartbeat-throttle-and-doctor-strict": {
|
||||||
|
"status": "completed",
|
||||||
|
"date": "2026-02-16",
|
||||||
|
"updated": "2026-02-16",
|
||||||
|
"summary": "Implemented operator-focused hardening and onboarding polish: added a setup Automation operator-pack path that preconfigures scheduled backups, heartbeat alerts, daily briefing, and default MinIO sync; added heartbeat notification throttling via `automation.heartbeat.notify_cooldown`; and added `flynn doctor --strict` to treat warnings as failures. Updated docs/default config examples accordingly.",
|
||||||
|
"files_modified": [
|
||||||
|
"src/cli/setup/config.ts",
|
||||||
|
"src/cli/setup/config.test.ts",
|
||||||
|
"src/cli/setup/automation.ts",
|
||||||
|
"src/automation/heartbeat.ts",
|
||||||
|
"src/automation/heartbeat.test.ts",
|
||||||
|
"src/config/schema.ts",
|
||||||
|
"src/config/schema.test.ts",
|
||||||
|
"src/cli/doctor.ts",
|
||||||
|
"src/cli/doctor.test.ts",
|
||||||
|
"config/default.yaml",
|
||||||
|
"README.md",
|
||||||
|
"docs/plans/state.json"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm test:run src/cli/setup/config.test.ts src/automation/heartbeat.test.ts src/config/schema.test.ts src/cli/doctor.test.ts + pnpm typecheck passing"
|
||||||
|
},
|
||||||
"backup-session-summary-audit-trail": {
|
"backup-session-summary-audit-trail": {
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"date": "2026-02-16",
|
"date": "2026-02-16",
|
||||||
@@ -3473,7 +3494,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"overall_progress": {
|
"overall_progress": {
|
||||||
"total_test_count": 1859,
|
"total_test_count": 1863,
|
||||||
"all_tests_passing": true,
|
"all_tests_passing": true,
|
||||||
"p0_completion": "3/3 (100%)",
|
"p0_completion": "3/3 (100%)",
|
||||||
"p1_completion": "4/4 (100%)",
|
"p1_completion": "4/4 (100%)",
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ function makeConfig(overrides?: Partial<HeartbeatConfig>): HeartbeatConfig {
|
|||||||
return {
|
return {
|
||||||
enabled: true,
|
enabled: true,
|
||||||
interval: '5m',
|
interval: '5m',
|
||||||
|
notify_cooldown: '30m',
|
||||||
checks: ['gateway', 'model', 'channels', 'memory', 'disk', 'process_memory', 'backup'],
|
checks: ['gateway', 'model', 'channels', 'memory', 'disk', 'process_memory', 'backup'],
|
||||||
failure_threshold: 2,
|
failure_threshold: 2,
|
||||||
disk_threshold_mb: 100,
|
disk_threshold_mb: 100,
|
||||||
@@ -227,6 +228,34 @@ describe('HeartbeatMonitor', () => {
|
|||||||
expect(mockSend).toHaveBeenCalledTimes(1);
|
expect(mockSend).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('suppresses repeat failure notifications inside notify cooldown after recovery', async () => {
|
||||||
|
const mockSend = vi.fn().mockResolvedValue(undefined);
|
||||||
|
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
||||||
|
|
||||||
|
const deps = makeDeps({
|
||||||
|
config: makeConfig({
|
||||||
|
checks: ['model'],
|
||||||
|
failure_threshold: 1,
|
||||||
|
notify_cooldown: '1h',
|
||||||
|
notify: { channel: 'telegram', peer: '123' },
|
||||||
|
}),
|
||||||
|
modelRouter: undefined,
|
||||||
|
channelLookup: { get: mockGet },
|
||||||
|
});
|
||||||
|
monitor = new HeartbeatMonitor(deps);
|
||||||
|
|
||||||
|
await monitor.runChecks();
|
||||||
|
expect(mockSend).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
|
Object.assign(deps, { modelRouter: { getTier: () => 'default' } });
|
||||||
|
await monitor.runChecks();
|
||||||
|
expect(mockSend).toHaveBeenCalledTimes(2);
|
||||||
|
|
||||||
|
Object.assign(deps, { modelRouter: undefined });
|
||||||
|
await monitor.runChecks();
|
||||||
|
expect(mockSend).toHaveBeenCalledTimes(2);
|
||||||
|
});
|
||||||
|
|
||||||
it('recovery notification sent when checks pass after failures', async () => {
|
it('recovery notification sent when checks pass after failures', async () => {
|
||||||
const mockSend = vi.fn().mockResolvedValue(undefined);
|
const mockSend = vi.fn().mockResolvedValue(undefined);
|
||||||
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
const mockGet = vi.fn().mockReturnValue({ send: mockSend });
|
||||||
|
|||||||
+56
-12
@@ -69,7 +69,11 @@ export class HeartbeatMonitor {
|
|||||||
private timer: ReturnType<typeof setInterval> | undefined;
|
private timer: ReturnType<typeof setInterval> | undefined;
|
||||||
private lastResult: HeartbeatResult | undefined;
|
private lastResult: HeartbeatResult | undefined;
|
||||||
private consecutiveFailures = 0;
|
private consecutiveFailures = 0;
|
||||||
private notifiedFailure = false;
|
private failureAlertSentForCurrentIncident = false;
|
||||||
|
private failureAlertProcessedForCurrentIncident = false;
|
||||||
|
private lastFailureNotificationAt = 0;
|
||||||
|
private lastFailureSignature = '';
|
||||||
|
private lastRecoveryNotificationAt = 0;
|
||||||
private readonly deps: HeartbeatDeps;
|
private readonly deps: HeartbeatDeps;
|
||||||
|
|
||||||
constructor(deps: HeartbeatDeps) {
|
constructor(deps: HeartbeatDeps) {
|
||||||
@@ -172,28 +176,38 @@ export class HeartbeatMonitor {
|
|||||||
// Failure tracking and notification
|
// Failure tracking and notification
|
||||||
if (!healthy) {
|
if (!healthy) {
|
||||||
this.consecutiveFailures++;
|
this.consecutiveFailures++;
|
||||||
if (this.consecutiveFailures >= this.deps.config.failure_threshold && !this.notifiedFailure) {
|
if (this.consecutiveFailures >= this.deps.config.failure_threshold && !this.failureAlertProcessedForCurrentIncident) {
|
||||||
this.notifiedFailure = true;
|
this.failureAlertProcessedForCurrentIncident = true;
|
||||||
const failedChecks = checks.filter((c) => !c.healthy).map((c) => `${c.name}: ${c.message}`);
|
const failedChecks = checks.filter((c) => !c.healthy).map((c) => `${c.name}: ${c.message}`);
|
||||||
await this.notify(`Heartbeat FAILING (${this.consecutiveFailures} consecutive failures):\n${failedChecks.join('\n')}`);
|
const signature = failedChecks.join('|');
|
||||||
|
const sent = await this.notifyFailureWithCooldown(
|
||||||
|
`Heartbeat FAILING (${this.consecutiveFailures} consecutive failures):\n${failedChecks.join('\n')}`,
|
||||||
|
signature,
|
||||||
|
);
|
||||||
|
this.failureAlertSentForCurrentIncident = sent;
|
||||||
|
|
||||||
auditLogger?.heartbeatFail({
|
if (sent) {
|
||||||
checks_failed: failedChecks,
|
auditLogger?.heartbeatFail({
|
||||||
consecutive_failures: this.consecutiveFailures,
|
checks_failed: failedChecks,
|
||||||
threshold: this.deps.config.failure_threshold,
|
consecutive_failures: this.consecutiveFailures,
|
||||||
});
|
threshold: this.deps.config.failure_threshold,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (this.notifiedFailure) {
|
if (this.failureAlertSentForCurrentIncident) {
|
||||||
// Recovery notification
|
// Recovery notification
|
||||||
await this.notify(`Heartbeat RECOVERED after ${this.consecutiveFailures} consecutive failure(s). All checks passing.`);
|
await this.notifyRecoveryWithCooldown(
|
||||||
|
`Heartbeat RECOVERED after ${this.consecutiveFailures} consecutive failure(s). All checks passing.`,
|
||||||
|
);
|
||||||
|
|
||||||
auditLogger?.heartbeatRecover({
|
auditLogger?.heartbeatRecover({
|
||||||
consecutive_failures_before: this.consecutiveFailures,
|
consecutive_failures_before: this.consecutiveFailures,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
this.consecutiveFailures = 0;
|
this.consecutiveFailures = 0;
|
||||||
this.notifiedFailure = false;
|
this.failureAlertSentForCurrentIncident = false;
|
||||||
|
this.failureAlertProcessedForCurrentIncident = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auditLogger?.heartbeatCycle({
|
auditLogger?.heartbeatCycle({
|
||||||
@@ -466,4 +480,34 @@ export class HeartbeatMonitor {
|
|||||||
console.error('HeartbeatMonitor: failed to send notification:', err);
|
console.error('HeartbeatMonitor: failed to send notification:', err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private shouldNotifyByCooldown(lastAt: number, cooldownMs: number): boolean {
|
||||||
|
return Date.now() - lastAt >= cooldownMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async notifyFailureWithCooldown(text: string, signature: string): Promise<boolean> {
|
||||||
|
const cooldownMs = parseInterval(this.deps.config.notify_cooldown ?? '30m');
|
||||||
|
const signatureChanged = signature !== this.lastFailureSignature;
|
||||||
|
const cooldownPassed = this.shouldNotifyByCooldown(this.lastFailureNotificationAt, cooldownMs);
|
||||||
|
if (!signatureChanged && !cooldownPassed) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.notify(text);
|
||||||
|
this.lastFailureNotificationAt = Date.now();
|
||||||
|
this.lastFailureSignature = signature;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async notifyRecoveryWithCooldown(text: string): Promise<boolean> {
|
||||||
|
const cooldownMs = parseInterval(this.deps.config.notify_cooldown ?? '30m');
|
||||||
|
const cooldownPassed = this.shouldNotifyByCooldown(this.lastRecoveryNotificationAt, cooldownMs);
|
||||||
|
if (!cooldownPassed) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.notify(text);
|
||||||
|
this.lastRecoveryNotificationAt = Date.now();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+17
-1
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect, afterEach } from 'vitest';
|
import { describe, it, expect, afterEach } from 'vitest';
|
||||||
import { runChecks, type CheckResult, type DoctorContext } from './doctor.js';
|
import { computeDoctorExitCode, runChecks, type CheckResult, type DoctorContext } from './doctor.js';
|
||||||
import { writeFileSync, mkdirSync, rmSync } from 'fs';
|
import { writeFileSync, mkdirSync, rmSync } from 'fs';
|
||||||
import { join } from 'path';
|
import { join } from 'path';
|
||||||
import { tmpdir } from 'os';
|
import { tmpdir } from 'os';
|
||||||
@@ -11,6 +11,22 @@ describe('doctor checks', () => {
|
|||||||
try { rmSync(testDir, { recursive: true }); } catch {}
|
try { rmSync(testDir, { recursive: true }); } catch {}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('computeDoctorExitCode returns 0 with warnings in non-strict mode', () => {
|
||||||
|
const results: CheckResult[] = [
|
||||||
|
{ status: 'pass', label: 'a' },
|
||||||
|
{ status: 'warn', label: 'b' },
|
||||||
|
];
|
||||||
|
expect(computeDoctorExitCode(results, false)).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('computeDoctorExitCode returns 1 with warnings in strict mode', () => {
|
||||||
|
const results: CheckResult[] = [
|
||||||
|
{ status: 'pass', label: 'a' },
|
||||||
|
{ status: 'warn', label: 'b' },
|
||||||
|
];
|
||||||
|
expect(computeDoctorExitCode(results, true)).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
it('reports PASS when config file exists and is valid', async () => {
|
it('reports PASS when config file exists and is valid', async () => {
|
||||||
mkdirSync(testDir, { recursive: true });
|
mkdirSync(testDir, { recursive: true });
|
||||||
const configPath = join(testDir, 'config.yaml');
|
const configPath = join(testDir, 'config.yaml');
|
||||||
|
|||||||
+18
-2
@@ -632,12 +632,25 @@ export async function runChecks(ctx: DoctorContext): Promise<CheckResult[]> {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function computeDoctorExitCode(results: CheckResult[], strict: boolean): number {
|
||||||
|
const failCount = results.filter((r) => r.status === 'fail').length;
|
||||||
|
const warnCount = results.filter((r) => r.status === 'warn').length;
|
||||||
|
if (failCount > 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (strict && warnCount > 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
export function registerDoctorCommand(program: Command): void {
|
export function registerDoctorCommand(program: Command): void {
|
||||||
program
|
program
|
||||||
.command('doctor')
|
.command('doctor')
|
||||||
.description('Validate configuration and check system health')
|
.description('Validate configuration and check system health')
|
||||||
.option('-c, --config <path>', 'Config file path')
|
.option('-c, --config <path>', 'Config file path')
|
||||||
.action(async (opts: { config?: string }) => {
|
.option('--strict', 'Treat warnings as failures')
|
||||||
|
.action(async (opts: { config?: string; strict?: boolean }) => {
|
||||||
const configPath = opts.config ?? getConfigPath();
|
const configPath = opts.config ?? getConfigPath();
|
||||||
const dataDir = getDataDir();
|
const dataDir = getDataDir();
|
||||||
|
|
||||||
@@ -662,7 +675,10 @@ export function registerDoctorCommand(program: Command): void {
|
|||||||
};
|
};
|
||||||
|
|
||||||
console.log(`Results: ${counts.pass} passed, ${counts.fail} failed, ${counts.warn} warnings, ${counts.skip} skipped`);
|
console.log(`Results: ${counts.pass} passed, ${counts.fail} failed, ${counts.warn} warnings, ${counts.skip} skipped`);
|
||||||
|
if (opts.strict && counts.warn > 0) {
|
||||||
|
console.log('Strict mode enabled: warnings are treated as failures.');
|
||||||
|
}
|
||||||
|
|
||||||
process.exit(counts.fail > 0 ? 1 : 0);
|
process.exit(computeDoctorExitCode(results, Boolean(opts.strict)));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -57,6 +57,30 @@ const GOOGLE_SERVICES: GoogleService[] = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
export async function setupAutomation(p: Prompter, builder: ConfigBuilder): Promise<void> {
|
export async function setupAutomation(p: Prompter, builder: ConfigBuilder): Promise<void> {
|
||||||
|
const enableOperatorPack = await p.confirm(
|
||||||
|
'Enable operator automation pack (scheduled backups + heartbeat alerts + daily briefing + MinIO sync)?',
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
if (enableOperatorPack) {
|
||||||
|
const config = builder.build();
|
||||||
|
const telegramPeer = config.telegram?.allowed_chat_ids?.[0];
|
||||||
|
const defaultOutputChannel = telegramPeer ? 'telegram' : 'webchat';
|
||||||
|
const defaultOutputPeer = telegramPeer ? String(telegramPeer) : 'operator';
|
||||||
|
|
||||||
|
const backupSchedule = await p.ask('Backup cron schedule', '0 2 * * *');
|
||||||
|
const dailyBriefingSchedule = await p.ask('Daily briefing cron schedule', '0 8 * * *');
|
||||||
|
const enableMinioSync = await p.confirm('Include default MinIO sync task?', true);
|
||||||
|
|
||||||
|
builder.applyOperatorPack({
|
||||||
|
outputChannel: defaultOutputChannel,
|
||||||
|
outputPeer: defaultOutputPeer,
|
||||||
|
backupSchedule,
|
||||||
|
dailyBriefingSchedule,
|
||||||
|
enableMinioSync,
|
||||||
|
});
|
||||||
|
p.println(`✓ Operator pack enabled (alerts routed to ${defaultOutputChannel}/${defaultOutputPeer})`);
|
||||||
|
}
|
||||||
|
|
||||||
const cron = await p.confirm('Enable cron scheduler?', false);
|
const cron = await p.confirm('Enable cron scheduler?', false);
|
||||||
if (cron) {
|
if (cron) {
|
||||||
builder.setCronEnabled();
|
builder.setCronEnabled();
|
||||||
|
|||||||
@@ -84,4 +84,23 @@ describe('ConfigBuilder', () => {
|
|||||||
const obj = builder.build();
|
const obj = builder.build();
|
||||||
expect(obj.server.token).toBe('my-secret-token');
|
expect(obj.server.token).toBe('my-secret-token');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('applies operator automation pack defaults', () => {
|
||||||
|
const builder = new ConfigBuilder();
|
||||||
|
builder.applyOperatorPack({
|
||||||
|
outputChannel: 'telegram',
|
||||||
|
outputPeer: '123',
|
||||||
|
backupSchedule: '0 2 * * *',
|
||||||
|
dailyBriefingSchedule: '0 8 * * *',
|
||||||
|
enableMinioSync: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const obj = builder.build();
|
||||||
|
expect(obj.backup?.enabled).toBe(true);
|
||||||
|
expect(obj.backup?.schedule).toBe('0 2 * * *');
|
||||||
|
expect(obj.backup?.run_on_start).toBe(true);
|
||||||
|
expect((obj.automation as Record<string, unknown>)?.heartbeat).toBeDefined();
|
||||||
|
expect((obj.automation as Record<string, unknown>)?.daily_briefing).toBeDefined();
|
||||||
|
expect((obj.automation as Record<string, unknown>)?.minio_sync).toBeDefined();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -43,9 +43,23 @@ export interface SetupConfig {
|
|||||||
gtasks?: { enabled?: boolean };
|
gtasks?: { enabled?: boolean };
|
||||||
heartbeat?: { enabled?: boolean };
|
heartbeat?: { enabled?: boolean };
|
||||||
} & Record<string, unknown>;
|
} & Record<string, unknown>;
|
||||||
|
backup?: {
|
||||||
|
enabled?: boolean;
|
||||||
|
schedule?: string;
|
||||||
|
run_on_start?: boolean;
|
||||||
|
notify?: { channel: string; peer: string };
|
||||||
|
} & Record<string, unknown>;
|
||||||
[key: string]: unknown;
|
[key: string]: unknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface OperatorPackOptions {
|
||||||
|
outputChannel: string;
|
||||||
|
outputPeer: string;
|
||||||
|
backupSchedule: string;
|
||||||
|
dailyBriefingSchedule: string;
|
||||||
|
enableMinioSync?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
export class ConfigBuilder {
|
export class ConfigBuilder {
|
||||||
private config: SetupConfig;
|
private config: SetupConfig;
|
||||||
|
|
||||||
@@ -187,6 +201,54 @@ export class ConfigBuilder {
|
|||||||
this.config.automation = automation;
|
this.config.automation = automation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
applyOperatorPack(options: OperatorPackOptions): void {
|
||||||
|
const automation = (this.config.automation ?? {}) as Record<string, unknown>;
|
||||||
|
const backup = (this.config.backup ?? {}) as Record<string, unknown>;
|
||||||
|
|
||||||
|
backup.enabled = true;
|
||||||
|
backup.schedule = options.backupSchedule;
|
||||||
|
backup.run_on_start = true;
|
||||||
|
backup.notify = { channel: options.outputChannel, peer: options.outputPeer };
|
||||||
|
|
||||||
|
automation.heartbeat = {
|
||||||
|
enabled: true,
|
||||||
|
notify: { channel: options.outputChannel, peer: options.outputPeer },
|
||||||
|
interval: '5m',
|
||||||
|
failure_threshold: 2,
|
||||||
|
notify_cooldown: '30m',
|
||||||
|
};
|
||||||
|
|
||||||
|
automation.daily_briefing = {
|
||||||
|
enabled: true,
|
||||||
|
schedule: options.dailyBriefingSchedule,
|
||||||
|
output: { channel: options.outputChannel, peer: options.outputPeer },
|
||||||
|
dedupe_per_local_day: true,
|
||||||
|
model_tier: 'fast',
|
||||||
|
};
|
||||||
|
|
||||||
|
if (options.enableMinioSync ?? true) {
|
||||||
|
automation.minio_sync = {
|
||||||
|
enabled: true,
|
||||||
|
interval: '6h',
|
||||||
|
run_on_start: true,
|
||||||
|
notify: { channel: options.outputChannel, peer: options.outputPeer },
|
||||||
|
tasks: [
|
||||||
|
{
|
||||||
|
prefix: 'knowledge/',
|
||||||
|
namespace_base: 'global/knowledge/minio',
|
||||||
|
mode: 'append',
|
||||||
|
max_objects: 20,
|
||||||
|
max_chars_per_object: 8000,
|
||||||
|
force: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
this.config.automation = automation;
|
||||||
|
this.config.backup = backup;
|
||||||
|
}
|
||||||
|
|
||||||
build(): SetupConfig {
|
build(): SetupConfig {
|
||||||
return structuredClone(this.config) as SetupConfig;
|
return structuredClone(this.config) as SetupConfig;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1002,6 +1002,7 @@ describe('configSchema automation', () => {
|
|||||||
|
|
||||||
it('defaults heartbeat extended thresholds and checks', () => {
|
it('defaults heartbeat extended thresholds and checks', () => {
|
||||||
const result = configSchema.parse(baseConfig);
|
const result = configSchema.parse(baseConfig);
|
||||||
|
expect(result.automation.heartbeat.notify_cooldown).toBe('30m');
|
||||||
expect(result.automation.heartbeat.process_memory_threshold_mb).toBe(1500);
|
expect(result.automation.heartbeat.process_memory_threshold_mb).toBe(1500);
|
||||||
expect(result.automation.heartbeat.backup_failure_threshold).toBe(1);
|
expect(result.automation.heartbeat.backup_failure_threshold).toBe(1);
|
||||||
expect(result.automation.heartbeat.provider_error_rate_threshold).toBe(0.5);
|
expect(result.automation.heartbeat.provider_error_rate_threshold).toBe(0.5);
|
||||||
|
|||||||
@@ -308,6 +308,7 @@ const heartbeatCheckSchema = z.enum(['gateway', 'model', 'channels', 'memory', '
|
|||||||
const heartbeatSchema = z.object({
|
const heartbeatSchema = z.object({
|
||||||
enabled: z.boolean().default(false),
|
enabled: z.boolean().default(false),
|
||||||
interval: z.string().default('5m'),
|
interval: z.string().default('5m'),
|
||||||
|
notify_cooldown: z.string().default('30m'),
|
||||||
checks: z.array(heartbeatCheckSchema).default(['gateway', 'model', 'channels', 'memory', 'disk', 'process_memory', 'backup', 'provider_errors']),
|
checks: z.array(heartbeatCheckSchema).default(['gateway', 'model', 'channels', 'memory', 'disk', 'process_memory', 'backup', 'provider_errors']),
|
||||||
notify: z.object({
|
notify: z.object({
|
||||||
channel: z.string().min(1),
|
channel: z.string().min(1),
|
||||||
|
|||||||
Reference in New Issue
Block a user