feat(audit): add guard-coverage thresholds to canary gate
This commit is contained in:
@@ -206,6 +206,52 @@ describe('summarizeBackendCanary', () => {
|
||||
{ category: 'pi_module_interface', count: 1, pct: 50 },
|
||||
]);
|
||||
});
|
||||
|
||||
it('tracks forced-native guard reasons', () => {
|
||||
const events: AuditEvent[] = [
|
||||
makeEvent(1000, 'backend.route', {
|
||||
session_id: 's1',
|
||||
channel: 'telegram',
|
||||
sender: '1',
|
||||
selected_backend: 'native',
|
||||
source: 'forced_native_guard',
|
||||
guard_reason: 'pi_no_tools_mode',
|
||||
}),
|
||||
makeEvent(1010, 'session.message', {
|
||||
session_id: 's1',
|
||||
role: 'assistant',
|
||||
content_length: 10,
|
||||
}),
|
||||
makeEvent(2000, 'backend.route', {
|
||||
session_id: 's2',
|
||||
channel: 'telegram',
|
||||
sender: '2',
|
||||
selected_backend: 'native',
|
||||
source: 'forced_native_guard',
|
||||
guard_reason: 'capability_query',
|
||||
}),
|
||||
makeEvent(2010, 'session.message', {
|
||||
session_id: 's2',
|
||||
role: 'assistant',
|
||||
content_length: 10,
|
||||
}),
|
||||
];
|
||||
|
||||
const summary = summarizeBackendCanary(events, {
|
||||
targetBackend: 'pi_embedded',
|
||||
baselineBackend: 'native',
|
||||
});
|
||||
|
||||
expect(summary.route_stats.forced_native_guards.pi_no_tools_mode).toBe(1);
|
||||
expect(summary.route_stats.forced_native_guards.capability_query).toBe(1);
|
||||
|
||||
const markdown = renderBackendCanaryMarkdown(summary, {
|
||||
targetBackend: 'pi_embedded',
|
||||
baselineBackend: 'native',
|
||||
});
|
||||
expect(markdown).toContain('Forced Native Guards');
|
||||
expect(markdown).toContain('pi_no_tools_mode');
|
||||
});
|
||||
});
|
||||
|
||||
describe('evaluateBackendCanaryGate', () => {
|
||||
@@ -322,4 +368,65 @@ describe('evaluateBackendCanaryGate', () => {
|
||||
actual: '1',
|
||||
}));
|
||||
});
|
||||
|
||||
it('evaluates guard coverage thresholds', () => {
|
||||
const events: AuditEvent[] = [
|
||||
makeEvent(1000, 'backend.route', {
|
||||
session_id: 's1',
|
||||
channel: 'telegram',
|
||||
sender: '1',
|
||||
selected_backend: 'native',
|
||||
source: 'forced_native_guard',
|
||||
guard_reason: 'pi_no_tools_mode',
|
||||
}),
|
||||
makeEvent(1010, 'session.message', {
|
||||
session_id: 's1',
|
||||
role: 'assistant',
|
||||
content_length: 20,
|
||||
}),
|
||||
makeEvent(2000, 'backend.route', {
|
||||
session_id: 's2',
|
||||
channel: 'telegram',
|
||||
sender: '2',
|
||||
selected_backend: 'native',
|
||||
source: 'forced_native_guard',
|
||||
guard_reason: 'attachments_present',
|
||||
}),
|
||||
makeEvent(2010, 'session.message', {
|
||||
session_id: 's2',
|
||||
role: 'assistant',
|
||||
content_length: 20,
|
||||
}),
|
||||
];
|
||||
|
||||
const summary = summarizeBackendCanary(events, {
|
||||
targetBackend: 'pi_embedded',
|
||||
baselineBackend: 'native',
|
||||
});
|
||||
|
||||
const gate = evaluateBackendCanaryGate(summary, {
|
||||
minGuardPiNoToolsCount: 1,
|
||||
minGuardCapabilityQueryCount: 1,
|
||||
minGuardAttachmentsPresentCount: 1,
|
||||
});
|
||||
|
||||
expect(gate.pass).toBe(false);
|
||||
expect(gate.criteria).toEqual([
|
||||
expect.objectContaining({
|
||||
criterion: 'Minimum pi_no_tools_mode guard hits',
|
||||
pass: true,
|
||||
actual: '1',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
criterion: 'Minimum capability_query guard hits',
|
||||
pass: false,
|
||||
actual: '0',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
criterion: 'Minimum attachments_present guard hits',
|
||||
pass: true,
|
||||
actual: '1',
|
||||
}),
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user