feat(audit): add router-based guard probe harness for pi canary

This commit is contained in:
William Valentin
2026-02-23 22:44:26 -08:00
parent 7b80c1e7a4
commit 959216ac5c
2 changed files with 282 additions and 1 deletions
+2 -1
View File
@@ -20,7 +20,8 @@
"typecheck": "tsc --noEmit",
"config:profiles:generate": "node scripts/generate-config-profiles.mjs",
"config:profiles:check": "node scripts/generate-config-profiles.mjs --check",
"audit:backend-canary": "node --import tsx/esm scripts/summarize-backend-canary.ts"
"audit:backend-canary": "node --import tsx/esm scripts/summarize-backend-canary.ts",
"audit:backend-canary:probes": "node --import tsx/esm scripts/run-pi-canary-guard-probes.ts"
},
"keywords": [
"ai",
+280
View File
@@ -0,0 +1,280 @@
#!/usr/bin/env node
import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { parseArgs } from 'node:util';
import { AgentOrchestrator } from '../src/backends/index.js';
import type { Attachment } from '../src/channels/types.js';
import type { InboundMessage, OutboundMessage } from '../src/channels/index.js';
import { initAuditLogger, auditLogger } from '../src/audit/index.js';
import type { AuditEventType } from '../src/audit/types.js';
import { createMessageRouter } from '../src/daemon/routing.js';
interface SessionMessage {
role: 'user' | 'assistant';
content: unknown;
timestamp: number;
}
interface SessionLike {
id: string;
addMessage(msg: { role: 'user' | 'assistant'; content: unknown }): void;
getHistory(): SessionMessage[];
clear(): void;
replaceHistory(messages: SessionMessage[]): void;
getConfig(key: string): string | undefined;
setConfig(key: string, value: string): void;
deleteConfig(key: string): void;
}
interface ProbeCase {
id: string;
text: string;
attachments?: Attachment[];
}
function usage(): string {
return [
'Usage: node --import tsx/esm scripts/run-pi-canary-guard-probes.ts [options]',
'',
'Options:',
' --out-log <path> Output JSONL log path',
' --session <id> Session id in <channel>:<sender> format (default: telegram:8367012007)',
].join('\n');
}
function parseSessionId(raw: string): { channel: string; senderId: string } {
const idx = raw.indexOf(':');
if (idx <= 0 || idx === raw.length - 1) {
throw new Error(`Invalid --session "${raw}". Expected <channel>:<sender>.`);
}
return {
channel: raw.slice(0, idx),
senderId: raw.slice(idx + 1),
};
}
async function main(): Promise<void> {
const { values } = parseArgs({
options: {
'out-log': { type: 'string' },
session: { type: 'string' },
help: { type: 'boolean', short: 'h' },
},
strict: true,
allowPositionals: false,
});
if (values.help) {
process.stdout.write(`${usage()}\n`);
return;
}
const sessionId = values.session ?? 'telegram:8367012007';
const { channel, senderId } = parseSessionId(sessionId);
const defaultOut = 'docs/plans/artifacts/pi_embedded_eval_window_c_guard_probes.jsonl';
const outLog = resolve(values['out-log'] ?? defaultOut);
const events: Array<{
timestamp: number;
level: 'debug' | 'info' | 'warn' | 'error';
event_type: AuditEventType;
event: Record<string, unknown>;
}> = [];
const pushEvent = (
level: 'debug' | 'info' | 'warn' | 'error',
eventType: AuditEventType,
event: Record<string, unknown>,
): void => {
events.push({
timestamp: Date.now(),
level,
event_type: eventType,
event,
});
};
const sessionConfigs = new Map<string, string>();
const history: SessionMessage[] = [];
const session: SessionLike = {
id: sessionId,
addMessage(msg) {
history.push({ ...msg, timestamp: Date.now() });
pushEvent('debug', 'session.message', {
session_id: sessionId,
role: msg.role,
content_length: typeof msg.content === 'string' ? msg.content.length : JSON.stringify(msg.content).length,
});
},
getHistory() {
return [...history];
},
clear() {
history.length = 0;
},
replaceHistory(messages) {
history.length = 0;
history.push(...messages);
},
getConfig(key) {
return sessionConfigs.get(key);
},
setConfig(key, value) {
sessionConfigs.set(key, value);
},
deleteConfig(key) {
sessionConfigs.delete(key);
},
};
const previousAuditLogger = auditLogger;
const captureLogger = {
userAction(event: Record<string, unknown>) {
pushEvent('info', 'user.action', event);
},
backendRoute(event: Record<string, unknown>) {
pushEvent('info', 'backend.route', event);
},
backendSuccess(event: Record<string, unknown>) {
pushEvent('info', 'backend.success', event);
},
backendFallback(event: Record<string, unknown>) {
pushEvent('warn', 'backend.fallback', event);
},
};
const originalProcess = AgentOrchestrator.prototype.process;
const originalSetModelTier = AgentOrchestrator.prototype.setModelTier;
const originalGetModelTier = AgentOrchestrator.prototype.getModelTier;
(AgentOrchestrator.prototype.process as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>)
= async function stubbedProcess(messageText: string): Promise<string> {
return `native stub response: ${messageText.slice(0, 40)}`;
};
(AgentOrchestrator.prototype.setModelTier as unknown as (tier: string) => void)
= function stubbedSetModelTier(): void {};
(AgentOrchestrator.prototype.getModelTier as unknown as () => string)
= function stubbedGetModelTier(): string {
return 'default';
};
initAuditLogger(captureLogger as unknown as Parameters<typeof initAuditLogger>[0]);
try {
const router = createMessageRouter({
sessionManager: {
getSession: () => session,
} as Parameters<typeof createMessageRouter>[0]['sessionManager'],
modelRouter: {
getAvailableTiers: () => ['fast', 'default', 'complex', 'local'],
getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }),
getLabel: (tier: string) => tier,
} as Parameters<typeof createMessageRouter>[0]['modelRouter'],
systemPrompt: 'test prompt',
toolRegistry: {
clone() { return this; },
register() {},
list() { return []; },
} as unknown as Parameters<typeof createMessageRouter>[0]['toolRegistry'],
toolExecutor: {} as Parameters<typeof createMessageRouter>[0]['toolExecutor'],
config: {
agents: {
primary_tier: 'default',
delegation: {
compaction: 'fast',
memory_extraction: 'fast',
classification: 'fast',
tool_summarisation: 'fast',
complex_reasoning: 'complex',
},
max_delegation_depth: 2,
max_iterations: 5,
auto_escalate: false,
autonomy_level: 'standard',
immutable_denylist: [],
},
backends: {
pi_embedded: { no_tools_mode: true },
},
compaction: { enabled: false },
models: { default: { provider: 'anthropic', model: 'claude' } },
} as Parameters<typeof createMessageRouter>[0]['config'],
externalBackends: {
pi_embedded: {
name: 'pi_embedded',
process: async () => 'pi embedded probe response',
},
},
defaultName: 'pi_embedded',
});
const reply = async (response: OutboundMessage): Promise<void> => {
if (response.text) {
pushEvent('debug', 'session.message', {
session_id: sessionId,
role: 'assistant',
content_length: response.text.length,
});
}
};
const probes: ProbeCase[] = [
{
id: 'probe-plain',
text: 'hello plain canary turn',
},
{
id: 'probe-pi-no-tools',
text: 'please run a shell command and read a file',
},
{
id: 'probe-capability',
text: 'what tools are available?',
},
{
id: 'probe-attachments',
text: 'normal chat with an attachment only',
attachments: [
{
mimeType: 'image/png',
data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII=',
filename: 'pixel.png',
},
],
},
];
for (const probe of probes) {
const inbound: InboundMessage = {
id: probe.id,
channel,
senderId,
text: probe.text,
timestamp: Date.now(),
...(probe.attachments ? { attachments: probe.attachments } : {}),
};
await router.handler(inbound, reply);
}
await mkdir(dirname(outLog), { recursive: true });
const jsonl = events.map((event) => JSON.stringify(event)).join('\n');
await writeFile(outLog, `${jsonl}\n`, 'utf-8');
process.stdout.write(`Wrote ${events.length} events to ${outLog}\n`);
} finally {
initAuditLogger(previousAuditLogger as unknown as Parameters<typeof initAuditLogger>[0]);
(AgentOrchestrator.prototype.process as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>)
= originalProcess as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>;
(AgentOrchestrator.prototype.setModelTier as unknown as (tier: string) => void)
= originalSetModelTier as unknown as (tier: string) => void;
(AgentOrchestrator.prototype.getModelTier as unknown as () => string)
= originalGetModelTier as unknown as () => string;
}
}
main().catch((error) => {
const message = error instanceof Error ? error.message : String(error);
process.stderr.write(`${message}\n\n${usage()}\n`);
process.exitCode = 1;
});