feat(audit): add router-based guard probe harness for pi canary
This commit is contained in:
Executable
+280
@@ -0,0 +1,280 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { dirname, resolve } from 'node:path';
|
||||
import { parseArgs } from 'node:util';
|
||||
import { AgentOrchestrator } from '../src/backends/index.js';
|
||||
import type { Attachment } from '../src/channels/types.js';
|
||||
import type { InboundMessage, OutboundMessage } from '../src/channels/index.js';
|
||||
import { initAuditLogger, auditLogger } from '../src/audit/index.js';
|
||||
import type { AuditEventType } from '../src/audit/types.js';
|
||||
import { createMessageRouter } from '../src/daemon/routing.js';
|
||||
|
||||
interface SessionMessage {
|
||||
role: 'user' | 'assistant';
|
||||
content: unknown;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
interface SessionLike {
|
||||
id: string;
|
||||
addMessage(msg: { role: 'user' | 'assistant'; content: unknown }): void;
|
||||
getHistory(): SessionMessage[];
|
||||
clear(): void;
|
||||
replaceHistory(messages: SessionMessage[]): void;
|
||||
getConfig(key: string): string | undefined;
|
||||
setConfig(key: string, value: string): void;
|
||||
deleteConfig(key: string): void;
|
||||
}
|
||||
|
||||
interface ProbeCase {
|
||||
id: string;
|
||||
text: string;
|
||||
attachments?: Attachment[];
|
||||
}
|
||||
|
||||
function usage(): string {
|
||||
return [
|
||||
'Usage: node --import tsx/esm scripts/run-pi-canary-guard-probes.ts [options]',
|
||||
'',
|
||||
'Options:',
|
||||
' --out-log <path> Output JSONL log path',
|
||||
' --session <id> Session id in <channel>:<sender> format (default: telegram:8367012007)',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function parseSessionId(raw: string): { channel: string; senderId: string } {
|
||||
const idx = raw.indexOf(':');
|
||||
if (idx <= 0 || idx === raw.length - 1) {
|
||||
throw new Error(`Invalid --session "${raw}". Expected <channel>:<sender>.`);
|
||||
}
|
||||
return {
|
||||
channel: raw.slice(0, idx),
|
||||
senderId: raw.slice(idx + 1),
|
||||
};
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const { values } = parseArgs({
|
||||
options: {
|
||||
'out-log': { type: 'string' },
|
||||
session: { type: 'string' },
|
||||
help: { type: 'boolean', short: 'h' },
|
||||
},
|
||||
strict: true,
|
||||
allowPositionals: false,
|
||||
});
|
||||
|
||||
if (values.help) {
|
||||
process.stdout.write(`${usage()}\n`);
|
||||
return;
|
||||
}
|
||||
|
||||
const sessionId = values.session ?? 'telegram:8367012007';
|
||||
const { channel, senderId } = parseSessionId(sessionId);
|
||||
|
||||
const defaultOut = 'docs/plans/artifacts/pi_embedded_eval_window_c_guard_probes.jsonl';
|
||||
const outLog = resolve(values['out-log'] ?? defaultOut);
|
||||
|
||||
const events: Array<{
|
||||
timestamp: number;
|
||||
level: 'debug' | 'info' | 'warn' | 'error';
|
||||
event_type: AuditEventType;
|
||||
event: Record<string, unknown>;
|
||||
}> = [];
|
||||
|
||||
const pushEvent = (
|
||||
level: 'debug' | 'info' | 'warn' | 'error',
|
||||
eventType: AuditEventType,
|
||||
event: Record<string, unknown>,
|
||||
): void => {
|
||||
events.push({
|
||||
timestamp: Date.now(),
|
||||
level,
|
||||
event_type: eventType,
|
||||
event,
|
||||
});
|
||||
};
|
||||
|
||||
const sessionConfigs = new Map<string, string>();
|
||||
const history: SessionMessage[] = [];
|
||||
const session: SessionLike = {
|
||||
id: sessionId,
|
||||
addMessage(msg) {
|
||||
history.push({ ...msg, timestamp: Date.now() });
|
||||
pushEvent('debug', 'session.message', {
|
||||
session_id: sessionId,
|
||||
role: msg.role,
|
||||
content_length: typeof msg.content === 'string' ? msg.content.length : JSON.stringify(msg.content).length,
|
||||
});
|
||||
},
|
||||
getHistory() {
|
||||
return [...history];
|
||||
},
|
||||
clear() {
|
||||
history.length = 0;
|
||||
},
|
||||
replaceHistory(messages) {
|
||||
history.length = 0;
|
||||
history.push(...messages);
|
||||
},
|
||||
getConfig(key) {
|
||||
return sessionConfigs.get(key);
|
||||
},
|
||||
setConfig(key, value) {
|
||||
sessionConfigs.set(key, value);
|
||||
},
|
||||
deleteConfig(key) {
|
||||
sessionConfigs.delete(key);
|
||||
},
|
||||
};
|
||||
|
||||
const previousAuditLogger = auditLogger;
|
||||
const captureLogger = {
|
||||
userAction(event: Record<string, unknown>) {
|
||||
pushEvent('info', 'user.action', event);
|
||||
},
|
||||
backendRoute(event: Record<string, unknown>) {
|
||||
pushEvent('info', 'backend.route', event);
|
||||
},
|
||||
backendSuccess(event: Record<string, unknown>) {
|
||||
pushEvent('info', 'backend.success', event);
|
||||
},
|
||||
backendFallback(event: Record<string, unknown>) {
|
||||
pushEvent('warn', 'backend.fallback', event);
|
||||
},
|
||||
};
|
||||
|
||||
const originalProcess = AgentOrchestrator.prototype.process;
|
||||
const originalSetModelTier = AgentOrchestrator.prototype.setModelTier;
|
||||
const originalGetModelTier = AgentOrchestrator.prototype.getModelTier;
|
||||
(AgentOrchestrator.prototype.process as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>)
|
||||
= async function stubbedProcess(messageText: string): Promise<string> {
|
||||
return `native stub response: ${messageText.slice(0, 40)}`;
|
||||
};
|
||||
(AgentOrchestrator.prototype.setModelTier as unknown as (tier: string) => void)
|
||||
= function stubbedSetModelTier(): void {};
|
||||
(AgentOrchestrator.prototype.getModelTier as unknown as () => string)
|
||||
= function stubbedGetModelTier(): string {
|
||||
return 'default';
|
||||
};
|
||||
|
||||
initAuditLogger(captureLogger as unknown as Parameters<typeof initAuditLogger>[0]);
|
||||
|
||||
try {
|
||||
const router = createMessageRouter({
|
||||
sessionManager: {
|
||||
getSession: () => session,
|
||||
} as Parameters<typeof createMessageRouter>[0]['sessionManager'],
|
||||
modelRouter: {
|
||||
getAvailableTiers: () => ['fast', 'default', 'complex', 'local'],
|
||||
getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }),
|
||||
getLabel: (tier: string) => tier,
|
||||
} as Parameters<typeof createMessageRouter>[0]['modelRouter'],
|
||||
systemPrompt: 'test prompt',
|
||||
toolRegistry: {
|
||||
clone() { return this; },
|
||||
register() {},
|
||||
list() { return []; },
|
||||
} as unknown as Parameters<typeof createMessageRouter>[0]['toolRegistry'],
|
||||
toolExecutor: {} as Parameters<typeof createMessageRouter>[0]['toolExecutor'],
|
||||
config: {
|
||||
agents: {
|
||||
primary_tier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'fast',
|
||||
classification: 'fast',
|
||||
tool_summarisation: 'fast',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
max_delegation_depth: 2,
|
||||
max_iterations: 5,
|
||||
auto_escalate: false,
|
||||
autonomy_level: 'standard',
|
||||
immutable_denylist: [],
|
||||
},
|
||||
backends: {
|
||||
pi_embedded: { no_tools_mode: true },
|
||||
},
|
||||
compaction: { enabled: false },
|
||||
models: { default: { provider: 'anthropic', model: 'claude' } },
|
||||
} as Parameters<typeof createMessageRouter>[0]['config'],
|
||||
externalBackends: {
|
||||
pi_embedded: {
|
||||
name: 'pi_embedded',
|
||||
process: async () => 'pi embedded probe response',
|
||||
},
|
||||
},
|
||||
defaultName: 'pi_embedded',
|
||||
});
|
||||
|
||||
const reply = async (response: OutboundMessage): Promise<void> => {
|
||||
if (response.text) {
|
||||
pushEvent('debug', 'session.message', {
|
||||
session_id: sessionId,
|
||||
role: 'assistant',
|
||||
content_length: response.text.length,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const probes: ProbeCase[] = [
|
||||
{
|
||||
id: 'probe-plain',
|
||||
text: 'hello plain canary turn',
|
||||
},
|
||||
{
|
||||
id: 'probe-pi-no-tools',
|
||||
text: 'please run a shell command and read a file',
|
||||
},
|
||||
{
|
||||
id: 'probe-capability',
|
||||
text: 'what tools are available?',
|
||||
},
|
||||
{
|
||||
id: 'probe-attachments',
|
||||
text: 'normal chat with an attachment only',
|
||||
attachments: [
|
||||
{
|
||||
mimeType: 'image/png',
|
||||
data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII=',
|
||||
filename: 'pixel.png',
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
for (const probe of probes) {
|
||||
const inbound: InboundMessage = {
|
||||
id: probe.id,
|
||||
channel,
|
||||
senderId,
|
||||
text: probe.text,
|
||||
timestamp: Date.now(),
|
||||
...(probe.attachments ? { attachments: probe.attachments } : {}),
|
||||
};
|
||||
await router.handler(inbound, reply);
|
||||
}
|
||||
|
||||
await mkdir(dirname(outLog), { recursive: true });
|
||||
const jsonl = events.map((event) => JSON.stringify(event)).join('\n');
|
||||
await writeFile(outLog, `${jsonl}\n`, 'utf-8');
|
||||
|
||||
process.stdout.write(`Wrote ${events.length} events to ${outLog}\n`);
|
||||
} finally {
|
||||
initAuditLogger(previousAuditLogger as unknown as Parameters<typeof initAuditLogger>[0]);
|
||||
(AgentOrchestrator.prototype.process as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>)
|
||||
= originalProcess as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>;
|
||||
(AgentOrchestrator.prototype.setModelTier as unknown as (tier: string) => void)
|
||||
= originalSetModelTier as unknown as (tier: string) => void;
|
||||
(AgentOrchestrator.prototype.getModelTier as unknown as () => string)
|
||||
= originalGetModelTier as unknown as () => string;
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
process.stderr.write(`${message}\n\n${usage()}\n`);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
Reference in New Issue
Block a user