281 lines
8.9 KiB
JavaScript
Executable File
281 lines
8.9 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
|
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
import { dirname, resolve } from 'node:path';
|
|
import { parseArgs } from 'node:util';
|
|
import { AgentOrchestrator } from '../src/backends/index.js';
|
|
import type { Attachment } from '../src/channels/types.js';
|
|
import type { InboundMessage, OutboundMessage } from '../src/channels/index.js';
|
|
import { initAuditLogger, auditLogger } from '../src/audit/index.js';
|
|
import type { AuditEventType } from '../src/audit/types.js';
|
|
import { createMessageRouter } from '../src/daemon/routing.js';
|
|
|
|
interface SessionMessage {
|
|
role: 'user' | 'assistant';
|
|
content: unknown;
|
|
timestamp: number;
|
|
}
|
|
|
|
interface SessionLike {
|
|
id: string;
|
|
addMessage(msg: { role: 'user' | 'assistant'; content: unknown }): void;
|
|
getHistory(): SessionMessage[];
|
|
clear(): void;
|
|
replaceHistory(messages: SessionMessage[]): void;
|
|
getConfig(key: string): string | undefined;
|
|
setConfig(key: string, value: string): void;
|
|
deleteConfig(key: string): void;
|
|
}
|
|
|
|
interface ProbeCase {
|
|
id: string;
|
|
text: string;
|
|
attachments?: Attachment[];
|
|
}
|
|
|
|
function usage(): string {
|
|
return [
|
|
'Usage: node --import tsx/esm scripts/run-pi-canary-guard-probes.ts [options]',
|
|
'',
|
|
'Options:',
|
|
' --out-log <path> Output JSONL log path',
|
|
' --session <id> Session id in <channel>:<sender> format (default: telegram:8367012007)',
|
|
].join('\n');
|
|
}
|
|
|
|
function parseSessionId(raw: string): { channel: string; senderId: string } {
|
|
const idx = raw.indexOf(':');
|
|
if (idx <= 0 || idx === raw.length - 1) {
|
|
throw new Error(`Invalid --session "${raw}". Expected <channel>:<sender>.`);
|
|
}
|
|
return {
|
|
channel: raw.slice(0, idx),
|
|
senderId: raw.slice(idx + 1),
|
|
};
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
const { values } = parseArgs({
|
|
options: {
|
|
'out-log': { type: 'string' },
|
|
session: { type: 'string' },
|
|
help: { type: 'boolean', short: 'h' },
|
|
},
|
|
strict: true,
|
|
allowPositionals: false,
|
|
});
|
|
|
|
if (values.help) {
|
|
process.stdout.write(`${usage()}\n`);
|
|
return;
|
|
}
|
|
|
|
const sessionId = values.session ?? 'telegram:8367012007';
|
|
const { channel, senderId } = parseSessionId(sessionId);
|
|
|
|
const defaultOut = 'docs/plans/artifacts/pi_embedded_eval_window_c_guard_probes.jsonl';
|
|
const outLog = resolve(values['out-log'] ?? defaultOut);
|
|
|
|
const events: Array<{
|
|
timestamp: number;
|
|
level: 'debug' | 'info' | 'warn' | 'error';
|
|
event_type: AuditEventType;
|
|
event: Record<string, unknown>;
|
|
}> = [];
|
|
|
|
const pushEvent = (
|
|
level: 'debug' | 'info' | 'warn' | 'error',
|
|
eventType: AuditEventType,
|
|
event: Record<string, unknown>,
|
|
): void => {
|
|
events.push({
|
|
timestamp: Date.now(),
|
|
level,
|
|
event_type: eventType,
|
|
event,
|
|
});
|
|
};
|
|
|
|
const sessionConfigs = new Map<string, string>();
|
|
const history: SessionMessage[] = [];
|
|
const session: SessionLike = {
|
|
id: sessionId,
|
|
addMessage(msg) {
|
|
history.push({ ...msg, timestamp: Date.now() });
|
|
pushEvent('debug', 'session.message', {
|
|
session_id: sessionId,
|
|
role: msg.role,
|
|
content_length: typeof msg.content === 'string' ? msg.content.length : JSON.stringify(msg.content).length,
|
|
});
|
|
},
|
|
getHistory() {
|
|
return [...history];
|
|
},
|
|
clear() {
|
|
history.length = 0;
|
|
},
|
|
replaceHistory(messages) {
|
|
history.length = 0;
|
|
history.push(...messages);
|
|
},
|
|
getConfig(key) {
|
|
return sessionConfigs.get(key);
|
|
},
|
|
setConfig(key, value) {
|
|
sessionConfigs.set(key, value);
|
|
},
|
|
deleteConfig(key) {
|
|
sessionConfigs.delete(key);
|
|
},
|
|
};
|
|
|
|
const previousAuditLogger = auditLogger;
|
|
const captureLogger = {
|
|
userAction(event: Record<string, unknown>) {
|
|
pushEvent('info', 'user.action', event);
|
|
},
|
|
backendRoute(event: Record<string, unknown>) {
|
|
pushEvent('info', 'backend.route', event);
|
|
},
|
|
backendSuccess(event: Record<string, unknown>) {
|
|
pushEvent('info', 'backend.success', event);
|
|
},
|
|
backendFallback(event: Record<string, unknown>) {
|
|
pushEvent('warn', 'backend.fallback', event);
|
|
},
|
|
};
|
|
|
|
const originalProcess = AgentOrchestrator.prototype.process;
|
|
const originalSetModelTier = AgentOrchestrator.prototype.setModelTier;
|
|
const originalGetModelTier = AgentOrchestrator.prototype.getModelTier;
|
|
(AgentOrchestrator.prototype.process as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>)
|
|
= async function stubbedProcess(messageText: string): Promise<string> {
|
|
return `native stub response: ${messageText.slice(0, 40)}`;
|
|
};
|
|
(AgentOrchestrator.prototype.setModelTier as unknown as (tier: string) => void)
|
|
= function stubbedSetModelTier(): void {};
|
|
(AgentOrchestrator.prototype.getModelTier as unknown as () => string)
|
|
= function stubbedGetModelTier(): string {
|
|
return 'default';
|
|
};
|
|
|
|
initAuditLogger(captureLogger as unknown as Parameters<typeof initAuditLogger>[0]);
|
|
|
|
try {
|
|
const router = createMessageRouter({
|
|
sessionManager: {
|
|
getSession: () => session,
|
|
} as Parameters<typeof createMessageRouter>[0]['sessionManager'],
|
|
modelRouter: {
|
|
getAvailableTiers: () => ['fast', 'default', 'complex', 'local'],
|
|
getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }),
|
|
getLabel: (tier: string) => tier,
|
|
} as Parameters<typeof createMessageRouter>[0]['modelRouter'],
|
|
systemPrompt: 'test prompt',
|
|
toolRegistry: {
|
|
clone() { return this; },
|
|
register() {},
|
|
list() { return []; },
|
|
} as unknown as Parameters<typeof createMessageRouter>[0]['toolRegistry'],
|
|
toolExecutor: {} as Parameters<typeof createMessageRouter>[0]['toolExecutor'],
|
|
config: {
|
|
agents: {
|
|
primary_tier: 'default',
|
|
delegation: {
|
|
compaction: 'fast',
|
|
memory_extraction: 'fast',
|
|
classification: 'fast',
|
|
tool_summarisation: 'fast',
|
|
complex_reasoning: 'complex',
|
|
},
|
|
max_delegation_depth: 2,
|
|
max_iterations: 5,
|
|
auto_escalate: false,
|
|
autonomy_level: 'standard',
|
|
immutable_denylist: [],
|
|
},
|
|
backends: {
|
|
pi_embedded: { no_tools_mode: true },
|
|
},
|
|
compaction: { enabled: false },
|
|
models: { default: { provider: 'anthropic', model: 'claude' } },
|
|
} as Parameters<typeof createMessageRouter>[0]['config'],
|
|
externalBackends: {
|
|
pi_embedded: {
|
|
name: 'pi_embedded',
|
|
process: async () => 'pi embedded probe response',
|
|
},
|
|
},
|
|
defaultName: 'pi_embedded',
|
|
});
|
|
|
|
const reply = async (response: OutboundMessage): Promise<void> => {
|
|
if (response.text) {
|
|
pushEvent('debug', 'session.message', {
|
|
session_id: sessionId,
|
|
role: 'assistant',
|
|
content_length: response.text.length,
|
|
});
|
|
}
|
|
};
|
|
|
|
const probes: ProbeCase[] = [
|
|
{
|
|
id: 'probe-plain',
|
|
text: 'hello plain canary turn',
|
|
},
|
|
{
|
|
id: 'probe-pi-no-tools',
|
|
text: 'please run a shell command and read a file',
|
|
},
|
|
{
|
|
id: 'probe-capability',
|
|
text: 'what tools are available?',
|
|
},
|
|
{
|
|
id: 'probe-attachments',
|
|
text: 'normal chat with an attachment only',
|
|
attachments: [
|
|
{
|
|
mimeType: 'image/png',
|
|
data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII=',
|
|
filename: 'pixel.png',
|
|
},
|
|
],
|
|
},
|
|
];
|
|
|
|
for (const probe of probes) {
|
|
const inbound: InboundMessage = {
|
|
id: probe.id,
|
|
channel,
|
|
senderId,
|
|
text: probe.text,
|
|
timestamp: Date.now(),
|
|
...(probe.attachments ? { attachments: probe.attachments } : {}),
|
|
};
|
|
await router.handler(inbound, reply);
|
|
}
|
|
|
|
await mkdir(dirname(outLog), { recursive: true });
|
|
const jsonl = events.map((event) => JSON.stringify(event)).join('\n');
|
|
await writeFile(outLog, `${jsonl}\n`, 'utf-8');
|
|
|
|
process.stdout.write(`Wrote ${events.length} events to ${outLog}\n`);
|
|
} finally {
|
|
initAuditLogger(previousAuditLogger as unknown as Parameters<typeof initAuditLogger>[0]);
|
|
(AgentOrchestrator.prototype.process as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>)
|
|
= originalProcess as unknown as (messageText: string, attachments?: Attachment[]) => Promise<string>;
|
|
(AgentOrchestrator.prototype.setModelTier as unknown as (tier: string) => void)
|
|
= originalSetModelTier as unknown as (tier: string) => void;
|
|
(AgentOrchestrator.prototype.getModelTier as unknown as () => string)
|
|
= originalGetModelTier as unknown as () => string;
|
|
}
|
|
}
|
|
|
|
main().catch((error) => {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
process.stderr.write(`${message}\n\n${usage()}\n`);
|
|
process.exitCode = 1;
|
|
});
|