#!/usr/bin/env node import { mkdir, writeFile } from 'node:fs/promises'; import { dirname, resolve } from 'node:path'; import { parseArgs } from 'node:util'; import { AgentOrchestrator } from '../src/backends/index.js'; import type { Attachment } from '../src/channels/types.js'; import type { InboundMessage, OutboundMessage } from '../src/channels/index.js'; import { initAuditLogger, auditLogger } from '../src/audit/index.js'; import type { AuditEventType } from '../src/audit/types.js'; import { createMessageRouter } from '../src/daemon/routing.js'; interface SessionMessage { role: 'user' | 'assistant'; content: unknown; timestamp: number; } interface SessionLike { id: string; addMessage(msg: { role: 'user' | 'assistant'; content: unknown }): void; getHistory(): SessionMessage[]; clear(): void; replaceHistory(messages: SessionMessage[]): void; getConfig(key: string): string | undefined; setConfig(key: string, value: string): void; deleteConfig(key: string): void; } interface ProbeCase { id: string; text: string; attachments?: Attachment[]; } function usage(): string { return [ 'Usage: node --import tsx/esm scripts/run-pi-canary-guard-probes.ts [options]', '', 'Options:', ' --out-log Output JSONL log path', ' --session Session id in : format (default: telegram:8367012007)', ].join('\n'); } function parseSessionId(raw: string): { channel: string; senderId: string } { const idx = raw.indexOf(':'); if (idx <= 0 || idx === raw.length - 1) { throw new Error(`Invalid --session "${raw}". Expected :.`); } return { channel: raw.slice(0, idx), senderId: raw.slice(idx + 1), }; } async function main(): Promise { const { values } = parseArgs({ options: { 'out-log': { type: 'string' }, session: { type: 'string' }, help: { type: 'boolean', short: 'h' }, }, strict: true, allowPositionals: false, }); if (values.help) { process.stdout.write(`${usage()}\n`); return; } const sessionId = values.session ?? 'telegram:8367012007'; const { channel, senderId } = parseSessionId(sessionId); const defaultOut = 'docs/plans/artifacts/pi_embedded_eval_window_c_guard_probes.jsonl'; const outLog = resolve(values['out-log'] ?? defaultOut); const events: Array<{ timestamp: number; level: 'debug' | 'info' | 'warn' | 'error'; event_type: AuditEventType; event: Record; }> = []; const pushEvent = ( level: 'debug' | 'info' | 'warn' | 'error', eventType: AuditEventType, event: Record, ): void => { events.push({ timestamp: Date.now(), level, event_type: eventType, event, }); }; const sessionConfigs = new Map(); const history: SessionMessage[] = []; const session: SessionLike = { id: sessionId, addMessage(msg) { history.push({ ...msg, timestamp: Date.now() }); pushEvent('debug', 'session.message', { session_id: sessionId, role: msg.role, content_length: typeof msg.content === 'string' ? msg.content.length : JSON.stringify(msg.content).length, }); }, getHistory() { return [...history]; }, clear() { history.length = 0; }, replaceHistory(messages) { history.length = 0; history.push(...messages); }, getConfig(key) { return sessionConfigs.get(key); }, setConfig(key, value) { sessionConfigs.set(key, value); }, deleteConfig(key) { sessionConfigs.delete(key); }, }; const previousAuditLogger = auditLogger; const captureLogger = { userAction(event: Record) { pushEvent('info', 'user.action', event); }, backendRoute(event: Record) { pushEvent('info', 'backend.route', event); }, backendSuccess(event: Record) { pushEvent('info', 'backend.success', event); }, backendFallback(event: Record) { pushEvent('warn', 'backend.fallback', event); }, }; const originalProcess = AgentOrchestrator.prototype.process; const originalSetModelTier = AgentOrchestrator.prototype.setModelTier; const originalGetModelTier = AgentOrchestrator.prototype.getModelTier; (AgentOrchestrator.prototype.process as unknown as (messageText: string, attachments?: Attachment[]) => Promise) = async function stubbedProcess(messageText: string): Promise { return `native stub response: ${messageText.slice(0, 40)}`; }; (AgentOrchestrator.prototype.setModelTier as unknown as (tier: string) => void) = function stubbedSetModelTier(): void {}; (AgentOrchestrator.prototype.getModelTier as unknown as () => string) = function stubbedGetModelTier(): string { return 'default'; }; initAuditLogger(captureLogger as unknown as Parameters[0]); try { const router = createMessageRouter({ sessionManager: { getSession: () => session, } as Parameters[0]['sessionManager'], modelRouter: { getAvailableTiers: () => ['fast', 'default', 'complex', 'local'], getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }), getLabel: (tier: string) => tier, } as Parameters[0]['modelRouter'], systemPrompt: 'test prompt', toolRegistry: { clone() { return this; }, register() {}, list() { return []; }, } as unknown as Parameters[0]['toolRegistry'], toolExecutor: {} as Parameters[0]['toolExecutor'], config: { agents: { primary_tier: 'default', delegation: { compaction: 'fast', memory_extraction: 'fast', classification: 'fast', tool_summarisation: 'fast', complex_reasoning: 'complex', }, max_delegation_depth: 2, max_iterations: 5, auto_escalate: false, autonomy_level: 'standard', immutable_denylist: [], }, backends: { pi_embedded: { no_tools_mode: true }, }, compaction: { enabled: false }, models: { default: { provider: 'anthropic', model: 'claude' } }, } as Parameters[0]['config'], externalBackends: { pi_embedded: { name: 'pi_embedded', process: async () => 'pi embedded probe response', }, }, defaultName: 'pi_embedded', }); const reply = async (response: OutboundMessage): Promise => { if (response.text) { pushEvent('debug', 'session.message', { session_id: sessionId, role: 'assistant', content_length: response.text.length, }); } }; const probes: ProbeCase[] = [ { id: 'probe-plain', text: 'hello plain canary turn', }, { id: 'probe-pi-no-tools', text: 'please run a shell command and read a file', }, { id: 'probe-capability', text: 'what tools are available?', }, { id: 'probe-attachments', text: 'normal chat with an attachment only', attachments: [ { mimeType: 'image/png', data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII=', filename: 'pixel.png', }, ], }, ]; for (const probe of probes) { const inbound: InboundMessage = { id: probe.id, channel, senderId, text: probe.text, timestamp: Date.now(), ...(probe.attachments ? { attachments: probe.attachments } : {}), }; await router.handler(inbound, reply); } await mkdir(dirname(outLog), { recursive: true }); const jsonl = events.map((event) => JSON.stringify(event)).join('\n'); await writeFile(outLog, `${jsonl}\n`, 'utf-8'); process.stdout.write(`Wrote ${events.length} events to ${outLog}\n`); } finally { initAuditLogger(previousAuditLogger as unknown as Parameters[0]); (AgentOrchestrator.prototype.process as unknown as (messageText: string, attachments?: Attachment[]) => Promise) = originalProcess as unknown as (messageText: string, attachments?: Attachment[]) => Promise; (AgentOrchestrator.prototype.setModelTier as unknown as (tier: string) => void) = originalSetModelTier as unknown as (tier: string) => void; (AgentOrchestrator.prototype.getModelTier as unknown as () => string) = originalGetModelTier as unknown as () => string; } } main().catch((error) => { const message = error instanceof Error ? error.message : String(error); process.stderr.write(`${message}\n\n${usage()}\n`); process.exitCode = 1; });