diff --git a/README.md b/README.md index 732cfd7..0dee735 100644 --- a/README.md +++ b/README.md @@ -522,7 +522,7 @@ pnpm tui:fs | `/status` | Show session info | | `/tools` | Show authoritative runtime tool list for this session | | `/research ` | Delegate a task to `agent_configs.research` | -| `/council ` | Run dual D/P councils pipeline with bridge+meta merge | +| `/council ` | Run dual D/P councils pipeline with bridge+meta merge (brief in TUI, full artifacts saved to disk) | | `/compact` | Compact conversation context | | `/usage` | Show token usage and cost | | `/context` | Show estimated context-window usage | @@ -631,6 +631,22 @@ Run from chat: /council design a 30-day plan to cut CI flakiness by 50% ``` +By default, `/council` prints a concise execution brief in the TUI and persists full artifacts under: + +```text +~/.local/share/flynn/councils/ +``` + +When `FLYNN_DATA_DIR` is set, artifacts are written to: + +```text +${FLYNN_DATA_DIR}/councils/ +``` + +Each run saves: +- `*.md` summary report (brief + full conversation trace + raw JSON block) +- `*.json` full structured `CouncilRunResult` + Or via tools: ```json diff --git a/config/default.yaml b/config/default.yaml index 33ec925..e92b3ab 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -153,9 +153,10 @@ models: # 4. Global fallback_chain (tried in order) # default: - provider: anthropic - model: claude-sonnet-4-20250514 - # auth_mode: auto # auto | api_key | oauth (provider-specific) + provider: openai + model: gpt-5.2 + auth_mode: api_key # auto | api_key | oauth (provider-specific) + api_key: ${OPENAI_API_KEY} # use_oauth: false # compat alias for auth_mode: oauth # api_keys: ["${ANTHROPIC_API_KEY_PRIMARY}", "${ANTHROPIC_API_KEY_SECONDARY}"] # Optional rotation pool # supports_audio: false # Override native audio detection per tier diff --git a/docs/api/COUNCILS.md b/docs/api/COUNCILS.md index 214f7bf..b196019 100644 --- a/docs/api/COUNCILS.md +++ b/docs/api/COUNCILS.md @@ -38,6 +38,16 @@ Flynn supports a deterministic dual-council orchestration pipeline (`council.run } ``` +`council.run` output behavior: + +- Tool output returns a concise execution brief suitable for TUI/chat surfaces. +- Full artifacts are persisted to disk: + - `${FLYNN_DATA_DIR}/councils/` when `FLYNN_DATA_DIR` is set + - otherwise `~/.local/share/flynn/councils/` +- Per run, Flynn writes: + - `-.md` summary report with conversation trace and embedded raw JSON + - `-.json` full structured `CouncilRunResult` + ## Determinism Notes - `prompt_payload_hash` is computed from canonical JSON payload passed to model. diff --git a/src/backends/native/orchestrator.ts b/src/backends/native/orchestrator.ts index 1bec32e..4fc0da1 100644 --- a/src/backends/native/orchestrator.ts +++ b/src/backends/native/orchestrator.ts @@ -1,5 +1,5 @@ import type { ModelRouter, ModelTier } from '../../models/router.js'; -import type { ChatRequest, Message, ModelClient, TokenUsage } from '../../models/types.js'; +import type { ChatRequest, ChatResponseFormat, Message, ModelClient, TokenUsage } from '../../models/types.js'; import type { Session } from '../../session/index.js'; import type { ToolRegistry } from '../../tools/registry.js'; import type { ToolExecutor } from '../../tools/executor.js'; @@ -28,6 +28,8 @@ export interface SubAgentRequest { maxTokens?: number; /** When true, include tools from the toolRegistry in the request. */ tools?: boolean; + /** Optional provider-level response format request (e.g., JSON schema). */ + responseFormat?: ChatResponseFormat; } /** Result returned from a sub-agent delegation call. */ @@ -279,6 +281,7 @@ export class AgentOrchestrator { messages, system: request.systemPrompt, maxTokens: request.maxTokens, + responseFormat: request.responseFormat, }; // Optionally include tools from the registry (filtered by policy) diff --git a/src/cli/tui.ts b/src/cli/tui.ts index 8d508e2..d987eb3 100644 --- a/src/cli/tui.ts +++ b/src/cli/tui.ts @@ -1,5 +1,6 @@ import type { Command } from 'commander'; import type { Config, CouncilsConfig, ModelConfig, ModelProvider } from '../config/index.js'; +import type { ChatResponseFormat } from '../models/types.js'; import { loadConfigSafe, getConfigPath } from './shared.js'; import { existsSync, mkdirSync, readFileSync } from 'fs'; import { resolve } from 'path'; @@ -118,6 +119,7 @@ export function registerTuiCommand(program: Command): void { const { createModelRouter } = await import('../daemon/index.js'); const { AgentConfigRegistry } = await import('../agents/index.js'); const { loadCouncilScaffoldSafe } = await import('../councils/scaffold.js'); + const { buildCouncilPreflightReport, shouldRunCouncilPreflight } = await import('../councils/preflight.js'); const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn'); mkdirSync(dataDir, { recursive: true }); @@ -190,11 +192,13 @@ export function registerTuiCommand(program: Command): void { systemPrompt: string; message: string; maxTokens?: number; + responseFormat?: ChatResponseFormat; }) { const response = await modelRouter.chat({ messages: [{ role: 'user', content: request.message }], system: request.systemPrompt, maxTokens: request.maxTokens, + responseFormat: request.responseFormat, }, request.tier); return { content: response.content, @@ -325,6 +329,7 @@ export function registerTuiCommand(program: Command): void { if (!message) { return 'Usage: /research '; } + modelRouter.clearAbort(); const agentConfig = agentConfigRegistry.get('research'); if (!agentConfig) { return 'Agent "research" not found. Configure agent_configs.research first.'; @@ -344,11 +349,21 @@ export function registerTuiCommand(program: Command): void { const runCouncilTask = async (task: string): Promise => { const message = task.trim(); if (!message) { - return 'Usage: /council '; + return 'Usage: /council | /council preflight'; } + modelRouter.clearAbort(); if (!config.councils?.enabled) { return 'Councils are disabled. Set councils.enabled: true in config.'; } + if (shouldRunCouncilPreflight(message)) { + return buildCouncilPreflightReport({ + config, + registry: agentConfigRegistry, + delegateRunner, + activeTier: modelRouter.getTier(), + includeLiveProbe: true, + }); + } const tool = toolRegistry.get('council.run'); if (!tool) { return 'Council tool is not registered. Verify councils config and restart Flynn.'; diff --git a/src/commands/builtin/index.test.ts b/src/commands/builtin/index.test.ts index 1c6c7c7..71792a0 100644 --- a/src/commands/builtin/index.test.ts +++ b/src/commands/builtin/index.test.ts @@ -90,7 +90,7 @@ describe('builtin /council command', () => { rawInput: '/council', services: {}, }); - expect(result).toEqual({ handled: true, text: 'Usage: /council ' }); + expect(result).toEqual({ handled: true, text: 'Usage: /council | /council preflight' }); }); }); diff --git a/src/commands/builtin/index.ts b/src/commands/builtin/index.ts index a36196e..77b0d55 100644 --- a/src/commands/builtin/index.ts +++ b/src/commands/builtin/index.ts @@ -260,7 +260,7 @@ export function createCouncilCommand(): CommandDefinition { if (!task) { return { handled: true, - text: 'Usage: /council ', + text: 'Usage: /council | /council preflight', }; } if (!ctx.services?.runCouncil) { diff --git a/src/councils/orchestrator.test.ts b/src/councils/orchestrator.test.ts index a727853..82a8b88 100644 --- a/src/councils/orchestrator.test.ts +++ b/src/councils/orchestrator.test.ts @@ -152,6 +152,7 @@ describe('CouncilsOrchestrator', () => { expect(result.pipeline_version).toBe('1.0.0'); expect(result.brief_D_v1.ideas[0].idea_id).toBe('D.r1.01'); expect(result.brief_P_v1.ideas[0].idea_id).toBe('P.r1.01'); + expect(typeof result.trace[0]?.latency_ms).toBe('number'); expect(result.stop_snapshot.stop_reason).toBe('convergence'); const callIds = result.trace.map((e) => e.call_id); expect(callIds).toEqual([...callIds].sort((a, b) => { @@ -244,6 +245,102 @@ describe('CouncilsOrchestrator', () => { expect(JSON.stringify(pRound2Payload.peer_bridge)).toContain('D-MARKER'); }); + it('requests json_schema response format for council structured phases', async () => { + const delegate = vi.fn(async ({ message }: { message: string }) => { + const payload = JSON.parse(message); + + if (payload.brief_D && payload.brief_P) { + return { + content: JSON.stringify({ + schema_version: '1.0.0', + selected_primary: [payload.brief_D.shortlist[0]], + selected_secondary: [payload.brief_P.shortlist[0]], + merges: [], + rejections: [], + open_questions: ['q1'], + next_experiments: ['e1'], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.shortlisted_ideas) { + return { + content: JSON.stringify({ + grounded: payload.shortlisted_ideas.map((idea: { idea_id: string }) => ({ + idea_id: idea.idea_id, + mve: `mve-${idea.idea_id}`, + constraints: ['c1'], + falsifiability_checks: ['f1'], + })), + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.ideas) { + return { + content: JSON.stringify({ + assessments: payload.ideas.map((idea: { idea_id: string }, idx: number) => ({ + idea_id: idea.idea_id, + scores: { novelty: 60, feasibility: 70, impact: 80, testability: 90 }, + decision: idx === 0 ? 'shortlist' : 'hold', + notes: 'ok', + })), + assumptions: ['a1'], + risks: ['r1'], + asks: ['q1'], + what_to_steal: ['w1'], + convergence_signal: false, + novelty_score: 60, + repetition_rate: 10, + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + return { + content: JSON.stringify({ + ideas: [ + { title: 't1', hypothesis: 'h1', mechanism: 'm1', expected_outcome: 'o1' }, + { title: 't2', hypothesis: 'h2', mechanism: 'm2', expected_outcome: 'o2' }, + ], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + }); + + const orchestrator = new CouncilsOrchestrator({ + registry: createRegistry(), + orchestrator: { delegate }, + config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }), + }); + + await orchestrator.run({ task: 'json schema check' }); + + const schemaNames = delegate.mock.calls + .map((call) => (call[0] as { responseFormat?: { type: string; name?: string } }).responseFormat) + .filter((rf): rf is { type: string; name?: string } => Boolean(rf)) + .filter((rf) => rf.type === 'json_schema') + .map((rf) => rf.name); + + expect(schemaNames).toContain('council_ideation'); + expect(schemaNames).toContain('council_assessment'); + expect(schemaNames).toContain('council_grounding'); + expect(schemaNames).toContain('council_meta'); + + const metaResponseFormat = delegate.mock.calls + .map((call) => (call[0] as { responseFormat?: Record }).responseFormat) + .find((rf) => rf && rf.type === 'json_schema' && rf.name === 'council_meta') as + | { schema?: { required?: string[] } } + | undefined; + expect(metaResponseFormat?.schema?.required).toContain('merges'); + }); + it('fails closed on bridge cap overflow before phase 2 executes', async () => { const delegate = vi.fn(async ({ message }: { message: string }) => { const payload = JSON.parse(message); @@ -314,4 +411,265 @@ describe('CouncilsOrchestrator', () => { expect(result.stop_snapshot.stop_reason).toBe('bridge_validation_failed'); expect(result.stop_snapshot.round_reached).toBe(1); }); + + it('recovers from wrapped or mildly malformed model JSON responses', async () => { + const delegate = vi.fn(async ({ message }: { message: string }) => { + const payload = JSON.parse(message); + if (payload.brief_D && payload.brief_P) { + return { + content: [ + '```json', + '{result: {schema_version: \'1.0.0\', selected_primary: [\'D.r1.01\'], selected_secondary: [\'P.r1.01\'], merges: [], rejections: [], open_questions: [\'q1\'], next_experiments: [\'e1\']}}', + '```', + ].join('\n'), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.shortlisted_ideas) { + return { + content: '{"output":{"grounded":[{"idea_id":"D.r1.01","mve":"m","constraints":["c"],"falsifiability_checks":["f"]}]}}', + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.ideas) { + return { + content: [ + 'Assessment follows.', + '```json', + '{data: {assessments: [{idea_id: "D.r1.01", scores: {novelty: 60, feasibility: 70, impact: 80, testability: 90}, decision: "shortlist", notes: "ok"}], assumptions: ["a"], risks: ["r"], asks: ["k"], what_to_steal: ["w"], convergence_signal: false, novelty_score: 60, repetition_rate: 20}}', + '```', + ].join('\n'), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.group && payload.round) { + return { + content: [ + 'Here is the draft:', + '```json', + '{output: {ideas: [{title: \'t1\', hypothesis: \'h1\', mechanism: \'m1\', expected_outcome: \'o1\'}]}}', + '```', + ].join('\n'), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + throw new Error('Unexpected payload'); + }); + + const orchestrator = new CouncilsOrchestrator({ + registry: createRegistry(), + orchestrator: { delegate }, + config: createConfig(), + }); + + const result = await orchestrator.run({ task: 'x', max_rounds: 1 }); + expect(result.stop_snapshot.round_reached).toBe(1); + expect(result.meta.selected_primary[0]).toBe('D.r1.01'); + expect(result.meta.selected_secondary[0]).toBe('P.r1.01'); + }); + + it('retries once with agent-assisted JSON repair when initial output is invalid', async () => { + const repairPayloads: Array> = []; + const seenIdeation = new Set(); + + const delegate = vi.fn(async ({ message }: { message: string }) => { + const payload = JSON.parse(message) as Record; + + if (payload.task === 'normalize_json_output') { + repairPayloads.push(payload); + const schema = String(payload.required_schema ?? ''); + if (schema.includes('"ideas"')) { + return { + content: JSON.stringify({ + ideas: [ + { + title: 'repaired idea', + hypothesis: 'h', + mechanism: 'm', + expected_outcome: 'o', + }, + ], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + throw new Error('Unexpected repair schema'); + } + + if (payload.brief_D && payload.brief_P) { + return { + content: JSON.stringify({ + schema_version: '1.0.0', + selected_primary: [payload.brief_D.shortlist[0]], + selected_secondary: [payload.brief_P.shortlist[0]], + merges: [], + rejections: [], + open_questions: ['q1'], + next_experiments: ['e1'], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.shortlisted_ideas) { + return { + content: JSON.stringify({ + grounded: payload.shortlisted_ideas.map((idea: any) => ({ + idea_id: idea.idea_id, + mve: 'm', + constraints: ['c'], + falsifiability_checks: ['f'], + })), + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.ideas) { + return { + content: JSON.stringify({ + assessments: payload.ideas.map((idea: any, idx: number) => ({ + idea_id: idea.idea_id, + scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 }, + decision: idx === 0 ? 'shortlist' : 'hold', + notes: 'ok', + })), + assumptions: [], + risks: [], + asks: [], + what_to_steal: [], + convergence_signal: false, + novelty_score: 60, + repetition_rate: 20, + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.group && payload.round) { + const key = `${payload.group}:${payload.round}`; + if (!seenIdeation.has(key)) { + seenIdeation.add(key); + return { + content: 'this is not JSON', + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + return { + content: JSON.stringify({ + ideas: [ + { + title: `${payload.group} idea`, + hypothesis: 'h', + mechanism: 'm', + expected_outcome: 'o', + }, + ], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + throw new Error('Unexpected payload'); + }); + + const orchestrator = new CouncilsOrchestrator({ + registry: createRegistry(), + orchestrator: { delegate }, + config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }), + }); + + const result = await orchestrator.run({ task: 'x', max_rounds: 1 }); + expect(result.stop_snapshot.round_reached).toBe(1); + expect(repairPayloads.length).toBeGreaterThan(0); + }); + + it('falls back instead of throwing when repair output is still invalid', async () => { + const delegate = vi.fn(async ({ message }: { message: string }) => { + const payload = JSON.parse(message) as Record; + + if (payload.task === 'normalize_json_output') { + return { + content: 'still invalid json', + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.brief_D && payload.brief_P) { + return { + content: JSON.stringify({ + schema_version: '1.0.0', + selected_primary: [payload.brief_D.shortlist[0]], + selected_secondary: [payload.brief_P.shortlist[0]], + merges: [], + rejections: [], + open_questions: ['q1'], + next_experiments: ['e1'], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.shortlisted_ideas) { + return { + content: JSON.stringify({ grounded: [] }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.ideas) { + return { + content: JSON.stringify({ + assessments: payload.ideas.map((idea: any, idx: number) => ({ + idea_id: idea.idea_id, + scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 }, + decision: idx === 0 ? 'shortlist' : 'hold', + notes: 'ok', + })), + assumptions: [], + risks: [], + asks: [], + what_to_steal: [], + convergence_signal: false, + novelty_score: 60, + repetition_rate: 20, + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.group && payload.round) { + return { + content: 'not json at all', + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + throw new Error('Unexpected payload'); + }); + + const orchestrator = new CouncilsOrchestrator({ + registry: createRegistry(), + orchestrator: { delegate }, + config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }), + }); + + const result = await orchestrator.run({ task: 'x', max_rounds: 1 }); + expect(result.stop_snapshot.round_reached).toBe(1); + expect(result.brief_D_v1.ideas.length).toBeGreaterThan(0); + expect(result.brief_P_v1.ideas.length).toBeGreaterThan(0); + expect(result.trace.some((event) => event.validation_failure === 'repair_failed')).toBe(true); + }); }); diff --git a/src/councils/orchestrator.ts b/src/councils/orchestrator.ts index 9bb4cbf..d34fd17 100644 --- a/src/councils/orchestrator.ts +++ b/src/councils/orchestrator.ts @@ -1,7 +1,7 @@ import type { AgentConfigRegistry } from '../agents/registry.js'; import type { AgentOrchestrator } from '../backends/native/orchestrator.js'; import type { ModelTier } from '../models/router.js'; -import type { TokenUsage } from '../models/types.js'; +import type { ChatResponseFormat, TokenUsage } from '../models/types.js'; import type { CouncilScaffold } from './scaffold.js'; import { COUNCIL_PIPELINE_VERSION, @@ -36,6 +36,7 @@ interface DelegateRunner { systemPrompt: string; message: string; maxTokens?: number; + responseFormat?: ChatResponseFormat; }): Promise<{ content: string; usage: TokenUsage; @@ -90,24 +91,171 @@ interface AgentCallResult { usage: TokenUsage; } +interface JsonRecoveryOptions { + rawContent: string; + parser: (value: unknown) => T; + agentName: string; + callId: string; + phaseIndex: number; + promptPayload: unknown; + modeDirective: string; + schemaHint: string; + group?: CouncilGroup; + round?: number; + scaffoldPrompt?: string; + tierOverride?: ModelTier; + maxTokens?: number; + responseFormat?: ChatResponseFormat; + fallback?: () => T; +} + function deterministicJsonRepair(raw: string): string | null { const trimmed = raw.trim(); const noFence = trimmed .replace(/^```json\s*/i, '') .replace(/^```\s*/i, '') .replace(/```$/, '') - .trim(); + .trim() + .replace(/[\u201C\u201D]/g, '"') + .replace(/[\u2018\u2019]/g, '\''); const extracted = extractFirstJsonContainer(noFence); if (!extracted) { return null; } - return extracted + const withoutComments = stripJsonLikeComments(extracted); + const withQuotedKeys = withoutComments.replace( + /([{,]\s*)([A-Za-z_][A-Za-z0-9_-]*)(\s*:)/g, + '$1"$2"$3', + ); + const withDoubleQuotedStrings = convertSingleQuotedStrings(withQuotedKeys); + + return withDoubleQuotedStrings .replace(/,\s*([}\]])/g, '$1') .trim(); } +function stripJsonLikeComments(input: string): string { + let output = ''; + let i = 0; + let inString = false; + let stringDelimiter: '"' | '\'' | null = null; + let escaped = false; + + while (i < input.length) { + const ch = input[i]; + const next = i + 1 < input.length ? input[i + 1] : ''; + + if (inString) { + output += ch; + if (escaped) { + escaped = false; + } else if (ch === '\\') { + escaped = true; + } else if (ch === stringDelimiter) { + inString = false; + stringDelimiter = null; + } + i += 1; + continue; + } + + if (ch === '"' || ch === '\'') { + inString = true; + stringDelimiter = ch; + output += ch; + i += 1; + continue; + } + + if (ch === '/' && next === '/') { + i += 2; + while (i < input.length && input[i] !== '\n') { + i += 1; + } + continue; + } + + if (ch === '/' && next === '*') { + i += 2; + while (i + 1 < input.length && !(input[i] === '*' && input[i + 1] === '/')) { + i += 1; + } + i = Math.min(i + 2, input.length); + continue; + } + + output += ch; + i += 1; + } + + return output; +} + +function convertSingleQuotedStrings(input: string): string { + let output = ''; + let inSingle = false; + let inDouble = false; + let escaped = false; + + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + + if (inSingle) { + if (escaped) { + output += ch; + escaped = false; + continue; + } + if (ch === '\\') { + output += ch; + escaped = true; + continue; + } + if (ch === '\'') { + inSingle = false; + output += '"'; + continue; + } + if (ch === '"') { + output += '\\"'; + continue; + } + output += ch; + continue; + } + + if (inDouble) { + output += ch; + if (escaped) { + escaped = false; + } else if (ch === '\\') { + escaped = true; + } else if (ch === '"') { + inDouble = false; + } + continue; + } + + if (ch === '\'') { + inSingle = true; + output += '"'; + continue; + } + + if (ch === '"') { + inDouble = true; + output += ch; + continue; + } + + output += ch; + } + + return output; +} + function extractFirstJsonContainer(input: string): string | null { const start = input.search(/[\[{]/); if (start < 0) { @@ -152,16 +300,82 @@ function extractFirstJsonContainer(input: string): string | null { return null; } +function parseWithFallbackCandidates(value: unknown, parser: (value: unknown) => T): T { + const queue: unknown[] = [value]; + const seenObjects = new Set(); + const wrapperKeys = ['output', 'result', 'data', 'response', 'json', 'payload', 'content']; + let firstError: Error | null = null; + + while (queue.length > 0) { + const current = queue.shift(); + if (current === undefined) { + continue; + } + + if (current && typeof current === 'object') { + const objectRef = current as object; + if (seenObjects.has(objectRef)) { + continue; + } + seenObjects.add(objectRef); + } + + try { + return parser(current); + } catch (error) { + if (!firstError) { + firstError = error instanceof Error ? error : new Error(String(error)); + } + } + + if (typeof current === 'string') { + try { + queue.push(JSON.parse(current)); + continue; + } catch { + const repaired = deterministicJsonRepair(current); + if (repaired) { + try { + queue.push(JSON.parse(repaired)); + continue; + } catch { + // Continue searching additional candidates below. + } + } + } + continue; + } + + if (Array.isArray(current)) { + for (const item of current) { + queue.push(item); + } + continue; + } + + if (current && typeof current === 'object') { + const record = current as Record; + for (const key of wrapperKeys) { + if (record[key] !== undefined) { + queue.push(record[key]); + } + } + } + } + + throw firstError ?? new Error('parse_failed'); +} + function parseJsonWithRepair(raw: string, parser: (value: unknown) => T): T { try { - return parser(JSON.parse(raw)); + return parseWithFallbackCandidates(JSON.parse(raw), parser); } catch { const repaired = deterministicJsonRepair(raw); if (!repaired) { throw new Error('parse_failed'); } try { - return parser(JSON.parse(repaired)); + return parseWithFallbackCandidates(JSON.parse(repaired), parser); } catch { throw new Error('repair_failed'); } @@ -172,6 +386,149 @@ function uniq(values: string[]): string[] { return Array.from(new Set(values)); } +function jsonSchemaFormat(name: string, schema: Record): ChatResponseFormat { + return { type: 'json_schema', name, schema, strict: true }; +} + +const councilIdeaContentJsonSchema: Record = { + type: 'object', + additionalProperties: false, + required: ['title', 'hypothesis', 'mechanism', 'expected_outcome'], + properties: { + title: { type: 'string', minLength: 1 }, + hypothesis: { type: 'string', minLength: 1 }, + mechanism: { type: 'string', minLength: 1 }, + expected_outcome: { type: 'string', minLength: 1 }, + }, +}; + +const councilIdeationJsonSchema: Record = { + type: 'object', + additionalProperties: false, + required: ['ideas'], + properties: { + ideas: { + type: 'array', + minItems: 1, + items: councilIdeaContentJsonSchema, + }, + }, +}; + +const councilAssessmentJsonSchema: Record = { + type: 'object', + additionalProperties: false, + required: ['assessments', 'assumptions', 'risks', 'asks', 'what_to_steal', 'convergence_signal', 'novelty_score', 'repetition_rate'], + properties: { + assessments: { + type: 'array', + items: { + type: 'object', + additionalProperties: false, + required: ['idea_id', 'scores', 'decision', 'notes'], + properties: { + idea_id: { type: 'string', minLength: 1 }, + scores: { + type: 'object', + additionalProperties: false, + required: ['novelty', 'feasibility', 'impact', 'testability'], + properties: { + novelty: { type: 'integer', minimum: 0, maximum: 100 }, + feasibility: { type: 'integer', minimum: 0, maximum: 100 }, + impact: { type: 'integer', minimum: 0, maximum: 100 }, + testability: { type: 'integer', minimum: 0, maximum: 100 }, + }, + }, + decision: { type: 'string', enum: ['shortlist', 'hold', 'reject'] }, + notes: { type: 'string', minLength: 1 }, + }, + }, + }, + assumptions: { type: 'array', items: { type: 'string', minLength: 1 } }, + risks: { type: 'array', items: { type: 'string', minLength: 1 } }, + asks: { type: 'array', items: { type: 'string', minLength: 1 } }, + what_to_steal: { type: 'array', items: { type: 'string', minLength: 1 } }, + convergence_signal: { type: 'boolean' }, + novelty_score: { type: 'integer', minimum: 0, maximum: 100 }, + repetition_rate: { type: 'integer', minimum: 0, maximum: 100 }, + }, +}; + +const councilGroundingJsonSchema: Record = { + type: 'object', + additionalProperties: false, + required: ['grounded'], + properties: { + grounded: { + type: 'array', + items: { + type: 'object', + additionalProperties: false, + required: ['idea_id', 'mve', 'constraints', 'falsifiability_checks'], + properties: { + idea_id: { type: 'string', minLength: 1 }, + mve: { type: 'string', minLength: 1 }, + constraints: { + type: 'array', + minItems: 1, + items: { type: 'string', minLength: 1 }, + }, + falsifiability_checks: { + type: 'array', + minItems: 1, + items: { type: 'string', minLength: 1 }, + }, + }, + }, + }, + }, +}; + +const councilMetaJsonSchema: Record = { + type: 'object', + additionalProperties: false, + required: ['schema_version', 'selected_primary', 'selected_secondary', 'merges', 'rejections', 'open_questions', 'next_experiments'], + properties: { + schema_version: { type: 'string', const: COUNCIL_SCHEMA_VERSION }, + selected_primary: { type: 'array', items: { type: 'string', minLength: 1 } }, + selected_secondary: { type: 'array', items: { type: 'string', minLength: 1 } }, + merges: { + type: 'array', + items: { + type: 'object', + additionalProperties: false, + required: ['sources', 'result_title', 'rationale'], + properties: { + sources: { + type: 'array', + minItems: 2, + items: { type: 'string', minLength: 1 }, + }, + result_title: { type: 'string', minLength: 1 }, + rationale: { type: 'string', minLength: 1 }, + }, + }, + }, + rejections: { + type: 'array', + items: { + type: 'object', + additionalProperties: false, + required: ['idea_id', 'reason_code'], + properties: { + idea_id: { type: 'string', minLength: 1 }, + reason_code: { + type: 'string', + enum: ['low_score', 'high_risk', 'insufficient_grounding', 'duplicate', 'out_of_scope', 'unknown_id', 'other'], + }, + }, + }, + }, + open_questions: { type: 'array', items: { type: 'string', minLength: 1 } }, + next_experiments: { type: 'array', items: { type: 'string', minLength: 1 } }, + }, +}; + function computeTotalScore(assessment: IdeaAssessment): number { const s = assessment.scores; return s.feasibility + s.impact + s.novelty + s.testability; @@ -403,19 +760,23 @@ export class CouncilsOrchestrator { scaffoldPrompt?: string; tierOverride?: ModelTier; maxTokens?: number; + responseFormat?: ChatResponseFormat; }): Promise { const agent = this.getAgent(opts.agentName); const message = canonicalStringify(opts.promptPayload); const promptHash = hashCanonical(opts.promptPayload); const scaffoldPrompt = opts.scaffoldPrompt ? `${opts.scaffoldPrompt}\n\n` : ''; const systemPrompt = `${scaffoldPrompt}${agent.systemPrompt}\n\n${opts.modeDirective}`; + const startedAt = Date.now(); const result = await this._delegateRunner.delegate({ tier: opts.tierOverride ?? agent.tier, systemPrompt, message, maxTokens: opts.maxTokens ?? 4096, + responseFormat: opts.responseFormat, }); + const latencyMs = Math.max(0, Date.now() - startedAt); this._trace.push(councilTraceEventSchema.parse({ schema_version: COUNCIL_SCHEMA_VERSION, @@ -427,6 +788,7 @@ export class CouncilsOrchestrator { prompt_payload_hash: promptHash, artifact_hash: hashCanonical(result.content), token_usage: result.usage, + latency_ms: latencyMs, })); this._conversations.push({ schema_version: COUNCIL_SCHEMA_VERSION, @@ -444,6 +806,63 @@ export class CouncilsOrchestrator { return result; } + private async parseWithAgentRecovery(opts: JsonRecoveryOptions): Promise { + try { + return parseJsonWithRepair(opts.rawContent, opts.parser); + } catch { + const recoveryPayload = { + task: 'normalize_json_output', + required_schema: opts.schemaHint, + original_prompt_payload: opts.promptPayload, + invalid_output: opts.rawContent, + }; + + const recoveryRaw = await this.callAgent({ + agentName: opts.agentName, + callId: `${opts.callId}.repair`, + phaseIndex: opts.phaseIndex, + group: opts.group, + round: opts.round, + promptPayload: recoveryPayload, + modeDirective: [ + 'Your previous response did not validate.', + 'Return ONLY valid JSON matching the required schema.', + 'Do not add prose, markdown fences, or commentary.', + ].join(' '), + scaffoldPrompt: opts.scaffoldPrompt, + tierOverride: opts.tierOverride, + maxTokens: opts.maxTokens, + responseFormat: opts.responseFormat, + }); + + try { + return parseJsonWithRepair(recoveryRaw.content, opts.parser); + } catch { + if (opts.fallback) { + this.recordValidationFailure(opts, 'repair_failed'); + return opts.fallback(); + } + throw new Error('repair_failed'); + } + } + } + + private recordValidationFailure( + opts: Pick, 'callId' | 'phaseIndex' | 'group' | 'round' | 'promptPayload'>, + reason: 'repair_failed' | 'parse_failed', + ): void { + this._trace.push(councilTraceEventSchema.parse({ + schema_version: COUNCIL_SCHEMA_VERSION, + event_id: `${opts.phaseIndex}:${opts.callId}.validation`, + phase_index: opts.phaseIndex, + call_id: `${opts.callId}.validation`, + group: opts.group, + round: opts.round, + prompt_payload_hash: hashCanonical(opts.promptPayload), + validation_failure: reason, + })); + } + private allocateIdeaId(group: CouncilGroup, round: number, index: number): string { return `${group}.r${round}.${String(index + 1).padStart(2, '0')}`; } @@ -481,11 +900,37 @@ export class CouncilsOrchestrator { round, promptPayload: ideationPayload, modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.', + responseFormat: jsonSchemaFormat('council_ideation', councilIdeationJsonSchema), scaffoldPrompt: this._scaffold?.prompts[group].free_thinker, tierOverride: groupTier, }); - const ideaOutput = parseJsonWithRepair(ideation.content, (value) => ideationOutputSchema.parse(value)); + const ideaOutput = await this.parseWithAgentRecovery({ + rawContent: ideation.content, + parser: (value) => ideationOutputSchema.parse(value), + agentName: groupConfig.freethinker_agent, + callId: `${group}.r${round}.ft.ideation`, + phaseIndex: phaseBase, + group, + round, + promptPayload: ideationPayload, + modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.', + schemaHint: + '{"ideas":[{"title":"string","hypothesis":"string","mechanism":"string","expected_outcome":"string"}]}', + responseFormat: jsonSchemaFormat('council_ideation', councilIdeationJsonSchema), + scaffoldPrompt: this._scaffold?.prompts[group].free_thinker, + tierOverride: groupTier, + fallback: () => ({ + ideas: [ + { + title: `${group} fallback idea`, + hypothesis: `Fallback hypothesis for: ${input.task}`, + mechanism: 'Run a minimum-viable experiment and measure signal within one iteration.', + expected_outcome: 'A concrete go/no-go signal to reduce uncertainty.', + }, + ], + }), + }); const ideaCards: IdeaCard[] = ideaOutput.ideas .slice(0, this._config.defaults.ideas_per_round) .map((idea, index) => ({ @@ -514,11 +959,48 @@ export class CouncilsOrchestrator { promptPayload: assessmentPayload, modeDirective: 'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.', + responseFormat: jsonSchemaFormat('council_assessment', councilAssessmentJsonSchema), scaffoldPrompt: this._scaffold?.prompts[group].arbiter, tierOverride: groupTier, }); - const assessmentOutput = parseJsonWithRepair(assessmentRaw.content, (value) => assessmentOutputSchema.parse(value)); + const assessmentOutput = await this.parseWithAgentRecovery({ + rawContent: assessmentRaw.content, + parser: (value) => assessmentOutputSchema.parse(value), + agentName: groupConfig.arbiter_agent, + callId: `${group}.r${round}.arb.assess`, + phaseIndex: phaseBase + 1, + group, + round, + promptPayload: assessmentPayload, + modeDirective: + 'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.', + schemaHint: + '{"assessments":[{"idea_id":"string","scores":{"novelty":0,"feasibility":0,"impact":0,"testability":0},"decision":"shortlist|hold|reject","notes":"string"}],"assumptions":["string"],"risks":["string"],"asks":["string"],"what_to_steal":["string"],"convergence_signal":false,"novelty_score":0,"repetition_rate":0}', + responseFormat: jsonSchemaFormat('council_assessment', councilAssessmentJsonSchema), + scaffoldPrompt: this._scaffold?.prompts[group].arbiter, + tierOverride: groupTier, + fallback: () => ({ + assessments: ideaCards.map((idea, index) => ({ + idea_id: idea.idea_id, + scores: { + novelty: 40, + feasibility: 40, + impact: 40, + testability: 40, + }, + decision: index === 0 ? 'shortlist' as const : 'hold' as const, + notes: 'Fallback assessment used because model output could not be repaired into valid JSON.', + })), + assumptions: ['Model output formatting instability; assumptions are provisional.'], + risks: ['Assessment quality degraded due to invalid structured output.'], + asks: ['Re-run with a different provider or tier to confirm shortlist quality.'], + what_to_steal: [], + convergence_signal: false, + novelty_score: 0, + repetition_rate: 100, + }), + }); const validIdeaIds = new Set(ideaCards.map((i) => i.idea_id)); const assessments: IdeaAssessment[] = uniq(assessmentOutput.assessments.map((a) => a.idea_id)) .map((ideaId) => assessmentOutput.assessments.find((a) => a.idea_id === ideaId)!) @@ -571,10 +1053,27 @@ export class CouncilsOrchestrator { promptPayload: groundingPayload, modeDirective: 'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.', + responseFormat: jsonSchemaFormat('council_grounding', councilGroundingJsonSchema), + scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker, + tierOverride: groupTier, + }); + grounding = await this.parseWithAgentRecovery({ + rawContent: groundingRaw.content, + parser: (value) => groundingOutputSchema.parse(value), + agentName: groundingAgent, + callId: `${group}.r${round}.ft.ground`, + phaseIndex: phaseBase + 2, + group, + round, + promptPayload: groundingPayload, + modeDirective: + 'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.', + schemaHint: + '{"grounded":[{"idea_id":"string","mve":"string","constraints":["string"],"falsifiability_checks":["string"]}]}', + responseFormat: jsonSchemaFormat('council_grounding', councilGroundingJsonSchema), scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker, tierOverride: groupTier, }); - grounding = parseJsonWithRepair(groundingRaw.content, (value) => groundingOutputSchema.parse(value)); } catch { groundingFailures = shortlist.length; if (this._config.strict_grounding) { @@ -704,7 +1203,11 @@ export class CouncilsOrchestrator { return packet; } - private async runMetaMerge(input: CouncilRunInput, briefD: CouncilBrief, briefP: CouncilBrief) { + private async runMetaMerge( + input: CouncilRunInput, + briefD: CouncilBrief, + briefP: CouncilBrief, + ): Promise> { const metaAgentName = this._config.meta_arbiter_agent; const payload = { input, @@ -723,11 +1226,37 @@ export class CouncilsOrchestrator { promptPayload: payload, modeDirective: 'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.', + responseFormat: jsonSchemaFormat('council_meta', councilMetaJsonSchema), scaffoldPrompt: this._scaffold?.prompts.meta_arbiter, tierOverride: this._config.meta_model_tier, }); - return parseJsonWithRepair(metaRaw.content, (value) => metaSelectionSchema.parse(value)); + return this.parseWithAgentRecovery>({ + rawContent: metaRaw.content, + parser: (value) => metaSelectionSchema.parse(value), + agentName: metaAgentName, + callId: 'meta.merge', + phaseIndex: 999, + promptPayload: payload, + modeDirective: + 'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.', + schemaHint: + '{"schema_version":"1.0.0","selected_primary":["string"],"selected_secondary":["string"],"merges":[{"sources":["string","string"],"result_title":"string","rationale":"string"}],"rejections":[{"idea_id":"string","reason_code":"low_score|high_risk|insufficient_grounding|duplicate|out_of_scope|unknown_id|other"}],"open_questions":["string"],"next_experiments":["string"]}', + responseFormat: jsonSchemaFormat('council_meta', councilMetaJsonSchema), + scaffoldPrompt: this._scaffold?.prompts.meta_arbiter, + tierOverride: this._config.meta_model_tier, + fallback: () => metaSelectionSchema.parse({ + schema_version: COUNCIL_SCHEMA_VERSION, + selected_primary: briefD.shortlist.slice(0, 1), + selected_secondary: briefP.shortlist.slice(0, 1), + merges: [], + rejections: [], + open_questions: ['Meta merge used fallback due to invalid structured output.'], + next_experiments: [ + 'Re-run council with a lower-latency/stronger JSON model and compare selected ideas.', + ], + }), + }); } private validateMetaSelection(meta: ReturnType, knownIds: Set): boolean { @@ -763,6 +1292,7 @@ export class CouncilsOrchestrator { group: normalizeOptional(event.group), round: normalizeOptional(event.round), token_usage: normalizeOptional(event.token_usage), + latency_ms: normalizeOptional(event.latency_ms), dropped_reason: normalizeOptional(event.dropped_reason), validation_failure: normalizeOptional(event.validation_failure), })); diff --git a/src/councils/preflight.test.ts b/src/councils/preflight.test.ts new file mode 100644 index 0000000..eca0b34 --- /dev/null +++ b/src/councils/preflight.test.ts @@ -0,0 +1,93 @@ +import { describe, expect, it, vi } from 'vitest'; +import { AgentConfigRegistry } from '../agents/registry.js'; +import { configSchema } from '../config/schema.js'; +import { buildCouncilPreflightReport } from './preflight.js'; + +function makeConfig() { + const config = configSchema.parse({ + models: { + default: { + provider: 'zhipuai', + model: 'glm-4.7', + use_oauth: true, + }, + }, + }); + config.councils.enabled = true; + config.councils.groups.D.arbiter_agent = 'comms'; + config.councils.groups.D.freethinker_agent = 'research'; + config.councils.groups.D.model_tier = 'default'; + config.councils.groups.P.arbiter_agent = 'comms'; + config.councils.groups.P.freethinker_agent = 'research'; + config.councils.groups.P.model_tier = 'default'; + config.councils.meta_arbiter_agent = 'code'; + config.councils.meta_model_tier = 'default'; + return config; +} + +function makeRegistry(): AgentConfigRegistry { + const registry = new AgentConfigRegistry(); + registry.register({ name: 'comms', modelTier: 'fast' }); + registry.register({ name: 'research', modelTier: 'default' }); + registry.register({ name: 'code', modelTier: 'complex' }); + return registry; +} + +describe('buildCouncilPreflightReport', () => { + it('reports zhipu endpoint/auth and stale oauth flag note', async () => { + const config = makeConfig(); + const report = await buildCouncilPreflightReport({ + config, + registry: makeRegistry(), + delegateRunner: { + delegate: vi.fn(async () => ({ + content: '{"ok":true}', + usage: { inputTokens: 1, outputTokens: 1 }, + tier: 'default' as const, + })), + }, + activeTier: 'default', + includeLiveProbe: false, + credentialState: { + openaiOAuth: false, + openaiApiKeyStored: false, + anthropicApiKeyStored: false, + anthropicAuthTokenStored: false, + zaiStored: true, + }, + }); + + expect(report).toContain('D.arbiter: agent=comms'); + expect(report).toContain('effective_tier=default model=zhipuai/glm-4.7'); + expect(report).toContain('endpoint=https://api.z.ai/api/paas/v4'); + expect(report).toContain('auth_mode=bearer_credential'); + expect(report).toContain('auth_source=auth.json:zai'); + expect(report).toContain('note=use_oauth/auth_mode=oauth is ignored for zhipuai'); + }); + + it('includes live probe failures by tier', async () => { + const config = makeConfig(); + const delegate = vi.fn(async () => { + throw new Error('Connection error'); + }); + const report = await buildCouncilPreflightReport({ + config, + registry: makeRegistry(), + delegateRunner: { delegate }, + activeTier: 'default', + includeLiveProbe: true, + credentialState: { + openaiOAuth: false, + openaiApiKeyStored: false, + anthropicApiKeyStored: false, + anthropicAuthTokenStored: false, + zaiStored: false, + }, + }); + + expect(report).toContain('Live tier probes:'); + expect(report).toContain('default: failed'); + expect(report).toContain('Connection error'); + expect(delegate).toHaveBeenCalled(); + }); +}); diff --git a/src/councils/preflight.ts b/src/councils/preflight.ts new file mode 100644 index 0000000..15d4d87 --- /dev/null +++ b/src/councils/preflight.ts @@ -0,0 +1,347 @@ +import type { AgentConfigRegistry } from '../agents/registry.js'; +import { + loadStoredAnthropicAuth, + loadStoredAnthropicAuthToken, + loadStoredOpenAIApiKey, + loadStoredOpenAIAuth, + loadStoredZaiAuth, +} from '../auth/index.js'; +import type { Config, ModelConfig, ModelProvider } from '../config/index.js'; +import type { ModelTier } from '../models/router.js'; +import type { ChatResponseFormat } from '../models/types.js'; + +interface DelegateRunner { + delegate(request: { + tier: ModelTier; + systemPrompt: string; + message: string; + maxTokens?: number; + responseFormat?: ChatResponseFormat; + }): Promise<{ + content: string; + usage: { inputTokens: number; outputTokens: number }; + tier: ModelTier; + }>; +} + +interface CredentialState { + openaiOAuth: boolean; + openaiApiKeyStored: boolean; + anthropicApiKeyStored: boolean; + anthropicAuthTokenStored: boolean; + zaiStored: boolean; +} + +export interface CouncilPreflightOptions { + config: Config; + registry: AgentConfigRegistry; + delegateRunner: DelegateRunner; + activeTier?: ModelTier; + includeLiveProbe?: boolean; + credentialState?: CredentialState; +} + +interface TierConfigResolution { + modelConfig: ModelConfig; + fellBackToDefault: boolean; +} + +interface AuthResolution { + mode: string; + source: string; + note?: string; +} + +const DEFAULT_ENDPOINTS: Partial> = { + openai: 'https://api.openai.com/v1', + openrouter: 'https://openrouter.ai/api/v1', + vercel: 'https://ai-gateway.vercel.sh/v1', + zhipuai: 'https://api.z.ai/api/paas/v4', + xai: 'https://api.x.ai/v1', + minimax: 'https://api.minimax.io/v1', + moonshot: 'https://api.moonshot.cn/v1', + ollama: 'http://localhost:11434', + llamacpp: 'http://localhost:8080', +}; + +function getCredentialStateFromSystem(): CredentialState { + return { + openaiOAuth: Boolean(loadStoredOpenAIAuth()), + openaiApiKeyStored: Boolean(loadStoredOpenAIApiKey()), + anthropicApiKeyStored: Boolean(loadStoredAnthropicAuth()), + anthropicAuthTokenStored: Boolean(loadStoredAnthropicAuthToken()), + zaiStored: Boolean(loadStoredZaiAuth()), + }; +} + +function getEffectiveAuthMode(cfg: ModelConfig): 'auto' | 'api_key' | 'oauth' { + if (cfg.auth_mode) { + return cfg.auth_mode; + } + if (cfg.use_oauth) { + return 'oauth'; + } + return 'auto'; +} + +function resolveTierConfig(config: Config, tier: ModelTier): TierConfigResolution { + if (tier === 'default') { + return { modelConfig: config.models.default, fellBackToDefault: false }; + } + const direct = config.models[tier]; + if (direct) { + return { modelConfig: direct, fellBackToDefault: false }; + } + return { modelConfig: config.models.default, fellBackToDefault: true }; +} + +function firstTruthySource(sources: Array<[boolean, string]>): string { + for (const [present, label] of sources) { + if (present) { + return label; + } + } + return 'missing'; +} + +function resolveAuth(cfg: ModelConfig, credentialState: CredentialState): AuthResolution { + if (cfg.provider === 'zhipuai') { + const source = firstTruthySource([ + [Boolean(cfg.api_key?.trim()), 'config.api_key'], + [Boolean(cfg.auth_token?.trim()), 'config.auth_token'], + [Boolean(process.env.ZAI_API_KEY?.trim()), 'env:ZAI_API_KEY'], + [Boolean(process.env.ZHIPUAI_API_KEY?.trim()), 'env:ZHIPUAI_API_KEY'], + [Boolean(process.env.ZHIPUAI_AUTH_TOKEN?.trim()), 'env:ZHIPUAI_AUTH_TOKEN'], + [credentialState.zaiStored, 'auth.json:zai'], + ]); + const note = cfg.use_oauth || cfg.auth_mode === 'oauth' + ? 'use_oauth/auth_mode=oauth is ignored for zhipuai' + : undefined; + return { + mode: 'bearer_credential', + source, + note, + }; + } + + if (cfg.provider === 'openai') { + const authMode = getEffectiveAuthMode(cfg); + const apiSource = firstTruthySource([ + [Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'], + [Boolean(cfg.api_key?.trim()), 'config.api_key'], + [Boolean(process.env.OPENAI_API_KEY?.trim()), 'env:OPENAI_API_KEY'], + [credentialState.openaiApiKeyStored, 'auth.json:openai.api_key'], + ]); + const oauthSource = credentialState.openaiOAuth ? 'auth.json:openai.oauth' : 'missing'; + + if (authMode === 'oauth') { + return { mode: 'oauth', source: oauthSource }; + } + if (authMode === 'api_key') { + return { mode: 'api_key', source: apiSource }; + } + if (apiSource !== 'missing') { + return { mode: 'auto->api_key', source: apiSource }; + } + return { mode: 'auto->oauth', source: oauthSource }; + } + + if (cfg.provider === 'anthropic') { + const authMode = getEffectiveAuthMode(cfg); + const apiSource = firstTruthySource([ + [Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'], + [Boolean(cfg.api_key?.trim()), 'config.api_key'], + [Boolean(process.env.ANTHROPIC_API_KEY?.trim()), 'env:ANTHROPIC_API_KEY'], + [credentialState.anthropicApiKeyStored, 'auth.json:anthropic.api_key'], + ]); + const oauthSource = firstTruthySource([ + [Boolean(cfg.auth_token?.trim()), 'config.auth_token'], + [Boolean(process.env.ANTHROPIC_AUTH_TOKEN?.trim()), 'env:ANTHROPIC_AUTH_TOKEN'], + [credentialState.anthropicAuthTokenStored, 'auth.json:anthropic.auth_token'], + ]); + + if (authMode === 'oauth') { + return { mode: 'oauth', source: oauthSource }; + } + if (authMode === 'api_key') { + return { mode: 'api_key', source: apiSource }; + } + if (apiSource !== 'missing') { + return { mode: 'auto->api_key', source: apiSource }; + } + return { mode: 'auto->oauth', source: oauthSource }; + } + + if (cfg.provider === 'gemini') { + const source = firstTruthySource([ + [Boolean(cfg.api_key?.trim()), 'config.api_key'], + [Boolean(process.env.GEMINI_API_KEY?.trim()), 'env:GEMINI_API_KEY'], + [Boolean(process.env.GOOGLE_API_KEY?.trim()), 'env:GOOGLE_API_KEY'], + ]); + return { mode: 'api_key', source }; + } + + if (cfg.provider === 'bedrock') { + const source = firstTruthySource([ + [Boolean(cfg.api_key?.trim() && cfg.auth_token?.trim()), 'config.api_key+auth_token'], + [Boolean(process.env.AWS_ACCESS_KEY_ID?.trim() && process.env.AWS_SECRET_ACCESS_KEY?.trim()), 'env:AWS_ACCESS_KEY_ID+AWS_SECRET_ACCESS_KEY'], + ]); + return { mode: 'aws_credentials', source }; + } + + if (cfg.provider === 'ollama' || cfg.provider === 'llamacpp' || cfg.provider === 'synthetic') { + return { mode: 'local', source: 'none' }; + } + + const source = firstTruthySource([ + [Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'], + [Boolean(cfg.api_key?.trim()), 'config.api_key'], + ]); + return { mode: 'api_key', source }; +} + +function resolveEndpoint(cfg: ModelConfig): string { + if (cfg.endpoint?.trim()) { + return cfg.endpoint.trim(); + } + return DEFAULT_ENDPOINTS[cfg.provider] ?? '(provider default)'; +} + +function normalizeProbeError(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + return String(error); +} + +function isPreflightRequest(task: string): boolean { + return task.trim().toLowerCase() === 'preflight'; +} + +export function shouldRunCouncilPreflight(task: string): boolean { + return isPreflightRequest(task); +} + +export async function buildCouncilPreflightReport(options: CouncilPreflightOptions): Promise { + const { config, registry, delegateRunner } = options; + const councils = config.councils; + const credentialState = options.credentialState ?? getCredentialStateFromSystem(); + + const roles = [ + { role: 'D.arbiter', agent: councils.groups.D.arbiter_agent, tierOverride: councils.groups.D.model_tier }, + { role: 'D.freethinker', agent: councils.groups.D.freethinker_agent, tierOverride: councils.groups.D.model_tier }, + ...(councils.groups.D.grounder_agent + ? [{ role: 'D.grounder', agent: councils.groups.D.grounder_agent, tierOverride: councils.groups.D.model_tier }] + : []), + ...(councils.groups.D.writer_agent + ? [{ role: 'D.writer', agent: councils.groups.D.writer_agent, tierOverride: councils.groups.D.model_tier }] + : []), + { role: 'P.arbiter', agent: councils.groups.P.arbiter_agent, tierOverride: councils.groups.P.model_tier }, + { role: 'P.freethinker', agent: councils.groups.P.freethinker_agent, tierOverride: councils.groups.P.model_tier }, + ...(councils.groups.P.grounder_agent + ? [{ role: 'P.grounder', agent: councils.groups.P.grounder_agent, tierOverride: councils.groups.P.model_tier }] + : []), + ...(councils.groups.P.writer_agent + ? [{ role: 'P.writer', agent: councils.groups.P.writer_agent, tierOverride: councils.groups.P.model_tier }] + : []), + { role: 'Meta.arbiter', agent: councils.meta_arbiter_agent, tierOverride: councils.meta_model_tier }, + ...(councils.meta_writer_agent + ? [{ role: 'Meta.writer', agent: councils.meta_writer_agent, tierOverride: councils.meta_model_tier }] + : []), + ]; + + const roleLines: string[] = []; + const tiersToProbe = new Set(); + + for (const role of roles) { + const agentConfig = registry.get(role.agent); + const agentTier = agentConfig?.modelTier ?? 'default'; + const effectiveTier = role.tierOverride ?? agentTier; + const { modelConfig, fellBackToDefault } = resolveTierConfig(config, effectiveTier); + const modelLabel = `${modelConfig.provider}/${modelConfig.model}`; + const missingSuffix = agentConfig ? '' : ' [agent_missing]'; + const fallbackSuffix = fellBackToDefault ? ' [tier_unconfigured->default]' : ''; + roleLines.push( + `- ${role.role}: agent=${role.agent}${missingSuffix} ` + + `agent_tier=${agentTier} override_tier=${role.tierOverride ?? 'none'} ` + + `effective_tier=${effectiveTier} model=${modelLabel}${fallbackSuffix}`, + ); + tiersToProbe.add(effectiveTier); + } + + const tierLines: string[] = []; + for (const tier of [...tiersToProbe].sort()) { + const { modelConfig, fellBackToDefault } = resolveTierConfig(config, tier); + const auth = resolveAuth(modelConfig, credentialState); + const endpoint = resolveEndpoint(modelConfig); + tierLines.push( + `- ${tier}: provider=${modelConfig.provider} model=${modelConfig.model} ` + + `endpoint=${endpoint} auth_mode=${auth.mode} auth_source=${auth.source}` + + `${auth.note ? ` note=${auth.note}` : ''}` + + `${fellBackToDefault ? ' fallback=default' : ''}`, + ); + } + + const probeLines: string[] = []; + if (options.includeLiveProbe ?? true) { + const probeResults = await Promise.all( + [...tiersToProbe].sort().map(async (tier) => { + const startedAt = Date.now(); + try { + await delegateRunner.delegate({ + tier, + systemPrompt: 'Return exactly {"ok":true}.', + message: 'Return exactly {"ok":true}.', + maxTokens: 64, + }); + return { + tier, + ok: true, + latencyMs: Math.max(0, Date.now() - startedAt), + error: '', + }; + } catch (error) { + return { + tier, + ok: false, + latencyMs: Math.max(0, Date.now() - startedAt), + error: normalizeProbeError(error), + }; + } + }), + ); + + for (const result of probeResults) { + if (result.ok) { + probeLines.push(`- ${result.tier}: ok (${result.latencyMs}ms)`); + } else { + probeLines.push(`- ${result.tier}: failed (${result.latencyMs}ms) ${result.error}`); + } + } + } else { + probeLines.push('- skipped'); + } + + const activeTier = options.activeTier ?? 'default'; + const activeModel = resolveTierConfig(config, activeTier).modelConfig; + + return [ + '[Council preflight]', + `Councils enabled: ${councils.enabled ? 'yes' : 'no'}`, + `Active interactive tier: ${activeTier} (${activeModel.provider}/${activeModel.model})`, + `Scaffold path: ${councils.scaffold_path?.trim() ? councils.scaffold_path : '(none)'}`, + '', + 'Role routing:', + ...roleLines, + '', + 'Tier provider/auth:', + ...tierLines, + '', + 'Live tier probes:', + ...probeLines, + '', + 'Path comparison:', + '- Interactive TUI messages use the active tier above.', + '- Council calls use per-role effective tiers (group override, then agent tier).', + ].join('\n'); +} diff --git a/src/councils/types.ts b/src/councils/types.ts index 4d542a1..721e5c3 100644 --- a/src/councils/types.ts +++ b/src/councils/types.ts @@ -166,6 +166,7 @@ export const councilTraceEventSchema = z.object({ inputTokens: z.number().int().min(0), outputTokens: z.number().int().min(0), }).strict().optional(), + latency_ms: z.number().int().min(0).optional(), dropped_reason: droppedReasonSchema.optional(), validation_failure: validationFailureReasonSchema.optional(), }).strict(); diff --git a/src/daemon/routing.ts b/src/daemon/routing.ts index ba4577b..2b68757 100644 --- a/src/daemon/routing.ts +++ b/src/daemon/routing.ts @@ -29,6 +29,7 @@ import { auditLogger } from '../audit/index.js'; import { getElevationStatusMessage, setElevationFromInput } from '../security/elevation.js'; import { dirname, resolve } from 'path'; import { loadCouncilScaffoldSafe } from '../councils/scaffold.js'; +import { buildCouncilPreflightReport, shouldRunCouncilPreflight } from '../councils/preflight.js'; function buildProviderConfigMap(config: Config): Partial> { const providerConfigs: Partial> = {}; @@ -866,14 +867,23 @@ export function createMessageRouter(deps: { runCouncil: async (task: string) => { const message = task.trim(); if (!message) { - return 'Usage: /council '; + return 'Usage: /council | /council preflight'; + } + if (!deps.config.councils?.enabled) { + return 'Councils are disabled. Set councils.enabled: true in config.'; } - if (!deps.config.councils?.enabled) { - return 'Councils are disabled. Set councils.enabled: true in config.'; - } if (!deps.agentConfigRegistry || deps.agentConfigRegistry.list().length === 0) { return 'No agent configurations are registered. Add council_* agent_configs first.'; } + if (shouldRunCouncilPreflight(message)) { + return buildCouncilPreflightReport({ + config: deps.config, + registry: deps.agentConfigRegistry, + delegateRunner: agent, + activeTier: agent.getModelTier(), + includeLiveProbe: true, + }); + } const tool = createCouncilRunTool({ registry: deps.agentConfigRegistry, orchestrator: agent, diff --git a/src/frontends/tui/commands.test.ts b/src/frontends/tui/commands.test.ts index 3919c54..9ceca9b 100644 --- a/src/frontends/tui/commands.test.ts +++ b/src/frontends/tui/commands.test.ts @@ -33,9 +33,16 @@ describe('parseCommand', () => { it('parses /council command', () => { expect(parseCommand('/council design backup plan')).toEqual({ type: 'council', task: 'design backup plan' }); + expect(parseCommand('/council preflight')).toEqual({ type: 'council', task: 'preflight' }); expect(parseCommand('/council')).toEqual({ type: 'council', task: '' }); }); + it('parses natural-language council shortcut', () => { + expect(parseCommand('run council design backup plan')).toEqual({ type: 'council', task: 'design backup plan' }); + expect(parseCommand('yes run the council')).toEqual({ type: 'council', task: '' }); + expect(parseCommand('Run council on #2')).toEqual({ type: 'council', task: '#2' }); + }); + it('parses /fullscreen command', () => { expect(parseCommand('/fullscreen')).toEqual({ type: 'fullscreen' }); expect(parseCommand('/fs')).toEqual({ type: 'fullscreen' }); @@ -197,6 +204,11 @@ describe('getCommandCompletions', () => { const completions = getCommandCompletions('/model fast x'); expect(completions).toEqual(['/model fast xai']); }); + + it('completes /council preflight shortcut', () => { + const completions = getCommandCompletions('/council pre'); + expect(completions).toEqual(['/council preflight']); + }); }); describe('isToolInventoryQuery', () => { diff --git a/src/frontends/tui/commands.ts b/src/frontends/tui/commands.ts index 2770478..173ca50 100644 --- a/src/frontends/tui/commands.ts +++ b/src/frontends/tui/commands.ts @@ -88,6 +88,11 @@ export function parseCommand(input: string): Command | null { if (trimmed === '/council') { return { type: 'council', task: '' }; } + // Natural-language council shortcut for common flows. + const councilShortcut = trimmed.match(/^(?:yes\s+)?run\s+(?:the\s+)?council(?:\s+on)?(?:\s+(.+))?$/i); + if (councilShortcut) { + return { type: 'council', task: (councilShortcut[1] ?? '').trim() }; + } // Fullscreen if (trimmed === '/fullscreen' || trimmed === '/fs') { @@ -211,6 +216,7 @@ Commands: /backend [provider] Show or switch local backend (ollama, llamacpp) /research Delegate a task to the configured research agent /council Run the councils pipeline for a task + /council preflight Check council tier routing, endpoint/auth mode, and probe latency /login [provider] Authenticate with GitHub, OpenAI, Anthropic, or Z.AI /pair List pending pairing codes and approved senders /pair generate [label] Generate a new DM pairing code @@ -275,7 +281,7 @@ export const COMMAND_TOOLTIPS: Record = { '/model': 'Show or switch model (local, default, fast, complex)', '/backend': 'Show or switch local backend (ollama, llamacpp)', '/research': 'Delegate a task to the configured research agent', - '/council': 'Run the councils pipeline for a task', + '/council': 'Run the councils pipeline for a task; use "/council preflight" for route/auth checks', '/reset': 'Clear conversation history', '/clear': 'Clear conversation history', '/new': 'Start a new conversation', @@ -319,6 +325,14 @@ export const MODEL_TOOLTIPS: Record = { export function getCommandCompletions(partial: string): string[] { const trimmed = partial.trim(); + if (trimmed.startsWith('/council ')) { + const suffix = trimmed.slice('/council '.length).toLowerCase(); + if ('preflight'.startsWith(suffix)) { + return ['/council preflight']; + } + return []; + } + // Complete /model if (trimmed.startsWith('/model ')) { const args = trimmed.slice('/model '.length).trim(); diff --git a/src/frontends/tui/components/App.tsx b/src/frontends/tui/components/App.tsx index ec6df0e..55a0f27 100644 --- a/src/frontends/tui/components/App.tsx +++ b/src/frontends/tui/components/App.tsx @@ -633,7 +633,7 @@ export function App({ case 'council': { if (!command.task.trim()) { - pushAssistantMessage('Usage: /council '); + pushAssistantMessage('Usage: /council | /council preflight'); return; } if (!onCouncil) { diff --git a/src/tools/builtin/council-run.test.ts b/src/tools/builtin/council-run.test.ts index 9dc9805..3c4b9aa 100644 --- a/src/tools/builtin/council-run.test.ts +++ b/src/tools/builtin/council-run.test.ts @@ -1,6 +1,9 @@ import { describe, it, expect, vi } from 'vitest'; import { createCouncilRunTool } from './council-run.js'; import type { AgentConfigRegistry } from '../../agents/registry.js'; +import { mkdtempSync, readFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; function createRegistry(): AgentConfigRegistry { const configs = new Map([ @@ -53,6 +56,10 @@ const config = { describe('council.run tool', () => { it('runs council pipeline and returns output summary', async () => { + const previousDataDir = process.env.FLYNN_DATA_DIR; + const testDataDir = mkdtempSync(join(tmpdir(), 'flynn-council-run-')); + process.env.FLYNN_DATA_DIR = testDataDir; + const delegate = vi.fn(async ({ message }: { message: string }) => { const payload = JSON.parse(message); if (payload.brief_D && payload.brief_P) { @@ -117,9 +124,31 @@ describe('council.run tool', () => { }); const result = await tool.execute({ task: 'plan migration' }); - expect(result.success).toBe(true); - expect(result.output).toContain('Council pipeline v1.0.0'); - expect(result.output).toContain('Meta selection'); + try { + expect(result.success).toBe(true); + expect(result.output).toContain('Council pipeline v1.0.0'); + expect(result.output).toContain('Meta selection'); + expect(result.output).toContain('Timing:'); + expect(result.output).toContain('Slowest calls:'); + expect(result.output).toContain('Artifacts:'); + + const markdownLine = result.output.split('\n').find((line) => line.startsWith('- Summary report: ')); + const jsonLine = result.output.split('\n').find((line) => line.startsWith('- Full JSON: ')); + expect(markdownLine).toBeDefined(); + expect(jsonLine).toBeDefined(); + + const markdownPath = markdownLine!.replace('- Summary report: ', '').trim(); + const jsonPath = jsonLine!.replace('- Full JSON: ', '').trim(); + expect(readFileSync(markdownPath, 'utf-8')).toContain('Council pipeline v1.0.0'); + expect(readFileSync(jsonPath, 'utf-8')).toContain('"pipeline_version": "1.0.0"'); + } finally { + if (previousDataDir !== undefined) { + process.env.FLYNN_DATA_DIR = previousDataDir; + } else { + delete process.env.FLYNN_DATA_DIR; + } + rmSync(testDataDir, { recursive: true, force: true }); + } }); it('returns error on invalid input', async () => { @@ -132,4 +161,56 @@ describe('council.run tool', () => { expect(result.success).toBe(false); expect(result.error).toBeDefined(); }); + + it('classifies network/latency failures with hint and council config summary', async () => { + const tool = createCouncilRunTool({ + registry: createRegistry(), + orchestrator: { + delegate: vi.fn(async () => { + throw new Error('Connection error.'); + }) as any, + }, + config: config as any, + }); + + const result = await tool.execute({ task: 'x' }); + expect(result.success).toBe(false); + expect(result.error).toContain('Likely root cause: network_or_latency'); + expect(result.error).toContain('Hint: Likely network/provider latency issue.'); + expect(result.error).toContain('Council config: D='); + }); + + it('classifies cap_exceeded failures with cap_overflow hint', async () => { + const tool = createCouncilRunTool({ + registry: createRegistry(), + orchestrator: { + delegate: vi.fn(async () => { + throw new Error('cap_exceeded'); + }) as any, + }, + config: config as any, + }); + + const result = await tool.execute({ task: 'x' }); + expect(result.success).toBe(false); + expect(result.error).toContain('Likely root cause: cap_overflow'); + expect(result.error).toContain('Bridge payload cap exceeded.'); + }); + + it('classifies cancellation failures with user_cancelled hint', async () => { + const tool = createCouncilRunTool({ + registry: createRegistry(), + orchestrator: { + delegate: vi.fn(async () => { + throw new Error('Operation cancelled by user.'); + }) as any, + }, + config: config as any, + }); + + const result = await tool.execute({ task: 'x' }); + expect(result.success).toBe(false); + expect(result.error).toContain('Likely root cause: user_cancelled'); + expect(result.error).toContain('Run was cancelled by user input'); + }); }); diff --git a/src/tools/builtin/council-run.ts b/src/tools/builtin/council-run.ts index 3e21515..912b9bc 100644 --- a/src/tools/builtin/council-run.ts +++ b/src/tools/builtin/council-run.ts @@ -1,4 +1,8 @@ import type { AgentConfigRegistry } from '../../agents/registry.js'; +import { mkdirSync, writeFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import type { ChatResponseFormat } from '../../models/types.js'; import type { Tool, ToolResult } from '../types.js'; import { CouncilsOrchestrator, type CouncilsConfig } from '../../councils/orchestrator.js'; import type { CouncilScaffold } from '../../councils/scaffold.js'; @@ -10,6 +14,7 @@ interface DelegateRunner { systemPrompt: string; message: string; maxTokens?: number; + responseFormat?: ChatResponseFormat; }): Promise<{ content: string; usage: { inputTokens: number; outputTokens: number }; @@ -24,6 +29,124 @@ export interface CouncilRunDeps { scaffold?: CouncilScaffold; } +function slugifyTask(task: string): string { + const trimmed = task.trim().toLowerCase(); + if (!trimmed) { + return 'council-run'; + } + const slug = trimmed + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 80); + return slug || 'council-run'; +} + +function formatTimestamp(date: Date): string { + return date.toISOString().replace(/[:.]/g, '-'); +} + +function getCouncilsDir(): string { + const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn'); + return resolve(dataDir, 'councils'); +} + +function writeCouncilArtifacts( + task: string, + summaryLines: string[], + conversationLog: string, + resultJson: string, +): { jsonPath: string; markdownPath: string } { + const dir = getCouncilsDir(); + mkdirSync(dir, { recursive: true }); + + const stamp = formatTimestamp(new Date()); + const base = `${stamp}-${slugifyTask(task)}`; + const jsonPath = resolve(dir, `${base}.json`); + const markdownPath = resolve(dir, `${base}.md`); + + const markdown = [ + ...summaryLines, + '', + 'Conversations:', + '', + conversationLog || '(none)', + '', + 'Raw Result JSON:', + '```json', + resultJson, + '```', + '', + ].join('\n'); + + writeFileSync(jsonPath, `${resultJson}\n`, 'utf-8'); + writeFileSync(markdownPath, markdown, 'utf-8'); + return { jsonPath, markdownPath }; +} + +function classifyCouncilFailure(message: string): 'network_or_latency' | 'json_format' | 'config' | 'cap_overflow' | 'user_cancelled' | 'unknown' { + const lower = message.toLowerCase(); + if ( + lower.includes('operation cancelled by user') + || lower.includes('aborterror') + || lower.includes('aborted') + || lower.includes('cancelled') + ) { + return 'user_cancelled'; + } + if ( + lower.includes('connection error') + || lower.includes('timed out') + || lower.includes('econn') + || lower.includes('enotfound') + || lower.includes('all model providers failed') + ) { + return 'network_or_latency'; + } + if ( + lower.includes('repair_failed') + || lower.includes('parse_failed') + || lower.includes('json') + ) { + return 'json_format'; + } + if ( + lower.includes('not configured') + || lower.includes('disabled') + || lower.includes('meta_validation_failed') + || lower.includes('grounding_failed') + || lower.includes('bridge_validation_failed') + ) { + return 'config'; + } + if ( + lower.includes('cap_exceeded') + || lower.includes('cap_top_ideas') + || lower.includes('cap_field_bullets') + || lower.includes('cap_entry_chars') + || lower.includes('cap_total_chars') + ) { + return 'cap_overflow'; + } + return 'unknown'; +} + +function buildFailureHint(kind: ReturnType): string { + switch (kind) { + case 'network_or_latency': + return 'Likely network/provider latency issue. Check endpoint reachability and consider faster council tiers.'; + case 'json_format': + return 'Likely model output-format issue. Council JSON repair/retry was unable to normalize output.'; + case 'config': + return 'Likely councils/agent configuration issue. Verify council agent names, tiers, and strict validation settings.'; + case 'cap_overflow': + return 'Bridge payload cap exceeded. Reduce task breadth, lower max rounds, or raise councils.defaults bridge cap settings.'; + case 'user_cancelled': + return 'Run was cancelled by user input (Esc/Ctrl+C). Re-run the council task when ready.'; + default: + return 'No deterministic diagnosis from error text.'; + } +} + export function createCouncilRunTool(deps: CouncilRunDeps): Tool { return { name: 'council.run', @@ -53,6 +176,20 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool { scaffold: deps.scaffold, }); const result = await runner.run(args); + const timedTrace = result.trace.filter((event) => typeof event.latency_ms === 'number'); + const totalLatencyMs = timedTrace.reduce((sum, event) => sum + (event.latency_ms ?? 0), 0); + const phaseLatency = new Map(); + for (const event of timedTrace) { + const phase = event.phase_index; + phaseLatency.set(phase, (phaseLatency.get(phase) ?? 0) + (event.latency_ms ?? 0)); + } + const phaseLatencyLines = [...phaseLatency.entries()] + .sort((a, b) => a[0] - b[0]) + .map(([phase, latency]) => `- Phase ${phase}: ${latency}ms`); + const slowestCalls = [...timedTrace] + .sort((a, b) => (b.latency_ms ?? 0) - (a.latency_ms ?? 0)) + .slice(0, 5) + .map((event) => `- ${event.call_id}: ${event.latency_ms ?? 0}ms`); const lines = [ `[Council pipeline v${result.pipeline_version}]`, @@ -68,6 +205,16 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool { `- Open questions: ${result.meta.open_questions.length}`, `- Next experiments: ${result.meta.next_experiments.length}`, '', + 'Timing:', + `- Timed calls: ${timedTrace.length}`, + `- Total model latency (summed): ${totalLatencyMs}ms`, + ...phaseLatencyLines, + ...( + slowestCalls.length > 0 + ? ['', 'Slowest calls:', ...slowestCalls] + : [] + ), + '', `Agent conversations: ${result.conversations.length}`, ]; @@ -82,15 +229,34 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool { }) .join('\n\n'); + const resultJson = JSON.stringify(result, null, 2); + const artifacts = writeCouncilArtifacts(args.task, lines, conversationLog, resultJson); + return { success: true, - output: `${lines.join('\n')}\n\n${conversationLog}\n\n${JSON.stringify(result)}`, + output: [ + ...lines, + '', + 'Artifacts:', + `- Summary report: ${artifacts.markdownPath}`, + `- Full JSON: ${artifacts.jsonPath}`, + ].join('\n'), }; } catch (error) { + const message = error instanceof Error ? error.message : String(error); + const kind = classifyCouncilFailure(message); + const hint = buildFailureHint(kind); return { success: false, output: '', - error: error instanceof Error ? error.message : String(error), + error: [ + message, + `Likely root cause: ${kind}`, + `Hint: ${hint}`, + `Council config: D=${deps.config.groups.D.arbiter_agent}/${deps.config.groups.D.freethinker_agent}@${deps.config.groups.D.model_tier ?? 'agent-tier'}, ` + + `P=${deps.config.groups.P.arbiter_agent}/${deps.config.groups.P.freethinker_agent}@${deps.config.groups.P.model_tier ?? 'agent-tier'}, ` + + `meta=${deps.config.meta_arbiter_agent}@${deps.config.meta_model_tier ?? 'agent-tier'}`, + ].join('\n'), }; } },