feat(councils): add preflight, schema-driven outputs, and artifact reporting
This commit is contained in:
@@ -522,7 +522,7 @@ pnpm tui:fs
|
|||||||
| `/status` | Show session info |
|
| `/status` | Show session info |
|
||||||
| `/tools` | Show authoritative runtime tool list for this session |
|
| `/tools` | Show authoritative runtime tool list for this session |
|
||||||
| `/research <task>` | Delegate a task to `agent_configs.research` |
|
| `/research <task>` | Delegate a task to `agent_configs.research` |
|
||||||
| `/council <task>` | Run dual D/P councils pipeline with bridge+meta merge |
|
| `/council <task>` | Run dual D/P councils pipeline with bridge+meta merge (brief in TUI, full artifacts saved to disk) |
|
||||||
| `/compact` | Compact conversation context |
|
| `/compact` | Compact conversation context |
|
||||||
| `/usage` | Show token usage and cost |
|
| `/usage` | Show token usage and cost |
|
||||||
| `/context` | Show estimated context-window usage |
|
| `/context` | Show estimated context-window usage |
|
||||||
@@ -631,6 +631,22 @@ Run from chat:
|
|||||||
/council design a 30-day plan to cut CI flakiness by 50%
|
/council design a 30-day plan to cut CI flakiness by 50%
|
||||||
```
|
```
|
||||||
|
|
||||||
|
By default, `/council` prints a concise execution brief in the TUI and persists full artifacts under:
|
||||||
|
|
||||||
|
```text
|
||||||
|
~/.local/share/flynn/councils/
|
||||||
|
```
|
||||||
|
|
||||||
|
When `FLYNN_DATA_DIR` is set, artifacts are written to:
|
||||||
|
|
||||||
|
```text
|
||||||
|
${FLYNN_DATA_DIR}/councils/
|
||||||
|
```
|
||||||
|
|
||||||
|
Each run saves:
|
||||||
|
- `*.md` summary report (brief + full conversation trace + raw JSON block)
|
||||||
|
- `*.json` full structured `CouncilRunResult`
|
||||||
|
|
||||||
Or via tools:
|
Or via tools:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
|||||||
+4
-3
@@ -153,9 +153,10 @@ models:
|
|||||||
# 4. Global fallback_chain (tried in order)
|
# 4. Global fallback_chain (tried in order)
|
||||||
#
|
#
|
||||||
default:
|
default:
|
||||||
provider: anthropic
|
provider: openai
|
||||||
model: claude-sonnet-4-20250514
|
model: gpt-5.2
|
||||||
# auth_mode: auto # auto | api_key | oauth (provider-specific)
|
auth_mode: api_key # auto | api_key | oauth (provider-specific)
|
||||||
|
api_key: ${OPENAI_API_KEY}
|
||||||
# use_oauth: false # compat alias for auth_mode: oauth
|
# use_oauth: false # compat alias for auth_mode: oauth
|
||||||
# api_keys: ["${ANTHROPIC_API_KEY_PRIMARY}", "${ANTHROPIC_API_KEY_SECONDARY}"] # Optional rotation pool
|
# api_keys: ["${ANTHROPIC_API_KEY_PRIMARY}", "${ANTHROPIC_API_KEY_SECONDARY}"] # Optional rotation pool
|
||||||
# supports_audio: false # Override native audio detection per tier
|
# supports_audio: false # Override native audio detection per tier
|
||||||
|
|||||||
@@ -38,6 +38,16 @@ Flynn supports a deterministic dual-council orchestration pipeline (`council.run
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
`council.run` output behavior:
|
||||||
|
|
||||||
|
- Tool output returns a concise execution brief suitable for TUI/chat surfaces.
|
||||||
|
- Full artifacts are persisted to disk:
|
||||||
|
- `${FLYNN_DATA_DIR}/councils/` when `FLYNN_DATA_DIR` is set
|
||||||
|
- otherwise `~/.local/share/flynn/councils/`
|
||||||
|
- Per run, Flynn writes:
|
||||||
|
- `<timestamp>-<slug>.md` summary report with conversation trace and embedded raw JSON
|
||||||
|
- `<timestamp>-<slug>.json` full structured `CouncilRunResult`
|
||||||
|
|
||||||
## Determinism Notes
|
## Determinism Notes
|
||||||
|
|
||||||
- `prompt_payload_hash` is computed from canonical JSON payload passed to model.
|
- `prompt_payload_hash` is computed from canonical JSON payload passed to model.
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import type { ModelRouter, ModelTier } from '../../models/router.js';
|
import type { ModelRouter, ModelTier } from '../../models/router.js';
|
||||||
import type { ChatRequest, Message, ModelClient, TokenUsage } from '../../models/types.js';
|
import type { ChatRequest, ChatResponseFormat, Message, ModelClient, TokenUsage } from '../../models/types.js';
|
||||||
import type { Session } from '../../session/index.js';
|
import type { Session } from '../../session/index.js';
|
||||||
import type { ToolRegistry } from '../../tools/registry.js';
|
import type { ToolRegistry } from '../../tools/registry.js';
|
||||||
import type { ToolExecutor } from '../../tools/executor.js';
|
import type { ToolExecutor } from '../../tools/executor.js';
|
||||||
@@ -28,6 +28,8 @@ export interface SubAgentRequest {
|
|||||||
maxTokens?: number;
|
maxTokens?: number;
|
||||||
/** When true, include tools from the toolRegistry in the request. */
|
/** When true, include tools from the toolRegistry in the request. */
|
||||||
tools?: boolean;
|
tools?: boolean;
|
||||||
|
/** Optional provider-level response format request (e.g., JSON schema). */
|
||||||
|
responseFormat?: ChatResponseFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Result returned from a sub-agent delegation call. */
|
/** Result returned from a sub-agent delegation call. */
|
||||||
@@ -279,6 +281,7 @@ export class AgentOrchestrator {
|
|||||||
messages,
|
messages,
|
||||||
system: request.systemPrompt,
|
system: request.systemPrompt,
|
||||||
maxTokens: request.maxTokens,
|
maxTokens: request.maxTokens,
|
||||||
|
responseFormat: request.responseFormat,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Optionally include tools from the registry (filtered by policy)
|
// Optionally include tools from the registry (filtered by policy)
|
||||||
|
|||||||
+16
-1
@@ -1,5 +1,6 @@
|
|||||||
import type { Command } from 'commander';
|
import type { Command } from 'commander';
|
||||||
import type { Config, CouncilsConfig, ModelConfig, ModelProvider } from '../config/index.js';
|
import type { Config, CouncilsConfig, ModelConfig, ModelProvider } from '../config/index.js';
|
||||||
|
import type { ChatResponseFormat } from '../models/types.js';
|
||||||
import { loadConfigSafe, getConfigPath } from './shared.js';
|
import { loadConfigSafe, getConfigPath } from './shared.js';
|
||||||
import { existsSync, mkdirSync, readFileSync } from 'fs';
|
import { existsSync, mkdirSync, readFileSync } from 'fs';
|
||||||
import { resolve } from 'path';
|
import { resolve } from 'path';
|
||||||
@@ -118,6 +119,7 @@ export function registerTuiCommand(program: Command): void {
|
|||||||
const { createModelRouter } = await import('../daemon/index.js');
|
const { createModelRouter } = await import('../daemon/index.js');
|
||||||
const { AgentConfigRegistry } = await import('../agents/index.js');
|
const { AgentConfigRegistry } = await import('../agents/index.js');
|
||||||
const { loadCouncilScaffoldSafe } = await import('../councils/scaffold.js');
|
const { loadCouncilScaffoldSafe } = await import('../councils/scaffold.js');
|
||||||
|
const { buildCouncilPreflightReport, shouldRunCouncilPreflight } = await import('../councils/preflight.js');
|
||||||
|
|
||||||
const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn');
|
const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn');
|
||||||
mkdirSync(dataDir, { recursive: true });
|
mkdirSync(dataDir, { recursive: true });
|
||||||
@@ -190,11 +192,13 @@ export function registerTuiCommand(program: Command): void {
|
|||||||
systemPrompt: string;
|
systemPrompt: string;
|
||||||
message: string;
|
message: string;
|
||||||
maxTokens?: number;
|
maxTokens?: number;
|
||||||
|
responseFormat?: ChatResponseFormat;
|
||||||
}) {
|
}) {
|
||||||
const response = await modelRouter.chat({
|
const response = await modelRouter.chat({
|
||||||
messages: [{ role: 'user', content: request.message }],
|
messages: [{ role: 'user', content: request.message }],
|
||||||
system: request.systemPrompt,
|
system: request.systemPrompt,
|
||||||
maxTokens: request.maxTokens,
|
maxTokens: request.maxTokens,
|
||||||
|
responseFormat: request.responseFormat,
|
||||||
}, request.tier);
|
}, request.tier);
|
||||||
return {
|
return {
|
||||||
content: response.content,
|
content: response.content,
|
||||||
@@ -325,6 +329,7 @@ export function registerTuiCommand(program: Command): void {
|
|||||||
if (!message) {
|
if (!message) {
|
||||||
return 'Usage: /research <question or task>';
|
return 'Usage: /research <question or task>';
|
||||||
}
|
}
|
||||||
|
modelRouter.clearAbort();
|
||||||
const agentConfig = agentConfigRegistry.get('research');
|
const agentConfig = agentConfigRegistry.get('research');
|
||||||
if (!agentConfig) {
|
if (!agentConfig) {
|
||||||
return 'Agent "research" not found. Configure agent_configs.research first.';
|
return 'Agent "research" not found. Configure agent_configs.research first.';
|
||||||
@@ -344,11 +349,21 @@ export function registerTuiCommand(program: Command): void {
|
|||||||
const runCouncilTask = async (task: string): Promise<string> => {
|
const runCouncilTask = async (task: string): Promise<string> => {
|
||||||
const message = task.trim();
|
const message = task.trim();
|
||||||
if (!message) {
|
if (!message) {
|
||||||
return 'Usage: /council <question or task>';
|
return 'Usage: /council <question or task> | /council preflight';
|
||||||
}
|
}
|
||||||
|
modelRouter.clearAbort();
|
||||||
if (!config.councils?.enabled) {
|
if (!config.councils?.enabled) {
|
||||||
return 'Councils are disabled. Set councils.enabled: true in config.';
|
return 'Councils are disabled. Set councils.enabled: true in config.';
|
||||||
}
|
}
|
||||||
|
if (shouldRunCouncilPreflight(message)) {
|
||||||
|
return buildCouncilPreflightReport({
|
||||||
|
config,
|
||||||
|
registry: agentConfigRegistry,
|
||||||
|
delegateRunner,
|
||||||
|
activeTier: modelRouter.getTier(),
|
||||||
|
includeLiveProbe: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
const tool = toolRegistry.get('council.run');
|
const tool = toolRegistry.get('council.run');
|
||||||
if (!tool) {
|
if (!tool) {
|
||||||
return 'Council tool is not registered. Verify councils config and restart Flynn.';
|
return 'Council tool is not registered. Verify councils config and restart Flynn.';
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ describe('builtin /council command', () => {
|
|||||||
rawInput: '/council',
|
rawInput: '/council',
|
||||||
services: {},
|
services: {},
|
||||||
});
|
});
|
||||||
expect(result).toEqual({ handled: true, text: 'Usage: /council <question or task>' });
|
expect(result).toEqual({ handled: true, text: 'Usage: /council <question or task> | /council preflight' });
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -260,7 +260,7 @@ export function createCouncilCommand(): CommandDefinition {
|
|||||||
if (!task) {
|
if (!task) {
|
||||||
return {
|
return {
|
||||||
handled: true,
|
handled: true,
|
||||||
text: 'Usage: /council <question or task>',
|
text: 'Usage: /council <question or task> | /council preflight',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
if (!ctx.services?.runCouncil) {
|
if (!ctx.services?.runCouncil) {
|
||||||
|
|||||||
@@ -152,6 +152,7 @@ describe('CouncilsOrchestrator', () => {
|
|||||||
expect(result.pipeline_version).toBe('1.0.0');
|
expect(result.pipeline_version).toBe('1.0.0');
|
||||||
expect(result.brief_D_v1.ideas[0].idea_id).toBe('D.r1.01');
|
expect(result.brief_D_v1.ideas[0].idea_id).toBe('D.r1.01');
|
||||||
expect(result.brief_P_v1.ideas[0].idea_id).toBe('P.r1.01');
|
expect(result.brief_P_v1.ideas[0].idea_id).toBe('P.r1.01');
|
||||||
|
expect(typeof result.trace[0]?.latency_ms).toBe('number');
|
||||||
expect(result.stop_snapshot.stop_reason).toBe('convergence');
|
expect(result.stop_snapshot.stop_reason).toBe('convergence');
|
||||||
const callIds = result.trace.map((e) => e.call_id);
|
const callIds = result.trace.map((e) => e.call_id);
|
||||||
expect(callIds).toEqual([...callIds].sort((a, b) => {
|
expect(callIds).toEqual([...callIds].sort((a, b) => {
|
||||||
@@ -244,6 +245,102 @@ describe('CouncilsOrchestrator', () => {
|
|||||||
expect(JSON.stringify(pRound2Payload.peer_bridge)).toContain('D-MARKER');
|
expect(JSON.stringify(pRound2Payload.peer_bridge)).toContain('D-MARKER');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('requests json_schema response format for council structured phases', async () => {
|
||||||
|
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||||
|
const payload = JSON.parse(message);
|
||||||
|
|
||||||
|
if (payload.brief_D && payload.brief_P) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
schema_version: '1.0.0',
|
||||||
|
selected_primary: [payload.brief_D.shortlist[0]],
|
||||||
|
selected_secondary: [payload.brief_P.shortlist[0]],
|
||||||
|
merges: [],
|
||||||
|
rejections: [],
|
||||||
|
open_questions: ['q1'],
|
||||||
|
next_experiments: ['e1'],
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.shortlisted_ideas) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
grounded: payload.shortlisted_ideas.map((idea: { idea_id: string }) => ({
|
||||||
|
idea_id: idea.idea_id,
|
||||||
|
mve: `mve-${idea.idea_id}`,
|
||||||
|
constraints: ['c1'],
|
||||||
|
falsifiability_checks: ['f1'],
|
||||||
|
})),
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.ideas) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
assessments: payload.ideas.map((idea: { idea_id: string }, idx: number) => ({
|
||||||
|
idea_id: idea.idea_id,
|
||||||
|
scores: { novelty: 60, feasibility: 70, impact: 80, testability: 90 },
|
||||||
|
decision: idx === 0 ? 'shortlist' : 'hold',
|
||||||
|
notes: 'ok',
|
||||||
|
})),
|
||||||
|
assumptions: ['a1'],
|
||||||
|
risks: ['r1'],
|
||||||
|
asks: ['q1'],
|
||||||
|
what_to_steal: ['w1'],
|
||||||
|
convergence_signal: false,
|
||||||
|
novelty_score: 60,
|
||||||
|
repetition_rate: 10,
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
ideas: [
|
||||||
|
{ title: 't1', hypothesis: 'h1', mechanism: 'm1', expected_outcome: 'o1' },
|
||||||
|
{ title: 't2', hypothesis: 'h2', mechanism: 'm2', expected_outcome: 'o2' },
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
const orchestrator = new CouncilsOrchestrator({
|
||||||
|
registry: createRegistry(),
|
||||||
|
orchestrator: { delegate },
|
||||||
|
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.run({ task: 'json schema check' });
|
||||||
|
|
||||||
|
const schemaNames = delegate.mock.calls
|
||||||
|
.map((call) => (call[0] as { responseFormat?: { type: string; name?: string } }).responseFormat)
|
||||||
|
.filter((rf): rf is { type: string; name?: string } => Boolean(rf))
|
||||||
|
.filter((rf) => rf.type === 'json_schema')
|
||||||
|
.map((rf) => rf.name);
|
||||||
|
|
||||||
|
expect(schemaNames).toContain('council_ideation');
|
||||||
|
expect(schemaNames).toContain('council_assessment');
|
||||||
|
expect(schemaNames).toContain('council_grounding');
|
||||||
|
expect(schemaNames).toContain('council_meta');
|
||||||
|
|
||||||
|
const metaResponseFormat = delegate.mock.calls
|
||||||
|
.map((call) => (call[0] as { responseFormat?: Record<string, unknown> }).responseFormat)
|
||||||
|
.find((rf) => rf && rf.type === 'json_schema' && rf.name === 'council_meta') as
|
||||||
|
| { schema?: { required?: string[] } }
|
||||||
|
| undefined;
|
||||||
|
expect(metaResponseFormat?.schema?.required).toContain('merges');
|
||||||
|
});
|
||||||
|
|
||||||
it('fails closed on bridge cap overflow before phase 2 executes', async () => {
|
it('fails closed on bridge cap overflow before phase 2 executes', async () => {
|
||||||
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||||
const payload = JSON.parse(message);
|
const payload = JSON.parse(message);
|
||||||
@@ -314,4 +411,265 @@ describe('CouncilsOrchestrator', () => {
|
|||||||
expect(result.stop_snapshot.stop_reason).toBe('bridge_validation_failed');
|
expect(result.stop_snapshot.stop_reason).toBe('bridge_validation_failed');
|
||||||
expect(result.stop_snapshot.round_reached).toBe(1);
|
expect(result.stop_snapshot.round_reached).toBe(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('recovers from wrapped or mildly malformed model JSON responses', async () => {
|
||||||
|
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||||
|
const payload = JSON.parse(message);
|
||||||
|
if (payload.brief_D && payload.brief_P) {
|
||||||
|
return {
|
||||||
|
content: [
|
||||||
|
'```json',
|
||||||
|
'{result: {schema_version: \'1.0.0\', selected_primary: [\'D.r1.01\'], selected_secondary: [\'P.r1.01\'], merges: [], rejections: [], open_questions: [\'q1\'], next_experiments: [\'e1\']}}',
|
||||||
|
'```',
|
||||||
|
].join('\n'),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (payload.shortlisted_ideas) {
|
||||||
|
return {
|
||||||
|
content: '{"output":{"grounded":[{"idea_id":"D.r1.01","mve":"m","constraints":["c"],"falsifiability_checks":["f"]}]}}',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (payload.ideas) {
|
||||||
|
return {
|
||||||
|
content: [
|
||||||
|
'Assessment follows.',
|
||||||
|
'```json',
|
||||||
|
'{data: {assessments: [{idea_id: "D.r1.01", scores: {novelty: 60, feasibility: 70, impact: 80, testability: 90}, decision: "shortlist", notes: "ok"}], assumptions: ["a"], risks: ["r"], asks: ["k"], what_to_steal: ["w"], convergence_signal: false, novelty_score: 60, repetition_rate: 20}}',
|
||||||
|
'```',
|
||||||
|
].join('\n'),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (payload.group && payload.round) {
|
||||||
|
return {
|
||||||
|
content: [
|
||||||
|
'Here is the draft:',
|
||||||
|
'```json',
|
||||||
|
'{output: {ideas: [{title: \'t1\', hypothesis: \'h1\', mechanism: \'m1\', expected_outcome: \'o1\'}]}}',
|
||||||
|
'```',
|
||||||
|
].join('\n'),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
throw new Error('Unexpected payload');
|
||||||
|
});
|
||||||
|
|
||||||
|
const orchestrator = new CouncilsOrchestrator({
|
||||||
|
registry: createRegistry(),
|
||||||
|
orchestrator: { delegate },
|
||||||
|
config: createConfig(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
|
||||||
|
expect(result.stop_snapshot.round_reached).toBe(1);
|
||||||
|
expect(result.meta.selected_primary[0]).toBe('D.r1.01');
|
||||||
|
expect(result.meta.selected_secondary[0]).toBe('P.r1.01');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('retries once with agent-assisted JSON repair when initial output is invalid', async () => {
|
||||||
|
const repairPayloads: Array<Record<string, unknown>> = [];
|
||||||
|
const seenIdeation = new Set<string>();
|
||||||
|
|
||||||
|
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||||
|
const payload = JSON.parse(message) as Record<string, any>;
|
||||||
|
|
||||||
|
if (payload.task === 'normalize_json_output') {
|
||||||
|
repairPayloads.push(payload);
|
||||||
|
const schema = String(payload.required_schema ?? '');
|
||||||
|
if (schema.includes('"ideas"')) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
ideas: [
|
||||||
|
{
|
||||||
|
title: 'repaired idea',
|
||||||
|
hypothesis: 'h',
|
||||||
|
mechanism: 'm',
|
||||||
|
expected_outcome: 'o',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
throw new Error('Unexpected repair schema');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.brief_D && payload.brief_P) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
schema_version: '1.0.0',
|
||||||
|
selected_primary: [payload.brief_D.shortlist[0]],
|
||||||
|
selected_secondary: [payload.brief_P.shortlist[0]],
|
||||||
|
merges: [],
|
||||||
|
rejections: [],
|
||||||
|
open_questions: ['q1'],
|
||||||
|
next_experiments: ['e1'],
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (payload.shortlisted_ideas) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
grounded: payload.shortlisted_ideas.map((idea: any) => ({
|
||||||
|
idea_id: idea.idea_id,
|
||||||
|
mve: 'm',
|
||||||
|
constraints: ['c'],
|
||||||
|
falsifiability_checks: ['f'],
|
||||||
|
})),
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (payload.ideas) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
assessments: payload.ideas.map((idea: any, idx: number) => ({
|
||||||
|
idea_id: idea.idea_id,
|
||||||
|
scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 },
|
||||||
|
decision: idx === 0 ? 'shortlist' : 'hold',
|
||||||
|
notes: 'ok',
|
||||||
|
})),
|
||||||
|
assumptions: [],
|
||||||
|
risks: [],
|
||||||
|
asks: [],
|
||||||
|
what_to_steal: [],
|
||||||
|
convergence_signal: false,
|
||||||
|
novelty_score: 60,
|
||||||
|
repetition_rate: 20,
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (payload.group && payload.round) {
|
||||||
|
const key = `${payload.group}:${payload.round}`;
|
||||||
|
if (!seenIdeation.has(key)) {
|
||||||
|
seenIdeation.add(key);
|
||||||
|
return {
|
||||||
|
content: 'this is not JSON',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
ideas: [
|
||||||
|
{
|
||||||
|
title: `${payload.group} idea`,
|
||||||
|
hypothesis: 'h',
|
||||||
|
mechanism: 'm',
|
||||||
|
expected_outcome: 'o',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Unexpected payload');
|
||||||
|
});
|
||||||
|
|
||||||
|
const orchestrator = new CouncilsOrchestrator({
|
||||||
|
registry: createRegistry(),
|
||||||
|
orchestrator: { delegate },
|
||||||
|
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
|
||||||
|
expect(result.stop_snapshot.round_reached).toBe(1);
|
||||||
|
expect(repairPayloads.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back instead of throwing when repair output is still invalid', async () => {
|
||||||
|
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||||
|
const payload = JSON.parse(message) as Record<string, any>;
|
||||||
|
|
||||||
|
if (payload.task === 'normalize_json_output') {
|
||||||
|
return {
|
||||||
|
content: 'still invalid json',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.brief_D && payload.brief_P) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
schema_version: '1.0.0',
|
||||||
|
selected_primary: [payload.brief_D.shortlist[0]],
|
||||||
|
selected_secondary: [payload.brief_P.shortlist[0]],
|
||||||
|
merges: [],
|
||||||
|
rejections: [],
|
||||||
|
open_questions: ['q1'],
|
||||||
|
next_experiments: ['e1'],
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.shortlisted_ideas) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({ grounded: [] }),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.ideas) {
|
||||||
|
return {
|
||||||
|
content: JSON.stringify({
|
||||||
|
assessments: payload.ideas.map((idea: any, idx: number) => ({
|
||||||
|
idea_id: idea.idea_id,
|
||||||
|
scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 },
|
||||||
|
decision: idx === 0 ? 'shortlist' : 'hold',
|
||||||
|
notes: 'ok',
|
||||||
|
})),
|
||||||
|
assumptions: [],
|
||||||
|
risks: [],
|
||||||
|
asks: [],
|
||||||
|
what_to_steal: [],
|
||||||
|
convergence_signal: false,
|
||||||
|
novelty_score: 60,
|
||||||
|
repetition_rate: 20,
|
||||||
|
}),
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.group && payload.round) {
|
||||||
|
return {
|
||||||
|
content: 'not json at all',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Unexpected payload');
|
||||||
|
});
|
||||||
|
|
||||||
|
const orchestrator = new CouncilsOrchestrator({
|
||||||
|
registry: createRegistry(),
|
||||||
|
orchestrator: { delegate },
|
||||||
|
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
|
||||||
|
expect(result.stop_snapshot.round_reached).toBe(1);
|
||||||
|
expect(result.brief_D_v1.ideas.length).toBeGreaterThan(0);
|
||||||
|
expect(result.brief_P_v1.ideas.length).toBeGreaterThan(0);
|
||||||
|
expect(result.trace.some((event) => event.validation_failure === 'repair_failed')).toBe(true);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
+540
-10
@@ -1,7 +1,7 @@
|
|||||||
import type { AgentConfigRegistry } from '../agents/registry.js';
|
import type { AgentConfigRegistry } from '../agents/registry.js';
|
||||||
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
||||||
import type { ModelTier } from '../models/router.js';
|
import type { ModelTier } from '../models/router.js';
|
||||||
import type { TokenUsage } from '../models/types.js';
|
import type { ChatResponseFormat, TokenUsage } from '../models/types.js';
|
||||||
import type { CouncilScaffold } from './scaffold.js';
|
import type { CouncilScaffold } from './scaffold.js';
|
||||||
import {
|
import {
|
||||||
COUNCIL_PIPELINE_VERSION,
|
COUNCIL_PIPELINE_VERSION,
|
||||||
@@ -36,6 +36,7 @@ interface DelegateRunner {
|
|||||||
systemPrompt: string;
|
systemPrompt: string;
|
||||||
message: string;
|
message: string;
|
||||||
maxTokens?: number;
|
maxTokens?: number;
|
||||||
|
responseFormat?: ChatResponseFormat;
|
||||||
}): Promise<{
|
}): Promise<{
|
||||||
content: string;
|
content: string;
|
||||||
usage: TokenUsage;
|
usage: TokenUsage;
|
||||||
@@ -90,24 +91,171 @@ interface AgentCallResult {
|
|||||||
usage: TokenUsage;
|
usage: TokenUsage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface JsonRecoveryOptions<T> {
|
||||||
|
rawContent: string;
|
||||||
|
parser: (value: unknown) => T;
|
||||||
|
agentName: string;
|
||||||
|
callId: string;
|
||||||
|
phaseIndex: number;
|
||||||
|
promptPayload: unknown;
|
||||||
|
modeDirective: string;
|
||||||
|
schemaHint: string;
|
||||||
|
group?: CouncilGroup;
|
||||||
|
round?: number;
|
||||||
|
scaffoldPrompt?: string;
|
||||||
|
tierOverride?: ModelTier;
|
||||||
|
maxTokens?: number;
|
||||||
|
responseFormat?: ChatResponseFormat;
|
||||||
|
fallback?: () => T;
|
||||||
|
}
|
||||||
|
|
||||||
function deterministicJsonRepair(raw: string): string | null {
|
function deterministicJsonRepair(raw: string): string | null {
|
||||||
const trimmed = raw.trim();
|
const trimmed = raw.trim();
|
||||||
const noFence = trimmed
|
const noFence = trimmed
|
||||||
.replace(/^```json\s*/i, '')
|
.replace(/^```json\s*/i, '')
|
||||||
.replace(/^```\s*/i, '')
|
.replace(/^```\s*/i, '')
|
||||||
.replace(/```$/, '')
|
.replace(/```$/, '')
|
||||||
.trim();
|
.trim()
|
||||||
|
.replace(/[\u201C\u201D]/g, '"')
|
||||||
|
.replace(/[\u2018\u2019]/g, '\'');
|
||||||
|
|
||||||
const extracted = extractFirstJsonContainer(noFence);
|
const extracted = extractFirstJsonContainer(noFence);
|
||||||
if (!extracted) {
|
if (!extracted) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return extracted
|
const withoutComments = stripJsonLikeComments(extracted);
|
||||||
|
const withQuotedKeys = withoutComments.replace(
|
||||||
|
/([{,]\s*)([A-Za-z_][A-Za-z0-9_-]*)(\s*:)/g,
|
||||||
|
'$1"$2"$3',
|
||||||
|
);
|
||||||
|
const withDoubleQuotedStrings = convertSingleQuotedStrings(withQuotedKeys);
|
||||||
|
|
||||||
|
return withDoubleQuotedStrings
|
||||||
.replace(/,\s*([}\]])/g, '$1')
|
.replace(/,\s*([}\]])/g, '$1')
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function stripJsonLikeComments(input: string): string {
|
||||||
|
let output = '';
|
||||||
|
let i = 0;
|
||||||
|
let inString = false;
|
||||||
|
let stringDelimiter: '"' | '\'' | null = null;
|
||||||
|
let escaped = false;
|
||||||
|
|
||||||
|
while (i < input.length) {
|
||||||
|
const ch = input[i];
|
||||||
|
const next = i + 1 < input.length ? input[i + 1] : '';
|
||||||
|
|
||||||
|
if (inString) {
|
||||||
|
output += ch;
|
||||||
|
if (escaped) {
|
||||||
|
escaped = false;
|
||||||
|
} else if (ch === '\\') {
|
||||||
|
escaped = true;
|
||||||
|
} else if (ch === stringDelimiter) {
|
||||||
|
inString = false;
|
||||||
|
stringDelimiter = null;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch === '"' || ch === '\'') {
|
||||||
|
inString = true;
|
||||||
|
stringDelimiter = ch;
|
||||||
|
output += ch;
|
||||||
|
i += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch === '/' && next === '/') {
|
||||||
|
i += 2;
|
||||||
|
while (i < input.length && input[i] !== '\n') {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch === '/' && next === '*') {
|
||||||
|
i += 2;
|
||||||
|
while (i + 1 < input.length && !(input[i] === '*' && input[i + 1] === '/')) {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
i = Math.min(i + 2, input.length);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
output += ch;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
function convertSingleQuotedStrings(input: string): string {
|
||||||
|
let output = '';
|
||||||
|
let inSingle = false;
|
||||||
|
let inDouble = false;
|
||||||
|
let escaped = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < input.length; i++) {
|
||||||
|
const ch = input[i];
|
||||||
|
|
||||||
|
if (inSingle) {
|
||||||
|
if (escaped) {
|
||||||
|
output += ch;
|
||||||
|
escaped = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (ch === '\\') {
|
||||||
|
output += ch;
|
||||||
|
escaped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (ch === '\'') {
|
||||||
|
inSingle = false;
|
||||||
|
output += '"';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (ch === '"') {
|
||||||
|
output += '\\"';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
output += ch;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inDouble) {
|
||||||
|
output += ch;
|
||||||
|
if (escaped) {
|
||||||
|
escaped = false;
|
||||||
|
} else if (ch === '\\') {
|
||||||
|
escaped = true;
|
||||||
|
} else if (ch === '"') {
|
||||||
|
inDouble = false;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch === '\'') {
|
||||||
|
inSingle = true;
|
||||||
|
output += '"';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch === '"') {
|
||||||
|
inDouble = true;
|
||||||
|
output += ch;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
output += ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
function extractFirstJsonContainer(input: string): string | null {
|
function extractFirstJsonContainer(input: string): string | null {
|
||||||
const start = input.search(/[\[{]/);
|
const start = input.search(/[\[{]/);
|
||||||
if (start < 0) {
|
if (start < 0) {
|
||||||
@@ -152,16 +300,82 @@ function extractFirstJsonContainer(input: string): string | null {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function parseWithFallbackCandidates<T>(value: unknown, parser: (value: unknown) => T): T {
|
||||||
|
const queue: unknown[] = [value];
|
||||||
|
const seenObjects = new Set<object>();
|
||||||
|
const wrapperKeys = ['output', 'result', 'data', 'response', 'json', 'payload', 'content'];
|
||||||
|
let firstError: Error | null = null;
|
||||||
|
|
||||||
|
while (queue.length > 0) {
|
||||||
|
const current = queue.shift();
|
||||||
|
if (current === undefined) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current && typeof current === 'object') {
|
||||||
|
const objectRef = current as object;
|
||||||
|
if (seenObjects.has(objectRef)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
seenObjects.add(objectRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return parser(current);
|
||||||
|
} catch (error) {
|
||||||
|
if (!firstError) {
|
||||||
|
firstError = error instanceof Error ? error : new Error(String(error));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof current === 'string') {
|
||||||
|
try {
|
||||||
|
queue.push(JSON.parse(current));
|
||||||
|
continue;
|
||||||
|
} catch {
|
||||||
|
const repaired = deterministicJsonRepair(current);
|
||||||
|
if (repaired) {
|
||||||
|
try {
|
||||||
|
queue.push(JSON.parse(repaired));
|
||||||
|
continue;
|
||||||
|
} catch {
|
||||||
|
// Continue searching additional candidates below.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(current)) {
|
||||||
|
for (const item of current) {
|
||||||
|
queue.push(item);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current && typeof current === 'object') {
|
||||||
|
const record = current as Record<string, unknown>;
|
||||||
|
for (const key of wrapperKeys) {
|
||||||
|
if (record[key] !== undefined) {
|
||||||
|
queue.push(record[key]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw firstError ?? new Error('parse_failed');
|
||||||
|
}
|
||||||
|
|
||||||
function parseJsonWithRepair<T>(raw: string, parser: (value: unknown) => T): T {
|
function parseJsonWithRepair<T>(raw: string, parser: (value: unknown) => T): T {
|
||||||
try {
|
try {
|
||||||
return parser(JSON.parse(raw));
|
return parseWithFallbackCandidates(JSON.parse(raw), parser);
|
||||||
} catch {
|
} catch {
|
||||||
const repaired = deterministicJsonRepair(raw);
|
const repaired = deterministicJsonRepair(raw);
|
||||||
if (!repaired) {
|
if (!repaired) {
|
||||||
throw new Error('parse_failed');
|
throw new Error('parse_failed');
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return parser(JSON.parse(repaired));
|
return parseWithFallbackCandidates(JSON.parse(repaired), parser);
|
||||||
} catch {
|
} catch {
|
||||||
throw new Error('repair_failed');
|
throw new Error('repair_failed');
|
||||||
}
|
}
|
||||||
@@ -172,6 +386,149 @@ function uniq(values: string[]): string[] {
|
|||||||
return Array.from(new Set(values));
|
return Array.from(new Set(values));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function jsonSchemaFormat(name: string, schema: Record<string, unknown>): ChatResponseFormat {
|
||||||
|
return { type: 'json_schema', name, schema, strict: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
const councilIdeaContentJsonSchema: Record<string, unknown> = {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['title', 'hypothesis', 'mechanism', 'expected_outcome'],
|
||||||
|
properties: {
|
||||||
|
title: { type: 'string', minLength: 1 },
|
||||||
|
hypothesis: { type: 'string', minLength: 1 },
|
||||||
|
mechanism: { type: 'string', minLength: 1 },
|
||||||
|
expected_outcome: { type: 'string', minLength: 1 },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const councilIdeationJsonSchema: Record<string, unknown> = {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['ideas'],
|
||||||
|
properties: {
|
||||||
|
ideas: {
|
||||||
|
type: 'array',
|
||||||
|
minItems: 1,
|
||||||
|
items: councilIdeaContentJsonSchema,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const councilAssessmentJsonSchema: Record<string, unknown> = {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['assessments', 'assumptions', 'risks', 'asks', 'what_to_steal', 'convergence_signal', 'novelty_score', 'repetition_rate'],
|
||||||
|
properties: {
|
||||||
|
assessments: {
|
||||||
|
type: 'array',
|
||||||
|
items: {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['idea_id', 'scores', 'decision', 'notes'],
|
||||||
|
properties: {
|
||||||
|
idea_id: { type: 'string', minLength: 1 },
|
||||||
|
scores: {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['novelty', 'feasibility', 'impact', 'testability'],
|
||||||
|
properties: {
|
||||||
|
novelty: { type: 'integer', minimum: 0, maximum: 100 },
|
||||||
|
feasibility: { type: 'integer', minimum: 0, maximum: 100 },
|
||||||
|
impact: { type: 'integer', minimum: 0, maximum: 100 },
|
||||||
|
testability: { type: 'integer', minimum: 0, maximum: 100 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
decision: { type: 'string', enum: ['shortlist', 'hold', 'reject'] },
|
||||||
|
notes: { type: 'string', minLength: 1 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
assumptions: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||||
|
risks: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||||
|
asks: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||||
|
what_to_steal: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||||
|
convergence_signal: { type: 'boolean' },
|
||||||
|
novelty_score: { type: 'integer', minimum: 0, maximum: 100 },
|
||||||
|
repetition_rate: { type: 'integer', minimum: 0, maximum: 100 },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const councilGroundingJsonSchema: Record<string, unknown> = {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['grounded'],
|
||||||
|
properties: {
|
||||||
|
grounded: {
|
||||||
|
type: 'array',
|
||||||
|
items: {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['idea_id', 'mve', 'constraints', 'falsifiability_checks'],
|
||||||
|
properties: {
|
||||||
|
idea_id: { type: 'string', minLength: 1 },
|
||||||
|
mve: { type: 'string', minLength: 1 },
|
||||||
|
constraints: {
|
||||||
|
type: 'array',
|
||||||
|
minItems: 1,
|
||||||
|
items: { type: 'string', minLength: 1 },
|
||||||
|
},
|
||||||
|
falsifiability_checks: {
|
||||||
|
type: 'array',
|
||||||
|
minItems: 1,
|
||||||
|
items: { type: 'string', minLength: 1 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const councilMetaJsonSchema: Record<string, unknown> = {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['schema_version', 'selected_primary', 'selected_secondary', 'merges', 'rejections', 'open_questions', 'next_experiments'],
|
||||||
|
properties: {
|
||||||
|
schema_version: { type: 'string', const: COUNCIL_SCHEMA_VERSION },
|
||||||
|
selected_primary: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||||
|
selected_secondary: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||||
|
merges: {
|
||||||
|
type: 'array',
|
||||||
|
items: {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['sources', 'result_title', 'rationale'],
|
||||||
|
properties: {
|
||||||
|
sources: {
|
||||||
|
type: 'array',
|
||||||
|
minItems: 2,
|
||||||
|
items: { type: 'string', minLength: 1 },
|
||||||
|
},
|
||||||
|
result_title: { type: 'string', minLength: 1 },
|
||||||
|
rationale: { type: 'string', minLength: 1 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
rejections: {
|
||||||
|
type: 'array',
|
||||||
|
items: {
|
||||||
|
type: 'object',
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ['idea_id', 'reason_code'],
|
||||||
|
properties: {
|
||||||
|
idea_id: { type: 'string', minLength: 1 },
|
||||||
|
reason_code: {
|
||||||
|
type: 'string',
|
||||||
|
enum: ['low_score', 'high_risk', 'insufficient_grounding', 'duplicate', 'out_of_scope', 'unknown_id', 'other'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
open_questions: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||||
|
next_experiments: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
function computeTotalScore(assessment: IdeaAssessment): number {
|
function computeTotalScore(assessment: IdeaAssessment): number {
|
||||||
const s = assessment.scores;
|
const s = assessment.scores;
|
||||||
return s.feasibility + s.impact + s.novelty + s.testability;
|
return s.feasibility + s.impact + s.novelty + s.testability;
|
||||||
@@ -403,19 +760,23 @@ export class CouncilsOrchestrator {
|
|||||||
scaffoldPrompt?: string;
|
scaffoldPrompt?: string;
|
||||||
tierOverride?: ModelTier;
|
tierOverride?: ModelTier;
|
||||||
maxTokens?: number;
|
maxTokens?: number;
|
||||||
|
responseFormat?: ChatResponseFormat;
|
||||||
}): Promise<AgentCallResult> {
|
}): Promise<AgentCallResult> {
|
||||||
const agent = this.getAgent(opts.agentName);
|
const agent = this.getAgent(opts.agentName);
|
||||||
const message = canonicalStringify(opts.promptPayload);
|
const message = canonicalStringify(opts.promptPayload);
|
||||||
const promptHash = hashCanonical(opts.promptPayload);
|
const promptHash = hashCanonical(opts.promptPayload);
|
||||||
const scaffoldPrompt = opts.scaffoldPrompt ? `${opts.scaffoldPrompt}\n\n` : '';
|
const scaffoldPrompt = opts.scaffoldPrompt ? `${opts.scaffoldPrompt}\n\n` : '';
|
||||||
const systemPrompt = `${scaffoldPrompt}${agent.systemPrompt}\n\n${opts.modeDirective}`;
|
const systemPrompt = `${scaffoldPrompt}${agent.systemPrompt}\n\n${opts.modeDirective}`;
|
||||||
|
const startedAt = Date.now();
|
||||||
|
|
||||||
const result = await this._delegateRunner.delegate({
|
const result = await this._delegateRunner.delegate({
|
||||||
tier: opts.tierOverride ?? agent.tier,
|
tier: opts.tierOverride ?? agent.tier,
|
||||||
systemPrompt,
|
systemPrompt,
|
||||||
message,
|
message,
|
||||||
maxTokens: opts.maxTokens ?? 4096,
|
maxTokens: opts.maxTokens ?? 4096,
|
||||||
|
responseFormat: opts.responseFormat,
|
||||||
});
|
});
|
||||||
|
const latencyMs = Math.max(0, Date.now() - startedAt);
|
||||||
|
|
||||||
this._trace.push(councilTraceEventSchema.parse({
|
this._trace.push(councilTraceEventSchema.parse({
|
||||||
schema_version: COUNCIL_SCHEMA_VERSION,
|
schema_version: COUNCIL_SCHEMA_VERSION,
|
||||||
@@ -427,6 +788,7 @@ export class CouncilsOrchestrator {
|
|||||||
prompt_payload_hash: promptHash,
|
prompt_payload_hash: promptHash,
|
||||||
artifact_hash: hashCanonical(result.content),
|
artifact_hash: hashCanonical(result.content),
|
||||||
token_usage: result.usage,
|
token_usage: result.usage,
|
||||||
|
latency_ms: latencyMs,
|
||||||
}));
|
}));
|
||||||
this._conversations.push({
|
this._conversations.push({
|
||||||
schema_version: COUNCIL_SCHEMA_VERSION,
|
schema_version: COUNCIL_SCHEMA_VERSION,
|
||||||
@@ -444,6 +806,63 @@ export class CouncilsOrchestrator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async parseWithAgentRecovery<T>(opts: JsonRecoveryOptions<T>): Promise<T> {
|
||||||
|
try {
|
||||||
|
return parseJsonWithRepair(opts.rawContent, opts.parser);
|
||||||
|
} catch {
|
||||||
|
const recoveryPayload = {
|
||||||
|
task: 'normalize_json_output',
|
||||||
|
required_schema: opts.schemaHint,
|
||||||
|
original_prompt_payload: opts.promptPayload,
|
||||||
|
invalid_output: opts.rawContent,
|
||||||
|
};
|
||||||
|
|
||||||
|
const recoveryRaw = await this.callAgent({
|
||||||
|
agentName: opts.agentName,
|
||||||
|
callId: `${opts.callId}.repair`,
|
||||||
|
phaseIndex: opts.phaseIndex,
|
||||||
|
group: opts.group,
|
||||||
|
round: opts.round,
|
||||||
|
promptPayload: recoveryPayload,
|
||||||
|
modeDirective: [
|
||||||
|
'Your previous response did not validate.',
|
||||||
|
'Return ONLY valid JSON matching the required schema.',
|
||||||
|
'Do not add prose, markdown fences, or commentary.',
|
||||||
|
].join(' '),
|
||||||
|
scaffoldPrompt: opts.scaffoldPrompt,
|
||||||
|
tierOverride: opts.tierOverride,
|
||||||
|
maxTokens: opts.maxTokens,
|
||||||
|
responseFormat: opts.responseFormat,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
return parseJsonWithRepair(recoveryRaw.content, opts.parser);
|
||||||
|
} catch {
|
||||||
|
if (opts.fallback) {
|
||||||
|
this.recordValidationFailure(opts, 'repair_failed');
|
||||||
|
return opts.fallback();
|
||||||
|
}
|
||||||
|
throw new Error('repair_failed');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private recordValidationFailure(
|
||||||
|
opts: Pick<JsonRecoveryOptions<unknown>, 'callId' | 'phaseIndex' | 'group' | 'round' | 'promptPayload'>,
|
||||||
|
reason: 'repair_failed' | 'parse_failed',
|
||||||
|
): void {
|
||||||
|
this._trace.push(councilTraceEventSchema.parse({
|
||||||
|
schema_version: COUNCIL_SCHEMA_VERSION,
|
||||||
|
event_id: `${opts.phaseIndex}:${opts.callId}.validation`,
|
||||||
|
phase_index: opts.phaseIndex,
|
||||||
|
call_id: `${opts.callId}.validation`,
|
||||||
|
group: opts.group,
|
||||||
|
round: opts.round,
|
||||||
|
prompt_payload_hash: hashCanonical(opts.promptPayload),
|
||||||
|
validation_failure: reason,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
private allocateIdeaId(group: CouncilGroup, round: number, index: number): string {
|
private allocateIdeaId(group: CouncilGroup, round: number, index: number): string {
|
||||||
return `${group}.r${round}.${String(index + 1).padStart(2, '0')}`;
|
return `${group}.r${round}.${String(index + 1).padStart(2, '0')}`;
|
||||||
}
|
}
|
||||||
@@ -481,11 +900,37 @@ export class CouncilsOrchestrator {
|
|||||||
round,
|
round,
|
||||||
promptPayload: ideationPayload,
|
promptPayload: ideationPayload,
|
||||||
modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.',
|
modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.',
|
||||||
|
responseFormat: jsonSchemaFormat('council_ideation', councilIdeationJsonSchema),
|
||||||
scaffoldPrompt: this._scaffold?.prompts[group].free_thinker,
|
scaffoldPrompt: this._scaffold?.prompts[group].free_thinker,
|
||||||
tierOverride: groupTier,
|
tierOverride: groupTier,
|
||||||
});
|
});
|
||||||
|
|
||||||
const ideaOutput = parseJsonWithRepair(ideation.content, (value) => ideationOutputSchema.parse(value));
|
const ideaOutput = await this.parseWithAgentRecovery({
|
||||||
|
rawContent: ideation.content,
|
||||||
|
parser: (value) => ideationOutputSchema.parse(value),
|
||||||
|
agentName: groupConfig.freethinker_agent,
|
||||||
|
callId: `${group}.r${round}.ft.ideation`,
|
||||||
|
phaseIndex: phaseBase,
|
||||||
|
group,
|
||||||
|
round,
|
||||||
|
promptPayload: ideationPayload,
|
||||||
|
modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.',
|
||||||
|
schemaHint:
|
||||||
|
'{"ideas":[{"title":"string","hypothesis":"string","mechanism":"string","expected_outcome":"string"}]}',
|
||||||
|
responseFormat: jsonSchemaFormat('council_ideation', councilIdeationJsonSchema),
|
||||||
|
scaffoldPrompt: this._scaffold?.prompts[group].free_thinker,
|
||||||
|
tierOverride: groupTier,
|
||||||
|
fallback: () => ({
|
||||||
|
ideas: [
|
||||||
|
{
|
||||||
|
title: `${group} fallback idea`,
|
||||||
|
hypothesis: `Fallback hypothesis for: ${input.task}`,
|
||||||
|
mechanism: 'Run a minimum-viable experiment and measure signal within one iteration.',
|
||||||
|
expected_outcome: 'A concrete go/no-go signal to reduce uncertainty.',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
});
|
||||||
const ideaCards: IdeaCard[] = ideaOutput.ideas
|
const ideaCards: IdeaCard[] = ideaOutput.ideas
|
||||||
.slice(0, this._config.defaults.ideas_per_round)
|
.slice(0, this._config.defaults.ideas_per_round)
|
||||||
.map((idea, index) => ({
|
.map((idea, index) => ({
|
||||||
@@ -514,11 +959,48 @@ export class CouncilsOrchestrator {
|
|||||||
promptPayload: assessmentPayload,
|
promptPayload: assessmentPayload,
|
||||||
modeDirective:
|
modeDirective:
|
||||||
'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.',
|
'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.',
|
||||||
|
responseFormat: jsonSchemaFormat('council_assessment', councilAssessmentJsonSchema),
|
||||||
scaffoldPrompt: this._scaffold?.prompts[group].arbiter,
|
scaffoldPrompt: this._scaffold?.prompts[group].arbiter,
|
||||||
tierOverride: groupTier,
|
tierOverride: groupTier,
|
||||||
});
|
});
|
||||||
|
|
||||||
const assessmentOutput = parseJsonWithRepair(assessmentRaw.content, (value) => assessmentOutputSchema.parse(value));
|
const assessmentOutput = await this.parseWithAgentRecovery({
|
||||||
|
rawContent: assessmentRaw.content,
|
||||||
|
parser: (value) => assessmentOutputSchema.parse(value),
|
||||||
|
agentName: groupConfig.arbiter_agent,
|
||||||
|
callId: `${group}.r${round}.arb.assess`,
|
||||||
|
phaseIndex: phaseBase + 1,
|
||||||
|
group,
|
||||||
|
round,
|
||||||
|
promptPayload: assessmentPayload,
|
||||||
|
modeDirective:
|
||||||
|
'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.',
|
||||||
|
schemaHint:
|
||||||
|
'{"assessments":[{"idea_id":"string","scores":{"novelty":0,"feasibility":0,"impact":0,"testability":0},"decision":"shortlist|hold|reject","notes":"string"}],"assumptions":["string"],"risks":["string"],"asks":["string"],"what_to_steal":["string"],"convergence_signal":false,"novelty_score":0,"repetition_rate":0}',
|
||||||
|
responseFormat: jsonSchemaFormat('council_assessment', councilAssessmentJsonSchema),
|
||||||
|
scaffoldPrompt: this._scaffold?.prompts[group].arbiter,
|
||||||
|
tierOverride: groupTier,
|
||||||
|
fallback: () => ({
|
||||||
|
assessments: ideaCards.map((idea, index) => ({
|
||||||
|
idea_id: idea.idea_id,
|
||||||
|
scores: {
|
||||||
|
novelty: 40,
|
||||||
|
feasibility: 40,
|
||||||
|
impact: 40,
|
||||||
|
testability: 40,
|
||||||
|
},
|
||||||
|
decision: index === 0 ? 'shortlist' as const : 'hold' as const,
|
||||||
|
notes: 'Fallback assessment used because model output could not be repaired into valid JSON.',
|
||||||
|
})),
|
||||||
|
assumptions: ['Model output formatting instability; assumptions are provisional.'],
|
||||||
|
risks: ['Assessment quality degraded due to invalid structured output.'],
|
||||||
|
asks: ['Re-run with a different provider or tier to confirm shortlist quality.'],
|
||||||
|
what_to_steal: [],
|
||||||
|
convergence_signal: false,
|
||||||
|
novelty_score: 0,
|
||||||
|
repetition_rate: 100,
|
||||||
|
}),
|
||||||
|
});
|
||||||
const validIdeaIds = new Set(ideaCards.map((i) => i.idea_id));
|
const validIdeaIds = new Set(ideaCards.map((i) => i.idea_id));
|
||||||
const assessments: IdeaAssessment[] = uniq(assessmentOutput.assessments.map((a) => a.idea_id))
|
const assessments: IdeaAssessment[] = uniq(assessmentOutput.assessments.map((a) => a.idea_id))
|
||||||
.map((ideaId) => assessmentOutput.assessments.find((a) => a.idea_id === ideaId)!)
|
.map((ideaId) => assessmentOutput.assessments.find((a) => a.idea_id === ideaId)!)
|
||||||
@@ -571,10 +1053,27 @@ export class CouncilsOrchestrator {
|
|||||||
promptPayload: groundingPayload,
|
promptPayload: groundingPayload,
|
||||||
modeDirective:
|
modeDirective:
|
||||||
'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.',
|
'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.',
|
||||||
|
responseFormat: jsonSchemaFormat('council_grounding', councilGroundingJsonSchema),
|
||||||
|
scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker,
|
||||||
|
tierOverride: groupTier,
|
||||||
|
});
|
||||||
|
grounding = await this.parseWithAgentRecovery({
|
||||||
|
rawContent: groundingRaw.content,
|
||||||
|
parser: (value) => groundingOutputSchema.parse(value),
|
||||||
|
agentName: groundingAgent,
|
||||||
|
callId: `${group}.r${round}.ft.ground`,
|
||||||
|
phaseIndex: phaseBase + 2,
|
||||||
|
group,
|
||||||
|
round,
|
||||||
|
promptPayload: groundingPayload,
|
||||||
|
modeDirective:
|
||||||
|
'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.',
|
||||||
|
schemaHint:
|
||||||
|
'{"grounded":[{"idea_id":"string","mve":"string","constraints":["string"],"falsifiability_checks":["string"]}]}',
|
||||||
|
responseFormat: jsonSchemaFormat('council_grounding', councilGroundingJsonSchema),
|
||||||
scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker,
|
scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker,
|
||||||
tierOverride: groupTier,
|
tierOverride: groupTier,
|
||||||
});
|
});
|
||||||
grounding = parseJsonWithRepair(groundingRaw.content, (value) => groundingOutputSchema.parse(value));
|
|
||||||
} catch {
|
} catch {
|
||||||
groundingFailures = shortlist.length;
|
groundingFailures = shortlist.length;
|
||||||
if (this._config.strict_grounding) {
|
if (this._config.strict_grounding) {
|
||||||
@@ -704,7 +1203,11 @@ export class CouncilsOrchestrator {
|
|||||||
return packet;
|
return packet;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async runMetaMerge(input: CouncilRunInput, briefD: CouncilBrief, briefP: CouncilBrief) {
|
private async runMetaMerge(
|
||||||
|
input: CouncilRunInput,
|
||||||
|
briefD: CouncilBrief,
|
||||||
|
briefP: CouncilBrief,
|
||||||
|
): Promise<ReturnType<typeof metaSelectionSchema.parse>> {
|
||||||
const metaAgentName = this._config.meta_arbiter_agent;
|
const metaAgentName = this._config.meta_arbiter_agent;
|
||||||
const payload = {
|
const payload = {
|
||||||
input,
|
input,
|
||||||
@@ -723,11 +1226,37 @@ export class CouncilsOrchestrator {
|
|||||||
promptPayload: payload,
|
promptPayload: payload,
|
||||||
modeDirective:
|
modeDirective:
|
||||||
'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.',
|
'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.',
|
||||||
|
responseFormat: jsonSchemaFormat('council_meta', councilMetaJsonSchema),
|
||||||
scaffoldPrompt: this._scaffold?.prompts.meta_arbiter,
|
scaffoldPrompt: this._scaffold?.prompts.meta_arbiter,
|
||||||
tierOverride: this._config.meta_model_tier,
|
tierOverride: this._config.meta_model_tier,
|
||||||
});
|
});
|
||||||
|
|
||||||
return parseJsonWithRepair(metaRaw.content, (value) => metaSelectionSchema.parse(value));
|
return this.parseWithAgentRecovery<ReturnType<typeof metaSelectionSchema.parse>>({
|
||||||
|
rawContent: metaRaw.content,
|
||||||
|
parser: (value) => metaSelectionSchema.parse(value),
|
||||||
|
agentName: metaAgentName,
|
||||||
|
callId: 'meta.merge',
|
||||||
|
phaseIndex: 999,
|
||||||
|
promptPayload: payload,
|
||||||
|
modeDirective:
|
||||||
|
'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.',
|
||||||
|
schemaHint:
|
||||||
|
'{"schema_version":"1.0.0","selected_primary":["string"],"selected_secondary":["string"],"merges":[{"sources":["string","string"],"result_title":"string","rationale":"string"}],"rejections":[{"idea_id":"string","reason_code":"low_score|high_risk|insufficient_grounding|duplicate|out_of_scope|unknown_id|other"}],"open_questions":["string"],"next_experiments":["string"]}',
|
||||||
|
responseFormat: jsonSchemaFormat('council_meta', councilMetaJsonSchema),
|
||||||
|
scaffoldPrompt: this._scaffold?.prompts.meta_arbiter,
|
||||||
|
tierOverride: this._config.meta_model_tier,
|
||||||
|
fallback: () => metaSelectionSchema.parse({
|
||||||
|
schema_version: COUNCIL_SCHEMA_VERSION,
|
||||||
|
selected_primary: briefD.shortlist.slice(0, 1),
|
||||||
|
selected_secondary: briefP.shortlist.slice(0, 1),
|
||||||
|
merges: [],
|
||||||
|
rejections: [],
|
||||||
|
open_questions: ['Meta merge used fallback due to invalid structured output.'],
|
||||||
|
next_experiments: [
|
||||||
|
'Re-run council with a lower-latency/stronger JSON model and compare selected ideas.',
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private validateMetaSelection(meta: ReturnType<typeof metaSelectionSchema.parse>, knownIds: Set<string>): boolean {
|
private validateMetaSelection(meta: ReturnType<typeof metaSelectionSchema.parse>, knownIds: Set<string>): boolean {
|
||||||
@@ -763,6 +1292,7 @@ export class CouncilsOrchestrator {
|
|||||||
group: normalizeOptional(event.group),
|
group: normalizeOptional(event.group),
|
||||||
round: normalizeOptional(event.round),
|
round: normalizeOptional(event.round),
|
||||||
token_usage: normalizeOptional(event.token_usage),
|
token_usage: normalizeOptional(event.token_usage),
|
||||||
|
latency_ms: normalizeOptional(event.latency_ms),
|
||||||
dropped_reason: normalizeOptional(event.dropped_reason),
|
dropped_reason: normalizeOptional(event.dropped_reason),
|
||||||
validation_failure: normalizeOptional(event.validation_failure),
|
validation_failure: normalizeOptional(event.validation_failure),
|
||||||
}));
|
}));
|
||||||
|
|||||||
@@ -0,0 +1,93 @@
|
|||||||
|
import { describe, expect, it, vi } from 'vitest';
|
||||||
|
import { AgentConfigRegistry } from '../agents/registry.js';
|
||||||
|
import { configSchema } from '../config/schema.js';
|
||||||
|
import { buildCouncilPreflightReport } from './preflight.js';
|
||||||
|
|
||||||
|
function makeConfig() {
|
||||||
|
const config = configSchema.parse({
|
||||||
|
models: {
|
||||||
|
default: {
|
||||||
|
provider: 'zhipuai',
|
||||||
|
model: 'glm-4.7',
|
||||||
|
use_oauth: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
config.councils.enabled = true;
|
||||||
|
config.councils.groups.D.arbiter_agent = 'comms';
|
||||||
|
config.councils.groups.D.freethinker_agent = 'research';
|
||||||
|
config.councils.groups.D.model_tier = 'default';
|
||||||
|
config.councils.groups.P.arbiter_agent = 'comms';
|
||||||
|
config.councils.groups.P.freethinker_agent = 'research';
|
||||||
|
config.councils.groups.P.model_tier = 'default';
|
||||||
|
config.councils.meta_arbiter_agent = 'code';
|
||||||
|
config.councils.meta_model_tier = 'default';
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeRegistry(): AgentConfigRegistry {
|
||||||
|
const registry = new AgentConfigRegistry();
|
||||||
|
registry.register({ name: 'comms', modelTier: 'fast' });
|
||||||
|
registry.register({ name: 'research', modelTier: 'default' });
|
||||||
|
registry.register({ name: 'code', modelTier: 'complex' });
|
||||||
|
return registry;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('buildCouncilPreflightReport', () => {
|
||||||
|
it('reports zhipu endpoint/auth and stale oauth flag note', async () => {
|
||||||
|
const config = makeConfig();
|
||||||
|
const report = await buildCouncilPreflightReport({
|
||||||
|
config,
|
||||||
|
registry: makeRegistry(),
|
||||||
|
delegateRunner: {
|
||||||
|
delegate: vi.fn(async () => ({
|
||||||
|
content: '{"ok":true}',
|
||||||
|
usage: { inputTokens: 1, outputTokens: 1 },
|
||||||
|
tier: 'default' as const,
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
activeTier: 'default',
|
||||||
|
includeLiveProbe: false,
|
||||||
|
credentialState: {
|
||||||
|
openaiOAuth: false,
|
||||||
|
openaiApiKeyStored: false,
|
||||||
|
anthropicApiKeyStored: false,
|
||||||
|
anthropicAuthTokenStored: false,
|
||||||
|
zaiStored: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(report).toContain('D.arbiter: agent=comms');
|
||||||
|
expect(report).toContain('effective_tier=default model=zhipuai/glm-4.7');
|
||||||
|
expect(report).toContain('endpoint=https://api.z.ai/api/paas/v4');
|
||||||
|
expect(report).toContain('auth_mode=bearer_credential');
|
||||||
|
expect(report).toContain('auth_source=auth.json:zai');
|
||||||
|
expect(report).toContain('note=use_oauth/auth_mode=oauth is ignored for zhipuai');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('includes live probe failures by tier', async () => {
|
||||||
|
const config = makeConfig();
|
||||||
|
const delegate = vi.fn(async () => {
|
||||||
|
throw new Error('Connection error');
|
||||||
|
});
|
||||||
|
const report = await buildCouncilPreflightReport({
|
||||||
|
config,
|
||||||
|
registry: makeRegistry(),
|
||||||
|
delegateRunner: { delegate },
|
||||||
|
activeTier: 'default',
|
||||||
|
includeLiveProbe: true,
|
||||||
|
credentialState: {
|
||||||
|
openaiOAuth: false,
|
||||||
|
openaiApiKeyStored: false,
|
||||||
|
anthropicApiKeyStored: false,
|
||||||
|
anthropicAuthTokenStored: false,
|
||||||
|
zaiStored: false,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(report).toContain('Live tier probes:');
|
||||||
|
expect(report).toContain('default: failed');
|
||||||
|
expect(report).toContain('Connection error');
|
||||||
|
expect(delegate).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,347 @@
|
|||||||
|
import type { AgentConfigRegistry } from '../agents/registry.js';
|
||||||
|
import {
|
||||||
|
loadStoredAnthropicAuth,
|
||||||
|
loadStoredAnthropicAuthToken,
|
||||||
|
loadStoredOpenAIApiKey,
|
||||||
|
loadStoredOpenAIAuth,
|
||||||
|
loadStoredZaiAuth,
|
||||||
|
} from '../auth/index.js';
|
||||||
|
import type { Config, ModelConfig, ModelProvider } from '../config/index.js';
|
||||||
|
import type { ModelTier } from '../models/router.js';
|
||||||
|
import type { ChatResponseFormat } from '../models/types.js';
|
||||||
|
|
||||||
|
interface DelegateRunner {
|
||||||
|
delegate(request: {
|
||||||
|
tier: ModelTier;
|
||||||
|
systemPrompt: string;
|
||||||
|
message: string;
|
||||||
|
maxTokens?: number;
|
||||||
|
responseFormat?: ChatResponseFormat;
|
||||||
|
}): Promise<{
|
||||||
|
content: string;
|
||||||
|
usage: { inputTokens: number; outputTokens: number };
|
||||||
|
tier: ModelTier;
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CredentialState {
|
||||||
|
openaiOAuth: boolean;
|
||||||
|
openaiApiKeyStored: boolean;
|
||||||
|
anthropicApiKeyStored: boolean;
|
||||||
|
anthropicAuthTokenStored: boolean;
|
||||||
|
zaiStored: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CouncilPreflightOptions {
|
||||||
|
config: Config;
|
||||||
|
registry: AgentConfigRegistry;
|
||||||
|
delegateRunner: DelegateRunner;
|
||||||
|
activeTier?: ModelTier;
|
||||||
|
includeLiveProbe?: boolean;
|
||||||
|
credentialState?: CredentialState;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TierConfigResolution {
|
||||||
|
modelConfig: ModelConfig;
|
||||||
|
fellBackToDefault: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AuthResolution {
|
||||||
|
mode: string;
|
||||||
|
source: string;
|
||||||
|
note?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_ENDPOINTS: Partial<Record<ModelProvider, string>> = {
|
||||||
|
openai: 'https://api.openai.com/v1',
|
||||||
|
openrouter: 'https://openrouter.ai/api/v1',
|
||||||
|
vercel: 'https://ai-gateway.vercel.sh/v1',
|
||||||
|
zhipuai: 'https://api.z.ai/api/paas/v4',
|
||||||
|
xai: 'https://api.x.ai/v1',
|
||||||
|
minimax: 'https://api.minimax.io/v1',
|
||||||
|
moonshot: 'https://api.moonshot.cn/v1',
|
||||||
|
ollama: 'http://localhost:11434',
|
||||||
|
llamacpp: 'http://localhost:8080',
|
||||||
|
};
|
||||||
|
|
||||||
|
function getCredentialStateFromSystem(): CredentialState {
|
||||||
|
return {
|
||||||
|
openaiOAuth: Boolean(loadStoredOpenAIAuth()),
|
||||||
|
openaiApiKeyStored: Boolean(loadStoredOpenAIApiKey()),
|
||||||
|
anthropicApiKeyStored: Boolean(loadStoredAnthropicAuth()),
|
||||||
|
anthropicAuthTokenStored: Boolean(loadStoredAnthropicAuthToken()),
|
||||||
|
zaiStored: Boolean(loadStoredZaiAuth()),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function getEffectiveAuthMode(cfg: ModelConfig): 'auto' | 'api_key' | 'oauth' {
|
||||||
|
if (cfg.auth_mode) {
|
||||||
|
return cfg.auth_mode;
|
||||||
|
}
|
||||||
|
if (cfg.use_oauth) {
|
||||||
|
return 'oauth';
|
||||||
|
}
|
||||||
|
return 'auto';
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveTierConfig(config: Config, tier: ModelTier): TierConfigResolution {
|
||||||
|
if (tier === 'default') {
|
||||||
|
return { modelConfig: config.models.default, fellBackToDefault: false };
|
||||||
|
}
|
||||||
|
const direct = config.models[tier];
|
||||||
|
if (direct) {
|
||||||
|
return { modelConfig: direct, fellBackToDefault: false };
|
||||||
|
}
|
||||||
|
return { modelConfig: config.models.default, fellBackToDefault: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
function firstTruthySource(sources: Array<[boolean, string]>): string {
|
||||||
|
for (const [present, label] of sources) {
|
||||||
|
if (present) {
|
||||||
|
return label;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 'missing';
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveAuth(cfg: ModelConfig, credentialState: CredentialState): AuthResolution {
|
||||||
|
if (cfg.provider === 'zhipuai') {
|
||||||
|
const source = firstTruthySource([
|
||||||
|
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||||
|
[Boolean(cfg.auth_token?.trim()), 'config.auth_token'],
|
||||||
|
[Boolean(process.env.ZAI_API_KEY?.trim()), 'env:ZAI_API_KEY'],
|
||||||
|
[Boolean(process.env.ZHIPUAI_API_KEY?.trim()), 'env:ZHIPUAI_API_KEY'],
|
||||||
|
[Boolean(process.env.ZHIPUAI_AUTH_TOKEN?.trim()), 'env:ZHIPUAI_AUTH_TOKEN'],
|
||||||
|
[credentialState.zaiStored, 'auth.json:zai'],
|
||||||
|
]);
|
||||||
|
const note = cfg.use_oauth || cfg.auth_mode === 'oauth'
|
||||||
|
? 'use_oauth/auth_mode=oauth is ignored for zhipuai'
|
||||||
|
: undefined;
|
||||||
|
return {
|
||||||
|
mode: 'bearer_credential',
|
||||||
|
source,
|
||||||
|
note,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cfg.provider === 'openai') {
|
||||||
|
const authMode = getEffectiveAuthMode(cfg);
|
||||||
|
const apiSource = firstTruthySource([
|
||||||
|
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
|
||||||
|
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||||
|
[Boolean(process.env.OPENAI_API_KEY?.trim()), 'env:OPENAI_API_KEY'],
|
||||||
|
[credentialState.openaiApiKeyStored, 'auth.json:openai.api_key'],
|
||||||
|
]);
|
||||||
|
const oauthSource = credentialState.openaiOAuth ? 'auth.json:openai.oauth' : 'missing';
|
||||||
|
|
||||||
|
if (authMode === 'oauth') {
|
||||||
|
return { mode: 'oauth', source: oauthSource };
|
||||||
|
}
|
||||||
|
if (authMode === 'api_key') {
|
||||||
|
return { mode: 'api_key', source: apiSource };
|
||||||
|
}
|
||||||
|
if (apiSource !== 'missing') {
|
||||||
|
return { mode: 'auto->api_key', source: apiSource };
|
||||||
|
}
|
||||||
|
return { mode: 'auto->oauth', source: oauthSource };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cfg.provider === 'anthropic') {
|
||||||
|
const authMode = getEffectiveAuthMode(cfg);
|
||||||
|
const apiSource = firstTruthySource([
|
||||||
|
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
|
||||||
|
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||||
|
[Boolean(process.env.ANTHROPIC_API_KEY?.trim()), 'env:ANTHROPIC_API_KEY'],
|
||||||
|
[credentialState.anthropicApiKeyStored, 'auth.json:anthropic.api_key'],
|
||||||
|
]);
|
||||||
|
const oauthSource = firstTruthySource([
|
||||||
|
[Boolean(cfg.auth_token?.trim()), 'config.auth_token'],
|
||||||
|
[Boolean(process.env.ANTHROPIC_AUTH_TOKEN?.trim()), 'env:ANTHROPIC_AUTH_TOKEN'],
|
||||||
|
[credentialState.anthropicAuthTokenStored, 'auth.json:anthropic.auth_token'],
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (authMode === 'oauth') {
|
||||||
|
return { mode: 'oauth', source: oauthSource };
|
||||||
|
}
|
||||||
|
if (authMode === 'api_key') {
|
||||||
|
return { mode: 'api_key', source: apiSource };
|
||||||
|
}
|
||||||
|
if (apiSource !== 'missing') {
|
||||||
|
return { mode: 'auto->api_key', source: apiSource };
|
||||||
|
}
|
||||||
|
return { mode: 'auto->oauth', source: oauthSource };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cfg.provider === 'gemini') {
|
||||||
|
const source = firstTruthySource([
|
||||||
|
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||||
|
[Boolean(process.env.GEMINI_API_KEY?.trim()), 'env:GEMINI_API_KEY'],
|
||||||
|
[Boolean(process.env.GOOGLE_API_KEY?.trim()), 'env:GOOGLE_API_KEY'],
|
||||||
|
]);
|
||||||
|
return { mode: 'api_key', source };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cfg.provider === 'bedrock') {
|
||||||
|
const source = firstTruthySource([
|
||||||
|
[Boolean(cfg.api_key?.trim() && cfg.auth_token?.trim()), 'config.api_key+auth_token'],
|
||||||
|
[Boolean(process.env.AWS_ACCESS_KEY_ID?.trim() && process.env.AWS_SECRET_ACCESS_KEY?.trim()), 'env:AWS_ACCESS_KEY_ID+AWS_SECRET_ACCESS_KEY'],
|
||||||
|
]);
|
||||||
|
return { mode: 'aws_credentials', source };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cfg.provider === 'ollama' || cfg.provider === 'llamacpp' || cfg.provider === 'synthetic') {
|
||||||
|
return { mode: 'local', source: 'none' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const source = firstTruthySource([
|
||||||
|
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
|
||||||
|
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||||
|
]);
|
||||||
|
return { mode: 'api_key', source };
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveEndpoint(cfg: ModelConfig): string {
|
||||||
|
if (cfg.endpoint?.trim()) {
|
||||||
|
return cfg.endpoint.trim();
|
||||||
|
}
|
||||||
|
return DEFAULT_ENDPOINTS[cfg.provider] ?? '(provider default)';
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeProbeError(error: unknown): string {
|
||||||
|
if (error instanceof Error) {
|
||||||
|
return error.message;
|
||||||
|
}
|
||||||
|
return String(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isPreflightRequest(task: string): boolean {
|
||||||
|
return task.trim().toLowerCase() === 'preflight';
|
||||||
|
}
|
||||||
|
|
||||||
|
export function shouldRunCouncilPreflight(task: string): boolean {
|
||||||
|
return isPreflightRequest(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function buildCouncilPreflightReport(options: CouncilPreflightOptions): Promise<string> {
|
||||||
|
const { config, registry, delegateRunner } = options;
|
||||||
|
const councils = config.councils;
|
||||||
|
const credentialState = options.credentialState ?? getCredentialStateFromSystem();
|
||||||
|
|
||||||
|
const roles = [
|
||||||
|
{ role: 'D.arbiter', agent: councils.groups.D.arbiter_agent, tierOverride: councils.groups.D.model_tier },
|
||||||
|
{ role: 'D.freethinker', agent: councils.groups.D.freethinker_agent, tierOverride: councils.groups.D.model_tier },
|
||||||
|
...(councils.groups.D.grounder_agent
|
||||||
|
? [{ role: 'D.grounder', agent: councils.groups.D.grounder_agent, tierOverride: councils.groups.D.model_tier }]
|
||||||
|
: []),
|
||||||
|
...(councils.groups.D.writer_agent
|
||||||
|
? [{ role: 'D.writer', agent: councils.groups.D.writer_agent, tierOverride: councils.groups.D.model_tier }]
|
||||||
|
: []),
|
||||||
|
{ role: 'P.arbiter', agent: councils.groups.P.arbiter_agent, tierOverride: councils.groups.P.model_tier },
|
||||||
|
{ role: 'P.freethinker', agent: councils.groups.P.freethinker_agent, tierOverride: councils.groups.P.model_tier },
|
||||||
|
...(councils.groups.P.grounder_agent
|
||||||
|
? [{ role: 'P.grounder', agent: councils.groups.P.grounder_agent, tierOverride: councils.groups.P.model_tier }]
|
||||||
|
: []),
|
||||||
|
...(councils.groups.P.writer_agent
|
||||||
|
? [{ role: 'P.writer', agent: councils.groups.P.writer_agent, tierOverride: councils.groups.P.model_tier }]
|
||||||
|
: []),
|
||||||
|
{ role: 'Meta.arbiter', agent: councils.meta_arbiter_agent, tierOverride: councils.meta_model_tier },
|
||||||
|
...(councils.meta_writer_agent
|
||||||
|
? [{ role: 'Meta.writer', agent: councils.meta_writer_agent, tierOverride: councils.meta_model_tier }]
|
||||||
|
: []),
|
||||||
|
];
|
||||||
|
|
||||||
|
const roleLines: string[] = [];
|
||||||
|
const tiersToProbe = new Set<ModelTier>();
|
||||||
|
|
||||||
|
for (const role of roles) {
|
||||||
|
const agentConfig = registry.get(role.agent);
|
||||||
|
const agentTier = agentConfig?.modelTier ?? 'default';
|
||||||
|
const effectiveTier = role.tierOverride ?? agentTier;
|
||||||
|
const { modelConfig, fellBackToDefault } = resolveTierConfig(config, effectiveTier);
|
||||||
|
const modelLabel = `${modelConfig.provider}/${modelConfig.model}`;
|
||||||
|
const missingSuffix = agentConfig ? '' : ' [agent_missing]';
|
||||||
|
const fallbackSuffix = fellBackToDefault ? ' [tier_unconfigured->default]' : '';
|
||||||
|
roleLines.push(
|
||||||
|
`- ${role.role}: agent=${role.agent}${missingSuffix} ` +
|
||||||
|
`agent_tier=${agentTier} override_tier=${role.tierOverride ?? 'none'} ` +
|
||||||
|
`effective_tier=${effectiveTier} model=${modelLabel}${fallbackSuffix}`,
|
||||||
|
);
|
||||||
|
tiersToProbe.add(effectiveTier);
|
||||||
|
}
|
||||||
|
|
||||||
|
const tierLines: string[] = [];
|
||||||
|
for (const tier of [...tiersToProbe].sort()) {
|
||||||
|
const { modelConfig, fellBackToDefault } = resolveTierConfig(config, tier);
|
||||||
|
const auth = resolveAuth(modelConfig, credentialState);
|
||||||
|
const endpoint = resolveEndpoint(modelConfig);
|
||||||
|
tierLines.push(
|
||||||
|
`- ${tier}: provider=${modelConfig.provider} model=${modelConfig.model} ` +
|
||||||
|
`endpoint=${endpoint} auth_mode=${auth.mode} auth_source=${auth.source}` +
|
||||||
|
`${auth.note ? ` note=${auth.note}` : ''}` +
|
||||||
|
`${fellBackToDefault ? ' fallback=default' : ''}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const probeLines: string[] = [];
|
||||||
|
if (options.includeLiveProbe ?? true) {
|
||||||
|
const probeResults = await Promise.all(
|
||||||
|
[...tiersToProbe].sort().map(async (tier) => {
|
||||||
|
const startedAt = Date.now();
|
||||||
|
try {
|
||||||
|
await delegateRunner.delegate({
|
||||||
|
tier,
|
||||||
|
systemPrompt: 'Return exactly {"ok":true}.',
|
||||||
|
message: 'Return exactly {"ok":true}.',
|
||||||
|
maxTokens: 64,
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
tier,
|
||||||
|
ok: true,
|
||||||
|
latencyMs: Math.max(0, Date.now() - startedAt),
|
||||||
|
error: '',
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
tier,
|
||||||
|
ok: false,
|
||||||
|
latencyMs: Math.max(0, Date.now() - startedAt),
|
||||||
|
error: normalizeProbeError(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const result of probeResults) {
|
||||||
|
if (result.ok) {
|
||||||
|
probeLines.push(`- ${result.tier}: ok (${result.latencyMs}ms)`);
|
||||||
|
} else {
|
||||||
|
probeLines.push(`- ${result.tier}: failed (${result.latencyMs}ms) ${result.error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
probeLines.push('- skipped');
|
||||||
|
}
|
||||||
|
|
||||||
|
const activeTier = options.activeTier ?? 'default';
|
||||||
|
const activeModel = resolveTierConfig(config, activeTier).modelConfig;
|
||||||
|
|
||||||
|
return [
|
||||||
|
'[Council preflight]',
|
||||||
|
`Councils enabled: ${councils.enabled ? 'yes' : 'no'}`,
|
||||||
|
`Active interactive tier: ${activeTier} (${activeModel.provider}/${activeModel.model})`,
|
||||||
|
`Scaffold path: ${councils.scaffold_path?.trim() ? councils.scaffold_path : '(none)'}`,
|
||||||
|
'',
|
||||||
|
'Role routing:',
|
||||||
|
...roleLines,
|
||||||
|
'',
|
||||||
|
'Tier provider/auth:',
|
||||||
|
...tierLines,
|
||||||
|
'',
|
||||||
|
'Live tier probes:',
|
||||||
|
...probeLines,
|
||||||
|
'',
|
||||||
|
'Path comparison:',
|
||||||
|
'- Interactive TUI messages use the active tier above.',
|
||||||
|
'- Council calls use per-role effective tiers (group override, then agent tier).',
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
@@ -166,6 +166,7 @@ export const councilTraceEventSchema = z.object({
|
|||||||
inputTokens: z.number().int().min(0),
|
inputTokens: z.number().int().min(0),
|
||||||
outputTokens: z.number().int().min(0),
|
outputTokens: z.number().int().min(0),
|
||||||
}).strict().optional(),
|
}).strict().optional(),
|
||||||
|
latency_ms: z.number().int().min(0).optional(),
|
||||||
dropped_reason: droppedReasonSchema.optional(),
|
dropped_reason: droppedReasonSchema.optional(),
|
||||||
validation_failure: validationFailureReasonSchema.optional(),
|
validation_failure: validationFailureReasonSchema.optional(),
|
||||||
}).strict();
|
}).strict();
|
||||||
|
|||||||
+14
-4
@@ -29,6 +29,7 @@ import { auditLogger } from '../audit/index.js';
|
|||||||
import { getElevationStatusMessage, setElevationFromInput } from '../security/elevation.js';
|
import { getElevationStatusMessage, setElevationFromInput } from '../security/elevation.js';
|
||||||
import { dirname, resolve } from 'path';
|
import { dirname, resolve } from 'path';
|
||||||
import { loadCouncilScaffoldSafe } from '../councils/scaffold.js';
|
import { loadCouncilScaffoldSafe } from '../councils/scaffold.js';
|
||||||
|
import { buildCouncilPreflightReport, shouldRunCouncilPreflight } from '../councils/preflight.js';
|
||||||
|
|
||||||
function buildProviderConfigMap(config: Config): Partial<Record<ModelProvider, ModelConfig>> {
|
function buildProviderConfigMap(config: Config): Partial<Record<ModelProvider, ModelConfig>> {
|
||||||
const providerConfigs: Partial<Record<ModelProvider, ModelConfig>> = {};
|
const providerConfigs: Partial<Record<ModelProvider, ModelConfig>> = {};
|
||||||
@@ -866,14 +867,23 @@ export function createMessageRouter(deps: {
|
|||||||
runCouncil: async (task: string) => {
|
runCouncil: async (task: string) => {
|
||||||
const message = task.trim();
|
const message = task.trim();
|
||||||
if (!message) {
|
if (!message) {
|
||||||
return 'Usage: /council <question or task>';
|
return 'Usage: /council <question or task> | /council preflight';
|
||||||
|
}
|
||||||
|
if (!deps.config.councils?.enabled) {
|
||||||
|
return 'Councils are disabled. Set councils.enabled: true in config.';
|
||||||
}
|
}
|
||||||
if (!deps.config.councils?.enabled) {
|
|
||||||
return 'Councils are disabled. Set councils.enabled: true in config.';
|
|
||||||
}
|
|
||||||
if (!deps.agentConfigRegistry || deps.agentConfigRegistry.list().length === 0) {
|
if (!deps.agentConfigRegistry || deps.agentConfigRegistry.list().length === 0) {
|
||||||
return 'No agent configurations are registered. Add council_* agent_configs first.';
|
return 'No agent configurations are registered. Add council_* agent_configs first.';
|
||||||
}
|
}
|
||||||
|
if (shouldRunCouncilPreflight(message)) {
|
||||||
|
return buildCouncilPreflightReport({
|
||||||
|
config: deps.config,
|
||||||
|
registry: deps.agentConfigRegistry,
|
||||||
|
delegateRunner: agent,
|
||||||
|
activeTier: agent.getModelTier(),
|
||||||
|
includeLiveProbe: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
const tool = createCouncilRunTool({
|
const tool = createCouncilRunTool({
|
||||||
registry: deps.agentConfigRegistry,
|
registry: deps.agentConfigRegistry,
|
||||||
orchestrator: agent,
|
orchestrator: agent,
|
||||||
|
|||||||
@@ -33,9 +33,16 @@ describe('parseCommand', () => {
|
|||||||
|
|
||||||
it('parses /council command', () => {
|
it('parses /council command', () => {
|
||||||
expect(parseCommand('/council design backup plan')).toEqual({ type: 'council', task: 'design backup plan' });
|
expect(parseCommand('/council design backup plan')).toEqual({ type: 'council', task: 'design backup plan' });
|
||||||
|
expect(parseCommand('/council preflight')).toEqual({ type: 'council', task: 'preflight' });
|
||||||
expect(parseCommand('/council')).toEqual({ type: 'council', task: '' });
|
expect(parseCommand('/council')).toEqual({ type: 'council', task: '' });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('parses natural-language council shortcut', () => {
|
||||||
|
expect(parseCommand('run council design backup plan')).toEqual({ type: 'council', task: 'design backup plan' });
|
||||||
|
expect(parseCommand('yes run the council')).toEqual({ type: 'council', task: '' });
|
||||||
|
expect(parseCommand('Run council on #2')).toEqual({ type: 'council', task: '#2' });
|
||||||
|
});
|
||||||
|
|
||||||
it('parses /fullscreen command', () => {
|
it('parses /fullscreen command', () => {
|
||||||
expect(parseCommand('/fullscreen')).toEqual({ type: 'fullscreen' });
|
expect(parseCommand('/fullscreen')).toEqual({ type: 'fullscreen' });
|
||||||
expect(parseCommand('/fs')).toEqual({ type: 'fullscreen' });
|
expect(parseCommand('/fs')).toEqual({ type: 'fullscreen' });
|
||||||
@@ -197,6 +204,11 @@ describe('getCommandCompletions', () => {
|
|||||||
const completions = getCommandCompletions('/model fast x');
|
const completions = getCommandCompletions('/model fast x');
|
||||||
expect(completions).toEqual(['/model fast xai']);
|
expect(completions).toEqual(['/model fast xai']);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('completes /council preflight shortcut', () => {
|
||||||
|
const completions = getCommandCompletions('/council pre');
|
||||||
|
expect(completions).toEqual(['/council preflight']);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('isToolInventoryQuery', () => {
|
describe('isToolInventoryQuery', () => {
|
||||||
|
|||||||
@@ -88,6 +88,11 @@ export function parseCommand(input: string): Command | null {
|
|||||||
if (trimmed === '/council') {
|
if (trimmed === '/council') {
|
||||||
return { type: 'council', task: '' };
|
return { type: 'council', task: '' };
|
||||||
}
|
}
|
||||||
|
// Natural-language council shortcut for common flows.
|
||||||
|
const councilShortcut = trimmed.match(/^(?:yes\s+)?run\s+(?:the\s+)?council(?:\s+on)?(?:\s+(.+))?$/i);
|
||||||
|
if (councilShortcut) {
|
||||||
|
return { type: 'council', task: (councilShortcut[1] ?? '').trim() };
|
||||||
|
}
|
||||||
|
|
||||||
// Fullscreen
|
// Fullscreen
|
||||||
if (trimmed === '/fullscreen' || trimmed === '/fs') {
|
if (trimmed === '/fullscreen' || trimmed === '/fs') {
|
||||||
@@ -211,6 +216,7 @@ Commands:
|
|||||||
/backend [provider] Show or switch local backend (ollama, llamacpp)
|
/backend [provider] Show or switch local backend (ollama, llamacpp)
|
||||||
/research <task> Delegate a task to the configured research agent
|
/research <task> Delegate a task to the configured research agent
|
||||||
/council <task> Run the councils pipeline for a task
|
/council <task> Run the councils pipeline for a task
|
||||||
|
/council preflight Check council tier routing, endpoint/auth mode, and probe latency
|
||||||
/login [provider] Authenticate with GitHub, OpenAI, Anthropic, or Z.AI
|
/login [provider] Authenticate with GitHub, OpenAI, Anthropic, or Z.AI
|
||||||
/pair List pending pairing codes and approved senders
|
/pair List pending pairing codes and approved senders
|
||||||
/pair generate [label] Generate a new DM pairing code
|
/pair generate [label] Generate a new DM pairing code
|
||||||
@@ -275,7 +281,7 @@ export const COMMAND_TOOLTIPS: Record<string, string> = {
|
|||||||
'/model': 'Show or switch model (local, default, fast, complex)',
|
'/model': 'Show or switch model (local, default, fast, complex)',
|
||||||
'/backend': 'Show or switch local backend (ollama, llamacpp)',
|
'/backend': 'Show or switch local backend (ollama, llamacpp)',
|
||||||
'/research': 'Delegate a task to the configured research agent',
|
'/research': 'Delegate a task to the configured research agent',
|
||||||
'/council': 'Run the councils pipeline for a task',
|
'/council': 'Run the councils pipeline for a task; use "/council preflight" for route/auth checks',
|
||||||
'/reset': 'Clear conversation history',
|
'/reset': 'Clear conversation history',
|
||||||
'/clear': 'Clear conversation history',
|
'/clear': 'Clear conversation history',
|
||||||
'/new': 'Start a new conversation',
|
'/new': 'Start a new conversation',
|
||||||
@@ -319,6 +325,14 @@ export const MODEL_TOOLTIPS: Record<string, string> = {
|
|||||||
export function getCommandCompletions(partial: string): string[] {
|
export function getCommandCompletions(partial: string): string[] {
|
||||||
const trimmed = partial.trim();
|
const trimmed = partial.trim();
|
||||||
|
|
||||||
|
if (trimmed.startsWith('/council ')) {
|
||||||
|
const suffix = trimmed.slice('/council '.length).toLowerCase();
|
||||||
|
if ('preflight'.startsWith(suffix)) {
|
||||||
|
return ['/council preflight'];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
// Complete /model <tier> <provider/model>
|
// Complete /model <tier> <provider/model>
|
||||||
if (trimmed.startsWith('/model ')) {
|
if (trimmed.startsWith('/model ')) {
|
||||||
const args = trimmed.slice('/model '.length).trim();
|
const args = trimmed.slice('/model '.length).trim();
|
||||||
|
|||||||
@@ -633,7 +633,7 @@ export function App({
|
|||||||
|
|
||||||
case 'council': {
|
case 'council': {
|
||||||
if (!command.task.trim()) {
|
if (!command.task.trim()) {
|
||||||
pushAssistantMessage('Usage: /council <question or task>');
|
pushAssistantMessage('Usage: /council <question or task> | /council preflight');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!onCouncil) {
|
if (!onCouncil) {
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
import { describe, it, expect, vi } from 'vitest';
|
import { describe, it, expect, vi } from 'vitest';
|
||||||
import { createCouncilRunTool } from './council-run.js';
|
import { createCouncilRunTool } from './council-run.js';
|
||||||
import type { AgentConfigRegistry } from '../../agents/registry.js';
|
import type { AgentConfigRegistry } from '../../agents/registry.js';
|
||||||
|
import { mkdtempSync, readFileSync, rmSync } from 'node:fs';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
|
||||||
function createRegistry(): AgentConfigRegistry {
|
function createRegistry(): AgentConfigRegistry {
|
||||||
const configs = new Map<string, { name: string; modelTier?: 'fast' | 'default' | 'complex'; systemPrompt?: string }>([
|
const configs = new Map<string, { name: string; modelTier?: 'fast' | 'default' | 'complex'; systemPrompt?: string }>([
|
||||||
@@ -53,6 +56,10 @@ const config = {
|
|||||||
|
|
||||||
describe('council.run tool', () => {
|
describe('council.run tool', () => {
|
||||||
it('runs council pipeline and returns output summary', async () => {
|
it('runs council pipeline and returns output summary', async () => {
|
||||||
|
const previousDataDir = process.env.FLYNN_DATA_DIR;
|
||||||
|
const testDataDir = mkdtempSync(join(tmpdir(), 'flynn-council-run-'));
|
||||||
|
process.env.FLYNN_DATA_DIR = testDataDir;
|
||||||
|
|
||||||
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||||
const payload = JSON.parse(message);
|
const payload = JSON.parse(message);
|
||||||
if (payload.brief_D && payload.brief_P) {
|
if (payload.brief_D && payload.brief_P) {
|
||||||
@@ -117,9 +124,31 @@ describe('council.run tool', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const result = await tool.execute({ task: 'plan migration' });
|
const result = await tool.execute({ task: 'plan migration' });
|
||||||
expect(result.success).toBe(true);
|
try {
|
||||||
expect(result.output).toContain('Council pipeline v1.0.0');
|
expect(result.success).toBe(true);
|
||||||
expect(result.output).toContain('Meta selection');
|
expect(result.output).toContain('Council pipeline v1.0.0');
|
||||||
|
expect(result.output).toContain('Meta selection');
|
||||||
|
expect(result.output).toContain('Timing:');
|
||||||
|
expect(result.output).toContain('Slowest calls:');
|
||||||
|
expect(result.output).toContain('Artifacts:');
|
||||||
|
|
||||||
|
const markdownLine = result.output.split('\n').find((line) => line.startsWith('- Summary report: '));
|
||||||
|
const jsonLine = result.output.split('\n').find((line) => line.startsWith('- Full JSON: '));
|
||||||
|
expect(markdownLine).toBeDefined();
|
||||||
|
expect(jsonLine).toBeDefined();
|
||||||
|
|
||||||
|
const markdownPath = markdownLine!.replace('- Summary report: ', '').trim();
|
||||||
|
const jsonPath = jsonLine!.replace('- Full JSON: ', '').trim();
|
||||||
|
expect(readFileSync(markdownPath, 'utf-8')).toContain('Council pipeline v1.0.0');
|
||||||
|
expect(readFileSync(jsonPath, 'utf-8')).toContain('"pipeline_version": "1.0.0"');
|
||||||
|
} finally {
|
||||||
|
if (previousDataDir !== undefined) {
|
||||||
|
process.env.FLYNN_DATA_DIR = previousDataDir;
|
||||||
|
} else {
|
||||||
|
delete process.env.FLYNN_DATA_DIR;
|
||||||
|
}
|
||||||
|
rmSync(testDataDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns error on invalid input', async () => {
|
it('returns error on invalid input', async () => {
|
||||||
@@ -132,4 +161,56 @@ describe('council.run tool', () => {
|
|||||||
expect(result.success).toBe(false);
|
expect(result.success).toBe(false);
|
||||||
expect(result.error).toBeDefined();
|
expect(result.error).toBeDefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('classifies network/latency failures with hint and council config summary', async () => {
|
||||||
|
const tool = createCouncilRunTool({
|
||||||
|
registry: createRegistry(),
|
||||||
|
orchestrator: {
|
||||||
|
delegate: vi.fn(async () => {
|
||||||
|
throw new Error('Connection error.');
|
||||||
|
}) as any,
|
||||||
|
},
|
||||||
|
config: config as any,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await tool.execute({ task: 'x' });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
expect(result.error).toContain('Likely root cause: network_or_latency');
|
||||||
|
expect(result.error).toContain('Hint: Likely network/provider latency issue.');
|
||||||
|
expect(result.error).toContain('Council config: D=');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('classifies cap_exceeded failures with cap_overflow hint', async () => {
|
||||||
|
const tool = createCouncilRunTool({
|
||||||
|
registry: createRegistry(),
|
||||||
|
orchestrator: {
|
||||||
|
delegate: vi.fn(async () => {
|
||||||
|
throw new Error('cap_exceeded');
|
||||||
|
}) as any,
|
||||||
|
},
|
||||||
|
config: config as any,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await tool.execute({ task: 'x' });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
expect(result.error).toContain('Likely root cause: cap_overflow');
|
||||||
|
expect(result.error).toContain('Bridge payload cap exceeded.');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('classifies cancellation failures with user_cancelled hint', async () => {
|
||||||
|
const tool = createCouncilRunTool({
|
||||||
|
registry: createRegistry(),
|
||||||
|
orchestrator: {
|
||||||
|
delegate: vi.fn(async () => {
|
||||||
|
throw new Error('Operation cancelled by user.');
|
||||||
|
}) as any,
|
||||||
|
},
|
||||||
|
config: config as any,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await tool.execute({ task: 'x' });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
expect(result.error).toContain('Likely root cause: user_cancelled');
|
||||||
|
expect(result.error).toContain('Run was cancelled by user input');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,4 +1,8 @@
|
|||||||
import type { AgentConfigRegistry } from '../../agents/registry.js';
|
import type { AgentConfigRegistry } from '../../agents/registry.js';
|
||||||
|
import { mkdirSync, writeFileSync } from 'node:fs';
|
||||||
|
import { homedir } from 'node:os';
|
||||||
|
import { resolve } from 'node:path';
|
||||||
|
import type { ChatResponseFormat } from '../../models/types.js';
|
||||||
import type { Tool, ToolResult } from '../types.js';
|
import type { Tool, ToolResult } from '../types.js';
|
||||||
import { CouncilsOrchestrator, type CouncilsConfig } from '../../councils/orchestrator.js';
|
import { CouncilsOrchestrator, type CouncilsConfig } from '../../councils/orchestrator.js';
|
||||||
import type { CouncilScaffold } from '../../councils/scaffold.js';
|
import type { CouncilScaffold } from '../../councils/scaffold.js';
|
||||||
@@ -10,6 +14,7 @@ interface DelegateRunner {
|
|||||||
systemPrompt: string;
|
systemPrompt: string;
|
||||||
message: string;
|
message: string;
|
||||||
maxTokens?: number;
|
maxTokens?: number;
|
||||||
|
responseFormat?: ChatResponseFormat;
|
||||||
}): Promise<{
|
}): Promise<{
|
||||||
content: string;
|
content: string;
|
||||||
usage: { inputTokens: number; outputTokens: number };
|
usage: { inputTokens: number; outputTokens: number };
|
||||||
@@ -24,6 +29,124 @@ export interface CouncilRunDeps {
|
|||||||
scaffold?: CouncilScaffold;
|
scaffold?: CouncilScaffold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function slugifyTask(task: string): string {
|
||||||
|
const trimmed = task.trim().toLowerCase();
|
||||||
|
if (!trimmed) {
|
||||||
|
return 'council-run';
|
||||||
|
}
|
||||||
|
const slug = trimmed
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-+|-+$/g, '')
|
||||||
|
.slice(0, 80);
|
||||||
|
return slug || 'council-run';
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatTimestamp(date: Date): string {
|
||||||
|
return date.toISOString().replace(/[:.]/g, '-');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getCouncilsDir(): string {
|
||||||
|
const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn');
|
||||||
|
return resolve(dataDir, 'councils');
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeCouncilArtifacts(
|
||||||
|
task: string,
|
||||||
|
summaryLines: string[],
|
||||||
|
conversationLog: string,
|
||||||
|
resultJson: string,
|
||||||
|
): { jsonPath: string; markdownPath: string } {
|
||||||
|
const dir = getCouncilsDir();
|
||||||
|
mkdirSync(dir, { recursive: true });
|
||||||
|
|
||||||
|
const stamp = formatTimestamp(new Date());
|
||||||
|
const base = `${stamp}-${slugifyTask(task)}`;
|
||||||
|
const jsonPath = resolve(dir, `${base}.json`);
|
||||||
|
const markdownPath = resolve(dir, `${base}.md`);
|
||||||
|
|
||||||
|
const markdown = [
|
||||||
|
...summaryLines,
|
||||||
|
'',
|
||||||
|
'Conversations:',
|
||||||
|
'',
|
||||||
|
conversationLog || '(none)',
|
||||||
|
'',
|
||||||
|
'Raw Result JSON:',
|
||||||
|
'```json',
|
||||||
|
resultJson,
|
||||||
|
'```',
|
||||||
|
'',
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
writeFileSync(jsonPath, `${resultJson}\n`, 'utf-8');
|
||||||
|
writeFileSync(markdownPath, markdown, 'utf-8');
|
||||||
|
return { jsonPath, markdownPath };
|
||||||
|
}
|
||||||
|
|
||||||
|
function classifyCouncilFailure(message: string): 'network_or_latency' | 'json_format' | 'config' | 'cap_overflow' | 'user_cancelled' | 'unknown' {
|
||||||
|
const lower = message.toLowerCase();
|
||||||
|
if (
|
||||||
|
lower.includes('operation cancelled by user')
|
||||||
|
|| lower.includes('aborterror')
|
||||||
|
|| lower.includes('aborted')
|
||||||
|
|| lower.includes('cancelled')
|
||||||
|
) {
|
||||||
|
return 'user_cancelled';
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
lower.includes('connection error')
|
||||||
|
|| lower.includes('timed out')
|
||||||
|
|| lower.includes('econn')
|
||||||
|
|| lower.includes('enotfound')
|
||||||
|
|| lower.includes('all model providers failed')
|
||||||
|
) {
|
||||||
|
return 'network_or_latency';
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
lower.includes('repair_failed')
|
||||||
|
|| lower.includes('parse_failed')
|
||||||
|
|| lower.includes('json')
|
||||||
|
) {
|
||||||
|
return 'json_format';
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
lower.includes('not configured')
|
||||||
|
|| lower.includes('disabled')
|
||||||
|
|| lower.includes('meta_validation_failed')
|
||||||
|
|| lower.includes('grounding_failed')
|
||||||
|
|| lower.includes('bridge_validation_failed')
|
||||||
|
) {
|
||||||
|
return 'config';
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
lower.includes('cap_exceeded')
|
||||||
|
|| lower.includes('cap_top_ideas')
|
||||||
|
|| lower.includes('cap_field_bullets')
|
||||||
|
|| lower.includes('cap_entry_chars')
|
||||||
|
|| lower.includes('cap_total_chars')
|
||||||
|
) {
|
||||||
|
return 'cap_overflow';
|
||||||
|
}
|
||||||
|
return 'unknown';
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildFailureHint(kind: ReturnType<typeof classifyCouncilFailure>): string {
|
||||||
|
switch (kind) {
|
||||||
|
case 'network_or_latency':
|
||||||
|
return 'Likely network/provider latency issue. Check endpoint reachability and consider faster council tiers.';
|
||||||
|
case 'json_format':
|
||||||
|
return 'Likely model output-format issue. Council JSON repair/retry was unable to normalize output.';
|
||||||
|
case 'config':
|
||||||
|
return 'Likely councils/agent configuration issue. Verify council agent names, tiers, and strict validation settings.';
|
||||||
|
case 'cap_overflow':
|
||||||
|
return 'Bridge payload cap exceeded. Reduce task breadth, lower max rounds, or raise councils.defaults bridge cap settings.';
|
||||||
|
case 'user_cancelled':
|
||||||
|
return 'Run was cancelled by user input (Esc/Ctrl+C). Re-run the council task when ready.';
|
||||||
|
default:
|
||||||
|
return 'No deterministic diagnosis from error text.';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
||||||
return {
|
return {
|
||||||
name: 'council.run',
|
name: 'council.run',
|
||||||
@@ -53,6 +176,20 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
|||||||
scaffold: deps.scaffold,
|
scaffold: deps.scaffold,
|
||||||
});
|
});
|
||||||
const result = await runner.run(args);
|
const result = await runner.run(args);
|
||||||
|
const timedTrace = result.trace.filter((event) => typeof event.latency_ms === 'number');
|
||||||
|
const totalLatencyMs = timedTrace.reduce((sum, event) => sum + (event.latency_ms ?? 0), 0);
|
||||||
|
const phaseLatency = new Map<number, number>();
|
||||||
|
for (const event of timedTrace) {
|
||||||
|
const phase = event.phase_index;
|
||||||
|
phaseLatency.set(phase, (phaseLatency.get(phase) ?? 0) + (event.latency_ms ?? 0));
|
||||||
|
}
|
||||||
|
const phaseLatencyLines = [...phaseLatency.entries()]
|
||||||
|
.sort((a, b) => a[0] - b[0])
|
||||||
|
.map(([phase, latency]) => `- Phase ${phase}: ${latency}ms`);
|
||||||
|
const slowestCalls = [...timedTrace]
|
||||||
|
.sort((a, b) => (b.latency_ms ?? 0) - (a.latency_ms ?? 0))
|
||||||
|
.slice(0, 5)
|
||||||
|
.map((event) => `- ${event.call_id}: ${event.latency_ms ?? 0}ms`);
|
||||||
|
|
||||||
const lines = [
|
const lines = [
|
||||||
`[Council pipeline v${result.pipeline_version}]`,
|
`[Council pipeline v${result.pipeline_version}]`,
|
||||||
@@ -68,6 +205,16 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
|||||||
`- Open questions: ${result.meta.open_questions.length}`,
|
`- Open questions: ${result.meta.open_questions.length}`,
|
||||||
`- Next experiments: ${result.meta.next_experiments.length}`,
|
`- Next experiments: ${result.meta.next_experiments.length}`,
|
||||||
'',
|
'',
|
||||||
|
'Timing:',
|
||||||
|
`- Timed calls: ${timedTrace.length}`,
|
||||||
|
`- Total model latency (summed): ${totalLatencyMs}ms`,
|
||||||
|
...phaseLatencyLines,
|
||||||
|
...(
|
||||||
|
slowestCalls.length > 0
|
||||||
|
? ['', 'Slowest calls:', ...slowestCalls]
|
||||||
|
: []
|
||||||
|
),
|
||||||
|
'',
|
||||||
`Agent conversations: ${result.conversations.length}`,
|
`Agent conversations: ${result.conversations.length}`,
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -82,15 +229,34 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
|||||||
})
|
})
|
||||||
.join('\n\n');
|
.join('\n\n');
|
||||||
|
|
||||||
|
const resultJson = JSON.stringify(result, null, 2);
|
||||||
|
const artifacts = writeCouncilArtifacts(args.task, lines, conversationLog, resultJson);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
output: `${lines.join('\n')}\n\n${conversationLog}\n\n${JSON.stringify(result)}`,
|
output: [
|
||||||
|
...lines,
|
||||||
|
'',
|
||||||
|
'Artifacts:',
|
||||||
|
`- Summary report: ${artifacts.markdownPath}`,
|
||||||
|
`- Full JSON: ${artifacts.jsonPath}`,
|
||||||
|
].join('\n'),
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
const kind = classifyCouncilFailure(message);
|
||||||
|
const hint = buildFailureHint(kind);
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
output: '',
|
output: '',
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: [
|
||||||
|
message,
|
||||||
|
`Likely root cause: ${kind}`,
|
||||||
|
`Hint: ${hint}`,
|
||||||
|
`Council config: D=${deps.config.groups.D.arbiter_agent}/${deps.config.groups.D.freethinker_agent}@${deps.config.groups.D.model_tier ?? 'agent-tier'}, ` +
|
||||||
|
`P=${deps.config.groups.P.arbiter_agent}/${deps.config.groups.P.freethinker_agent}@${deps.config.groups.P.model_tier ?? 'agent-tier'}, ` +
|
||||||
|
`meta=${deps.config.meta_arbiter_agent}@${deps.config.meta_model_tier ?? 'agent-tier'}`,
|
||||||
|
].join('\n'),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user