feat(councils): add preflight, schema-driven outputs, and artifact reporting

This commit is contained in:
William Valentin
2026-02-22 15:56:30 -08:00
parent dafe9b4d3d
commit 44c7409a20
18 changed files with 1686 additions and 29 deletions
+4 -1
View File
@@ -1,5 +1,5 @@
import type { ModelRouter, ModelTier } from '../../models/router.js';
import type { ChatRequest, Message, ModelClient, TokenUsage } from '../../models/types.js';
import type { ChatRequest, ChatResponseFormat, Message, ModelClient, TokenUsage } from '../../models/types.js';
import type { Session } from '../../session/index.js';
import type { ToolRegistry } from '../../tools/registry.js';
import type { ToolExecutor } from '../../tools/executor.js';
@@ -28,6 +28,8 @@ export interface SubAgentRequest {
maxTokens?: number;
/** When true, include tools from the toolRegistry in the request. */
tools?: boolean;
/** Optional provider-level response format request (e.g., JSON schema). */
responseFormat?: ChatResponseFormat;
}
/** Result returned from a sub-agent delegation call. */
@@ -279,6 +281,7 @@ export class AgentOrchestrator {
messages,
system: request.systemPrompt,
maxTokens: request.maxTokens,
responseFormat: request.responseFormat,
};
// Optionally include tools from the registry (filtered by policy)
+16 -1
View File
@@ -1,5 +1,6 @@
import type { Command } from 'commander';
import type { Config, CouncilsConfig, ModelConfig, ModelProvider } from '../config/index.js';
import type { ChatResponseFormat } from '../models/types.js';
import { loadConfigSafe, getConfigPath } from './shared.js';
import { existsSync, mkdirSync, readFileSync } from 'fs';
import { resolve } from 'path';
@@ -118,6 +119,7 @@ export function registerTuiCommand(program: Command): void {
const { createModelRouter } = await import('../daemon/index.js');
const { AgentConfigRegistry } = await import('../agents/index.js');
const { loadCouncilScaffoldSafe } = await import('../councils/scaffold.js');
const { buildCouncilPreflightReport, shouldRunCouncilPreflight } = await import('../councils/preflight.js');
const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn');
mkdirSync(dataDir, { recursive: true });
@@ -190,11 +192,13 @@ export function registerTuiCommand(program: Command): void {
systemPrompt: string;
message: string;
maxTokens?: number;
responseFormat?: ChatResponseFormat;
}) {
const response = await modelRouter.chat({
messages: [{ role: 'user', content: request.message }],
system: request.systemPrompt,
maxTokens: request.maxTokens,
responseFormat: request.responseFormat,
}, request.tier);
return {
content: response.content,
@@ -325,6 +329,7 @@ export function registerTuiCommand(program: Command): void {
if (!message) {
return 'Usage: /research <question or task>';
}
modelRouter.clearAbort();
const agentConfig = agentConfigRegistry.get('research');
if (!agentConfig) {
return 'Agent "research" not found. Configure agent_configs.research first.';
@@ -344,11 +349,21 @@ export function registerTuiCommand(program: Command): void {
const runCouncilTask = async (task: string): Promise<string> => {
const message = task.trim();
if (!message) {
return 'Usage: /council <question or task>';
return 'Usage: /council <question or task> | /council preflight';
}
modelRouter.clearAbort();
if (!config.councils?.enabled) {
return 'Councils are disabled. Set councils.enabled: true in config.';
}
if (shouldRunCouncilPreflight(message)) {
return buildCouncilPreflightReport({
config,
registry: agentConfigRegistry,
delegateRunner,
activeTier: modelRouter.getTier(),
includeLiveProbe: true,
});
}
const tool = toolRegistry.get('council.run');
if (!tool) {
return 'Council tool is not registered. Verify councils config and restart Flynn.';
+1 -1
View File
@@ -90,7 +90,7 @@ describe('builtin /council command', () => {
rawInput: '/council',
services: {},
});
expect(result).toEqual({ handled: true, text: 'Usage: /council <question or task>' });
expect(result).toEqual({ handled: true, text: 'Usage: /council <question or task> | /council preflight' });
});
});
+1 -1
View File
@@ -260,7 +260,7 @@ export function createCouncilCommand(): CommandDefinition {
if (!task) {
return {
handled: true,
text: 'Usage: /council <question or task>',
text: 'Usage: /council <question or task> | /council preflight',
};
}
if (!ctx.services?.runCouncil) {
+358
View File
@@ -152,6 +152,7 @@ describe('CouncilsOrchestrator', () => {
expect(result.pipeline_version).toBe('1.0.0');
expect(result.brief_D_v1.ideas[0].idea_id).toBe('D.r1.01');
expect(result.brief_P_v1.ideas[0].idea_id).toBe('P.r1.01');
expect(typeof result.trace[0]?.latency_ms).toBe('number');
expect(result.stop_snapshot.stop_reason).toBe('convergence');
const callIds = result.trace.map((e) => e.call_id);
expect(callIds).toEqual([...callIds].sort((a, b) => {
@@ -244,6 +245,102 @@ describe('CouncilsOrchestrator', () => {
expect(JSON.stringify(pRound2Payload.peer_bridge)).toContain('D-MARKER');
});
it('requests json_schema response format for council structured phases', async () => {
const delegate = vi.fn(async ({ message }: { message: string }) => {
const payload = JSON.parse(message);
if (payload.brief_D && payload.brief_P) {
return {
content: JSON.stringify({
schema_version: '1.0.0',
selected_primary: [payload.brief_D.shortlist[0]],
selected_secondary: [payload.brief_P.shortlist[0]],
merges: [],
rejections: [],
open_questions: ['q1'],
next_experiments: ['e1'],
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.shortlisted_ideas) {
return {
content: JSON.stringify({
grounded: payload.shortlisted_ideas.map((idea: { idea_id: string }) => ({
idea_id: idea.idea_id,
mve: `mve-${idea.idea_id}`,
constraints: ['c1'],
falsifiability_checks: ['f1'],
})),
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.ideas) {
return {
content: JSON.stringify({
assessments: payload.ideas.map((idea: { idea_id: string }, idx: number) => ({
idea_id: idea.idea_id,
scores: { novelty: 60, feasibility: 70, impact: 80, testability: 90 },
decision: idx === 0 ? 'shortlist' : 'hold',
notes: 'ok',
})),
assumptions: ['a1'],
risks: ['r1'],
asks: ['q1'],
what_to_steal: ['w1'],
convergence_signal: false,
novelty_score: 60,
repetition_rate: 10,
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
return {
content: JSON.stringify({
ideas: [
{ title: 't1', hypothesis: 'h1', mechanism: 'm1', expected_outcome: 'o1' },
{ title: 't2', hypothesis: 'h2', mechanism: 'm2', expected_outcome: 'o2' },
],
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
});
const orchestrator = new CouncilsOrchestrator({
registry: createRegistry(),
orchestrator: { delegate },
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
});
await orchestrator.run({ task: 'json schema check' });
const schemaNames = delegate.mock.calls
.map((call) => (call[0] as { responseFormat?: { type: string; name?: string } }).responseFormat)
.filter((rf): rf is { type: string; name?: string } => Boolean(rf))
.filter((rf) => rf.type === 'json_schema')
.map((rf) => rf.name);
expect(schemaNames).toContain('council_ideation');
expect(schemaNames).toContain('council_assessment');
expect(schemaNames).toContain('council_grounding');
expect(schemaNames).toContain('council_meta');
const metaResponseFormat = delegate.mock.calls
.map((call) => (call[0] as { responseFormat?: Record<string, unknown> }).responseFormat)
.find((rf) => rf && rf.type === 'json_schema' && rf.name === 'council_meta') as
| { schema?: { required?: string[] } }
| undefined;
expect(metaResponseFormat?.schema?.required).toContain('merges');
});
it('fails closed on bridge cap overflow before phase 2 executes', async () => {
const delegate = vi.fn(async ({ message }: { message: string }) => {
const payload = JSON.parse(message);
@@ -314,4 +411,265 @@ describe('CouncilsOrchestrator', () => {
expect(result.stop_snapshot.stop_reason).toBe('bridge_validation_failed');
expect(result.stop_snapshot.round_reached).toBe(1);
});
it('recovers from wrapped or mildly malformed model JSON responses', async () => {
const delegate = vi.fn(async ({ message }: { message: string }) => {
const payload = JSON.parse(message);
if (payload.brief_D && payload.brief_P) {
return {
content: [
'```json',
'{result: {schema_version: \'1.0.0\', selected_primary: [\'D.r1.01\'], selected_secondary: [\'P.r1.01\'], merges: [], rejections: [], open_questions: [\'q1\'], next_experiments: [\'e1\']}}',
'```',
].join('\n'),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.shortlisted_ideas) {
return {
content: '{"output":{"grounded":[{"idea_id":"D.r1.01","mve":"m","constraints":["c"],"falsifiability_checks":["f"]}]}}',
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.ideas) {
return {
content: [
'Assessment follows.',
'```json',
'{data: {assessments: [{idea_id: "D.r1.01", scores: {novelty: 60, feasibility: 70, impact: 80, testability: 90}, decision: "shortlist", notes: "ok"}], assumptions: ["a"], risks: ["r"], asks: ["k"], what_to_steal: ["w"], convergence_signal: false, novelty_score: 60, repetition_rate: 20}}',
'```',
].join('\n'),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.group && payload.round) {
return {
content: [
'Here is the draft:',
'```json',
'{output: {ideas: [{title: \'t1\', hypothesis: \'h1\', mechanism: \'m1\', expected_outcome: \'o1\'}]}}',
'```',
].join('\n'),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
throw new Error('Unexpected payload');
});
const orchestrator = new CouncilsOrchestrator({
registry: createRegistry(),
orchestrator: { delegate },
config: createConfig(),
});
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
expect(result.stop_snapshot.round_reached).toBe(1);
expect(result.meta.selected_primary[0]).toBe('D.r1.01');
expect(result.meta.selected_secondary[0]).toBe('P.r1.01');
});
it('retries once with agent-assisted JSON repair when initial output is invalid', async () => {
const repairPayloads: Array<Record<string, unknown>> = [];
const seenIdeation = new Set<string>();
const delegate = vi.fn(async ({ message }: { message: string }) => {
const payload = JSON.parse(message) as Record<string, any>;
if (payload.task === 'normalize_json_output') {
repairPayloads.push(payload);
const schema = String(payload.required_schema ?? '');
if (schema.includes('"ideas"')) {
return {
content: JSON.stringify({
ideas: [
{
title: 'repaired idea',
hypothesis: 'h',
mechanism: 'm',
expected_outcome: 'o',
},
],
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
throw new Error('Unexpected repair schema');
}
if (payload.brief_D && payload.brief_P) {
return {
content: JSON.stringify({
schema_version: '1.0.0',
selected_primary: [payload.brief_D.shortlist[0]],
selected_secondary: [payload.brief_P.shortlist[0]],
merges: [],
rejections: [],
open_questions: ['q1'],
next_experiments: ['e1'],
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.shortlisted_ideas) {
return {
content: JSON.stringify({
grounded: payload.shortlisted_ideas.map((idea: any) => ({
idea_id: idea.idea_id,
mve: 'm',
constraints: ['c'],
falsifiability_checks: ['f'],
})),
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.ideas) {
return {
content: JSON.stringify({
assessments: payload.ideas.map((idea: any, idx: number) => ({
idea_id: idea.idea_id,
scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 },
decision: idx === 0 ? 'shortlist' : 'hold',
notes: 'ok',
})),
assumptions: [],
risks: [],
asks: [],
what_to_steal: [],
convergence_signal: false,
novelty_score: 60,
repetition_rate: 20,
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.group && payload.round) {
const key = `${payload.group}:${payload.round}`;
if (!seenIdeation.has(key)) {
seenIdeation.add(key);
return {
content: 'this is not JSON',
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
return {
content: JSON.stringify({
ideas: [
{
title: `${payload.group} idea`,
hypothesis: 'h',
mechanism: 'm',
expected_outcome: 'o',
},
],
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
throw new Error('Unexpected payload');
});
const orchestrator = new CouncilsOrchestrator({
registry: createRegistry(),
orchestrator: { delegate },
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
});
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
expect(result.stop_snapshot.round_reached).toBe(1);
expect(repairPayloads.length).toBeGreaterThan(0);
});
it('falls back instead of throwing when repair output is still invalid', async () => {
const delegate = vi.fn(async ({ message }: { message: string }) => {
const payload = JSON.parse(message) as Record<string, any>;
if (payload.task === 'normalize_json_output') {
return {
content: 'still invalid json',
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.brief_D && payload.brief_P) {
return {
content: JSON.stringify({
schema_version: '1.0.0',
selected_primary: [payload.brief_D.shortlist[0]],
selected_secondary: [payload.brief_P.shortlist[0]],
merges: [],
rejections: [],
open_questions: ['q1'],
next_experiments: ['e1'],
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.shortlisted_ideas) {
return {
content: JSON.stringify({ grounded: [] }),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.ideas) {
return {
content: JSON.stringify({
assessments: payload.ideas.map((idea: any, idx: number) => ({
idea_id: idea.idea_id,
scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 },
decision: idx === 0 ? 'shortlist' : 'hold',
notes: 'ok',
})),
assumptions: [],
risks: [],
asks: [],
what_to_steal: [],
convergence_signal: false,
novelty_score: 60,
repetition_rate: 20,
}),
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
if (payload.group && payload.round) {
return {
content: 'not json at all',
usage: { inputTokens: 10, outputTokens: 5 },
tier: 'default' as const,
};
}
throw new Error('Unexpected payload');
});
const orchestrator = new CouncilsOrchestrator({
registry: createRegistry(),
orchestrator: { delegate },
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
});
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
expect(result.stop_snapshot.round_reached).toBe(1);
expect(result.brief_D_v1.ideas.length).toBeGreaterThan(0);
expect(result.brief_P_v1.ideas.length).toBeGreaterThan(0);
expect(result.trace.some((event) => event.validation_failure === 'repair_failed')).toBe(true);
});
});
+540 -10
View File
@@ -1,7 +1,7 @@
import type { AgentConfigRegistry } from '../agents/registry.js';
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
import type { ModelTier } from '../models/router.js';
import type { TokenUsage } from '../models/types.js';
import type { ChatResponseFormat, TokenUsage } from '../models/types.js';
import type { CouncilScaffold } from './scaffold.js';
import {
COUNCIL_PIPELINE_VERSION,
@@ -36,6 +36,7 @@ interface DelegateRunner {
systemPrompt: string;
message: string;
maxTokens?: number;
responseFormat?: ChatResponseFormat;
}): Promise<{
content: string;
usage: TokenUsage;
@@ -90,24 +91,171 @@ interface AgentCallResult {
usage: TokenUsage;
}
interface JsonRecoveryOptions<T> {
rawContent: string;
parser: (value: unknown) => T;
agentName: string;
callId: string;
phaseIndex: number;
promptPayload: unknown;
modeDirective: string;
schemaHint: string;
group?: CouncilGroup;
round?: number;
scaffoldPrompt?: string;
tierOverride?: ModelTier;
maxTokens?: number;
responseFormat?: ChatResponseFormat;
fallback?: () => T;
}
function deterministicJsonRepair(raw: string): string | null {
const trimmed = raw.trim();
const noFence = trimmed
.replace(/^```json\s*/i, '')
.replace(/^```\s*/i, '')
.replace(/```$/, '')
.trim();
.trim()
.replace(/[\u201C\u201D]/g, '"')
.replace(/[\u2018\u2019]/g, '\'');
const extracted = extractFirstJsonContainer(noFence);
if (!extracted) {
return null;
}
return extracted
const withoutComments = stripJsonLikeComments(extracted);
const withQuotedKeys = withoutComments.replace(
/([{,]\s*)([A-Za-z_][A-Za-z0-9_-]*)(\s*:)/g,
'$1"$2"$3',
);
const withDoubleQuotedStrings = convertSingleQuotedStrings(withQuotedKeys);
return withDoubleQuotedStrings
.replace(/,\s*([}\]])/g, '$1')
.trim();
}
function stripJsonLikeComments(input: string): string {
let output = '';
let i = 0;
let inString = false;
let stringDelimiter: '"' | '\'' | null = null;
let escaped = false;
while (i < input.length) {
const ch = input[i];
const next = i + 1 < input.length ? input[i + 1] : '';
if (inString) {
output += ch;
if (escaped) {
escaped = false;
} else if (ch === '\\') {
escaped = true;
} else if (ch === stringDelimiter) {
inString = false;
stringDelimiter = null;
}
i += 1;
continue;
}
if (ch === '"' || ch === '\'') {
inString = true;
stringDelimiter = ch;
output += ch;
i += 1;
continue;
}
if (ch === '/' && next === '/') {
i += 2;
while (i < input.length && input[i] !== '\n') {
i += 1;
}
continue;
}
if (ch === '/' && next === '*') {
i += 2;
while (i + 1 < input.length && !(input[i] === '*' && input[i + 1] === '/')) {
i += 1;
}
i = Math.min(i + 2, input.length);
continue;
}
output += ch;
i += 1;
}
return output;
}
function convertSingleQuotedStrings(input: string): string {
let output = '';
let inSingle = false;
let inDouble = false;
let escaped = false;
for (let i = 0; i < input.length; i++) {
const ch = input[i];
if (inSingle) {
if (escaped) {
output += ch;
escaped = false;
continue;
}
if (ch === '\\') {
output += ch;
escaped = true;
continue;
}
if (ch === '\'') {
inSingle = false;
output += '"';
continue;
}
if (ch === '"') {
output += '\\"';
continue;
}
output += ch;
continue;
}
if (inDouble) {
output += ch;
if (escaped) {
escaped = false;
} else if (ch === '\\') {
escaped = true;
} else if (ch === '"') {
inDouble = false;
}
continue;
}
if (ch === '\'') {
inSingle = true;
output += '"';
continue;
}
if (ch === '"') {
inDouble = true;
output += ch;
continue;
}
output += ch;
}
return output;
}
function extractFirstJsonContainer(input: string): string | null {
const start = input.search(/[\[{]/);
if (start < 0) {
@@ -152,16 +300,82 @@ function extractFirstJsonContainer(input: string): string | null {
return null;
}
function parseWithFallbackCandidates<T>(value: unknown, parser: (value: unknown) => T): T {
const queue: unknown[] = [value];
const seenObjects = new Set<object>();
const wrapperKeys = ['output', 'result', 'data', 'response', 'json', 'payload', 'content'];
let firstError: Error | null = null;
while (queue.length > 0) {
const current = queue.shift();
if (current === undefined) {
continue;
}
if (current && typeof current === 'object') {
const objectRef = current as object;
if (seenObjects.has(objectRef)) {
continue;
}
seenObjects.add(objectRef);
}
try {
return parser(current);
} catch (error) {
if (!firstError) {
firstError = error instanceof Error ? error : new Error(String(error));
}
}
if (typeof current === 'string') {
try {
queue.push(JSON.parse(current));
continue;
} catch {
const repaired = deterministicJsonRepair(current);
if (repaired) {
try {
queue.push(JSON.parse(repaired));
continue;
} catch {
// Continue searching additional candidates below.
}
}
}
continue;
}
if (Array.isArray(current)) {
for (const item of current) {
queue.push(item);
}
continue;
}
if (current && typeof current === 'object') {
const record = current as Record<string, unknown>;
for (const key of wrapperKeys) {
if (record[key] !== undefined) {
queue.push(record[key]);
}
}
}
}
throw firstError ?? new Error('parse_failed');
}
function parseJsonWithRepair<T>(raw: string, parser: (value: unknown) => T): T {
try {
return parser(JSON.parse(raw));
return parseWithFallbackCandidates(JSON.parse(raw), parser);
} catch {
const repaired = deterministicJsonRepair(raw);
if (!repaired) {
throw new Error('parse_failed');
}
try {
return parser(JSON.parse(repaired));
return parseWithFallbackCandidates(JSON.parse(repaired), parser);
} catch {
throw new Error('repair_failed');
}
@@ -172,6 +386,149 @@ function uniq(values: string[]): string[] {
return Array.from(new Set(values));
}
function jsonSchemaFormat(name: string, schema: Record<string, unknown>): ChatResponseFormat {
return { type: 'json_schema', name, schema, strict: true };
}
const councilIdeaContentJsonSchema: Record<string, unknown> = {
type: 'object',
additionalProperties: false,
required: ['title', 'hypothesis', 'mechanism', 'expected_outcome'],
properties: {
title: { type: 'string', minLength: 1 },
hypothesis: { type: 'string', minLength: 1 },
mechanism: { type: 'string', minLength: 1 },
expected_outcome: { type: 'string', minLength: 1 },
},
};
const councilIdeationJsonSchema: Record<string, unknown> = {
type: 'object',
additionalProperties: false,
required: ['ideas'],
properties: {
ideas: {
type: 'array',
minItems: 1,
items: councilIdeaContentJsonSchema,
},
},
};
const councilAssessmentJsonSchema: Record<string, unknown> = {
type: 'object',
additionalProperties: false,
required: ['assessments', 'assumptions', 'risks', 'asks', 'what_to_steal', 'convergence_signal', 'novelty_score', 'repetition_rate'],
properties: {
assessments: {
type: 'array',
items: {
type: 'object',
additionalProperties: false,
required: ['idea_id', 'scores', 'decision', 'notes'],
properties: {
idea_id: { type: 'string', minLength: 1 },
scores: {
type: 'object',
additionalProperties: false,
required: ['novelty', 'feasibility', 'impact', 'testability'],
properties: {
novelty: { type: 'integer', minimum: 0, maximum: 100 },
feasibility: { type: 'integer', minimum: 0, maximum: 100 },
impact: { type: 'integer', minimum: 0, maximum: 100 },
testability: { type: 'integer', minimum: 0, maximum: 100 },
},
},
decision: { type: 'string', enum: ['shortlist', 'hold', 'reject'] },
notes: { type: 'string', minLength: 1 },
},
},
},
assumptions: { type: 'array', items: { type: 'string', minLength: 1 } },
risks: { type: 'array', items: { type: 'string', minLength: 1 } },
asks: { type: 'array', items: { type: 'string', minLength: 1 } },
what_to_steal: { type: 'array', items: { type: 'string', minLength: 1 } },
convergence_signal: { type: 'boolean' },
novelty_score: { type: 'integer', minimum: 0, maximum: 100 },
repetition_rate: { type: 'integer', minimum: 0, maximum: 100 },
},
};
const councilGroundingJsonSchema: Record<string, unknown> = {
type: 'object',
additionalProperties: false,
required: ['grounded'],
properties: {
grounded: {
type: 'array',
items: {
type: 'object',
additionalProperties: false,
required: ['idea_id', 'mve', 'constraints', 'falsifiability_checks'],
properties: {
idea_id: { type: 'string', minLength: 1 },
mve: { type: 'string', minLength: 1 },
constraints: {
type: 'array',
minItems: 1,
items: { type: 'string', minLength: 1 },
},
falsifiability_checks: {
type: 'array',
minItems: 1,
items: { type: 'string', minLength: 1 },
},
},
},
},
},
};
const councilMetaJsonSchema: Record<string, unknown> = {
type: 'object',
additionalProperties: false,
required: ['schema_version', 'selected_primary', 'selected_secondary', 'merges', 'rejections', 'open_questions', 'next_experiments'],
properties: {
schema_version: { type: 'string', const: COUNCIL_SCHEMA_VERSION },
selected_primary: { type: 'array', items: { type: 'string', minLength: 1 } },
selected_secondary: { type: 'array', items: { type: 'string', minLength: 1 } },
merges: {
type: 'array',
items: {
type: 'object',
additionalProperties: false,
required: ['sources', 'result_title', 'rationale'],
properties: {
sources: {
type: 'array',
minItems: 2,
items: { type: 'string', minLength: 1 },
},
result_title: { type: 'string', minLength: 1 },
rationale: { type: 'string', minLength: 1 },
},
},
},
rejections: {
type: 'array',
items: {
type: 'object',
additionalProperties: false,
required: ['idea_id', 'reason_code'],
properties: {
idea_id: { type: 'string', minLength: 1 },
reason_code: {
type: 'string',
enum: ['low_score', 'high_risk', 'insufficient_grounding', 'duplicate', 'out_of_scope', 'unknown_id', 'other'],
},
},
},
},
open_questions: { type: 'array', items: { type: 'string', minLength: 1 } },
next_experiments: { type: 'array', items: { type: 'string', minLength: 1 } },
},
};
function computeTotalScore(assessment: IdeaAssessment): number {
const s = assessment.scores;
return s.feasibility + s.impact + s.novelty + s.testability;
@@ -403,19 +760,23 @@ export class CouncilsOrchestrator {
scaffoldPrompt?: string;
tierOverride?: ModelTier;
maxTokens?: number;
responseFormat?: ChatResponseFormat;
}): Promise<AgentCallResult> {
const agent = this.getAgent(opts.agentName);
const message = canonicalStringify(opts.promptPayload);
const promptHash = hashCanonical(opts.promptPayload);
const scaffoldPrompt = opts.scaffoldPrompt ? `${opts.scaffoldPrompt}\n\n` : '';
const systemPrompt = `${scaffoldPrompt}${agent.systemPrompt}\n\n${opts.modeDirective}`;
const startedAt = Date.now();
const result = await this._delegateRunner.delegate({
tier: opts.tierOverride ?? agent.tier,
systemPrompt,
message,
maxTokens: opts.maxTokens ?? 4096,
responseFormat: opts.responseFormat,
});
const latencyMs = Math.max(0, Date.now() - startedAt);
this._trace.push(councilTraceEventSchema.parse({
schema_version: COUNCIL_SCHEMA_VERSION,
@@ -427,6 +788,7 @@ export class CouncilsOrchestrator {
prompt_payload_hash: promptHash,
artifact_hash: hashCanonical(result.content),
token_usage: result.usage,
latency_ms: latencyMs,
}));
this._conversations.push({
schema_version: COUNCIL_SCHEMA_VERSION,
@@ -444,6 +806,63 @@ export class CouncilsOrchestrator {
return result;
}
private async parseWithAgentRecovery<T>(opts: JsonRecoveryOptions<T>): Promise<T> {
try {
return parseJsonWithRepair(opts.rawContent, opts.parser);
} catch {
const recoveryPayload = {
task: 'normalize_json_output',
required_schema: opts.schemaHint,
original_prompt_payload: opts.promptPayload,
invalid_output: opts.rawContent,
};
const recoveryRaw = await this.callAgent({
agentName: opts.agentName,
callId: `${opts.callId}.repair`,
phaseIndex: opts.phaseIndex,
group: opts.group,
round: opts.round,
promptPayload: recoveryPayload,
modeDirective: [
'Your previous response did not validate.',
'Return ONLY valid JSON matching the required schema.',
'Do not add prose, markdown fences, or commentary.',
].join(' '),
scaffoldPrompt: opts.scaffoldPrompt,
tierOverride: opts.tierOverride,
maxTokens: opts.maxTokens,
responseFormat: opts.responseFormat,
});
try {
return parseJsonWithRepair(recoveryRaw.content, opts.parser);
} catch {
if (opts.fallback) {
this.recordValidationFailure(opts, 'repair_failed');
return opts.fallback();
}
throw new Error('repair_failed');
}
}
}
private recordValidationFailure(
opts: Pick<JsonRecoveryOptions<unknown>, 'callId' | 'phaseIndex' | 'group' | 'round' | 'promptPayload'>,
reason: 'repair_failed' | 'parse_failed',
): void {
this._trace.push(councilTraceEventSchema.parse({
schema_version: COUNCIL_SCHEMA_VERSION,
event_id: `${opts.phaseIndex}:${opts.callId}.validation`,
phase_index: opts.phaseIndex,
call_id: `${opts.callId}.validation`,
group: opts.group,
round: opts.round,
prompt_payload_hash: hashCanonical(opts.promptPayload),
validation_failure: reason,
}));
}
private allocateIdeaId(group: CouncilGroup, round: number, index: number): string {
return `${group}.r${round}.${String(index + 1).padStart(2, '0')}`;
}
@@ -481,11 +900,37 @@ export class CouncilsOrchestrator {
round,
promptPayload: ideationPayload,
modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.',
responseFormat: jsonSchemaFormat('council_ideation', councilIdeationJsonSchema),
scaffoldPrompt: this._scaffold?.prompts[group].free_thinker,
tierOverride: groupTier,
});
const ideaOutput = parseJsonWithRepair(ideation.content, (value) => ideationOutputSchema.parse(value));
const ideaOutput = await this.parseWithAgentRecovery({
rawContent: ideation.content,
parser: (value) => ideationOutputSchema.parse(value),
agentName: groupConfig.freethinker_agent,
callId: `${group}.r${round}.ft.ideation`,
phaseIndex: phaseBase,
group,
round,
promptPayload: ideationPayload,
modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.',
schemaHint:
'{"ideas":[{"title":"string","hypothesis":"string","mechanism":"string","expected_outcome":"string"}]}',
responseFormat: jsonSchemaFormat('council_ideation', councilIdeationJsonSchema),
scaffoldPrompt: this._scaffold?.prompts[group].free_thinker,
tierOverride: groupTier,
fallback: () => ({
ideas: [
{
title: `${group} fallback idea`,
hypothesis: `Fallback hypothesis for: ${input.task}`,
mechanism: 'Run a minimum-viable experiment and measure signal within one iteration.',
expected_outcome: 'A concrete go/no-go signal to reduce uncertainty.',
},
],
}),
});
const ideaCards: IdeaCard[] = ideaOutput.ideas
.slice(0, this._config.defaults.ideas_per_round)
.map((idea, index) => ({
@@ -514,11 +959,48 @@ export class CouncilsOrchestrator {
promptPayload: assessmentPayload,
modeDirective:
'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.',
responseFormat: jsonSchemaFormat('council_assessment', councilAssessmentJsonSchema),
scaffoldPrompt: this._scaffold?.prompts[group].arbiter,
tierOverride: groupTier,
});
const assessmentOutput = parseJsonWithRepair(assessmentRaw.content, (value) => assessmentOutputSchema.parse(value));
const assessmentOutput = await this.parseWithAgentRecovery({
rawContent: assessmentRaw.content,
parser: (value) => assessmentOutputSchema.parse(value),
agentName: groupConfig.arbiter_agent,
callId: `${group}.r${round}.arb.assess`,
phaseIndex: phaseBase + 1,
group,
round,
promptPayload: assessmentPayload,
modeDirective:
'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.',
schemaHint:
'{"assessments":[{"idea_id":"string","scores":{"novelty":0,"feasibility":0,"impact":0,"testability":0},"decision":"shortlist|hold|reject","notes":"string"}],"assumptions":["string"],"risks":["string"],"asks":["string"],"what_to_steal":["string"],"convergence_signal":false,"novelty_score":0,"repetition_rate":0}',
responseFormat: jsonSchemaFormat('council_assessment', councilAssessmentJsonSchema),
scaffoldPrompt: this._scaffold?.prompts[group].arbiter,
tierOverride: groupTier,
fallback: () => ({
assessments: ideaCards.map((idea, index) => ({
idea_id: idea.idea_id,
scores: {
novelty: 40,
feasibility: 40,
impact: 40,
testability: 40,
},
decision: index === 0 ? 'shortlist' as const : 'hold' as const,
notes: 'Fallback assessment used because model output could not be repaired into valid JSON.',
})),
assumptions: ['Model output formatting instability; assumptions are provisional.'],
risks: ['Assessment quality degraded due to invalid structured output.'],
asks: ['Re-run with a different provider or tier to confirm shortlist quality.'],
what_to_steal: [],
convergence_signal: false,
novelty_score: 0,
repetition_rate: 100,
}),
});
const validIdeaIds = new Set(ideaCards.map((i) => i.idea_id));
const assessments: IdeaAssessment[] = uniq(assessmentOutput.assessments.map((a) => a.idea_id))
.map((ideaId) => assessmentOutput.assessments.find((a) => a.idea_id === ideaId)!)
@@ -571,10 +1053,27 @@ export class CouncilsOrchestrator {
promptPayload: groundingPayload,
modeDirective:
'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.',
responseFormat: jsonSchemaFormat('council_grounding', councilGroundingJsonSchema),
scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker,
tierOverride: groupTier,
});
grounding = await this.parseWithAgentRecovery({
rawContent: groundingRaw.content,
parser: (value) => groundingOutputSchema.parse(value),
agentName: groundingAgent,
callId: `${group}.r${round}.ft.ground`,
phaseIndex: phaseBase + 2,
group,
round,
promptPayload: groundingPayload,
modeDirective:
'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.',
schemaHint:
'{"grounded":[{"idea_id":"string","mve":"string","constraints":["string"],"falsifiability_checks":["string"]}]}',
responseFormat: jsonSchemaFormat('council_grounding', councilGroundingJsonSchema),
scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker,
tierOverride: groupTier,
});
grounding = parseJsonWithRepair(groundingRaw.content, (value) => groundingOutputSchema.parse(value));
} catch {
groundingFailures = shortlist.length;
if (this._config.strict_grounding) {
@@ -704,7 +1203,11 @@ export class CouncilsOrchestrator {
return packet;
}
private async runMetaMerge(input: CouncilRunInput, briefD: CouncilBrief, briefP: CouncilBrief) {
private async runMetaMerge(
input: CouncilRunInput,
briefD: CouncilBrief,
briefP: CouncilBrief,
): Promise<ReturnType<typeof metaSelectionSchema.parse>> {
const metaAgentName = this._config.meta_arbiter_agent;
const payload = {
input,
@@ -723,11 +1226,37 @@ export class CouncilsOrchestrator {
promptPayload: payload,
modeDirective:
'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.',
responseFormat: jsonSchemaFormat('council_meta', councilMetaJsonSchema),
scaffoldPrompt: this._scaffold?.prompts.meta_arbiter,
tierOverride: this._config.meta_model_tier,
});
return parseJsonWithRepair(metaRaw.content, (value) => metaSelectionSchema.parse(value));
return this.parseWithAgentRecovery<ReturnType<typeof metaSelectionSchema.parse>>({
rawContent: metaRaw.content,
parser: (value) => metaSelectionSchema.parse(value),
agentName: metaAgentName,
callId: 'meta.merge',
phaseIndex: 999,
promptPayload: payload,
modeDirective:
'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.',
schemaHint:
'{"schema_version":"1.0.0","selected_primary":["string"],"selected_secondary":["string"],"merges":[{"sources":["string","string"],"result_title":"string","rationale":"string"}],"rejections":[{"idea_id":"string","reason_code":"low_score|high_risk|insufficient_grounding|duplicate|out_of_scope|unknown_id|other"}],"open_questions":["string"],"next_experiments":["string"]}',
responseFormat: jsonSchemaFormat('council_meta', councilMetaJsonSchema),
scaffoldPrompt: this._scaffold?.prompts.meta_arbiter,
tierOverride: this._config.meta_model_tier,
fallback: () => metaSelectionSchema.parse({
schema_version: COUNCIL_SCHEMA_VERSION,
selected_primary: briefD.shortlist.slice(0, 1),
selected_secondary: briefP.shortlist.slice(0, 1),
merges: [],
rejections: [],
open_questions: ['Meta merge used fallback due to invalid structured output.'],
next_experiments: [
'Re-run council with a lower-latency/stronger JSON model and compare selected ideas.',
],
}),
});
}
private validateMetaSelection(meta: ReturnType<typeof metaSelectionSchema.parse>, knownIds: Set<string>): boolean {
@@ -763,6 +1292,7 @@ export class CouncilsOrchestrator {
group: normalizeOptional(event.group),
round: normalizeOptional(event.round),
token_usage: normalizeOptional(event.token_usage),
latency_ms: normalizeOptional(event.latency_ms),
dropped_reason: normalizeOptional(event.dropped_reason),
validation_failure: normalizeOptional(event.validation_failure),
}));
+93
View File
@@ -0,0 +1,93 @@
import { describe, expect, it, vi } from 'vitest';
import { AgentConfigRegistry } from '../agents/registry.js';
import { configSchema } from '../config/schema.js';
import { buildCouncilPreflightReport } from './preflight.js';
function makeConfig() {
const config = configSchema.parse({
models: {
default: {
provider: 'zhipuai',
model: 'glm-4.7',
use_oauth: true,
},
},
});
config.councils.enabled = true;
config.councils.groups.D.arbiter_agent = 'comms';
config.councils.groups.D.freethinker_agent = 'research';
config.councils.groups.D.model_tier = 'default';
config.councils.groups.P.arbiter_agent = 'comms';
config.councils.groups.P.freethinker_agent = 'research';
config.councils.groups.P.model_tier = 'default';
config.councils.meta_arbiter_agent = 'code';
config.councils.meta_model_tier = 'default';
return config;
}
function makeRegistry(): AgentConfigRegistry {
const registry = new AgentConfigRegistry();
registry.register({ name: 'comms', modelTier: 'fast' });
registry.register({ name: 'research', modelTier: 'default' });
registry.register({ name: 'code', modelTier: 'complex' });
return registry;
}
describe('buildCouncilPreflightReport', () => {
it('reports zhipu endpoint/auth and stale oauth flag note', async () => {
const config = makeConfig();
const report = await buildCouncilPreflightReport({
config,
registry: makeRegistry(),
delegateRunner: {
delegate: vi.fn(async () => ({
content: '{"ok":true}',
usage: { inputTokens: 1, outputTokens: 1 },
tier: 'default' as const,
})),
},
activeTier: 'default',
includeLiveProbe: false,
credentialState: {
openaiOAuth: false,
openaiApiKeyStored: false,
anthropicApiKeyStored: false,
anthropicAuthTokenStored: false,
zaiStored: true,
},
});
expect(report).toContain('D.arbiter: agent=comms');
expect(report).toContain('effective_tier=default model=zhipuai/glm-4.7');
expect(report).toContain('endpoint=https://api.z.ai/api/paas/v4');
expect(report).toContain('auth_mode=bearer_credential');
expect(report).toContain('auth_source=auth.json:zai');
expect(report).toContain('note=use_oauth/auth_mode=oauth is ignored for zhipuai');
});
it('includes live probe failures by tier', async () => {
const config = makeConfig();
const delegate = vi.fn(async () => {
throw new Error('Connection error');
});
const report = await buildCouncilPreflightReport({
config,
registry: makeRegistry(),
delegateRunner: { delegate },
activeTier: 'default',
includeLiveProbe: true,
credentialState: {
openaiOAuth: false,
openaiApiKeyStored: false,
anthropicApiKeyStored: false,
anthropicAuthTokenStored: false,
zaiStored: false,
},
});
expect(report).toContain('Live tier probes:');
expect(report).toContain('default: failed');
expect(report).toContain('Connection error');
expect(delegate).toHaveBeenCalled();
});
});
+347
View File
@@ -0,0 +1,347 @@
import type { AgentConfigRegistry } from '../agents/registry.js';
import {
loadStoredAnthropicAuth,
loadStoredAnthropicAuthToken,
loadStoredOpenAIApiKey,
loadStoredOpenAIAuth,
loadStoredZaiAuth,
} from '../auth/index.js';
import type { Config, ModelConfig, ModelProvider } from '../config/index.js';
import type { ModelTier } from '../models/router.js';
import type { ChatResponseFormat } from '../models/types.js';
interface DelegateRunner {
delegate(request: {
tier: ModelTier;
systemPrompt: string;
message: string;
maxTokens?: number;
responseFormat?: ChatResponseFormat;
}): Promise<{
content: string;
usage: { inputTokens: number; outputTokens: number };
tier: ModelTier;
}>;
}
interface CredentialState {
openaiOAuth: boolean;
openaiApiKeyStored: boolean;
anthropicApiKeyStored: boolean;
anthropicAuthTokenStored: boolean;
zaiStored: boolean;
}
export interface CouncilPreflightOptions {
config: Config;
registry: AgentConfigRegistry;
delegateRunner: DelegateRunner;
activeTier?: ModelTier;
includeLiveProbe?: boolean;
credentialState?: CredentialState;
}
interface TierConfigResolution {
modelConfig: ModelConfig;
fellBackToDefault: boolean;
}
interface AuthResolution {
mode: string;
source: string;
note?: string;
}
const DEFAULT_ENDPOINTS: Partial<Record<ModelProvider, string>> = {
openai: 'https://api.openai.com/v1',
openrouter: 'https://openrouter.ai/api/v1',
vercel: 'https://ai-gateway.vercel.sh/v1',
zhipuai: 'https://api.z.ai/api/paas/v4',
xai: 'https://api.x.ai/v1',
minimax: 'https://api.minimax.io/v1',
moonshot: 'https://api.moonshot.cn/v1',
ollama: 'http://localhost:11434',
llamacpp: 'http://localhost:8080',
};
function getCredentialStateFromSystem(): CredentialState {
return {
openaiOAuth: Boolean(loadStoredOpenAIAuth()),
openaiApiKeyStored: Boolean(loadStoredOpenAIApiKey()),
anthropicApiKeyStored: Boolean(loadStoredAnthropicAuth()),
anthropicAuthTokenStored: Boolean(loadStoredAnthropicAuthToken()),
zaiStored: Boolean(loadStoredZaiAuth()),
};
}
function getEffectiveAuthMode(cfg: ModelConfig): 'auto' | 'api_key' | 'oauth' {
if (cfg.auth_mode) {
return cfg.auth_mode;
}
if (cfg.use_oauth) {
return 'oauth';
}
return 'auto';
}
function resolveTierConfig(config: Config, tier: ModelTier): TierConfigResolution {
if (tier === 'default') {
return { modelConfig: config.models.default, fellBackToDefault: false };
}
const direct = config.models[tier];
if (direct) {
return { modelConfig: direct, fellBackToDefault: false };
}
return { modelConfig: config.models.default, fellBackToDefault: true };
}
function firstTruthySource(sources: Array<[boolean, string]>): string {
for (const [present, label] of sources) {
if (present) {
return label;
}
}
return 'missing';
}
function resolveAuth(cfg: ModelConfig, credentialState: CredentialState): AuthResolution {
if (cfg.provider === 'zhipuai') {
const source = firstTruthySource([
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
[Boolean(cfg.auth_token?.trim()), 'config.auth_token'],
[Boolean(process.env.ZAI_API_KEY?.trim()), 'env:ZAI_API_KEY'],
[Boolean(process.env.ZHIPUAI_API_KEY?.trim()), 'env:ZHIPUAI_API_KEY'],
[Boolean(process.env.ZHIPUAI_AUTH_TOKEN?.trim()), 'env:ZHIPUAI_AUTH_TOKEN'],
[credentialState.zaiStored, 'auth.json:zai'],
]);
const note = cfg.use_oauth || cfg.auth_mode === 'oauth'
? 'use_oauth/auth_mode=oauth is ignored for zhipuai'
: undefined;
return {
mode: 'bearer_credential',
source,
note,
};
}
if (cfg.provider === 'openai') {
const authMode = getEffectiveAuthMode(cfg);
const apiSource = firstTruthySource([
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
[Boolean(process.env.OPENAI_API_KEY?.trim()), 'env:OPENAI_API_KEY'],
[credentialState.openaiApiKeyStored, 'auth.json:openai.api_key'],
]);
const oauthSource = credentialState.openaiOAuth ? 'auth.json:openai.oauth' : 'missing';
if (authMode === 'oauth') {
return { mode: 'oauth', source: oauthSource };
}
if (authMode === 'api_key') {
return { mode: 'api_key', source: apiSource };
}
if (apiSource !== 'missing') {
return { mode: 'auto->api_key', source: apiSource };
}
return { mode: 'auto->oauth', source: oauthSource };
}
if (cfg.provider === 'anthropic') {
const authMode = getEffectiveAuthMode(cfg);
const apiSource = firstTruthySource([
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
[Boolean(process.env.ANTHROPIC_API_KEY?.trim()), 'env:ANTHROPIC_API_KEY'],
[credentialState.anthropicApiKeyStored, 'auth.json:anthropic.api_key'],
]);
const oauthSource = firstTruthySource([
[Boolean(cfg.auth_token?.trim()), 'config.auth_token'],
[Boolean(process.env.ANTHROPIC_AUTH_TOKEN?.trim()), 'env:ANTHROPIC_AUTH_TOKEN'],
[credentialState.anthropicAuthTokenStored, 'auth.json:anthropic.auth_token'],
]);
if (authMode === 'oauth') {
return { mode: 'oauth', source: oauthSource };
}
if (authMode === 'api_key') {
return { mode: 'api_key', source: apiSource };
}
if (apiSource !== 'missing') {
return { mode: 'auto->api_key', source: apiSource };
}
return { mode: 'auto->oauth', source: oauthSource };
}
if (cfg.provider === 'gemini') {
const source = firstTruthySource([
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
[Boolean(process.env.GEMINI_API_KEY?.trim()), 'env:GEMINI_API_KEY'],
[Boolean(process.env.GOOGLE_API_KEY?.trim()), 'env:GOOGLE_API_KEY'],
]);
return { mode: 'api_key', source };
}
if (cfg.provider === 'bedrock') {
const source = firstTruthySource([
[Boolean(cfg.api_key?.trim() && cfg.auth_token?.trim()), 'config.api_key+auth_token'],
[Boolean(process.env.AWS_ACCESS_KEY_ID?.trim() && process.env.AWS_SECRET_ACCESS_KEY?.trim()), 'env:AWS_ACCESS_KEY_ID+AWS_SECRET_ACCESS_KEY'],
]);
return { mode: 'aws_credentials', source };
}
if (cfg.provider === 'ollama' || cfg.provider === 'llamacpp' || cfg.provider === 'synthetic') {
return { mode: 'local', source: 'none' };
}
const source = firstTruthySource([
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
]);
return { mode: 'api_key', source };
}
function resolveEndpoint(cfg: ModelConfig): string {
if (cfg.endpoint?.trim()) {
return cfg.endpoint.trim();
}
return DEFAULT_ENDPOINTS[cfg.provider] ?? '(provider default)';
}
function normalizeProbeError(error: unknown): string {
if (error instanceof Error) {
return error.message;
}
return String(error);
}
function isPreflightRequest(task: string): boolean {
return task.trim().toLowerCase() === 'preflight';
}
export function shouldRunCouncilPreflight(task: string): boolean {
return isPreflightRequest(task);
}
export async function buildCouncilPreflightReport(options: CouncilPreflightOptions): Promise<string> {
const { config, registry, delegateRunner } = options;
const councils = config.councils;
const credentialState = options.credentialState ?? getCredentialStateFromSystem();
const roles = [
{ role: 'D.arbiter', agent: councils.groups.D.arbiter_agent, tierOverride: councils.groups.D.model_tier },
{ role: 'D.freethinker', agent: councils.groups.D.freethinker_agent, tierOverride: councils.groups.D.model_tier },
...(councils.groups.D.grounder_agent
? [{ role: 'D.grounder', agent: councils.groups.D.grounder_agent, tierOverride: councils.groups.D.model_tier }]
: []),
...(councils.groups.D.writer_agent
? [{ role: 'D.writer', agent: councils.groups.D.writer_agent, tierOverride: councils.groups.D.model_tier }]
: []),
{ role: 'P.arbiter', agent: councils.groups.P.arbiter_agent, tierOverride: councils.groups.P.model_tier },
{ role: 'P.freethinker', agent: councils.groups.P.freethinker_agent, tierOverride: councils.groups.P.model_tier },
...(councils.groups.P.grounder_agent
? [{ role: 'P.grounder', agent: councils.groups.P.grounder_agent, tierOverride: councils.groups.P.model_tier }]
: []),
...(councils.groups.P.writer_agent
? [{ role: 'P.writer', agent: councils.groups.P.writer_agent, tierOverride: councils.groups.P.model_tier }]
: []),
{ role: 'Meta.arbiter', agent: councils.meta_arbiter_agent, tierOverride: councils.meta_model_tier },
...(councils.meta_writer_agent
? [{ role: 'Meta.writer', agent: councils.meta_writer_agent, tierOverride: councils.meta_model_tier }]
: []),
];
const roleLines: string[] = [];
const tiersToProbe = new Set<ModelTier>();
for (const role of roles) {
const agentConfig = registry.get(role.agent);
const agentTier = agentConfig?.modelTier ?? 'default';
const effectiveTier = role.tierOverride ?? agentTier;
const { modelConfig, fellBackToDefault } = resolveTierConfig(config, effectiveTier);
const modelLabel = `${modelConfig.provider}/${modelConfig.model}`;
const missingSuffix = agentConfig ? '' : ' [agent_missing]';
const fallbackSuffix = fellBackToDefault ? ' [tier_unconfigured->default]' : '';
roleLines.push(
`- ${role.role}: agent=${role.agent}${missingSuffix} ` +
`agent_tier=${agentTier} override_tier=${role.tierOverride ?? 'none'} ` +
`effective_tier=${effectiveTier} model=${modelLabel}${fallbackSuffix}`,
);
tiersToProbe.add(effectiveTier);
}
const tierLines: string[] = [];
for (const tier of [...tiersToProbe].sort()) {
const { modelConfig, fellBackToDefault } = resolveTierConfig(config, tier);
const auth = resolveAuth(modelConfig, credentialState);
const endpoint = resolveEndpoint(modelConfig);
tierLines.push(
`- ${tier}: provider=${modelConfig.provider} model=${modelConfig.model} ` +
`endpoint=${endpoint} auth_mode=${auth.mode} auth_source=${auth.source}` +
`${auth.note ? ` note=${auth.note}` : ''}` +
`${fellBackToDefault ? ' fallback=default' : ''}`,
);
}
const probeLines: string[] = [];
if (options.includeLiveProbe ?? true) {
const probeResults = await Promise.all(
[...tiersToProbe].sort().map(async (tier) => {
const startedAt = Date.now();
try {
await delegateRunner.delegate({
tier,
systemPrompt: 'Return exactly {"ok":true}.',
message: 'Return exactly {"ok":true}.',
maxTokens: 64,
});
return {
tier,
ok: true,
latencyMs: Math.max(0, Date.now() - startedAt),
error: '',
};
} catch (error) {
return {
tier,
ok: false,
latencyMs: Math.max(0, Date.now() - startedAt),
error: normalizeProbeError(error),
};
}
}),
);
for (const result of probeResults) {
if (result.ok) {
probeLines.push(`- ${result.tier}: ok (${result.latencyMs}ms)`);
} else {
probeLines.push(`- ${result.tier}: failed (${result.latencyMs}ms) ${result.error}`);
}
}
} else {
probeLines.push('- skipped');
}
const activeTier = options.activeTier ?? 'default';
const activeModel = resolveTierConfig(config, activeTier).modelConfig;
return [
'[Council preflight]',
`Councils enabled: ${councils.enabled ? 'yes' : 'no'}`,
`Active interactive tier: ${activeTier} (${activeModel.provider}/${activeModel.model})`,
`Scaffold path: ${councils.scaffold_path?.trim() ? councils.scaffold_path : '(none)'}`,
'',
'Role routing:',
...roleLines,
'',
'Tier provider/auth:',
...tierLines,
'',
'Live tier probes:',
...probeLines,
'',
'Path comparison:',
'- Interactive TUI messages use the active tier above.',
'- Council calls use per-role effective tiers (group override, then agent tier).',
].join('\n');
}
+1
View File
@@ -166,6 +166,7 @@ export const councilTraceEventSchema = z.object({
inputTokens: z.number().int().min(0),
outputTokens: z.number().int().min(0),
}).strict().optional(),
latency_ms: z.number().int().min(0).optional(),
dropped_reason: droppedReasonSchema.optional(),
validation_failure: validationFailureReasonSchema.optional(),
}).strict();
+14 -4
View File
@@ -29,6 +29,7 @@ import { auditLogger } from '../audit/index.js';
import { getElevationStatusMessage, setElevationFromInput } from '../security/elevation.js';
import { dirname, resolve } from 'path';
import { loadCouncilScaffoldSafe } from '../councils/scaffold.js';
import { buildCouncilPreflightReport, shouldRunCouncilPreflight } from '../councils/preflight.js';
function buildProviderConfigMap(config: Config): Partial<Record<ModelProvider, ModelConfig>> {
const providerConfigs: Partial<Record<ModelProvider, ModelConfig>> = {};
@@ -866,14 +867,23 @@ export function createMessageRouter(deps: {
runCouncil: async (task: string) => {
const message = task.trim();
if (!message) {
return 'Usage: /council <question or task>';
return 'Usage: /council <question or task> | /council preflight';
}
if (!deps.config.councils?.enabled) {
return 'Councils are disabled. Set councils.enabled: true in config.';
}
if (!deps.config.councils?.enabled) {
return 'Councils are disabled. Set councils.enabled: true in config.';
}
if (!deps.agentConfigRegistry || deps.agentConfigRegistry.list().length === 0) {
return 'No agent configurations are registered. Add council_* agent_configs first.';
}
if (shouldRunCouncilPreflight(message)) {
return buildCouncilPreflightReport({
config: deps.config,
registry: deps.agentConfigRegistry,
delegateRunner: agent,
activeTier: agent.getModelTier(),
includeLiveProbe: true,
});
}
const tool = createCouncilRunTool({
registry: deps.agentConfigRegistry,
orchestrator: agent,
+12
View File
@@ -33,9 +33,16 @@ describe('parseCommand', () => {
it('parses /council command', () => {
expect(parseCommand('/council design backup plan')).toEqual({ type: 'council', task: 'design backup plan' });
expect(parseCommand('/council preflight')).toEqual({ type: 'council', task: 'preflight' });
expect(parseCommand('/council')).toEqual({ type: 'council', task: '' });
});
it('parses natural-language council shortcut', () => {
expect(parseCommand('run council design backup plan')).toEqual({ type: 'council', task: 'design backup plan' });
expect(parseCommand('yes run the council')).toEqual({ type: 'council', task: '' });
expect(parseCommand('Run council on #2')).toEqual({ type: 'council', task: '#2' });
});
it('parses /fullscreen command', () => {
expect(parseCommand('/fullscreen')).toEqual({ type: 'fullscreen' });
expect(parseCommand('/fs')).toEqual({ type: 'fullscreen' });
@@ -197,6 +204,11 @@ describe('getCommandCompletions', () => {
const completions = getCommandCompletions('/model fast x');
expect(completions).toEqual(['/model fast xai']);
});
it('completes /council preflight shortcut', () => {
const completions = getCommandCompletions('/council pre');
expect(completions).toEqual(['/council preflight']);
});
});
describe('isToolInventoryQuery', () => {
+15 -1
View File
@@ -88,6 +88,11 @@ export function parseCommand(input: string): Command | null {
if (trimmed === '/council') {
return { type: 'council', task: '' };
}
// Natural-language council shortcut for common flows.
const councilShortcut = trimmed.match(/^(?:yes\s+)?run\s+(?:the\s+)?council(?:\s+on)?(?:\s+(.+))?$/i);
if (councilShortcut) {
return { type: 'council', task: (councilShortcut[1] ?? '').trim() };
}
// Fullscreen
if (trimmed === '/fullscreen' || trimmed === '/fs') {
@@ -211,6 +216,7 @@ Commands:
/backend [provider] Show or switch local backend (ollama, llamacpp)
/research <task> Delegate a task to the configured research agent
/council <task> Run the councils pipeline for a task
/council preflight Check council tier routing, endpoint/auth mode, and probe latency
/login [provider] Authenticate with GitHub, OpenAI, Anthropic, or Z.AI
/pair List pending pairing codes and approved senders
/pair generate [label] Generate a new DM pairing code
@@ -275,7 +281,7 @@ export const COMMAND_TOOLTIPS: Record<string, string> = {
'/model': 'Show or switch model (local, default, fast, complex)',
'/backend': 'Show or switch local backend (ollama, llamacpp)',
'/research': 'Delegate a task to the configured research agent',
'/council': 'Run the councils pipeline for a task',
'/council': 'Run the councils pipeline for a task; use "/council preflight" for route/auth checks',
'/reset': 'Clear conversation history',
'/clear': 'Clear conversation history',
'/new': 'Start a new conversation',
@@ -319,6 +325,14 @@ export const MODEL_TOOLTIPS: Record<string, string> = {
export function getCommandCompletions(partial: string): string[] {
const trimmed = partial.trim();
if (trimmed.startsWith('/council ')) {
const suffix = trimmed.slice('/council '.length).toLowerCase();
if ('preflight'.startsWith(suffix)) {
return ['/council preflight'];
}
return [];
}
// Complete /model <tier> <provider/model>
if (trimmed.startsWith('/model ')) {
const args = trimmed.slice('/model '.length).trim();
+1 -1
View File
@@ -633,7 +633,7 @@ export function App({
case 'council': {
if (!command.task.trim()) {
pushAssistantMessage('Usage: /council <question or task>');
pushAssistantMessage('Usage: /council <question or task> | /council preflight');
return;
}
if (!onCouncil) {
+84 -3
View File
@@ -1,6 +1,9 @@
import { describe, it, expect, vi } from 'vitest';
import { createCouncilRunTool } from './council-run.js';
import type { AgentConfigRegistry } from '../../agents/registry.js';
import { mkdtempSync, readFileSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
function createRegistry(): AgentConfigRegistry {
const configs = new Map<string, { name: string; modelTier?: 'fast' | 'default' | 'complex'; systemPrompt?: string }>([
@@ -53,6 +56,10 @@ const config = {
describe('council.run tool', () => {
it('runs council pipeline and returns output summary', async () => {
const previousDataDir = process.env.FLYNN_DATA_DIR;
const testDataDir = mkdtempSync(join(tmpdir(), 'flynn-council-run-'));
process.env.FLYNN_DATA_DIR = testDataDir;
const delegate = vi.fn(async ({ message }: { message: string }) => {
const payload = JSON.parse(message);
if (payload.brief_D && payload.brief_P) {
@@ -117,9 +124,31 @@ describe('council.run tool', () => {
});
const result = await tool.execute({ task: 'plan migration' });
expect(result.success).toBe(true);
expect(result.output).toContain('Council pipeline v1.0.0');
expect(result.output).toContain('Meta selection');
try {
expect(result.success).toBe(true);
expect(result.output).toContain('Council pipeline v1.0.0');
expect(result.output).toContain('Meta selection');
expect(result.output).toContain('Timing:');
expect(result.output).toContain('Slowest calls:');
expect(result.output).toContain('Artifacts:');
const markdownLine = result.output.split('\n').find((line) => line.startsWith('- Summary report: '));
const jsonLine = result.output.split('\n').find((line) => line.startsWith('- Full JSON: '));
expect(markdownLine).toBeDefined();
expect(jsonLine).toBeDefined();
const markdownPath = markdownLine!.replace('- Summary report: ', '').trim();
const jsonPath = jsonLine!.replace('- Full JSON: ', '').trim();
expect(readFileSync(markdownPath, 'utf-8')).toContain('Council pipeline v1.0.0');
expect(readFileSync(jsonPath, 'utf-8')).toContain('"pipeline_version": "1.0.0"');
} finally {
if (previousDataDir !== undefined) {
process.env.FLYNN_DATA_DIR = previousDataDir;
} else {
delete process.env.FLYNN_DATA_DIR;
}
rmSync(testDataDir, { recursive: true, force: true });
}
});
it('returns error on invalid input', async () => {
@@ -132,4 +161,56 @@ describe('council.run tool', () => {
expect(result.success).toBe(false);
expect(result.error).toBeDefined();
});
it('classifies network/latency failures with hint and council config summary', async () => {
const tool = createCouncilRunTool({
registry: createRegistry(),
orchestrator: {
delegate: vi.fn(async () => {
throw new Error('Connection error.');
}) as any,
},
config: config as any,
});
const result = await tool.execute({ task: 'x' });
expect(result.success).toBe(false);
expect(result.error).toContain('Likely root cause: network_or_latency');
expect(result.error).toContain('Hint: Likely network/provider latency issue.');
expect(result.error).toContain('Council config: D=');
});
it('classifies cap_exceeded failures with cap_overflow hint', async () => {
const tool = createCouncilRunTool({
registry: createRegistry(),
orchestrator: {
delegate: vi.fn(async () => {
throw new Error('cap_exceeded');
}) as any,
},
config: config as any,
});
const result = await tool.execute({ task: 'x' });
expect(result.success).toBe(false);
expect(result.error).toContain('Likely root cause: cap_overflow');
expect(result.error).toContain('Bridge payload cap exceeded.');
});
it('classifies cancellation failures with user_cancelled hint', async () => {
const tool = createCouncilRunTool({
registry: createRegistry(),
orchestrator: {
delegate: vi.fn(async () => {
throw new Error('Operation cancelled by user.');
}) as any,
},
config: config as any,
});
const result = await tool.execute({ task: 'x' });
expect(result.success).toBe(false);
expect(result.error).toContain('Likely root cause: user_cancelled');
expect(result.error).toContain('Run was cancelled by user input');
});
});
+168 -2
View File
@@ -1,4 +1,8 @@
import type { AgentConfigRegistry } from '../../agents/registry.js';
import { mkdirSync, writeFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import type { ChatResponseFormat } from '../../models/types.js';
import type { Tool, ToolResult } from '../types.js';
import { CouncilsOrchestrator, type CouncilsConfig } from '../../councils/orchestrator.js';
import type { CouncilScaffold } from '../../councils/scaffold.js';
@@ -10,6 +14,7 @@ interface DelegateRunner {
systemPrompt: string;
message: string;
maxTokens?: number;
responseFormat?: ChatResponseFormat;
}): Promise<{
content: string;
usage: { inputTokens: number; outputTokens: number };
@@ -24,6 +29,124 @@ export interface CouncilRunDeps {
scaffold?: CouncilScaffold;
}
function slugifyTask(task: string): string {
const trimmed = task.trim().toLowerCase();
if (!trimmed) {
return 'council-run';
}
const slug = trimmed
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 80);
return slug || 'council-run';
}
function formatTimestamp(date: Date): string {
return date.toISOString().replace(/[:.]/g, '-');
}
function getCouncilsDir(): string {
const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn');
return resolve(dataDir, 'councils');
}
function writeCouncilArtifacts(
task: string,
summaryLines: string[],
conversationLog: string,
resultJson: string,
): { jsonPath: string; markdownPath: string } {
const dir = getCouncilsDir();
mkdirSync(dir, { recursive: true });
const stamp = formatTimestamp(new Date());
const base = `${stamp}-${slugifyTask(task)}`;
const jsonPath = resolve(dir, `${base}.json`);
const markdownPath = resolve(dir, `${base}.md`);
const markdown = [
...summaryLines,
'',
'Conversations:',
'',
conversationLog || '(none)',
'',
'Raw Result JSON:',
'```json',
resultJson,
'```',
'',
].join('\n');
writeFileSync(jsonPath, `${resultJson}\n`, 'utf-8');
writeFileSync(markdownPath, markdown, 'utf-8');
return { jsonPath, markdownPath };
}
function classifyCouncilFailure(message: string): 'network_or_latency' | 'json_format' | 'config' | 'cap_overflow' | 'user_cancelled' | 'unknown' {
const lower = message.toLowerCase();
if (
lower.includes('operation cancelled by user')
|| lower.includes('aborterror')
|| lower.includes('aborted')
|| lower.includes('cancelled')
) {
return 'user_cancelled';
}
if (
lower.includes('connection error')
|| lower.includes('timed out')
|| lower.includes('econn')
|| lower.includes('enotfound')
|| lower.includes('all model providers failed')
) {
return 'network_or_latency';
}
if (
lower.includes('repair_failed')
|| lower.includes('parse_failed')
|| lower.includes('json')
) {
return 'json_format';
}
if (
lower.includes('not configured')
|| lower.includes('disabled')
|| lower.includes('meta_validation_failed')
|| lower.includes('grounding_failed')
|| lower.includes('bridge_validation_failed')
) {
return 'config';
}
if (
lower.includes('cap_exceeded')
|| lower.includes('cap_top_ideas')
|| lower.includes('cap_field_bullets')
|| lower.includes('cap_entry_chars')
|| lower.includes('cap_total_chars')
) {
return 'cap_overflow';
}
return 'unknown';
}
function buildFailureHint(kind: ReturnType<typeof classifyCouncilFailure>): string {
switch (kind) {
case 'network_or_latency':
return 'Likely network/provider latency issue. Check endpoint reachability and consider faster council tiers.';
case 'json_format':
return 'Likely model output-format issue. Council JSON repair/retry was unable to normalize output.';
case 'config':
return 'Likely councils/agent configuration issue. Verify council agent names, tiers, and strict validation settings.';
case 'cap_overflow':
return 'Bridge payload cap exceeded. Reduce task breadth, lower max rounds, or raise councils.defaults bridge cap settings.';
case 'user_cancelled':
return 'Run was cancelled by user input (Esc/Ctrl+C). Re-run the council task when ready.';
default:
return 'No deterministic diagnosis from error text.';
}
}
export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
return {
name: 'council.run',
@@ -53,6 +176,20 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
scaffold: deps.scaffold,
});
const result = await runner.run(args);
const timedTrace = result.trace.filter((event) => typeof event.latency_ms === 'number');
const totalLatencyMs = timedTrace.reduce((sum, event) => sum + (event.latency_ms ?? 0), 0);
const phaseLatency = new Map<number, number>();
for (const event of timedTrace) {
const phase = event.phase_index;
phaseLatency.set(phase, (phaseLatency.get(phase) ?? 0) + (event.latency_ms ?? 0));
}
const phaseLatencyLines = [...phaseLatency.entries()]
.sort((a, b) => a[0] - b[0])
.map(([phase, latency]) => `- Phase ${phase}: ${latency}ms`);
const slowestCalls = [...timedTrace]
.sort((a, b) => (b.latency_ms ?? 0) - (a.latency_ms ?? 0))
.slice(0, 5)
.map((event) => `- ${event.call_id}: ${event.latency_ms ?? 0}ms`);
const lines = [
`[Council pipeline v${result.pipeline_version}]`,
@@ -68,6 +205,16 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
`- Open questions: ${result.meta.open_questions.length}`,
`- Next experiments: ${result.meta.next_experiments.length}`,
'',
'Timing:',
`- Timed calls: ${timedTrace.length}`,
`- Total model latency (summed): ${totalLatencyMs}ms`,
...phaseLatencyLines,
...(
slowestCalls.length > 0
? ['', 'Slowest calls:', ...slowestCalls]
: []
),
'',
`Agent conversations: ${result.conversations.length}`,
];
@@ -82,15 +229,34 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
})
.join('\n\n');
const resultJson = JSON.stringify(result, null, 2);
const artifacts = writeCouncilArtifacts(args.task, lines, conversationLog, resultJson);
return {
success: true,
output: `${lines.join('\n')}\n\n${conversationLog}\n\n${JSON.stringify(result)}`,
output: [
...lines,
'',
'Artifacts:',
`- Summary report: ${artifacts.markdownPath}`,
`- Full JSON: ${artifacts.jsonPath}`,
].join('\n'),
};
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
const kind = classifyCouncilFailure(message);
const hint = buildFailureHint(kind);
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
error: [
message,
`Likely root cause: ${kind}`,
`Hint: ${hint}`,
`Council config: D=${deps.config.groups.D.arbiter_agent}/${deps.config.groups.D.freethinker_agent}@${deps.config.groups.D.model_tier ?? 'agent-tier'}, ` +
`P=${deps.config.groups.P.arbiter_agent}/${deps.config.groups.P.freethinker_agent}@${deps.config.groups.P.model_tier ?? 'agent-tier'}, ` +
`meta=${deps.config.meta_arbiter_agent}@${deps.config.meta_model_tier ?? 'agent-tier'}`,
].join('\n'),
};
}
},