feat(councils): add preflight, schema-driven outputs, and artifact reporting
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { createCouncilRunTool } from './council-run.js';
|
||||
import type { AgentConfigRegistry } from '../../agents/registry.js';
|
||||
import { mkdtempSync, readFileSync, rmSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
|
||||
function createRegistry(): AgentConfigRegistry {
|
||||
const configs = new Map<string, { name: string; modelTier?: 'fast' | 'default' | 'complex'; systemPrompt?: string }>([
|
||||
@@ -53,6 +56,10 @@ const config = {
|
||||
|
||||
describe('council.run tool', () => {
|
||||
it('runs council pipeline and returns output summary', async () => {
|
||||
const previousDataDir = process.env.FLYNN_DATA_DIR;
|
||||
const testDataDir = mkdtempSync(join(tmpdir(), 'flynn-council-run-'));
|
||||
process.env.FLYNN_DATA_DIR = testDataDir;
|
||||
|
||||
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||
const payload = JSON.parse(message);
|
||||
if (payload.brief_D && payload.brief_P) {
|
||||
@@ -117,9 +124,31 @@ describe('council.run tool', () => {
|
||||
});
|
||||
|
||||
const result = await tool.execute({ task: 'plan migration' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toContain('Council pipeline v1.0.0');
|
||||
expect(result.output).toContain('Meta selection');
|
||||
try {
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toContain('Council pipeline v1.0.0');
|
||||
expect(result.output).toContain('Meta selection');
|
||||
expect(result.output).toContain('Timing:');
|
||||
expect(result.output).toContain('Slowest calls:');
|
||||
expect(result.output).toContain('Artifacts:');
|
||||
|
||||
const markdownLine = result.output.split('\n').find((line) => line.startsWith('- Summary report: '));
|
||||
const jsonLine = result.output.split('\n').find((line) => line.startsWith('- Full JSON: '));
|
||||
expect(markdownLine).toBeDefined();
|
||||
expect(jsonLine).toBeDefined();
|
||||
|
||||
const markdownPath = markdownLine!.replace('- Summary report: ', '').trim();
|
||||
const jsonPath = jsonLine!.replace('- Full JSON: ', '').trim();
|
||||
expect(readFileSync(markdownPath, 'utf-8')).toContain('Council pipeline v1.0.0');
|
||||
expect(readFileSync(jsonPath, 'utf-8')).toContain('"pipeline_version": "1.0.0"');
|
||||
} finally {
|
||||
if (previousDataDir !== undefined) {
|
||||
process.env.FLYNN_DATA_DIR = previousDataDir;
|
||||
} else {
|
||||
delete process.env.FLYNN_DATA_DIR;
|
||||
}
|
||||
rmSync(testDataDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('returns error on invalid input', async () => {
|
||||
@@ -132,4 +161,56 @@ describe('council.run tool', () => {
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toBeDefined();
|
||||
});
|
||||
|
||||
it('classifies network/latency failures with hint and council config summary', async () => {
|
||||
const tool = createCouncilRunTool({
|
||||
registry: createRegistry(),
|
||||
orchestrator: {
|
||||
delegate: vi.fn(async () => {
|
||||
throw new Error('Connection error.');
|
||||
}) as any,
|
||||
},
|
||||
config: config as any,
|
||||
});
|
||||
|
||||
const result = await tool.execute({ task: 'x' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('Likely root cause: network_or_latency');
|
||||
expect(result.error).toContain('Hint: Likely network/provider latency issue.');
|
||||
expect(result.error).toContain('Council config: D=');
|
||||
});
|
||||
|
||||
it('classifies cap_exceeded failures with cap_overflow hint', async () => {
|
||||
const tool = createCouncilRunTool({
|
||||
registry: createRegistry(),
|
||||
orchestrator: {
|
||||
delegate: vi.fn(async () => {
|
||||
throw new Error('cap_exceeded');
|
||||
}) as any,
|
||||
},
|
||||
config: config as any,
|
||||
});
|
||||
|
||||
const result = await tool.execute({ task: 'x' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('Likely root cause: cap_overflow');
|
||||
expect(result.error).toContain('Bridge payload cap exceeded.');
|
||||
});
|
||||
|
||||
it('classifies cancellation failures with user_cancelled hint', async () => {
|
||||
const tool = createCouncilRunTool({
|
||||
registry: createRegistry(),
|
||||
orchestrator: {
|
||||
delegate: vi.fn(async () => {
|
||||
throw new Error('Operation cancelled by user.');
|
||||
}) as any,
|
||||
},
|
||||
config: config as any,
|
||||
});
|
||||
|
||||
const result = await tool.execute({ task: 'x' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('Likely root cause: user_cancelled');
|
||||
expect(result.error).toContain('Run was cancelled by user input');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import type { AgentConfigRegistry } from '../../agents/registry.js';
|
||||
import { mkdirSync, writeFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import type { ChatResponseFormat } from '../../models/types.js';
|
||||
import type { Tool, ToolResult } from '../types.js';
|
||||
import { CouncilsOrchestrator, type CouncilsConfig } from '../../councils/orchestrator.js';
|
||||
import type { CouncilScaffold } from '../../councils/scaffold.js';
|
||||
@@ -10,6 +14,7 @@ interface DelegateRunner {
|
||||
systemPrompt: string;
|
||||
message: string;
|
||||
maxTokens?: number;
|
||||
responseFormat?: ChatResponseFormat;
|
||||
}): Promise<{
|
||||
content: string;
|
||||
usage: { inputTokens: number; outputTokens: number };
|
||||
@@ -24,6 +29,124 @@ export interface CouncilRunDeps {
|
||||
scaffold?: CouncilScaffold;
|
||||
}
|
||||
|
||||
function slugifyTask(task: string): string {
|
||||
const trimmed = task.trim().toLowerCase();
|
||||
if (!trimmed) {
|
||||
return 'council-run';
|
||||
}
|
||||
const slug = trimmed
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.slice(0, 80);
|
||||
return slug || 'council-run';
|
||||
}
|
||||
|
||||
function formatTimestamp(date: Date): string {
|
||||
return date.toISOString().replace(/[:.]/g, '-');
|
||||
}
|
||||
|
||||
function getCouncilsDir(): string {
|
||||
const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn');
|
||||
return resolve(dataDir, 'councils');
|
||||
}
|
||||
|
||||
function writeCouncilArtifacts(
|
||||
task: string,
|
||||
summaryLines: string[],
|
||||
conversationLog: string,
|
||||
resultJson: string,
|
||||
): { jsonPath: string; markdownPath: string } {
|
||||
const dir = getCouncilsDir();
|
||||
mkdirSync(dir, { recursive: true });
|
||||
|
||||
const stamp = formatTimestamp(new Date());
|
||||
const base = `${stamp}-${slugifyTask(task)}`;
|
||||
const jsonPath = resolve(dir, `${base}.json`);
|
||||
const markdownPath = resolve(dir, `${base}.md`);
|
||||
|
||||
const markdown = [
|
||||
...summaryLines,
|
||||
'',
|
||||
'Conversations:',
|
||||
'',
|
||||
conversationLog || '(none)',
|
||||
'',
|
||||
'Raw Result JSON:',
|
||||
'```json',
|
||||
resultJson,
|
||||
'```',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
writeFileSync(jsonPath, `${resultJson}\n`, 'utf-8');
|
||||
writeFileSync(markdownPath, markdown, 'utf-8');
|
||||
return { jsonPath, markdownPath };
|
||||
}
|
||||
|
||||
function classifyCouncilFailure(message: string): 'network_or_latency' | 'json_format' | 'config' | 'cap_overflow' | 'user_cancelled' | 'unknown' {
|
||||
const lower = message.toLowerCase();
|
||||
if (
|
||||
lower.includes('operation cancelled by user')
|
||||
|| lower.includes('aborterror')
|
||||
|| lower.includes('aborted')
|
||||
|| lower.includes('cancelled')
|
||||
) {
|
||||
return 'user_cancelled';
|
||||
}
|
||||
if (
|
||||
lower.includes('connection error')
|
||||
|| lower.includes('timed out')
|
||||
|| lower.includes('econn')
|
||||
|| lower.includes('enotfound')
|
||||
|| lower.includes('all model providers failed')
|
||||
) {
|
||||
return 'network_or_latency';
|
||||
}
|
||||
if (
|
||||
lower.includes('repair_failed')
|
||||
|| lower.includes('parse_failed')
|
||||
|| lower.includes('json')
|
||||
) {
|
||||
return 'json_format';
|
||||
}
|
||||
if (
|
||||
lower.includes('not configured')
|
||||
|| lower.includes('disabled')
|
||||
|| lower.includes('meta_validation_failed')
|
||||
|| lower.includes('grounding_failed')
|
||||
|| lower.includes('bridge_validation_failed')
|
||||
) {
|
||||
return 'config';
|
||||
}
|
||||
if (
|
||||
lower.includes('cap_exceeded')
|
||||
|| lower.includes('cap_top_ideas')
|
||||
|| lower.includes('cap_field_bullets')
|
||||
|| lower.includes('cap_entry_chars')
|
||||
|| lower.includes('cap_total_chars')
|
||||
) {
|
||||
return 'cap_overflow';
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
function buildFailureHint(kind: ReturnType<typeof classifyCouncilFailure>): string {
|
||||
switch (kind) {
|
||||
case 'network_or_latency':
|
||||
return 'Likely network/provider latency issue. Check endpoint reachability and consider faster council tiers.';
|
||||
case 'json_format':
|
||||
return 'Likely model output-format issue. Council JSON repair/retry was unable to normalize output.';
|
||||
case 'config':
|
||||
return 'Likely councils/agent configuration issue. Verify council agent names, tiers, and strict validation settings.';
|
||||
case 'cap_overflow':
|
||||
return 'Bridge payload cap exceeded. Reduce task breadth, lower max rounds, or raise councils.defaults bridge cap settings.';
|
||||
case 'user_cancelled':
|
||||
return 'Run was cancelled by user input (Esc/Ctrl+C). Re-run the council task when ready.';
|
||||
default:
|
||||
return 'No deterministic diagnosis from error text.';
|
||||
}
|
||||
}
|
||||
|
||||
export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
||||
return {
|
||||
name: 'council.run',
|
||||
@@ -53,6 +176,20 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
||||
scaffold: deps.scaffold,
|
||||
});
|
||||
const result = await runner.run(args);
|
||||
const timedTrace = result.trace.filter((event) => typeof event.latency_ms === 'number');
|
||||
const totalLatencyMs = timedTrace.reduce((sum, event) => sum + (event.latency_ms ?? 0), 0);
|
||||
const phaseLatency = new Map<number, number>();
|
||||
for (const event of timedTrace) {
|
||||
const phase = event.phase_index;
|
||||
phaseLatency.set(phase, (phaseLatency.get(phase) ?? 0) + (event.latency_ms ?? 0));
|
||||
}
|
||||
const phaseLatencyLines = [...phaseLatency.entries()]
|
||||
.sort((a, b) => a[0] - b[0])
|
||||
.map(([phase, latency]) => `- Phase ${phase}: ${latency}ms`);
|
||||
const slowestCalls = [...timedTrace]
|
||||
.sort((a, b) => (b.latency_ms ?? 0) - (a.latency_ms ?? 0))
|
||||
.slice(0, 5)
|
||||
.map((event) => `- ${event.call_id}: ${event.latency_ms ?? 0}ms`);
|
||||
|
||||
const lines = [
|
||||
`[Council pipeline v${result.pipeline_version}]`,
|
||||
@@ -68,6 +205,16 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
||||
`- Open questions: ${result.meta.open_questions.length}`,
|
||||
`- Next experiments: ${result.meta.next_experiments.length}`,
|
||||
'',
|
||||
'Timing:',
|
||||
`- Timed calls: ${timedTrace.length}`,
|
||||
`- Total model latency (summed): ${totalLatencyMs}ms`,
|
||||
...phaseLatencyLines,
|
||||
...(
|
||||
slowestCalls.length > 0
|
||||
? ['', 'Slowest calls:', ...slowestCalls]
|
||||
: []
|
||||
),
|
||||
'',
|
||||
`Agent conversations: ${result.conversations.length}`,
|
||||
];
|
||||
|
||||
@@ -82,15 +229,34 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
|
||||
})
|
||||
.join('\n\n');
|
||||
|
||||
const resultJson = JSON.stringify(result, null, 2);
|
||||
const artifacts = writeCouncilArtifacts(args.task, lines, conversationLog, resultJson);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: `${lines.join('\n')}\n\n${conversationLog}\n\n${JSON.stringify(result)}`,
|
||||
output: [
|
||||
...lines,
|
||||
'',
|
||||
'Artifacts:',
|
||||
`- Summary report: ${artifacts.markdownPath}`,
|
||||
`- Full JSON: ${artifacts.jsonPath}`,
|
||||
].join('\n'),
|
||||
};
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const kind = classifyCouncilFailure(message);
|
||||
const hint = buildFailureHint(kind);
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
error: [
|
||||
message,
|
||||
`Likely root cause: ${kind}`,
|
||||
`Hint: ${hint}`,
|
||||
`Council config: D=${deps.config.groups.D.arbiter_agent}/${deps.config.groups.D.freethinker_agent}@${deps.config.groups.D.model_tier ?? 'agent-tier'}, ` +
|
||||
`P=${deps.config.groups.P.arbiter_agent}/${deps.config.groups.P.freethinker_agent}@${deps.config.groups.P.model_tier ?? 'agent-tier'}, ` +
|
||||
`meta=${deps.config.meta_arbiter_agent}@${deps.config.meta_model_tier ?? 'agent-tier'}`,
|
||||
].join('\n'),
|
||||
};
|
||||
}
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user