feat(councils): add preflight, schema-driven outputs, and artifact reporting

2026-02-22 15:56:30 -08:00
parent dafe9b4d3d
commit 44c7409a20
18 changed files with 1686 additions and 29 deletions
@@ -1,6 +1,9 @@
 import { describe, it, expect, vi } from 'vitest';
 import { createCouncilRunTool } from './council-run.js';
 import type { AgentConfigRegistry } from '../../agents/registry.js';
+import { mkdtempSync, readFileSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';

 function createRegistry(): AgentConfigRegistry {
  const configs = new Map<string, { name: string; modelTier?: 'fast' | 'default' | 'complex'; systemPrompt?: string }>([
@@ -53,6 +56,10 @@ const config = {

 describe('council.run tool', () => {
  it('runs council pipeline and returns output summary', async () => {
+    const previousDataDir = process.env.FLYNN_DATA_DIR;
+    const testDataDir = mkdtempSync(join(tmpdir(), 'flynn-council-run-'));
+    process.env.FLYNN_DATA_DIR = testDataDir;
+
    const delegate = vi.fn(async ({ message }: { message: string }) => {
      const payload = JSON.parse(message);
      if (payload.brief_D && payload.brief_P) {
@@ -117,9 +124,31 @@ describe('council.run tool', () => {
    });

    const result = await tool.execute({ task: 'plan migration' });
-    expect(result.success).toBe(true);
-    expect(result.output).toContain('Council pipeline v1.0.0');
-    expect(result.output).toContain('Meta selection');
+    try {
+      expect(result.success).toBe(true);
+      expect(result.output).toContain('Council pipeline v1.0.0');
+      expect(result.output).toContain('Meta selection');
+      expect(result.output).toContain('Timing:');
+      expect(result.output).toContain('Slowest calls:');
+      expect(result.output).toContain('Artifacts:');
+
+      const markdownLine = result.output.split('\n').find((line) => line.startsWith('- Summary report: '));
+      const jsonLine = result.output.split('\n').find((line) => line.startsWith('- Full JSON: '));
+      expect(markdownLine).toBeDefined();
+      expect(jsonLine).toBeDefined();
+
+      const markdownPath = markdownLine!.replace('- Summary report: ', '').trim();
+      const jsonPath = jsonLine!.replace('- Full JSON: ', '').trim();
+      expect(readFileSync(markdownPath, 'utf-8')).toContain('Council pipeline v1.0.0');
+      expect(readFileSync(jsonPath, 'utf-8')).toContain('"pipeline_version": "1.0.0"');
+    } finally {
+      if (previousDataDir !== undefined) {
+        process.env.FLYNN_DATA_DIR = previousDataDir;
+      } else {
+        delete process.env.FLYNN_DATA_DIR;
+      }
+      rmSync(testDataDir, { recursive: true, force: true });
+    }
  });

  it('returns error on invalid input', async () => {
@@ -132,4 +161,56 @@ describe('council.run tool', () => {
    expect(result.success).toBe(false);
    expect(result.error).toBeDefined();
  });
+
+  it('classifies network/latency failures with hint and council config summary', async () => {
+    const tool = createCouncilRunTool({
+      registry: createRegistry(),
+      orchestrator: {
+        delegate: vi.fn(async () => {
+          throw new Error('Connection error.');
+        }) as any,
+      },
+      config: config as any,
+    });
+
+    const result = await tool.execute({ task: 'x' });
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('Likely root cause: network_or_latency');
+    expect(result.error).toContain('Hint: Likely network/provider latency issue.');
+    expect(result.error).toContain('Council config: D=');
+  });
+
+  it('classifies cap_exceeded failures with cap_overflow hint', async () => {
+    const tool = createCouncilRunTool({
+      registry: createRegistry(),
+      orchestrator: {
+        delegate: vi.fn(async () => {
+          throw new Error('cap_exceeded');
+        }) as any,
+      },
+      config: config as any,
+    });
+
+    const result = await tool.execute({ task: 'x' });
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('Likely root cause: cap_overflow');
+    expect(result.error).toContain('Bridge payload cap exceeded.');
+  });
+
+  it('classifies cancellation failures with user_cancelled hint', async () => {
+    const tool = createCouncilRunTool({
+      registry: createRegistry(),
+      orchestrator: {
+        delegate: vi.fn(async () => {
+          throw new Error('Operation cancelled by user.');
+        }) as any,
+      },
+      config: config as any,
+    });
+
+    const result = await tool.execute({ task: 'x' });
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('Likely root cause: user_cancelled');
+    expect(result.error).toContain('Run was cancelled by user input');
+  });
 });
@@ -1,4 +1,8 @@
 import type { AgentConfigRegistry } from '../../agents/registry.js';
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { homedir } from 'node:os';
+import { resolve } from 'node:path';
+import type { ChatResponseFormat } from '../../models/types.js';
 import type { Tool, ToolResult } from '../types.js';
 import { CouncilsOrchestrator, type CouncilsConfig } from '../../councils/orchestrator.js';
 import type { CouncilScaffold } from '../../councils/scaffold.js';
@@ -10,6 +14,7 @@ interface DelegateRunner {
    systemPrompt: string;
    message: string;
    maxTokens?: number;
+    responseFormat?: ChatResponseFormat;
  }): Promise<{
    content: string;
    usage: { inputTokens: number; outputTokens: number };
@@ -24,6 +29,124 @@ export interface CouncilRunDeps {
  scaffold?: CouncilScaffold;
 }

+function slugifyTask(task: string): string {
+  const trimmed = task.trim().toLowerCase();
+  if (!trimmed) {
+    return 'council-run';
+  }
+  const slug = trimmed
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, 80);
+  return slug || 'council-run';
+}
+
+function formatTimestamp(date: Date): string {
+  return date.toISOString().replace(/[:.]/g, '-');
+}
+
+function getCouncilsDir(): string {
+  const dataDir = process.env.FLYNN_DATA_DIR ?? resolve(homedir(), '.local/share/flynn');
+  return resolve(dataDir, 'councils');
+}
+
+function writeCouncilArtifacts(
+  task: string,
+  summaryLines: string[],
+  conversationLog: string,
+  resultJson: string,
+): { jsonPath: string; markdownPath: string } {
+  const dir = getCouncilsDir();
+  mkdirSync(dir, { recursive: true });
+
+  const stamp = formatTimestamp(new Date());
+  const base = `${stamp}-${slugifyTask(task)}`;
+  const jsonPath = resolve(dir, `${base}.json`);
+  const markdownPath = resolve(dir, `${base}.md`);
+
+  const markdown = [
+    ...summaryLines,
+    '',
+    'Conversations:',
+    '',
+    conversationLog || '(none)',
+    '',
+    'Raw Result JSON:',
+    '```json',
+    resultJson,
+    '```',
+    '',
+  ].join('\n');
+
+  writeFileSync(jsonPath, `${resultJson}\n`, 'utf-8');
+  writeFileSync(markdownPath, markdown, 'utf-8');
+  return { jsonPath, markdownPath };
+}
+
+function classifyCouncilFailure(message: string): 'network_or_latency' | 'json_format' | 'config' | 'cap_overflow' | 'user_cancelled' | 'unknown' {
+  const lower = message.toLowerCase();
+  if (
+    lower.includes('operation cancelled by user')
+    || lower.includes('aborterror')
+    || lower.includes('aborted')
+    || lower.includes('cancelled')
+  ) {
+    return 'user_cancelled';
+  }
+  if (
+    lower.includes('connection error')
+    || lower.includes('timed out')
+    || lower.includes('econn')
+    || lower.includes('enotfound')
+    || lower.includes('all model providers failed')
+  ) {
+    return 'network_or_latency';
+  }
+  if (
+    lower.includes('repair_failed')
+    || lower.includes('parse_failed')
+    || lower.includes('json')
+  ) {
+    return 'json_format';
+  }
+  if (
+    lower.includes('not configured')
+    || lower.includes('disabled')
+    || lower.includes('meta_validation_failed')
+    || lower.includes('grounding_failed')
+    || lower.includes('bridge_validation_failed')
+  ) {
+    return 'config';
+  }
+  if (
+    lower.includes('cap_exceeded')
+    || lower.includes('cap_top_ideas')
+    || lower.includes('cap_field_bullets')
+    || lower.includes('cap_entry_chars')
+    || lower.includes('cap_total_chars')
+  ) {
+    return 'cap_overflow';
+  }
+  return 'unknown';
+}
+
+function buildFailureHint(kind: ReturnType<typeof classifyCouncilFailure>): string {
+  switch (kind) {
+    case 'network_or_latency':
+      return 'Likely network/provider latency issue. Check endpoint reachability and consider faster council tiers.';
+    case 'json_format':
+      return 'Likely model output-format issue. Council JSON repair/retry was unable to normalize output.';
+    case 'config':
+      return 'Likely councils/agent configuration issue. Verify council agent names, tiers, and strict validation settings.';
+    case 'cap_overflow':
+      return 'Bridge payload cap exceeded. Reduce task breadth, lower max rounds, or raise councils.defaults bridge cap settings.';
+    case 'user_cancelled':
+      return 'Run was cancelled by user input (Esc/Ctrl+C). Re-run the council task when ready.';
+    default:
+      return 'No deterministic diagnosis from error text.';
+  }
+}
+
 export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
  return {
    name: 'council.run',
@@ -53,6 +176,20 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
          scaffold: deps.scaffold,
        });
        const result = await runner.run(args);
+        const timedTrace = result.trace.filter((event) => typeof event.latency_ms === 'number');
+        const totalLatencyMs = timedTrace.reduce((sum, event) => sum + (event.latency_ms ?? 0), 0);
+        const phaseLatency = new Map<number, number>();
+        for (const event of timedTrace) {
+          const phase = event.phase_index;
+          phaseLatency.set(phase, (phaseLatency.get(phase) ?? 0) + (event.latency_ms ?? 0));
+        }
+        const phaseLatencyLines = [...phaseLatency.entries()]
+          .sort((a, b) => a[0] - b[0])
+          .map(([phase, latency]) => `- Phase ${phase}: ${latency}ms`);
+        const slowestCalls = [...timedTrace]
+          .sort((a, b) => (b.latency_ms ?? 0) - (a.latency_ms ?? 0))
+          .slice(0, 5)
+          .map((event) => `- ${event.call_id}: ${event.latency_ms ?? 0}ms`);

        const lines = [
          `[Council pipeline v${result.pipeline_version}]`,
@@ -68,6 +205,16 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
          `- Open questions: ${result.meta.open_questions.length}`,
          `- Next experiments: ${result.meta.next_experiments.length}`,
          '',
+          'Timing:',
+          `- Timed calls: ${timedTrace.length}`,
+          `- Total model latency (summed): ${totalLatencyMs}ms`,
+          ...phaseLatencyLines,
+          ...(
+            slowestCalls.length > 0
+              ? ['', 'Slowest calls:', ...slowestCalls]
+              : []
+          ),
+          '',
          `Agent conversations: ${result.conversations.length}`,
        ];

@@ -82,15 +229,34 @@ export function createCouncilRunTool(deps: CouncilRunDeps): Tool {
          })
          .join('\n\n');

+        const resultJson = JSON.stringify(result, null, 2);
+        const artifacts = writeCouncilArtifacts(args.task, lines, conversationLog, resultJson);
+
        return {
          success: true,
-          output: `${lines.join('\n')}\n\n${conversationLog}\n\n${JSON.stringify(result)}`,
+          output: [
+            ...lines,
+            '',
+            'Artifacts:',
+            `- Summary report: ${artifacts.markdownPath}`,
+            `- Full JSON: ${artifacts.jsonPath}`,
+          ].join('\n'),
        };
      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        const kind = classifyCouncilFailure(message);
+        const hint = buildFailureHint(kind);
        return {
          success: false,
          output: '',
-          error: error instanceof Error ? error.message : String(error),
+          error: [
+            message,
+            `Likely root cause: ${kind}`,
+            `Hint: ${hint}`,
+            `Council config: D=${deps.config.groups.D.arbiter_agent}/${deps.config.groups.D.freethinker_agent}@${deps.config.groups.D.model_tier ?? 'agent-tier'}, ` +
+              `P=${deps.config.groups.P.arbiter_agent}/${deps.config.groups.P.freethinker_agent}@${deps.config.groups.P.model_tier ?? 'agent-tier'}, ` +
+              `meta=${deps.config.meta_arbiter_agent}@${deps.config.meta_model_tier ?? 'agent-tier'}`,
+          ].join('\n'),
        };
      }
    },