From bcb7e7b6587cfeb92e2a0ee9b0e8dd92b0dff86f Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sat, 21 Feb 2026 10:49:14 -0800 Subject: [PATCH] feat(councils): add deterministic councils engine and council.run tool --- src/councils/canonical.ts | 46 ++ src/councils/index.ts | 11 + src/councils/orchestrator.test.ts | 317 +++++++++++ src/councils/orchestrator.ts | 743 ++++++++++++++++++++++++++ src/councils/types.ts | 249 +++++++++ src/tools/builtin/council-run.test.ts | 135 +++++ src/tools/builtin/council-run.ts | 82 +++ src/tools/builtin/index.ts | 2 + src/tools/index.ts | 3 +- src/tools/policy.ts | 4 +- 10 files changed, 1590 insertions(+), 2 deletions(-) create mode 100644 src/councils/canonical.ts create mode 100644 src/councils/index.ts create mode 100644 src/councils/orchestrator.test.ts create mode 100644 src/councils/orchestrator.ts create mode 100644 src/councils/types.ts create mode 100644 src/tools/builtin/council-run.test.ts create mode 100644 src/tools/builtin/council-run.ts diff --git a/src/councils/canonical.ts b/src/councils/canonical.ts new file mode 100644 index 0000000..b19885d --- /dev/null +++ b/src/councils/canonical.ts @@ -0,0 +1,46 @@ +import { createHash } from 'crypto'; + +function sortValue(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map((item) => sortValue(item)); + } + if (value && typeof value === 'object') { + const out: Record = {}; + for (const key of Object.keys(value as Record).sort()) { + const normalized = normalizeOptional((value as Record)[key]); + if (normalized !== undefined) { + out[key] = sortValue(normalized); + } + } + return out; + } + if (typeof value === 'number') { + if (Number.isNaN(value) || !Number.isFinite(value)) { + return 0; + } + // Keep integer-like values stable. + return Number.isInteger(value) ? value : Number(value.toFixed(6)); + } + return value; +} + +export function normalizeOptional(value: T): T | undefined { + if (value === undefined || value === null) { + return undefined; + } + if (Array.isArray(value) && value.length === 0) { + return undefined; + } + if (typeof value === 'string' && value.trim() === '') { + return undefined; + } + return value; +} + +export function canonicalStringify(value: unknown): string { + return JSON.stringify(sortValue(value)); +} + +export function hashCanonical(value: unknown): string { + return createHash('sha256').update(canonicalStringify(value)).digest('hex'); +} diff --git a/src/councils/index.ts b/src/councils/index.ts new file mode 100644 index 0000000..8cce809 --- /dev/null +++ b/src/councils/index.ts @@ -0,0 +1,11 @@ +export { CouncilsOrchestrator, createCouncilsOrchestrator } from './orchestrator.js'; +export type { CouncilsConfig } from './orchestrator.js'; +export { canonicalStringify, hashCanonical, normalizeOptional } from './canonical.js'; +export { + COUNCIL_SCHEMA_VERSION, + COUNCIL_PIPELINE_VERSION, + councilRunInputSchema, + councilRunResultSchema, + type CouncilRunInput, + type CouncilRunResult, +} from './types.js'; diff --git a/src/councils/orchestrator.test.ts b/src/councils/orchestrator.test.ts new file mode 100644 index 0000000..a727853 --- /dev/null +++ b/src/councils/orchestrator.test.ts @@ -0,0 +1,317 @@ +import { describe, it, expect, vi } from 'vitest'; +import { CouncilsOrchestrator, type CouncilsConfig } from './orchestrator.js'; +import type { AgentConfigRegistry } from '../agents/registry.js'; + +function createRegistry(): AgentConfigRegistry { + const configs = new Map([ + ['council_d_arbiter', { name: 'council_d_arbiter', modelTier: 'default', systemPrompt: 'D Arbiter' }], + ['council_d_freethinker', { name: 'council_d_freethinker', modelTier: 'default', systemPrompt: 'D FT' }], + ['council_p_arbiter', { name: 'council_p_arbiter', modelTier: 'default', systemPrompt: 'P Arbiter' }], + ['council_p_freethinker', { name: 'council_p_freethinker', modelTier: 'default', systemPrompt: 'P FT' }], + ['council_meta_arbiter', { name: 'council_meta_arbiter', modelTier: 'default', systemPrompt: 'Meta' }], + ]); + + return { + get: (name: string) => configs.get(name), + list: () => [...configs.values()], + } as unknown as AgentConfigRegistry; +} + +function createConfig(overrides?: Partial): CouncilsConfig { + return { + enabled: true, + defaults: { + max_rounds: 2, + ideas_per_round: 3, + top_ideas_for_bridge: 2, + bridge_packet_max_chars: 4000, + bridge_field_max_bullets: 6, + bridge_entry_max_chars: 300, + novelty_delta_threshold: 10, + repetition_threshold: 70, + }, + strict_grounding: false, + strict_meta_validation: true, + groups: { + D: { + arbiter_agent: 'council_d_arbiter', + freethinker_agent: 'council_d_freethinker', + group_prompt_prefix: 'D prefix', + novelty_bias: 'low', + risk_tolerance: 'low', + forbidden_approaches: ['moonshots'], + }, + P: { + arbiter_agent: 'council_p_arbiter', + freethinker_agent: 'council_p_freethinker', + group_prompt_prefix: 'P prefix', + novelty_bias: 'high', + risk_tolerance: 'high', + forbidden_approaches: ['incremental'], + }, + }, + meta_arbiter_agent: 'council_meta_arbiter', + ...overrides, + }; +} + +describe('CouncilsOrchestrator', () => { + it('runs D/P pipeline with deterministic IDs and trace ordering', async () => { + const delegate = vi.fn(async ({ message }: { message: string }) => { + const payload = JSON.parse(message); + + if (payload.brief_D && payload.brief_P) { + return { + content: JSON.stringify({ + schema_version: '1.0.0', + selected_primary: [payload.brief_D.shortlist[0]], + selected_secondary: [payload.brief_P.shortlist[0]], + merges: [], + rejections: [], + open_questions: ['q1'], + next_experiments: ['e1'], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.shortlisted_ideas) { + return { + content: JSON.stringify({ + grounded: payload.shortlisted_ideas.map((idea: { idea_id: string }) => ({ + idea_id: idea.idea_id, + mve: `test-${idea.idea_id}`, + constraints: ['c1'], + falsifiability_checks: ['f1'], + })), + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.ideas) { + const ids = payload.ideas.map((idea: { idea_id: string }) => idea.idea_id); + const round = payload.round; + return { + content: JSON.stringify({ + assessments: ids.map((idea_id: string, i: number) => ({ + idea_id, + scores: { novelty: 60 - i, feasibility: 80, impact: 70, testability: 75 }, + decision: i === 0 ? 'shortlist' : 'hold', + notes: `note-${idea_id}`, + })), + assumptions: [`assume-${payload.group}-${round}`], + risks: [`risk-${payload.group}-${round}`], + asks: [`ask-${payload.group}-${round}`], + what_to_steal: [`steal-${payload.group}-${round}`], + convergence_signal: round >= 2, + novelty_score: round >= 2 ? 50 : 62, + repetition_rate: round >= 2 ? 72 : 20, + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.group && payload.round) { + const marker = payload.group === 'D' ? 'D-MARKER' : 'P-MARKER'; + return { + content: JSON.stringify({ + ideas: [ + { + title: `${payload.group} idea 1`, + hypothesis: `${marker} hypothesis`, + mechanism: `${marker} mechanism`, + expected_outcome: 'Outcome 1', + }, + { + title: `${payload.group} idea 2`, + hypothesis: `h2-${payload.group}`, + mechanism: `m2-${payload.group}`, + expected_outcome: 'Outcome 2', + }, + ], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + throw new Error('Unexpected payload'); + }); + + const orchestrator = new CouncilsOrchestrator({ + registry: createRegistry(), + orchestrator: { delegate }, + config: createConfig(), + }); + + const result = await orchestrator.run({ task: 'design test harness' }); + expect(result.pipeline_version).toBe('1.0.0'); + expect(result.brief_D_v1.ideas[0].idea_id).toBe('D.r1.01'); + expect(result.brief_P_v1.ideas[0].idea_id).toBe('P.r1.01'); + expect(result.stop_snapshot.stop_reason).toBe('convergence'); + const callIds = result.trace.map((e) => e.call_id); + expect(callIds).toEqual([...callIds].sort((a, b) => { + const pa = result.trace.find((e) => e.call_id === a)?.phase_index ?? 0; + const pb = result.trace.find((e) => e.call_id === b)?.phase_index ?? 0; + return pa - pb || a.localeCompare(b); + })); + }); + + it('keeps cross-council leakage constrained to bridge fields', async () => { + const observedPayloads: unknown[] = []; + const delegate = vi.fn(async ({ message }: { message: string }) => { + const payload = JSON.parse(message); + observedPayloads.push(payload); + + if (payload.brief_D && payload.brief_P) { + return { + content: JSON.stringify({ + schema_version: '1.0.0', + selected_primary: [payload.brief_D.shortlist[0]], + selected_secondary: [payload.brief_P.shortlist[0]], + merges: [], + rejections: [], + open_questions: ['q1'], + next_experiments: ['e1'], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.shortlisted_ideas) { + return { + content: JSON.stringify({ grounded: [] }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + if (payload.ideas) { + return { + content: JSON.stringify({ + assessments: payload.ideas.map((idea: { idea_id: string }, idx: number) => ({ + idea_id: idea.idea_id, + scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 }, + decision: idx === 0 ? 'shortlist' : 'hold', + notes: 'ok', + })), + assumptions: [], + risks: [], + asks: [], + what_to_steal: [], + convergence_signal: true, + novelty_score: 50, + repetition_rate: 90, + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + + return { + content: JSON.stringify({ + ideas: [ + { + title: 't', + hypothesis: payload.group === 'D' ? 'D-MARKER hypothesis' : 'plain hypothesis', + mechanism: payload.group === 'D' ? 'D-MARKER mechanism' : 'plain mechanism', + expected_outcome: 'o', + }, + ], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + }); + + const orchestrator = new CouncilsOrchestrator({ + registry: createRegistry(), + orchestrator: { delegate }, + config: createConfig(), + }); + + await orchestrator.run({ task: 'x' }); + + const pRound1Payloads = observedPayloads.filter((p: any) => p.group === 'P' && p.round === 1); + expect(JSON.stringify(pRound1Payloads)).not.toContain('D-MARKER'); + + const pRound2Payload = observedPayloads.find((p: any) => p.group === 'P' && p.round === 2) as any; + expect(JSON.stringify(pRound2Payload.peer_bridge)).toContain('D-MARKER'); + }); + + it('fails closed on bridge cap overflow before phase 2 executes', async () => { + const delegate = vi.fn(async ({ message }: { message: string }) => { + const payload = JSON.parse(message); + if (payload.brief_D && payload.brief_P) { + return { + content: JSON.stringify({ + schema_version: '1.0.0', + selected_primary: [payload.brief_D.shortlist[0]], + selected_secondary: [payload.brief_P.shortlist[0]], + merges: [], + rejections: [], + open_questions: ['q1'], + next_experiments: ['e1'], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.shortlisted_ideas) { + return { + content: JSON.stringify({ grounded: payload.shortlisted_ideas.map((idea: any) => ({ idea_id: idea.idea_id, mve: 'm', constraints: ['c'], falsifiability_checks: ['f'] })) }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.ideas) { + return { + content: JSON.stringify({ + assessments: payload.ideas.map((idea: any, idx: number) => ({ + idea_id: idea.idea_id, + scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 }, + decision: idx === 0 ? 'shortlist' : 'hold', + notes: 'very-long-note-that-will-overflow-bridge-limits', + })), + assumptions: ['a'], + risks: ['r'], + asks: ['k'], + what_to_steal: ['this-is-way-too-long-for-cap'], + convergence_signal: false, + novelty_score: 60, + repetition_rate: 10, + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + return { + content: JSON.stringify({ + ideas: [{ title: 't', hypothesis: 'h', mechanism: 'm', expected_outcome: 'o' }], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + }); + + const orchestrator = new CouncilsOrchestrator({ + registry: createRegistry(), + orchestrator: { delegate }, + config: createConfig({ + defaults: { + ...createConfig().defaults, + bridge_entry_max_chars: 5, + }, + }), + }); + + const result = await orchestrator.run({ task: 'x', max_rounds: 2 }); + expect(result.stop_snapshot.stop_reason).toBe('bridge_validation_failed'); + expect(result.stop_snapshot.round_reached).toBe(1); + }); +}); diff --git a/src/councils/orchestrator.ts b/src/councils/orchestrator.ts new file mode 100644 index 0000000..6770579 --- /dev/null +++ b/src/councils/orchestrator.ts @@ -0,0 +1,743 @@ +import type { AgentConfigRegistry } from '../agents/registry.js'; +import type { AgentOrchestrator } from '../backends/native/orchestrator.js'; +import type { ModelTier } from '../models/router.js'; +import type { TokenUsage } from '../models/types.js'; +import { + COUNCIL_PIPELINE_VERSION, + COUNCIL_SCHEMA_VERSION, + assessmentOutputSchema, + bridgePacketSchema, + councilBriefSchema, + councilDiffSchema, + councilRunInputSchema, + councilRunResultSchema, + councilTraceEventSchema, + groundingOutputSchema, + ideationOutputSchema, + metaSelectionSchema, + type BridgePacket, + type CouncilBrief, + type CouncilDiff, + type CouncilGroup, + type CouncilRunInput, + type CouncilRunResult, + type CouncilTraceEvent, + type IdeaAssessment, + type IdeaCard, + type StopReason, +} from './types.js'; +import { canonicalStringify, hashCanonical, normalizeOptional } from './canonical.js'; + +interface DelegateRunner { + delegate(request: { + tier: ModelTier; + systemPrompt: string; + message: string; + maxTokens?: number; + }): Promise<{ + content: string; + usage: TokenUsage; + tier: ModelTier; + }>; +} + +export interface CouncilsConfig { + enabled: boolean; + defaults: { + max_rounds: number; + ideas_per_round: number; + top_ideas_for_bridge: number; + bridge_packet_max_chars: number; + bridge_field_max_bullets: number; + bridge_entry_max_chars: number; + novelty_delta_threshold: number; + repetition_threshold: number; + }; + strict_grounding: boolean; + strict_meta_validation: boolean; + groups: { + D: CouncilGroupConfig; + P: CouncilGroupConfig; + }; + meta_arbiter_agent: string; +} + +interface CouncilGroupConfig { + arbiter_agent: string; + freethinker_agent: string; + group_prompt_prefix: string; + novelty_bias: 'low' | 'medium' | 'high'; + risk_tolerance: 'low' | 'medium' | 'high'; + forbidden_approaches: string[]; +} + +interface GroupRoundResult { + brief: CouncilBrief; + convergenceQualified: boolean; + groundingFailures: number; +} + +interface AgentCallResult { + content: string; + usage: TokenUsage; +} + +function deterministicJsonRepair(raw: string): string | null { + const trimmed = raw.trim(); + const noFence = trimmed + .replace(/^```json\s*/i, '') + .replace(/^```\s*/i, '') + .replace(/```$/, '') + .trim(); + + const extracted = extractFirstJsonContainer(noFence); + if (!extracted) { + return null; + } + + return extracted + .replace(/,\s*([}\]])/g, '$1') + .trim(); +} + +function extractFirstJsonContainer(input: string): string | null { + const start = input.search(/[\[{]/); + if (start < 0) { + return null; + } + const opener = input[start]; + const closer = opener === '{' ? '}' : ']'; + let depth = 0; + let inString = false; + let escaped = false; + + for (let i = start; i < input.length; i++) { + const ch = input[i]; + if (inString) { + if (escaped) { + escaped = false; + continue; + } + if (ch === '\\') { + escaped = true; + } else if (ch === '"') { + inString = false; + } + continue; + } + if (ch === '"') { + inString = true; + continue; + } + if (ch === opener) { + depth++; + continue; + } + if (ch === closer) { + depth--; + if (depth === 0) { + return input.slice(start, i + 1); + } + } + } + + return null; +} + +function parseJsonWithRepair(raw: string, parser: (value: unknown) => T): T { + try { + return parser(JSON.parse(raw)); + } catch { + const repaired = deterministicJsonRepair(raw); + if (!repaired) { + throw new Error('parse_failed'); + } + try { + return parser(JSON.parse(repaired)); + } catch { + throw new Error('repair_failed'); + } + } +} + +function uniq(values: string[]): string[] { + return Array.from(new Set(values)); +} + +function computeTotalScore(assessment: IdeaAssessment): number { + const s = assessment.scores; + return s.feasibility + s.impact + s.novelty + s.testability; +} + +function buildDiff(group: CouncilGroup, fromBrief: CouncilBrief, toBrief: CouncilBrief): CouncilDiff { + const fromIds = new Set(fromBrief.ideas.map((i) => i.idea_id)); + const toIds = new Set(toBrief.ideas.map((i) => i.idea_id)); + + const ideaAdded = [...toIds].filter((id) => !fromIds.has(id)); + const ideaRemoved = [...fromIds].filter((id) => !toIds.has(id)); + + const shortlistAdded = toBrief.shortlist.filter((id) => !fromBrief.shortlist.includes(id)); + const shortlistRemoved = fromBrief.shortlist.filter((id) => !toBrief.shortlist.includes(id)); + + const fromAssessmentMap = new Map(fromBrief.assessments.map((a) => [a.idea_id, a])); + const scoreChanges = toBrief.assessments + .filter((a) => fromAssessmentMap.has(a.idea_id)) + .map((a) => { + const prev = fromAssessmentMap.get(a.idea_id)!; + return { + idea_id: a.idea_id, + from_total: computeTotalScore(prev), + to_total: computeTotalScore(a), + }; + }) + .filter((entry) => entry.from_total !== entry.to_total) + .sort((a, b) => a.idea_id.localeCompare(b.idea_id)); + + const assumptionsAdded = toBrief.assumptions.filter((a) => !fromBrief.assumptions.includes(a)); + const assumptionsRemoved = fromBrief.assumptions.filter((a) => !toBrief.assumptions.includes(a)); + + const fromGroundingMap = new Map(fromBrief.ideas.map((i) => [i.idea_id, i.grounding?.mve])); + const mveChanged = toBrief.ideas + .filter((idea) => fromGroundingMap.has(idea.idea_id)) + .filter((idea) => fromGroundingMap.get(idea.idea_id) !== idea.grounding?.mve) + .map((idea) => idea.idea_id) + .sort(); + + return councilDiffSchema.parse({ + schema_version: COUNCIL_SCHEMA_VERSION, + group, + from_round: fromBrief.round, + to_round: toBrief.round, + idea_added: ideaAdded.sort(), + idea_removed: ideaRemoved.sort(), + shortlist_added: shortlistAdded.sort(), + shortlist_removed: shortlistRemoved.sort(), + score_changes: scoreChanges, + assumptions_added: assumptionsAdded.sort(), + assumptions_removed: assumptionsRemoved.sort(), + mve_changed: mveChanged, + }); +} + +export class CouncilsOrchestrator { + private readonly _registry: AgentConfigRegistry; + private readonly _delegateRunner: DelegateRunner; + private readonly _config: CouncilsConfig; + private readonly _trace: CouncilTraceEvent[] = []; + + constructor(deps: { + registry: AgentConfigRegistry; + orchestrator: DelegateRunner; + config: CouncilsConfig; + }) { + this._registry = deps.registry; + this._delegateRunner = deps.orchestrator; + this._config = deps.config; + } + + async run(rawInput: unknown): Promise { + const input = councilRunInputSchema.parse(rawInput); + if (!this._config.enabled) { + throw new Error('Councils are disabled in config'); + } + + this._trace.length = 0; + const inputHash = hashCanonical(input); + const maxRounds = input.max_rounds ?? this._config.defaults.max_rounds; + + const phase1 = await Promise.all([ + this.runGroupRound('D', 1, input), + this.runGroupRound('P', 1, input), + ]); + const briefD1 = phase1[0].brief; + const briefP1 = phase1[1].brief; + + let bridgeDToP: BridgePacket; + let bridgePToD: BridgePacket; + let bridgeValidated = true; + try { + bridgeDToP = this.buildBridgePacket(briefD1, 'P'); + bridgePToD = this.buildBridgePacket(briefP1, 'D'); + } catch { + bridgeValidated = false; + bridgeDToP = bridgePacketSchema.parse({ + schema_version: COUNCIL_SCHEMA_VERSION, + from_group: 'D', + to_group: 'P', + round: 1, + top_ideas: [], + assumptions: [], + risks: [], + asks: [], + what_to_steal: [], + }); + bridgePToD = bridgePacketSchema.parse({ + schema_version: COUNCIL_SCHEMA_VERSION, + from_group: 'P', + to_group: 'D', + round: 1, + top_ideas: [], + assumptions: [], + risks: [], + asks: [], + what_to_steal: [], + }); + } + + let currentD = briefD1; + let currentP = briefP1; + let lastRound = 1; + let groundingFailuresCount = phase1[0].groundingFailures + phase1[1].groundingFailures; + let stopReason: StopReason = bridgeValidated ? 'max_rounds' : 'bridge_validation_failed'; + + for (let round = 2; round <= maxRounds; round++) { + if (!bridgeValidated) { + break; + } + try { + this.enforceBridgeCaps(bridgeDToP); + this.enforceBridgeCaps(bridgePToD); + } catch { + bridgeValidated = false; + stopReason = 'bridge_validation_failed'; + break; + } + + const [dRound, pRound] = await Promise.all([ + this.runGroupRound('D', round, input, bridgePToD, currentD), + this.runGroupRound('P', round, input, bridgeDToP, currentP), + ]); + + currentD = dRound.brief; + currentP = pRound.brief; + groundingFailuresCount += dRound.groundingFailures + pRound.groundingFailures; + lastRound = round; + + bridgeDToP = this.buildBridgePacket(currentD, 'P'); + bridgePToD = this.buildBridgePacket(currentP, 'D'); + + const bothConverged = dRound.convergenceQualified && pRound.convergenceQualified; + if (bothConverged) { + stopReason = 'convergence'; + break; + } + } + + if (groundingFailuresCount > 0 && this._config.strict_grounding) { + stopReason = 'grounding_failed'; + } + + const diffD = buildDiff('D', briefD1, currentD); + const diffP = buildDiff('P', briefP1, currentP); + + const meta = await this.runMetaMerge(input, currentD, currentP); + const allKnownIds = new Set([...currentD.ideas, ...currentP.ideas].map((idea) => idea.idea_id)); + if (!this.validateMetaSelection(meta, allKnownIds)) { + if (this._config.strict_meta_validation) { + stopReason = 'meta_validation_failed'; + throw new Error('meta_validation_failed'); + } + } + + const stopSnapshot = { + stop_reason: stopReason, + round_reached: lastRound, + final_shortlist_D: currentD.shortlist, + final_shortlist_P: currentP.shortlist, + bridge_validated: bridgeValidated, + grounding_failures_count: groundingFailuresCount, + }; + + const result = councilRunResultSchema.parse({ + pipeline_version: COUNCIL_PIPELINE_VERSION, + input_hash: inputHash, + brief_D_v1: briefD1, + brief_P_v1: briefP1, + brief_D_v2: currentD, + brief_P_v2: currentP, + diff_D: diffD, + diff_P: diffP, + bridge_D_to_P: bridgeDToP, + bridge_P_to_D: bridgePToD, + meta, + stop_snapshot: stopSnapshot, + trace: this.getSortedTrace(), + }); + + return result; + } + + private getAgent(name: string): { tier: ModelTier; systemPrompt: string } { + const agent = this._registry.get(name); + if (!agent) { + throw new Error(`Council agent "${name}" is not configured in agent_configs`); + } + return { + tier: agent.modelTier ?? 'default', + systemPrompt: agent.systemPrompt ?? `You are ${name}.`, + }; + } + + private async callAgent(opts: { + agentName: string; + callId: string; + phaseIndex: number; + group?: CouncilGroup; + round?: number; + promptPayload: unknown; + modeDirective: string; + maxTokens?: number; + }): Promise { + const agent = this.getAgent(opts.agentName); + const message = canonicalStringify(opts.promptPayload); + const promptHash = hashCanonical(opts.promptPayload); + const systemPrompt = `${agent.systemPrompt}\n\n${opts.modeDirective}`; + + const result = await this._delegateRunner.delegate({ + tier: agent.tier, + systemPrompt, + message, + maxTokens: opts.maxTokens ?? 4096, + }); + + this._trace.push(councilTraceEventSchema.parse({ + schema_version: COUNCIL_SCHEMA_VERSION, + event_id: `${opts.phaseIndex}:${opts.callId}`, + phase_index: opts.phaseIndex, + call_id: opts.callId, + group: opts.group, + round: opts.round, + prompt_payload_hash: promptHash, + artifact_hash: hashCanonical(result.content), + token_usage: result.usage, + })); + + return result; + } + + private allocateIdeaId(group: CouncilGroup, round: number, index: number): string { + return `${group}.r${round}.${String(index + 1).padStart(2, '0')}`; + } + + private async runGroupRound( + group: CouncilGroup, + round: number, + input: CouncilRunInput, + peerBridge?: BridgePacket, + previousBrief?: CouncilBrief, + ): Promise { + const groupConfig = this._config.groups[group]; + const phaseBase = round * 10 + (group === 'D' ? 1 : 2); + + const ideationPayload = { + input, + group, + round, + profile: { + group_prompt_prefix: groupConfig.group_prompt_prefix, + novelty_bias: groupConfig.novelty_bias, + risk_tolerance: groupConfig.risk_tolerance, + forbidden_approaches: groupConfig.forbidden_approaches, + }, + peer_bridge: peerBridge, + previous_shortlist: previousBrief?.shortlist, + }; + + const ideation = await this.callAgent({ + agentName: groupConfig.freethinker_agent, + callId: `${group}.r${round}.ft.ideation`, + phaseIndex: phaseBase, + group, + round, + promptPayload: ideationPayload, + modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.', + }); + + const ideaOutput = parseJsonWithRepair(ideation.content, (value) => ideationOutputSchema.parse(value)); + const ideaCards: IdeaCard[] = ideaOutput.ideas + .slice(0, this._config.defaults.ideas_per_round) + .map((idea, index) => ({ + schema_version: COUNCIL_SCHEMA_VERSION, + idea_id: this.allocateIdeaId(group, round, index), + group, + round, + content: idea, + })); + + const assessmentPayload = { + input, + group, + round, + ideas: ideaCards.map((idea) => ({ idea_id: idea.idea_id, ...idea.content })), + peer_bridge: peerBridge, + previous_shortlist: previousBrief?.shortlist, + }; + + const assessmentRaw = await this.callAgent({ + agentName: groupConfig.arbiter_agent, + callId: `${group}.r${round}.arb.assess`, + phaseIndex: phaseBase + 1, + group, + round, + promptPayload: assessmentPayload, + modeDirective: + 'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.', + }); + + const assessmentOutput = parseJsonWithRepair(assessmentRaw.content, (value) => assessmentOutputSchema.parse(value)); + const validIdeaIds = new Set(ideaCards.map((i) => i.idea_id)); + const assessments: IdeaAssessment[] = uniq(assessmentOutput.assessments.map((a) => a.idea_id)) + .map((ideaId) => assessmentOutput.assessments.find((a) => a.idea_id === ideaId)!) + .filter((assessment) => validIdeaIds.has(assessment.idea_id)) + .map((assessment) => ({ + schema_version: COUNCIL_SCHEMA_VERSION, + ...assessment, + })); + + for (const idea of ideaCards) { + if (!assessments.find((a) => a.idea_id === idea.idea_id)) { + assessments.push({ + schema_version: COUNCIL_SCHEMA_VERSION, + idea_id: idea.idea_id, + scores: { novelty: 0, feasibility: 0, impact: 0, testability: 0 }, + decision: 'hold', + notes: 'Missing assessment from arbiter output.', + }); + } + } + + const shortlist = assessments + .filter((a) => a.decision === 'shortlist') + .sort((a, b) => computeTotalScore(b) - computeTotalScore(a) || a.idea_id.localeCompare(b.idea_id)) + .map((a) => a.idea_id); + + const groundingPayload = { + group, + round, + shortlisted_ideas: ideaCards + .filter((idea) => shortlist.includes(idea.idea_id)) + .map((idea) => ({ + idea_id: idea.idea_id, + ...idea.content, + })), + success_definition: input.success_definition, + constraints: input.constraints, + }; + + let groundingFailures = 0; + let grounding = { grounded: [] as Array<{ idea_id: string; mve: string; constraints: string[]; falsifiability_checks: string[] }> }; + try { + const groundingRaw = await this.callAgent({ + agentName: groupConfig.freethinker_agent, + callId: `${group}.r${round}.ft.ground`, + phaseIndex: phaseBase + 2, + group, + round, + promptPayload: groundingPayload, + modeDirective: + 'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.', + }); + grounding = parseJsonWithRepair(groundingRaw.content, (value) => groundingOutputSchema.parse(value)); + } catch { + groundingFailures = shortlist.length; + if (this._config.strict_grounding) { + throw new Error('grounding_failed'); + } + } + + const groundingMap = new Map(grounding.grounded.map((item) => [item.idea_id, item])); + const groundedIdeas = ideaCards.map((idea) => { + const grounded = groundingMap.get(idea.idea_id); + if (!grounded) { + if (shortlist.includes(idea.idea_id)) { + groundingFailures += 1; + } + return { + ...idea, + grounding_failed: shortlist.includes(idea.idea_id), + }; + } + return { + ...idea, + grounding: { + mve: grounded.mve, + constraints: grounded.constraints, + falsifiability_checks: grounded.falsifiability_checks, + }, + }; + }); + + const brief = councilBriefSchema.parse({ + schema_version: COUNCIL_SCHEMA_VERSION, + group, + round, + ideas: groundedIdeas, + assessments, + shortlist, + assumptions: assessmentOutput.assumptions, + risks: assessmentOutput.risks, + asks: assessmentOutput.asks, + what_to_steal: assessmentOutput.what_to_steal, + convergence_signal: assessmentOutput.convergence_signal, + novelty_score: assessmentOutput.novelty_score, + repetition_rate: assessmentOutput.repetition_rate, + }); + + const convergenceQualified = previousBrief + ? this.evaluateConvergence(previousBrief, brief) + : false; + + return { + brief, + convergenceQualified, + groundingFailures, + }; + } + + private evaluateConvergence(previousBrief: CouncilBrief, currentBrief: CouncilBrief): boolean { + if (currentBrief.round < 2) { + return false; + } + + const stableShortlist = previousBrief.shortlist.join('|') === currentBrief.shortlist.join('|'); + const noveltyDelta = Math.abs(previousBrief.novelty_score - currentBrief.novelty_score); + const lowNoveltyDelta = noveltyDelta <= this._config.defaults.novelty_delta_threshold; + const highRepetition = currentBrief.repetition_rate >= this._config.defaults.repetition_threshold; + const deterministicSignal = stableShortlist || lowNoveltyDelta || highRepetition; + + return currentBrief.convergence_signal && deterministicSignal; + } + + private enforceBridgeCaps(packet: BridgePacket): void { + const defaults = this._config.defaults; + if (packet.top_ideas.length > defaults.top_ideas_for_bridge) { + throw new Error('cap_exceeded'); + } + + const bulletFields = [packet.assumptions, packet.risks, packet.asks, packet.what_to_steal]; + for (const value of bulletFields) { + if (value.length > defaults.bridge_field_max_bullets) { + throw new Error('cap_exceeded'); + } + if (value.some((v) => v.length > defaults.bridge_entry_max_chars)) { + throw new Error('cap_exceeded'); + } + } + + if (packet.top_ideas.some((idea) => idea.mechanism.length > defaults.bridge_entry_max_chars)) { + throw new Error('cap_exceeded'); + } + + const total = canonicalStringify(packet).length; + if (total > defaults.bridge_packet_max_chars) { + throw new Error('cap_exceeded'); + } + } + + private buildBridgePacket(fromBrief: CouncilBrief, toGroup: CouncilGroup): BridgePacket { + const assessmentMap = new Map(fromBrief.assessments.map((a) => [a.idea_id, a])); + const topIdeas = fromBrief.shortlist + .slice(0, this._config.defaults.top_ideas_for_bridge) + .map((ideaId) => { + const idea = fromBrief.ideas.find((i) => i.idea_id === ideaId); + const assessment = assessmentMap.get(ideaId); + if (!idea || !assessment) { + throw new Error(`unknown_id:${ideaId}`); + } + return { + idea_id: idea.idea_id, + mechanism: idea.content.mechanism, + rationale: assessment.notes, + }; + }); + + const packet = bridgePacketSchema.parse({ + schema_version: COUNCIL_SCHEMA_VERSION, + from_group: fromBrief.group, + to_group: toGroup, + round: fromBrief.round, + top_ideas: topIdeas, + assumptions: fromBrief.assumptions, + risks: fromBrief.risks, + asks: fromBrief.asks, + what_to_steal: fromBrief.what_to_steal, + }); + + this.enforceBridgeCaps(packet); + return packet; + } + + private async runMetaMerge(input: CouncilRunInput, briefD: CouncilBrief, briefP: CouncilBrief) { + const metaAgentName = this._config.meta_arbiter_agent; + const payload = { + input, + brief_D: briefD, + brief_P: briefP, + instructions: { + no_novel_mechanisms: true, + require_source_ids: true, + }, + }; + + const metaRaw = await this.callAgent({ + agentName: metaAgentName, + callId: 'meta.merge', + phaseIndex: 999, + promptPayload: payload, + modeDirective: + 'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.', + }); + + return parseJsonWithRepair(metaRaw.content, (value) => metaSelectionSchema.parse(value)); + } + + private validateMetaSelection(meta: ReturnType, knownIds: Set): boolean { + for (const id of [...meta.selected_primary, ...meta.selected_secondary]) { + if (!knownIds.has(id)) { + return false; + } + } + + for (const rej of meta.rejections) { + if (!knownIds.has(rej.idea_id)) { + return false; + } + } + + if (meta.merges) { + for (const merge of meta.merges) { + if (merge.sources.some((source) => !knownIds.has(source))) { + return false; + } + } + } + + return true; + } + + private getSortedTrace(): CouncilTraceEvent[] { + return [...this._trace] + .sort((a, b) => a.phase_index - b.phase_index || a.call_id.localeCompare(b.call_id)) + .map((event) => councilTraceEventSchema.parse({ + ...event, + artifact_hash: normalizeOptional(event.artifact_hash), + group: normalizeOptional(event.group), + round: normalizeOptional(event.round), + token_usage: normalizeOptional(event.token_usage), + dropped_reason: normalizeOptional(event.dropped_reason), + validation_failure: normalizeOptional(event.validation_failure), + })); + } +} + +export function createCouncilsOrchestrator(deps: { + registry: AgentConfigRegistry; + orchestrator: AgentOrchestrator; + config: CouncilsConfig; +}): CouncilsOrchestrator { + return new CouncilsOrchestrator({ + registry: deps.registry, + orchestrator: deps.orchestrator, + config: deps.config, + }); +} diff --git a/src/councils/types.ts b/src/councils/types.ts new file mode 100644 index 0000000..b886edb --- /dev/null +++ b/src/councils/types.ts @@ -0,0 +1,249 @@ +import { z } from 'zod'; + +export const COUNCIL_SCHEMA_VERSION = '1.0.0'; +export const COUNCIL_PIPELINE_VERSION = '1.0.0'; + +export const councilGroupSchema = z.enum(['D', 'P']); +export type CouncilGroup = z.infer; + +export const stopReasonSchema = z.enum([ + 'max_rounds', + 'convergence', + 'bridge_validation_failed', + 'grounding_failed', + 'meta_validation_failed', +]); + +export const validationFailureReasonSchema = z.enum([ + 'schema_invalid', + 'unknown_id', + 'cap_exceeded', + 'repair_failed', + 'parse_failed', +]); + +export const droppedReasonSchema = z.enum([ + 'cap_top_ideas', + 'cap_field_bullets', + 'cap_entry_chars', + 'cap_total_chars', + 'invalid_reference', + 'grounding_failed', +]); + +export const rejectionReasonCodeSchema = z.enum([ + 'low_score', + 'high_risk', + 'insufficient_grounding', + 'duplicate', + 'out_of_scope', + 'unknown_id', + 'other', +]); + +const schemaVersionField = z.literal(COUNCIL_SCHEMA_VERSION); + +export const ideaContentSchema = z.object({ + title: z.string().min(1), + hypothesis: z.string().min(1), + mechanism: z.string().min(1), + expected_outcome: z.string().min(1), +}).strict(); + +export const ideaGroundingSchema = z.object({ + mve: z.string().min(1), + constraints: z.array(z.string().min(1)).min(1), + falsifiability_checks: z.array(z.string().min(1)).min(1), +}).strict(); + +export const ideaCardSchema = z.object({ + schema_version: schemaVersionField, + idea_id: z.string().min(1), + group: councilGroupSchema, + round: z.number().int().min(1), + content: ideaContentSchema, + grounding: ideaGroundingSchema.optional(), + grounding_failed: z.boolean().optional(), +}).strict(); + +export const scoreSetSchema = z.object({ + novelty: z.number().int().min(0).max(100), + feasibility: z.number().int().min(0).max(100), + impact: z.number().int().min(0).max(100), + testability: z.number().int().min(0).max(100), +}).strict(); + +export const ideaAssessmentSchema = z.object({ + schema_version: schemaVersionField, + idea_id: z.string().min(1), + scores: scoreSetSchema, + decision: z.enum(['shortlist', 'hold', 'reject']), + notes: z.string().min(1), +}).strict(); + +export const bridgeIdeaSchema = z.object({ + idea_id: z.string().min(1), + mechanism: z.string().min(1), + rationale: z.string().min(1), +}).strict(); + +export const bridgePacketSchema = z.object({ + schema_version: schemaVersionField, + from_group: councilGroupSchema, + to_group: councilGroupSchema, + round: z.number().int().min(1), + top_ideas: z.array(bridgeIdeaSchema), + assumptions: z.array(z.string().min(1)), + risks: z.array(z.string().min(1)), + asks: z.array(z.string().min(1)), + what_to_steal: z.array(z.string().min(1)), +}).strict(); + +export const councilBriefSchema = z.object({ + schema_version: schemaVersionField, + group: councilGroupSchema, + round: z.number().int().min(1), + ideas: z.array(ideaCardSchema), + assessments: z.array(ideaAssessmentSchema), + shortlist: z.array(z.string().min(1)), + assumptions: z.array(z.string().min(1)), + risks: z.array(z.string().min(1)), + asks: z.array(z.string().min(1)), + what_to_steal: z.array(z.string().min(1)), + convergence_signal: z.boolean(), + novelty_score: z.number().int().min(0).max(100), + repetition_rate: z.number().int().min(0).max(100), +}).strict(); + +export const councilDiffSchema = z.object({ + schema_version: schemaVersionField, + group: councilGroupSchema, + from_round: z.number().int().min(1), + to_round: z.number().int().min(1), + idea_added: z.array(z.string().min(1)), + idea_removed: z.array(z.string().min(1)), + shortlist_added: z.array(z.string().min(1)), + shortlist_removed: z.array(z.string().min(1)), + score_changes: z.array(z.object({ + idea_id: z.string().min(1), + from_total: z.number().int(), + to_total: z.number().int(), + }).strict()), + assumptions_added: z.array(z.string().min(1)), + assumptions_removed: z.array(z.string().min(1)), + mve_changed: z.array(z.string().min(1)), +}).strict(); + +export const mergeRecordSchema = z.object({ + sources: z.array(z.string().min(1)).min(2), + result_title: z.string().min(1), + rationale: z.string().min(1), +}).strict(); + +export const metaSelectionSchema = z.object({ + schema_version: schemaVersionField, + selected_primary: z.array(z.string().min(1)), + selected_secondary: z.array(z.string().min(1)), + merges: z.array(mergeRecordSchema).optional(), + rejections: z.array(z.object({ + idea_id: z.string().min(1), + reason_code: rejectionReasonCodeSchema, + }).strict()), + open_questions: z.array(z.string().min(1)), + next_experiments: z.array(z.string().min(1)), +}).strict(); + +export const councilTraceEventSchema = z.object({ + schema_version: schemaVersionField, + event_id: z.string().min(1), + phase_index: z.number().int().min(1), + call_id: z.string().min(1), + group: councilGroupSchema.optional(), + round: z.number().int().min(1).optional(), + prompt_payload_hash: z.string().length(64), + artifact_hash: z.string().length(64).optional(), + token_usage: z.object({ + inputTokens: z.number().int().min(0), + outputTokens: z.number().int().min(0), + }).strict().optional(), + dropped_reason: droppedReasonSchema.optional(), + validation_failure: validationFailureReasonSchema.optional(), +}).strict(); + +export const stopSnapshotSchema = z.object({ + stop_reason: stopReasonSchema, + round_reached: z.number().int().min(1), + final_shortlist_D: z.array(z.string().min(1)), + final_shortlist_P: z.array(z.string().min(1)), + bridge_validated: z.boolean(), + grounding_failures_count: z.number().int().min(0), +}).strict(); + +export const councilRunResultSchema = z.object({ + pipeline_version: z.literal(COUNCIL_PIPELINE_VERSION), + input_hash: z.string().length(64), + brief_D_v1: councilBriefSchema, + brief_P_v1: councilBriefSchema, + brief_D_v2: councilBriefSchema, + brief_P_v2: councilBriefSchema, + diff_D: councilDiffSchema, + diff_P: councilDiffSchema, + bridge_D_to_P: bridgePacketSchema, + bridge_P_to_D: bridgePacketSchema, + meta: metaSelectionSchema, + stop_snapshot: stopSnapshotSchema, + trace: z.array(councilTraceEventSchema), +}).strict(); + +export const councilRunInputSchema = z.object({ + task: z.string().min(1), + constraints: z.union([z.string().min(1), z.record(z.string(), z.unknown())]).optional(), + success_definition: z.string().min(1).optional(), + budget: z.record(z.string(), z.unknown()).optional(), + timebox: z.union([z.string().min(1), z.number().positive()]).optional(), + output_format: z.string().min(1).optional(), + max_rounds: z.number().int().min(1).max(6).optional(), + session_id: z.string().min(1).optional(), +}).strict(); + +export const ideationOutputSchema = z.object({ + ideas: z.array(ideaContentSchema).min(1), +}).strict(); + +export const assessmentOutputSchema = z.object({ + assessments: z.array(z.object({ + idea_id: z.string().min(1), + scores: scoreSetSchema, + decision: z.enum(['shortlist', 'hold', 'reject']), + notes: z.string().min(1), + }).strict()), + assumptions: z.array(z.string().min(1)), + risks: z.array(z.string().min(1)), + asks: z.array(z.string().min(1)), + what_to_steal: z.array(z.string().min(1)), + convergence_signal: z.boolean(), + novelty_score: z.number().int().min(0).max(100), + repetition_rate: z.number().int().min(0).max(100), +}).strict(); + +export const groundingOutputSchema = z.object({ + grounded: z.array(z.object({ + idea_id: z.string().min(1), + mve: z.string().min(1), + constraints: z.array(z.string().min(1)).min(1), + falsifiability_checks: z.array(z.string().min(1)).min(1), + }).strict()), +}).strict(); + +export type StopReason = z.infer; +export type ValidationFailureReason = z.infer; +export type DroppedReason = z.infer; +export type IdeaCard = z.infer; +export type IdeaAssessment = z.infer; +export type BridgePacket = z.infer; +export type CouncilBrief = z.infer; +export type CouncilDiff = z.infer; +export type CouncilRunInput = z.infer; +export type CouncilRunResult = z.infer; +export type CouncilTraceEvent = z.infer; +export type MetaSelection = z.infer; diff --git a/src/tools/builtin/council-run.test.ts b/src/tools/builtin/council-run.test.ts new file mode 100644 index 0000000..9dc9805 --- /dev/null +++ b/src/tools/builtin/council-run.test.ts @@ -0,0 +1,135 @@ +import { describe, it, expect, vi } from 'vitest'; +import { createCouncilRunTool } from './council-run.js'; +import type { AgentConfigRegistry } from '../../agents/registry.js'; + +function createRegistry(): AgentConfigRegistry { + const configs = new Map([ + ['council_d_arbiter', { name: 'council_d_arbiter', modelTier: 'default', systemPrompt: 'D Arbiter' }], + ['council_d_freethinker', { name: 'council_d_freethinker', modelTier: 'default', systemPrompt: 'D FT' }], + ['council_p_arbiter', { name: 'council_p_arbiter', modelTier: 'default', systemPrompt: 'P Arbiter' }], + ['council_p_freethinker', { name: 'council_p_freethinker', modelTier: 'default', systemPrompt: 'P FT' }], + ['council_meta_arbiter', { name: 'council_meta_arbiter', modelTier: 'default', systemPrompt: 'Meta' }], + ]); + return { + get: (name: string) => configs.get(name), + list: () => [...configs.values()], + } as unknown as AgentConfigRegistry; +} + +const config = { + enabled: true, + defaults: { + max_rounds: 1, + ideas_per_round: 2, + top_ideas_for_bridge: 1, + bridge_packet_max_chars: 5000, + bridge_field_max_bullets: 5, + bridge_entry_max_chars: 300, + novelty_delta_threshold: 10, + repetition_threshold: 70, + }, + strict_grounding: false, + strict_meta_validation: true, + groups: { + D: { + arbiter_agent: 'council_d_arbiter', + freethinker_agent: 'council_d_freethinker', + group_prompt_prefix: 'D', + novelty_bias: 'low', + risk_tolerance: 'low', + forbidden_approaches: [], + }, + P: { + arbiter_agent: 'council_p_arbiter', + freethinker_agent: 'council_p_freethinker', + group_prompt_prefix: 'P', + novelty_bias: 'high', + risk_tolerance: 'high', + forbidden_approaches: [], + }, + }, + meta_arbiter_agent: 'council_meta_arbiter', +} as const; + +describe('council.run tool', () => { + it('runs council pipeline and returns output summary', async () => { + const delegate = vi.fn(async ({ message }: { message: string }) => { + const payload = JSON.parse(message); + if (payload.brief_D && payload.brief_P) { + return { + content: JSON.stringify({ + schema_version: '1.0.0', + selected_primary: [payload.brief_D.shortlist[0]], + selected_secondary: [payload.brief_P.shortlist[0]], + merges: [], + rejections: [], + open_questions: ['q1'], + next_experiments: ['e1'], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.shortlisted_ideas) { + return { + content: JSON.stringify({ grounded: payload.shortlisted_ideas.map((idea: any) => ({ idea_id: idea.idea_id, mve: 'm', constraints: ['c'], falsifiability_checks: ['f'] })) }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + if (payload.ideas) { + return { + content: JSON.stringify({ + assessments: payload.ideas.map((idea: any, idx: number) => ({ + idea_id: idea.idea_id, + scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 }, + decision: idx === 0 ? 'shortlist' : 'hold', + notes: 'note', + })), + assumptions: ['a'], + risks: ['r'], + asks: ['k'], + what_to_steal: ['w'], + convergence_signal: false, + novelty_score: 60, + repetition_rate: 10, + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + } + return { + content: JSON.stringify({ + ideas: [ + { title: 't1', hypothesis: 'h1', mechanism: 'm1', expected_outcome: 'o1' }, + { title: 't2', hypothesis: 'h2', mechanism: 'm2', expected_outcome: 'o2' }, + ], + }), + usage: { inputTokens: 10, outputTokens: 5 }, + tier: 'default' as const, + }; + }); + + const tool = createCouncilRunTool({ + registry: createRegistry(), + orchestrator: { delegate }, + config: config as any, + }); + + const result = await tool.execute({ task: 'plan migration' }); + expect(result.success).toBe(true); + expect(result.output).toContain('Council pipeline v1.0.0'); + expect(result.output).toContain('Meta selection'); + }); + + it('returns error on invalid input', async () => { + const tool = createCouncilRunTool({ + registry: createRegistry(), + orchestrator: { delegate: vi.fn() as any }, + config: config as any, + }); + const result = await tool.execute({}); + expect(result.success).toBe(false); + expect(result.error).toBeDefined(); + }); +}); diff --git a/src/tools/builtin/council-run.ts b/src/tools/builtin/council-run.ts new file mode 100644 index 0000000..e697622 --- /dev/null +++ b/src/tools/builtin/council-run.ts @@ -0,0 +1,82 @@ +import type { AgentConfigRegistry } from '../../agents/registry.js'; +import type { Tool, ToolResult } from '../types.js'; +import { CouncilsOrchestrator, type CouncilsConfig } from '../../councils/orchestrator.js'; +import { councilRunInputSchema } from '../../councils/types.js'; + +interface DelegateRunner { + delegate(request: { + tier: 'fast' | 'default' | 'complex' | 'local'; + systemPrompt: string; + message: string; + maxTokens?: number; + }): Promise<{ + content: string; + usage: { inputTokens: number; outputTokens: number }; + tier: 'fast' | 'default' | 'complex' | 'local'; + }>; +} + +export interface CouncilRunDeps { + registry: AgentConfigRegistry; + orchestrator: DelegateRunner; + config: CouncilsConfig; +} + +export function createCouncilRunTool(deps: CouncilRunDeps): Tool { + return { + name: 'council.run', + description: + 'Run the deterministic dual-council pipeline (D/P groups with bridge-only exchange and meta merge).', + inputSchema: { + type: 'object', + properties: { + task: { type: 'string', description: 'Primary task or question to explore' }, + constraints: { type: 'object', description: 'Optional constraints object (or pass string)' }, + success_definition: { type: 'string', description: 'What success looks like for this run' }, + budget: { type: 'object', description: 'Optional budget limits/time/cost constraints' }, + timebox: { type: 'string', description: 'Optional timebox (e.g. 30m)' }, + output_format: { type: 'string', description: 'Desired output format' }, + max_rounds: { type: 'number', description: 'Override configured max rounds (1-6)' }, + session_id: { type: 'string', description: 'Optional external session/run id' }, + }, + required: ['task'], + }, + execute: async (rawArgs: unknown): Promise => { + try { + const args = councilRunInputSchema.parse(rawArgs); + const runner = new CouncilsOrchestrator({ + registry: deps.registry, + orchestrator: deps.orchestrator, + config: deps.config, + }); + const result = await runner.run(args); + + const lines = [ + `[Council pipeline v${result.pipeline_version}]`, + `Stop reason: ${result.stop_snapshot.stop_reason} (round ${result.stop_snapshot.round_reached})`, + `D shortlist: ${result.stop_snapshot.final_shortlist_D.join(', ') || 'none'}`, + `P shortlist: ${result.stop_snapshot.final_shortlist_P.join(', ') || 'none'}`, + `Bridge validated: ${result.stop_snapshot.bridge_validated ? 'yes' : 'no'}`, + `Grounding failures: ${result.stop_snapshot.grounding_failures_count}`, + '', + 'Meta selection:', + `- Primary: ${result.meta.selected_primary.join(', ') || 'none'}`, + `- Secondary: ${result.meta.selected_secondary.join(', ') || 'none'}`, + `- Open questions: ${result.meta.open_questions.length}`, + `- Next experiments: ${result.meta.next_experiments.length}`, + ]; + + return { + success: true, + output: `${lines.join('\n')}\n\n${JSON.stringify(result)}`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; +} diff --git a/src/tools/builtin/index.ts b/src/tools/builtin/index.ts index c223e76..3cad4b6 100644 --- a/src/tools/builtin/index.ts +++ b/src/tools/builtin/index.ts @@ -34,6 +34,8 @@ export { createK8sTools } from './k8s.js'; export { screenCaptureTool, cameraCaptureTool } from './capture.js'; export { createAgentDelegateTool } from './agent-delegate.js'; export type { AgentDelegateDeps } from './agent-delegate.js'; +export { createCouncilRunTool } from './council-run.js'; +export type { CouncilRunDeps } from './council-run.js'; import type { Tool } from '../types.js'; import type { MemoryStore } from '../../memory/store.js'; diff --git a/src/tools/index.ts b/src/tools/index.ts index 96bf594..0115c65 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -5,8 +5,9 @@ export { ToolExecutor } from './executor.js'; export type { ToolExecutorConfig } from './executor.js'; export { ToolPolicy } from './policy.js'; export type { ToolPolicyContext } from './policy.js'; -export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createAudioTranscribeTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool, createMinioIngestTool, createMinioSyncTool, createK8sTools, createAgentDelegateTool } from './builtin/index.js'; +export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createAudioTranscribeTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool, createMinioIngestTool, createMinioSyncTool, createK8sTools, createAgentDelegateTool, createCouncilRunTool } from './builtin/index.js'; export type { AgentDelegateDeps } from './builtin/index.js'; +export type { CouncilRunDeps } from './builtin/index.js'; export type { WebSearchConfig } from './builtin/web-search.js'; export type { ProcessManagerConfig } from './builtin/process/index.js'; export type { BrowserManagerConfig } from './builtin/browser/index.js'; diff --git a/src/tools/policy.ts b/src/tools/policy.ts index 7119704..c4ae444 100644 --- a/src/tools/policy.ts +++ b/src/tools/policy.ts @@ -47,6 +47,7 @@ const PROFILE_TOOLS: Record> = { 'k8s.logs', 'agent.delegate', 'agents.list', + 'council.run', ]), coding: new Set([ 'file.read', @@ -101,6 +102,7 @@ const PROFILE_TOOLS: Record> = { 'browser.evaluate', 'agent.delegate', 'agents.list', + 'council.run', ]), full: new Set(), // Special: matches everything }; @@ -121,7 +123,7 @@ export const TOOL_GROUPS: Record = { 'group:cron': ['cron.list', 'cron.trigger', 'cron.create', 'cron.delete'], 'group:minio': ['minio.share', 'minio.ingest', 'minio.sync'], 'group:k8s': ['k8s.pods', 'k8s.deployments', 'k8s.logs'], - 'group:agents': ['agent.delegate', 'agents.list'], + 'group:agents': ['agent.delegate', 'agents.list', 'council.run'], }; /** Expand group references in a list of tool names/patterns. */