feat(councils): add preflight, schema-driven outputs, and artifact reporting
This commit is contained in:
@@ -152,6 +152,7 @@ describe('CouncilsOrchestrator', () => {
|
||||
expect(result.pipeline_version).toBe('1.0.0');
|
||||
expect(result.brief_D_v1.ideas[0].idea_id).toBe('D.r1.01');
|
||||
expect(result.brief_P_v1.ideas[0].idea_id).toBe('P.r1.01');
|
||||
expect(typeof result.trace[0]?.latency_ms).toBe('number');
|
||||
expect(result.stop_snapshot.stop_reason).toBe('convergence');
|
||||
const callIds = result.trace.map((e) => e.call_id);
|
||||
expect(callIds).toEqual([...callIds].sort((a, b) => {
|
||||
@@ -244,6 +245,102 @@ describe('CouncilsOrchestrator', () => {
|
||||
expect(JSON.stringify(pRound2Payload.peer_bridge)).toContain('D-MARKER');
|
||||
});
|
||||
|
||||
it('requests json_schema response format for council structured phases', async () => {
|
||||
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||
const payload = JSON.parse(message);
|
||||
|
||||
if (payload.brief_D && payload.brief_P) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
schema_version: '1.0.0',
|
||||
selected_primary: [payload.brief_D.shortlist[0]],
|
||||
selected_secondary: [payload.brief_P.shortlist[0]],
|
||||
merges: [],
|
||||
rejections: [],
|
||||
open_questions: ['q1'],
|
||||
next_experiments: ['e1'],
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.shortlisted_ideas) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
grounded: payload.shortlisted_ideas.map((idea: { idea_id: string }) => ({
|
||||
idea_id: idea.idea_id,
|
||||
mve: `mve-${idea.idea_id}`,
|
||||
constraints: ['c1'],
|
||||
falsifiability_checks: ['f1'],
|
||||
})),
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.ideas) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
assessments: payload.ideas.map((idea: { idea_id: string }, idx: number) => ({
|
||||
idea_id: idea.idea_id,
|
||||
scores: { novelty: 60, feasibility: 70, impact: 80, testability: 90 },
|
||||
decision: idx === 0 ? 'shortlist' : 'hold',
|
||||
notes: 'ok',
|
||||
})),
|
||||
assumptions: ['a1'],
|
||||
risks: ['r1'],
|
||||
asks: ['q1'],
|
||||
what_to_steal: ['w1'],
|
||||
convergence_signal: false,
|
||||
novelty_score: 60,
|
||||
repetition_rate: 10,
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
ideas: [
|
||||
{ title: 't1', hypothesis: 'h1', mechanism: 'm1', expected_outcome: 'o1' },
|
||||
{ title: 't2', hypothesis: 'h2', mechanism: 'm2', expected_outcome: 'o2' },
|
||||
],
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
});
|
||||
|
||||
const orchestrator = new CouncilsOrchestrator({
|
||||
registry: createRegistry(),
|
||||
orchestrator: { delegate },
|
||||
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
|
||||
});
|
||||
|
||||
await orchestrator.run({ task: 'json schema check' });
|
||||
|
||||
const schemaNames = delegate.mock.calls
|
||||
.map((call) => (call[0] as { responseFormat?: { type: string; name?: string } }).responseFormat)
|
||||
.filter((rf): rf is { type: string; name?: string } => Boolean(rf))
|
||||
.filter((rf) => rf.type === 'json_schema')
|
||||
.map((rf) => rf.name);
|
||||
|
||||
expect(schemaNames).toContain('council_ideation');
|
||||
expect(schemaNames).toContain('council_assessment');
|
||||
expect(schemaNames).toContain('council_grounding');
|
||||
expect(schemaNames).toContain('council_meta');
|
||||
|
||||
const metaResponseFormat = delegate.mock.calls
|
||||
.map((call) => (call[0] as { responseFormat?: Record<string, unknown> }).responseFormat)
|
||||
.find((rf) => rf && rf.type === 'json_schema' && rf.name === 'council_meta') as
|
||||
| { schema?: { required?: string[] } }
|
||||
| undefined;
|
||||
expect(metaResponseFormat?.schema?.required).toContain('merges');
|
||||
});
|
||||
|
||||
it('fails closed on bridge cap overflow before phase 2 executes', async () => {
|
||||
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||
const payload = JSON.parse(message);
|
||||
@@ -314,4 +411,265 @@ describe('CouncilsOrchestrator', () => {
|
||||
expect(result.stop_snapshot.stop_reason).toBe('bridge_validation_failed');
|
||||
expect(result.stop_snapshot.round_reached).toBe(1);
|
||||
});
|
||||
|
||||
it('recovers from wrapped or mildly malformed model JSON responses', async () => {
|
||||
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||
const payload = JSON.parse(message);
|
||||
if (payload.brief_D && payload.brief_P) {
|
||||
return {
|
||||
content: [
|
||||
'```json',
|
||||
'{result: {schema_version: \'1.0.0\', selected_primary: [\'D.r1.01\'], selected_secondary: [\'P.r1.01\'], merges: [], rejections: [], open_questions: [\'q1\'], next_experiments: [\'e1\']}}',
|
||||
'```',
|
||||
].join('\n'),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
if (payload.shortlisted_ideas) {
|
||||
return {
|
||||
content: '{"output":{"grounded":[{"idea_id":"D.r1.01","mve":"m","constraints":["c"],"falsifiability_checks":["f"]}]}}',
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
if (payload.ideas) {
|
||||
return {
|
||||
content: [
|
||||
'Assessment follows.',
|
||||
'```json',
|
||||
'{data: {assessments: [{idea_id: "D.r1.01", scores: {novelty: 60, feasibility: 70, impact: 80, testability: 90}, decision: "shortlist", notes: "ok"}], assumptions: ["a"], risks: ["r"], asks: ["k"], what_to_steal: ["w"], convergence_signal: false, novelty_score: 60, repetition_rate: 20}}',
|
||||
'```',
|
||||
].join('\n'),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
if (payload.group && payload.round) {
|
||||
return {
|
||||
content: [
|
||||
'Here is the draft:',
|
||||
'```json',
|
||||
'{output: {ideas: [{title: \'t1\', hypothesis: \'h1\', mechanism: \'m1\', expected_outcome: \'o1\'}]}}',
|
||||
'```',
|
||||
].join('\n'),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
throw new Error('Unexpected payload');
|
||||
});
|
||||
|
||||
const orchestrator = new CouncilsOrchestrator({
|
||||
registry: createRegistry(),
|
||||
orchestrator: { delegate },
|
||||
config: createConfig(),
|
||||
});
|
||||
|
||||
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
|
||||
expect(result.stop_snapshot.round_reached).toBe(1);
|
||||
expect(result.meta.selected_primary[0]).toBe('D.r1.01');
|
||||
expect(result.meta.selected_secondary[0]).toBe('P.r1.01');
|
||||
});
|
||||
|
||||
it('retries once with agent-assisted JSON repair when initial output is invalid', async () => {
|
||||
const repairPayloads: Array<Record<string, unknown>> = [];
|
||||
const seenIdeation = new Set<string>();
|
||||
|
||||
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||
const payload = JSON.parse(message) as Record<string, any>;
|
||||
|
||||
if (payload.task === 'normalize_json_output') {
|
||||
repairPayloads.push(payload);
|
||||
const schema = String(payload.required_schema ?? '');
|
||||
if (schema.includes('"ideas"')) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
ideas: [
|
||||
{
|
||||
title: 'repaired idea',
|
||||
hypothesis: 'h',
|
||||
mechanism: 'm',
|
||||
expected_outcome: 'o',
|
||||
},
|
||||
],
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
throw new Error('Unexpected repair schema');
|
||||
}
|
||||
|
||||
if (payload.brief_D && payload.brief_P) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
schema_version: '1.0.0',
|
||||
selected_primary: [payload.brief_D.shortlist[0]],
|
||||
selected_secondary: [payload.brief_P.shortlist[0]],
|
||||
merges: [],
|
||||
rejections: [],
|
||||
open_questions: ['q1'],
|
||||
next_experiments: ['e1'],
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
if (payload.shortlisted_ideas) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
grounded: payload.shortlisted_ideas.map((idea: any) => ({
|
||||
idea_id: idea.idea_id,
|
||||
mve: 'm',
|
||||
constraints: ['c'],
|
||||
falsifiability_checks: ['f'],
|
||||
})),
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
if (payload.ideas) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
assessments: payload.ideas.map((idea: any, idx: number) => ({
|
||||
idea_id: idea.idea_id,
|
||||
scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 },
|
||||
decision: idx === 0 ? 'shortlist' : 'hold',
|
||||
notes: 'ok',
|
||||
})),
|
||||
assumptions: [],
|
||||
risks: [],
|
||||
asks: [],
|
||||
what_to_steal: [],
|
||||
convergence_signal: false,
|
||||
novelty_score: 60,
|
||||
repetition_rate: 20,
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
if (payload.group && payload.round) {
|
||||
const key = `${payload.group}:${payload.round}`;
|
||||
if (!seenIdeation.has(key)) {
|
||||
seenIdeation.add(key);
|
||||
return {
|
||||
content: 'this is not JSON',
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
ideas: [
|
||||
{
|
||||
title: `${payload.group} idea`,
|
||||
hypothesis: 'h',
|
||||
mechanism: 'm',
|
||||
expected_outcome: 'o',
|
||||
},
|
||||
],
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error('Unexpected payload');
|
||||
});
|
||||
|
||||
const orchestrator = new CouncilsOrchestrator({
|
||||
registry: createRegistry(),
|
||||
orchestrator: { delegate },
|
||||
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
|
||||
});
|
||||
|
||||
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
|
||||
expect(result.stop_snapshot.round_reached).toBe(1);
|
||||
expect(repairPayloads.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('falls back instead of throwing when repair output is still invalid', async () => {
|
||||
const delegate = vi.fn(async ({ message }: { message: string }) => {
|
||||
const payload = JSON.parse(message) as Record<string, any>;
|
||||
|
||||
if (payload.task === 'normalize_json_output') {
|
||||
return {
|
||||
content: 'still invalid json',
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.brief_D && payload.brief_P) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
schema_version: '1.0.0',
|
||||
selected_primary: [payload.brief_D.shortlist[0]],
|
||||
selected_secondary: [payload.brief_P.shortlist[0]],
|
||||
merges: [],
|
||||
rejections: [],
|
||||
open_questions: ['q1'],
|
||||
next_experiments: ['e1'],
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.shortlisted_ideas) {
|
||||
return {
|
||||
content: JSON.stringify({ grounded: [] }),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.ideas) {
|
||||
return {
|
||||
content: JSON.stringify({
|
||||
assessments: payload.ideas.map((idea: any, idx: number) => ({
|
||||
idea_id: idea.idea_id,
|
||||
scores: { novelty: 50, feasibility: 50, impact: 50, testability: 50 },
|
||||
decision: idx === 0 ? 'shortlist' : 'hold',
|
||||
notes: 'ok',
|
||||
})),
|
||||
assumptions: [],
|
||||
risks: [],
|
||||
asks: [],
|
||||
what_to_steal: [],
|
||||
convergence_signal: false,
|
||||
novelty_score: 60,
|
||||
repetition_rate: 20,
|
||||
}),
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.group && payload.round) {
|
||||
return {
|
||||
content: 'not json at all',
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
tier: 'default' as const,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error('Unexpected payload');
|
||||
});
|
||||
|
||||
const orchestrator = new CouncilsOrchestrator({
|
||||
registry: createRegistry(),
|
||||
orchestrator: { delegate },
|
||||
config: createConfig({ defaults: { ...createConfig().defaults, max_rounds: 1 } }),
|
||||
});
|
||||
|
||||
const result = await orchestrator.run({ task: 'x', max_rounds: 1 });
|
||||
expect(result.stop_snapshot.round_reached).toBe(1);
|
||||
expect(result.brief_D_v1.ideas.length).toBeGreaterThan(0);
|
||||
expect(result.brief_P_v1.ideas.length).toBeGreaterThan(0);
|
||||
expect(result.trace.some((event) => event.validation_failure === 'repair_failed')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
+540
-10
@@ -1,7 +1,7 @@
|
||||
import type { AgentConfigRegistry } from '../agents/registry.js';
|
||||
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
||||
import type { ModelTier } from '../models/router.js';
|
||||
import type { TokenUsage } from '../models/types.js';
|
||||
import type { ChatResponseFormat, TokenUsage } from '../models/types.js';
|
||||
import type { CouncilScaffold } from './scaffold.js';
|
||||
import {
|
||||
COUNCIL_PIPELINE_VERSION,
|
||||
@@ -36,6 +36,7 @@ interface DelegateRunner {
|
||||
systemPrompt: string;
|
||||
message: string;
|
||||
maxTokens?: number;
|
||||
responseFormat?: ChatResponseFormat;
|
||||
}): Promise<{
|
||||
content: string;
|
||||
usage: TokenUsage;
|
||||
@@ -90,24 +91,171 @@ interface AgentCallResult {
|
||||
usage: TokenUsage;
|
||||
}
|
||||
|
||||
interface JsonRecoveryOptions<T> {
|
||||
rawContent: string;
|
||||
parser: (value: unknown) => T;
|
||||
agentName: string;
|
||||
callId: string;
|
||||
phaseIndex: number;
|
||||
promptPayload: unknown;
|
||||
modeDirective: string;
|
||||
schemaHint: string;
|
||||
group?: CouncilGroup;
|
||||
round?: number;
|
||||
scaffoldPrompt?: string;
|
||||
tierOverride?: ModelTier;
|
||||
maxTokens?: number;
|
||||
responseFormat?: ChatResponseFormat;
|
||||
fallback?: () => T;
|
||||
}
|
||||
|
||||
function deterministicJsonRepair(raw: string): string | null {
|
||||
const trimmed = raw.trim();
|
||||
const noFence = trimmed
|
||||
.replace(/^```json\s*/i, '')
|
||||
.replace(/^```\s*/i, '')
|
||||
.replace(/```$/, '')
|
||||
.trim();
|
||||
.trim()
|
||||
.replace(/[\u201C\u201D]/g, '"')
|
||||
.replace(/[\u2018\u2019]/g, '\'');
|
||||
|
||||
const extracted = extractFirstJsonContainer(noFence);
|
||||
if (!extracted) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return extracted
|
||||
const withoutComments = stripJsonLikeComments(extracted);
|
||||
const withQuotedKeys = withoutComments.replace(
|
||||
/([{,]\s*)([A-Za-z_][A-Za-z0-9_-]*)(\s*:)/g,
|
||||
'$1"$2"$3',
|
||||
);
|
||||
const withDoubleQuotedStrings = convertSingleQuotedStrings(withQuotedKeys);
|
||||
|
||||
return withDoubleQuotedStrings
|
||||
.replace(/,\s*([}\]])/g, '$1')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function stripJsonLikeComments(input: string): string {
|
||||
let output = '';
|
||||
let i = 0;
|
||||
let inString = false;
|
||||
let stringDelimiter: '"' | '\'' | null = null;
|
||||
let escaped = false;
|
||||
|
||||
while (i < input.length) {
|
||||
const ch = input[i];
|
||||
const next = i + 1 < input.length ? input[i + 1] : '';
|
||||
|
||||
if (inString) {
|
||||
output += ch;
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
} else if (ch === '\\') {
|
||||
escaped = true;
|
||||
} else if (ch === stringDelimiter) {
|
||||
inString = false;
|
||||
stringDelimiter = null;
|
||||
}
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"' || ch === '\'') {
|
||||
inString = true;
|
||||
stringDelimiter = ch;
|
||||
output += ch;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '/' && next === '/') {
|
||||
i += 2;
|
||||
while (i < input.length && input[i] !== '\n') {
|
||||
i += 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '/' && next === '*') {
|
||||
i += 2;
|
||||
while (i + 1 < input.length && !(input[i] === '*' && input[i + 1] === '/')) {
|
||||
i += 1;
|
||||
}
|
||||
i = Math.min(i + 2, input.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
output += ch;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function convertSingleQuotedStrings(input: string): string {
|
||||
let output = '';
|
||||
let inSingle = false;
|
||||
let inDouble = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
const ch = input[i];
|
||||
|
||||
if (inSingle) {
|
||||
if (escaped) {
|
||||
output += ch;
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if (ch === '\\') {
|
||||
output += ch;
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if (ch === '\'') {
|
||||
inSingle = false;
|
||||
output += '"';
|
||||
continue;
|
||||
}
|
||||
if (ch === '"') {
|
||||
output += '\\"';
|
||||
continue;
|
||||
}
|
||||
output += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inDouble) {
|
||||
output += ch;
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
} else if (ch === '\\') {
|
||||
escaped = true;
|
||||
} else if (ch === '"') {
|
||||
inDouble = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '\'') {
|
||||
inSingle = true;
|
||||
output += '"';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"') {
|
||||
inDouble = true;
|
||||
output += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
output += ch;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function extractFirstJsonContainer(input: string): string | null {
|
||||
const start = input.search(/[\[{]/);
|
||||
if (start < 0) {
|
||||
@@ -152,16 +300,82 @@ function extractFirstJsonContainer(input: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
function parseWithFallbackCandidates<T>(value: unknown, parser: (value: unknown) => T): T {
|
||||
const queue: unknown[] = [value];
|
||||
const seenObjects = new Set<object>();
|
||||
const wrapperKeys = ['output', 'result', 'data', 'response', 'json', 'payload', 'content'];
|
||||
let firstError: Error | null = null;
|
||||
|
||||
while (queue.length > 0) {
|
||||
const current = queue.shift();
|
||||
if (current === undefined) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (current && typeof current === 'object') {
|
||||
const objectRef = current as object;
|
||||
if (seenObjects.has(objectRef)) {
|
||||
continue;
|
||||
}
|
||||
seenObjects.add(objectRef);
|
||||
}
|
||||
|
||||
try {
|
||||
return parser(current);
|
||||
} catch (error) {
|
||||
if (!firstError) {
|
||||
firstError = error instanceof Error ? error : new Error(String(error));
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof current === 'string') {
|
||||
try {
|
||||
queue.push(JSON.parse(current));
|
||||
continue;
|
||||
} catch {
|
||||
const repaired = deterministicJsonRepair(current);
|
||||
if (repaired) {
|
||||
try {
|
||||
queue.push(JSON.parse(repaired));
|
||||
continue;
|
||||
} catch {
|
||||
// Continue searching additional candidates below.
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Array.isArray(current)) {
|
||||
for (const item of current) {
|
||||
queue.push(item);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (current && typeof current === 'object') {
|
||||
const record = current as Record<string, unknown>;
|
||||
for (const key of wrapperKeys) {
|
||||
if (record[key] !== undefined) {
|
||||
queue.push(record[key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw firstError ?? new Error('parse_failed');
|
||||
}
|
||||
|
||||
function parseJsonWithRepair<T>(raw: string, parser: (value: unknown) => T): T {
|
||||
try {
|
||||
return parser(JSON.parse(raw));
|
||||
return parseWithFallbackCandidates(JSON.parse(raw), parser);
|
||||
} catch {
|
||||
const repaired = deterministicJsonRepair(raw);
|
||||
if (!repaired) {
|
||||
throw new Error('parse_failed');
|
||||
}
|
||||
try {
|
||||
return parser(JSON.parse(repaired));
|
||||
return parseWithFallbackCandidates(JSON.parse(repaired), parser);
|
||||
} catch {
|
||||
throw new Error('repair_failed');
|
||||
}
|
||||
@@ -172,6 +386,149 @@ function uniq(values: string[]): string[] {
|
||||
return Array.from(new Set(values));
|
||||
}
|
||||
|
||||
function jsonSchemaFormat(name: string, schema: Record<string, unknown>): ChatResponseFormat {
|
||||
return { type: 'json_schema', name, schema, strict: true };
|
||||
}
|
||||
|
||||
const councilIdeaContentJsonSchema: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['title', 'hypothesis', 'mechanism', 'expected_outcome'],
|
||||
properties: {
|
||||
title: { type: 'string', minLength: 1 },
|
||||
hypothesis: { type: 'string', minLength: 1 },
|
||||
mechanism: { type: 'string', minLength: 1 },
|
||||
expected_outcome: { type: 'string', minLength: 1 },
|
||||
},
|
||||
};
|
||||
|
||||
const councilIdeationJsonSchema: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['ideas'],
|
||||
properties: {
|
||||
ideas: {
|
||||
type: 'array',
|
||||
minItems: 1,
|
||||
items: councilIdeaContentJsonSchema,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const councilAssessmentJsonSchema: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['assessments', 'assumptions', 'risks', 'asks', 'what_to_steal', 'convergence_signal', 'novelty_score', 'repetition_rate'],
|
||||
properties: {
|
||||
assessments: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['idea_id', 'scores', 'decision', 'notes'],
|
||||
properties: {
|
||||
idea_id: { type: 'string', minLength: 1 },
|
||||
scores: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['novelty', 'feasibility', 'impact', 'testability'],
|
||||
properties: {
|
||||
novelty: { type: 'integer', minimum: 0, maximum: 100 },
|
||||
feasibility: { type: 'integer', minimum: 0, maximum: 100 },
|
||||
impact: { type: 'integer', minimum: 0, maximum: 100 },
|
||||
testability: { type: 'integer', minimum: 0, maximum: 100 },
|
||||
},
|
||||
},
|
||||
decision: { type: 'string', enum: ['shortlist', 'hold', 'reject'] },
|
||||
notes: { type: 'string', minLength: 1 },
|
||||
},
|
||||
},
|
||||
},
|
||||
assumptions: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||
risks: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||
asks: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||
what_to_steal: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||
convergence_signal: { type: 'boolean' },
|
||||
novelty_score: { type: 'integer', minimum: 0, maximum: 100 },
|
||||
repetition_rate: { type: 'integer', minimum: 0, maximum: 100 },
|
||||
},
|
||||
};
|
||||
|
||||
const councilGroundingJsonSchema: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['grounded'],
|
||||
properties: {
|
||||
grounded: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['idea_id', 'mve', 'constraints', 'falsifiability_checks'],
|
||||
properties: {
|
||||
idea_id: { type: 'string', minLength: 1 },
|
||||
mve: { type: 'string', minLength: 1 },
|
||||
constraints: {
|
||||
type: 'array',
|
||||
minItems: 1,
|
||||
items: { type: 'string', minLength: 1 },
|
||||
},
|
||||
falsifiability_checks: {
|
||||
type: 'array',
|
||||
minItems: 1,
|
||||
items: { type: 'string', minLength: 1 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const councilMetaJsonSchema: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['schema_version', 'selected_primary', 'selected_secondary', 'merges', 'rejections', 'open_questions', 'next_experiments'],
|
||||
properties: {
|
||||
schema_version: { type: 'string', const: COUNCIL_SCHEMA_VERSION },
|
||||
selected_primary: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||
selected_secondary: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||
merges: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['sources', 'result_title', 'rationale'],
|
||||
properties: {
|
||||
sources: {
|
||||
type: 'array',
|
||||
minItems: 2,
|
||||
items: { type: 'string', minLength: 1 },
|
||||
},
|
||||
result_title: { type: 'string', minLength: 1 },
|
||||
rationale: { type: 'string', minLength: 1 },
|
||||
},
|
||||
},
|
||||
},
|
||||
rejections: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['idea_id', 'reason_code'],
|
||||
properties: {
|
||||
idea_id: { type: 'string', minLength: 1 },
|
||||
reason_code: {
|
||||
type: 'string',
|
||||
enum: ['low_score', 'high_risk', 'insufficient_grounding', 'duplicate', 'out_of_scope', 'unknown_id', 'other'],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
open_questions: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||
next_experiments: { type: 'array', items: { type: 'string', minLength: 1 } },
|
||||
},
|
||||
};
|
||||
|
||||
function computeTotalScore(assessment: IdeaAssessment): number {
|
||||
const s = assessment.scores;
|
||||
return s.feasibility + s.impact + s.novelty + s.testability;
|
||||
@@ -403,19 +760,23 @@ export class CouncilsOrchestrator {
|
||||
scaffoldPrompt?: string;
|
||||
tierOverride?: ModelTier;
|
||||
maxTokens?: number;
|
||||
responseFormat?: ChatResponseFormat;
|
||||
}): Promise<AgentCallResult> {
|
||||
const agent = this.getAgent(opts.agentName);
|
||||
const message = canonicalStringify(opts.promptPayload);
|
||||
const promptHash = hashCanonical(opts.promptPayload);
|
||||
const scaffoldPrompt = opts.scaffoldPrompt ? `${opts.scaffoldPrompt}\n\n` : '';
|
||||
const systemPrompt = `${scaffoldPrompt}${agent.systemPrompt}\n\n${opts.modeDirective}`;
|
||||
const startedAt = Date.now();
|
||||
|
||||
const result = await this._delegateRunner.delegate({
|
||||
tier: opts.tierOverride ?? agent.tier,
|
||||
systemPrompt,
|
||||
message,
|
||||
maxTokens: opts.maxTokens ?? 4096,
|
||||
responseFormat: opts.responseFormat,
|
||||
});
|
||||
const latencyMs = Math.max(0, Date.now() - startedAt);
|
||||
|
||||
this._trace.push(councilTraceEventSchema.parse({
|
||||
schema_version: COUNCIL_SCHEMA_VERSION,
|
||||
@@ -427,6 +788,7 @@ export class CouncilsOrchestrator {
|
||||
prompt_payload_hash: promptHash,
|
||||
artifact_hash: hashCanonical(result.content),
|
||||
token_usage: result.usage,
|
||||
latency_ms: latencyMs,
|
||||
}));
|
||||
this._conversations.push({
|
||||
schema_version: COUNCIL_SCHEMA_VERSION,
|
||||
@@ -444,6 +806,63 @@ export class CouncilsOrchestrator {
|
||||
return result;
|
||||
}
|
||||
|
||||
private async parseWithAgentRecovery<T>(opts: JsonRecoveryOptions<T>): Promise<T> {
|
||||
try {
|
||||
return parseJsonWithRepair(opts.rawContent, opts.parser);
|
||||
} catch {
|
||||
const recoveryPayload = {
|
||||
task: 'normalize_json_output',
|
||||
required_schema: opts.schemaHint,
|
||||
original_prompt_payload: opts.promptPayload,
|
||||
invalid_output: opts.rawContent,
|
||||
};
|
||||
|
||||
const recoveryRaw = await this.callAgent({
|
||||
agentName: opts.agentName,
|
||||
callId: `${opts.callId}.repair`,
|
||||
phaseIndex: opts.phaseIndex,
|
||||
group: opts.group,
|
||||
round: opts.round,
|
||||
promptPayload: recoveryPayload,
|
||||
modeDirective: [
|
||||
'Your previous response did not validate.',
|
||||
'Return ONLY valid JSON matching the required schema.',
|
||||
'Do not add prose, markdown fences, or commentary.',
|
||||
].join(' '),
|
||||
scaffoldPrompt: opts.scaffoldPrompt,
|
||||
tierOverride: opts.tierOverride,
|
||||
maxTokens: opts.maxTokens,
|
||||
responseFormat: opts.responseFormat,
|
||||
});
|
||||
|
||||
try {
|
||||
return parseJsonWithRepair(recoveryRaw.content, opts.parser);
|
||||
} catch {
|
||||
if (opts.fallback) {
|
||||
this.recordValidationFailure(opts, 'repair_failed');
|
||||
return opts.fallback();
|
||||
}
|
||||
throw new Error('repair_failed');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private recordValidationFailure(
|
||||
opts: Pick<JsonRecoveryOptions<unknown>, 'callId' | 'phaseIndex' | 'group' | 'round' | 'promptPayload'>,
|
||||
reason: 'repair_failed' | 'parse_failed',
|
||||
): void {
|
||||
this._trace.push(councilTraceEventSchema.parse({
|
||||
schema_version: COUNCIL_SCHEMA_VERSION,
|
||||
event_id: `${opts.phaseIndex}:${opts.callId}.validation`,
|
||||
phase_index: opts.phaseIndex,
|
||||
call_id: `${opts.callId}.validation`,
|
||||
group: opts.group,
|
||||
round: opts.round,
|
||||
prompt_payload_hash: hashCanonical(opts.promptPayload),
|
||||
validation_failure: reason,
|
||||
}));
|
||||
}
|
||||
|
||||
private allocateIdeaId(group: CouncilGroup, round: number, index: number): string {
|
||||
return `${group}.r${round}.${String(index + 1).padStart(2, '0')}`;
|
||||
}
|
||||
@@ -481,11 +900,37 @@ export class CouncilsOrchestrator {
|
||||
round,
|
||||
promptPayload: ideationPayload,
|
||||
modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.',
|
||||
responseFormat: jsonSchemaFormat('council_ideation', councilIdeationJsonSchema),
|
||||
scaffoldPrompt: this._scaffold?.prompts[group].free_thinker,
|
||||
tierOverride: groupTier,
|
||||
});
|
||||
|
||||
const ideaOutput = parseJsonWithRepair(ideation.content, (value) => ideationOutputSchema.parse(value));
|
||||
const ideaOutput = await this.parseWithAgentRecovery({
|
||||
rawContent: ideation.content,
|
||||
parser: (value) => ideationOutputSchema.parse(value),
|
||||
agentName: groupConfig.freethinker_agent,
|
||||
callId: `${group}.r${round}.ft.ideation`,
|
||||
phaseIndex: phaseBase,
|
||||
group,
|
||||
round,
|
||||
promptPayload: ideationPayload,
|
||||
modeDirective: 'Return JSON only: {"ideas":[IdeaContent,...]}. Do not include IDs. No prose.',
|
||||
schemaHint:
|
||||
'{"ideas":[{"title":"string","hypothesis":"string","mechanism":"string","expected_outcome":"string"}]}',
|
||||
responseFormat: jsonSchemaFormat('council_ideation', councilIdeationJsonSchema),
|
||||
scaffoldPrompt: this._scaffold?.prompts[group].free_thinker,
|
||||
tierOverride: groupTier,
|
||||
fallback: () => ({
|
||||
ideas: [
|
||||
{
|
||||
title: `${group} fallback idea`,
|
||||
hypothesis: `Fallback hypothesis for: ${input.task}`,
|
||||
mechanism: 'Run a minimum-viable experiment and measure signal within one iteration.',
|
||||
expected_outcome: 'A concrete go/no-go signal to reduce uncertainty.',
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
const ideaCards: IdeaCard[] = ideaOutput.ideas
|
||||
.slice(0, this._config.defaults.ideas_per_round)
|
||||
.map((idea, index) => ({
|
||||
@@ -514,11 +959,48 @@ export class CouncilsOrchestrator {
|
||||
promptPayload: assessmentPayload,
|
||||
modeDirective:
|
||||
'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.',
|
||||
responseFormat: jsonSchemaFormat('council_assessment', councilAssessmentJsonSchema),
|
||||
scaffoldPrompt: this._scaffold?.prompts[group].arbiter,
|
||||
tierOverride: groupTier,
|
||||
});
|
||||
|
||||
const assessmentOutput = parseJsonWithRepair(assessmentRaw.content, (value) => assessmentOutputSchema.parse(value));
|
||||
const assessmentOutput = await this.parseWithAgentRecovery({
|
||||
rawContent: assessmentRaw.content,
|
||||
parser: (value) => assessmentOutputSchema.parse(value),
|
||||
agentName: groupConfig.arbiter_agent,
|
||||
callId: `${group}.r${round}.arb.assess`,
|
||||
phaseIndex: phaseBase + 1,
|
||||
group,
|
||||
round,
|
||||
promptPayload: assessmentPayload,
|
||||
modeDirective:
|
||||
'Return JSON only. Assess provided idea IDs only. No new IDs. Include convergence_signal/novelty_score/repetition_rate.',
|
||||
schemaHint:
|
||||
'{"assessments":[{"idea_id":"string","scores":{"novelty":0,"feasibility":0,"impact":0,"testability":0},"decision":"shortlist|hold|reject","notes":"string"}],"assumptions":["string"],"risks":["string"],"asks":["string"],"what_to_steal":["string"],"convergence_signal":false,"novelty_score":0,"repetition_rate":0}',
|
||||
responseFormat: jsonSchemaFormat('council_assessment', councilAssessmentJsonSchema),
|
||||
scaffoldPrompt: this._scaffold?.prompts[group].arbiter,
|
||||
tierOverride: groupTier,
|
||||
fallback: () => ({
|
||||
assessments: ideaCards.map((idea, index) => ({
|
||||
idea_id: idea.idea_id,
|
||||
scores: {
|
||||
novelty: 40,
|
||||
feasibility: 40,
|
||||
impact: 40,
|
||||
testability: 40,
|
||||
},
|
||||
decision: index === 0 ? 'shortlist' as const : 'hold' as const,
|
||||
notes: 'Fallback assessment used because model output could not be repaired into valid JSON.',
|
||||
})),
|
||||
assumptions: ['Model output formatting instability; assumptions are provisional.'],
|
||||
risks: ['Assessment quality degraded due to invalid structured output.'],
|
||||
asks: ['Re-run with a different provider or tier to confirm shortlist quality.'],
|
||||
what_to_steal: [],
|
||||
convergence_signal: false,
|
||||
novelty_score: 0,
|
||||
repetition_rate: 100,
|
||||
}),
|
||||
});
|
||||
const validIdeaIds = new Set(ideaCards.map((i) => i.idea_id));
|
||||
const assessments: IdeaAssessment[] = uniq(assessmentOutput.assessments.map((a) => a.idea_id))
|
||||
.map((ideaId) => assessmentOutput.assessments.find((a) => a.idea_id === ideaId)!)
|
||||
@@ -571,10 +1053,27 @@ export class CouncilsOrchestrator {
|
||||
promptPayload: groundingPayload,
|
||||
modeDirective:
|
||||
'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.',
|
||||
responseFormat: jsonSchemaFormat('council_grounding', councilGroundingJsonSchema),
|
||||
scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker,
|
||||
tierOverride: groupTier,
|
||||
});
|
||||
grounding = await this.parseWithAgentRecovery({
|
||||
rawContent: groundingRaw.content,
|
||||
parser: (value) => groundingOutputSchema.parse(value),
|
||||
agentName: groundingAgent,
|
||||
callId: `${group}.r${round}.ft.ground`,
|
||||
phaseIndex: phaseBase + 2,
|
||||
group,
|
||||
round,
|
||||
promptPayload: groundingPayload,
|
||||
modeDirective:
|
||||
'Grounder mode. Return JSON only: {"grounded":[{"idea_id", "mve", "constraints", "falsifiability_checks"}]}. No prose.',
|
||||
schemaHint:
|
||||
'{"grounded":[{"idea_id":"string","mve":"string","constraints":["string"],"falsifiability_checks":["string"]}]}',
|
||||
responseFormat: jsonSchemaFormat('council_grounding', councilGroundingJsonSchema),
|
||||
scaffoldPrompt: this._scaffold?.prompts[group].grounder ?? this._scaffold?.prompts[group].free_thinker,
|
||||
tierOverride: groupTier,
|
||||
});
|
||||
grounding = parseJsonWithRepair(groundingRaw.content, (value) => groundingOutputSchema.parse(value));
|
||||
} catch {
|
||||
groundingFailures = shortlist.length;
|
||||
if (this._config.strict_grounding) {
|
||||
@@ -704,7 +1203,11 @@ export class CouncilsOrchestrator {
|
||||
return packet;
|
||||
}
|
||||
|
||||
private async runMetaMerge(input: CouncilRunInput, briefD: CouncilBrief, briefP: CouncilBrief) {
|
||||
private async runMetaMerge(
|
||||
input: CouncilRunInput,
|
||||
briefD: CouncilBrief,
|
||||
briefP: CouncilBrief,
|
||||
): Promise<ReturnType<typeof metaSelectionSchema.parse>> {
|
||||
const metaAgentName = this._config.meta_arbiter_agent;
|
||||
const payload = {
|
||||
input,
|
||||
@@ -723,11 +1226,37 @@ export class CouncilsOrchestrator {
|
||||
promptPayload: payload,
|
||||
modeDirective:
|
||||
'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.',
|
||||
responseFormat: jsonSchemaFormat('council_meta', councilMetaJsonSchema),
|
||||
scaffoldPrompt: this._scaffold?.prompts.meta_arbiter,
|
||||
tierOverride: this._config.meta_model_tier,
|
||||
});
|
||||
|
||||
return parseJsonWithRepair(metaRaw.content, (value) => metaSelectionSchema.parse(value));
|
||||
return this.parseWithAgentRecovery<ReturnType<typeof metaSelectionSchema.parse>>({
|
||||
rawContent: metaRaw.content,
|
||||
parser: (value) => metaSelectionSchema.parse(value),
|
||||
agentName: metaAgentName,
|
||||
callId: 'meta.merge',
|
||||
phaseIndex: 999,
|
||||
promptPayload: payload,
|
||||
modeDirective:
|
||||
'Return JSON only following schema with selected_primary/selected_secondary/merges/rejections/open_questions/next_experiments. Use only known idea IDs.',
|
||||
schemaHint:
|
||||
'{"schema_version":"1.0.0","selected_primary":["string"],"selected_secondary":["string"],"merges":[{"sources":["string","string"],"result_title":"string","rationale":"string"}],"rejections":[{"idea_id":"string","reason_code":"low_score|high_risk|insufficient_grounding|duplicate|out_of_scope|unknown_id|other"}],"open_questions":["string"],"next_experiments":["string"]}',
|
||||
responseFormat: jsonSchemaFormat('council_meta', councilMetaJsonSchema),
|
||||
scaffoldPrompt: this._scaffold?.prompts.meta_arbiter,
|
||||
tierOverride: this._config.meta_model_tier,
|
||||
fallback: () => metaSelectionSchema.parse({
|
||||
schema_version: COUNCIL_SCHEMA_VERSION,
|
||||
selected_primary: briefD.shortlist.slice(0, 1),
|
||||
selected_secondary: briefP.shortlist.slice(0, 1),
|
||||
merges: [],
|
||||
rejections: [],
|
||||
open_questions: ['Meta merge used fallback due to invalid structured output.'],
|
||||
next_experiments: [
|
||||
'Re-run council with a lower-latency/stronger JSON model and compare selected ideas.',
|
||||
],
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
private validateMetaSelection(meta: ReturnType<typeof metaSelectionSchema.parse>, knownIds: Set<string>): boolean {
|
||||
@@ -763,6 +1292,7 @@ export class CouncilsOrchestrator {
|
||||
group: normalizeOptional(event.group),
|
||||
round: normalizeOptional(event.round),
|
||||
token_usage: normalizeOptional(event.token_usage),
|
||||
latency_ms: normalizeOptional(event.latency_ms),
|
||||
dropped_reason: normalizeOptional(event.dropped_reason),
|
||||
validation_failure: normalizeOptional(event.validation_failure),
|
||||
}));
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { AgentConfigRegistry } from '../agents/registry.js';
|
||||
import { configSchema } from '../config/schema.js';
|
||||
import { buildCouncilPreflightReport } from './preflight.js';
|
||||
|
||||
function makeConfig() {
|
||||
const config = configSchema.parse({
|
||||
models: {
|
||||
default: {
|
||||
provider: 'zhipuai',
|
||||
model: 'glm-4.7',
|
||||
use_oauth: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
config.councils.enabled = true;
|
||||
config.councils.groups.D.arbiter_agent = 'comms';
|
||||
config.councils.groups.D.freethinker_agent = 'research';
|
||||
config.councils.groups.D.model_tier = 'default';
|
||||
config.councils.groups.P.arbiter_agent = 'comms';
|
||||
config.councils.groups.P.freethinker_agent = 'research';
|
||||
config.councils.groups.P.model_tier = 'default';
|
||||
config.councils.meta_arbiter_agent = 'code';
|
||||
config.councils.meta_model_tier = 'default';
|
||||
return config;
|
||||
}
|
||||
|
||||
function makeRegistry(): AgentConfigRegistry {
|
||||
const registry = new AgentConfigRegistry();
|
||||
registry.register({ name: 'comms', modelTier: 'fast' });
|
||||
registry.register({ name: 'research', modelTier: 'default' });
|
||||
registry.register({ name: 'code', modelTier: 'complex' });
|
||||
return registry;
|
||||
}
|
||||
|
||||
describe('buildCouncilPreflightReport', () => {
|
||||
it('reports zhipu endpoint/auth and stale oauth flag note', async () => {
|
||||
const config = makeConfig();
|
||||
const report = await buildCouncilPreflightReport({
|
||||
config,
|
||||
registry: makeRegistry(),
|
||||
delegateRunner: {
|
||||
delegate: vi.fn(async () => ({
|
||||
content: '{"ok":true}',
|
||||
usage: { inputTokens: 1, outputTokens: 1 },
|
||||
tier: 'default' as const,
|
||||
})),
|
||||
},
|
||||
activeTier: 'default',
|
||||
includeLiveProbe: false,
|
||||
credentialState: {
|
||||
openaiOAuth: false,
|
||||
openaiApiKeyStored: false,
|
||||
anthropicApiKeyStored: false,
|
||||
anthropicAuthTokenStored: false,
|
||||
zaiStored: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(report).toContain('D.arbiter: agent=comms');
|
||||
expect(report).toContain('effective_tier=default model=zhipuai/glm-4.7');
|
||||
expect(report).toContain('endpoint=https://api.z.ai/api/paas/v4');
|
||||
expect(report).toContain('auth_mode=bearer_credential');
|
||||
expect(report).toContain('auth_source=auth.json:zai');
|
||||
expect(report).toContain('note=use_oauth/auth_mode=oauth is ignored for zhipuai');
|
||||
});
|
||||
|
||||
it('includes live probe failures by tier', async () => {
|
||||
const config = makeConfig();
|
||||
const delegate = vi.fn(async () => {
|
||||
throw new Error('Connection error');
|
||||
});
|
||||
const report = await buildCouncilPreflightReport({
|
||||
config,
|
||||
registry: makeRegistry(),
|
||||
delegateRunner: { delegate },
|
||||
activeTier: 'default',
|
||||
includeLiveProbe: true,
|
||||
credentialState: {
|
||||
openaiOAuth: false,
|
||||
openaiApiKeyStored: false,
|
||||
anthropicApiKeyStored: false,
|
||||
anthropicAuthTokenStored: false,
|
||||
zaiStored: false,
|
||||
},
|
||||
});
|
||||
|
||||
expect(report).toContain('Live tier probes:');
|
||||
expect(report).toContain('default: failed');
|
||||
expect(report).toContain('Connection error');
|
||||
expect(delegate).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,347 @@
|
||||
import type { AgentConfigRegistry } from '../agents/registry.js';
|
||||
import {
|
||||
loadStoredAnthropicAuth,
|
||||
loadStoredAnthropicAuthToken,
|
||||
loadStoredOpenAIApiKey,
|
||||
loadStoredOpenAIAuth,
|
||||
loadStoredZaiAuth,
|
||||
} from '../auth/index.js';
|
||||
import type { Config, ModelConfig, ModelProvider } from '../config/index.js';
|
||||
import type { ModelTier } from '../models/router.js';
|
||||
import type { ChatResponseFormat } from '../models/types.js';
|
||||
|
||||
interface DelegateRunner {
|
||||
delegate(request: {
|
||||
tier: ModelTier;
|
||||
systemPrompt: string;
|
||||
message: string;
|
||||
maxTokens?: number;
|
||||
responseFormat?: ChatResponseFormat;
|
||||
}): Promise<{
|
||||
content: string;
|
||||
usage: { inputTokens: number; outputTokens: number };
|
||||
tier: ModelTier;
|
||||
}>;
|
||||
}
|
||||
|
||||
interface CredentialState {
|
||||
openaiOAuth: boolean;
|
||||
openaiApiKeyStored: boolean;
|
||||
anthropicApiKeyStored: boolean;
|
||||
anthropicAuthTokenStored: boolean;
|
||||
zaiStored: boolean;
|
||||
}
|
||||
|
||||
export interface CouncilPreflightOptions {
|
||||
config: Config;
|
||||
registry: AgentConfigRegistry;
|
||||
delegateRunner: DelegateRunner;
|
||||
activeTier?: ModelTier;
|
||||
includeLiveProbe?: boolean;
|
||||
credentialState?: CredentialState;
|
||||
}
|
||||
|
||||
interface TierConfigResolution {
|
||||
modelConfig: ModelConfig;
|
||||
fellBackToDefault: boolean;
|
||||
}
|
||||
|
||||
interface AuthResolution {
|
||||
mode: string;
|
||||
source: string;
|
||||
note?: string;
|
||||
}
|
||||
|
||||
const DEFAULT_ENDPOINTS: Partial<Record<ModelProvider, string>> = {
|
||||
openai: 'https://api.openai.com/v1',
|
||||
openrouter: 'https://openrouter.ai/api/v1',
|
||||
vercel: 'https://ai-gateway.vercel.sh/v1',
|
||||
zhipuai: 'https://api.z.ai/api/paas/v4',
|
||||
xai: 'https://api.x.ai/v1',
|
||||
minimax: 'https://api.minimax.io/v1',
|
||||
moonshot: 'https://api.moonshot.cn/v1',
|
||||
ollama: 'http://localhost:11434',
|
||||
llamacpp: 'http://localhost:8080',
|
||||
};
|
||||
|
||||
function getCredentialStateFromSystem(): CredentialState {
|
||||
return {
|
||||
openaiOAuth: Boolean(loadStoredOpenAIAuth()),
|
||||
openaiApiKeyStored: Boolean(loadStoredOpenAIApiKey()),
|
||||
anthropicApiKeyStored: Boolean(loadStoredAnthropicAuth()),
|
||||
anthropicAuthTokenStored: Boolean(loadStoredAnthropicAuthToken()),
|
||||
zaiStored: Boolean(loadStoredZaiAuth()),
|
||||
};
|
||||
}
|
||||
|
||||
function getEffectiveAuthMode(cfg: ModelConfig): 'auto' | 'api_key' | 'oauth' {
|
||||
if (cfg.auth_mode) {
|
||||
return cfg.auth_mode;
|
||||
}
|
||||
if (cfg.use_oauth) {
|
||||
return 'oauth';
|
||||
}
|
||||
return 'auto';
|
||||
}
|
||||
|
||||
function resolveTierConfig(config: Config, tier: ModelTier): TierConfigResolution {
|
||||
if (tier === 'default') {
|
||||
return { modelConfig: config.models.default, fellBackToDefault: false };
|
||||
}
|
||||
const direct = config.models[tier];
|
||||
if (direct) {
|
||||
return { modelConfig: direct, fellBackToDefault: false };
|
||||
}
|
||||
return { modelConfig: config.models.default, fellBackToDefault: true };
|
||||
}
|
||||
|
||||
function firstTruthySource(sources: Array<[boolean, string]>): string {
|
||||
for (const [present, label] of sources) {
|
||||
if (present) {
|
||||
return label;
|
||||
}
|
||||
}
|
||||
return 'missing';
|
||||
}
|
||||
|
||||
function resolveAuth(cfg: ModelConfig, credentialState: CredentialState): AuthResolution {
|
||||
if (cfg.provider === 'zhipuai') {
|
||||
const source = firstTruthySource([
|
||||
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||
[Boolean(cfg.auth_token?.trim()), 'config.auth_token'],
|
||||
[Boolean(process.env.ZAI_API_KEY?.trim()), 'env:ZAI_API_KEY'],
|
||||
[Boolean(process.env.ZHIPUAI_API_KEY?.trim()), 'env:ZHIPUAI_API_KEY'],
|
||||
[Boolean(process.env.ZHIPUAI_AUTH_TOKEN?.trim()), 'env:ZHIPUAI_AUTH_TOKEN'],
|
||||
[credentialState.zaiStored, 'auth.json:zai'],
|
||||
]);
|
||||
const note = cfg.use_oauth || cfg.auth_mode === 'oauth'
|
||||
? 'use_oauth/auth_mode=oauth is ignored for zhipuai'
|
||||
: undefined;
|
||||
return {
|
||||
mode: 'bearer_credential',
|
||||
source,
|
||||
note,
|
||||
};
|
||||
}
|
||||
|
||||
if (cfg.provider === 'openai') {
|
||||
const authMode = getEffectiveAuthMode(cfg);
|
||||
const apiSource = firstTruthySource([
|
||||
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
|
||||
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||
[Boolean(process.env.OPENAI_API_KEY?.trim()), 'env:OPENAI_API_KEY'],
|
||||
[credentialState.openaiApiKeyStored, 'auth.json:openai.api_key'],
|
||||
]);
|
||||
const oauthSource = credentialState.openaiOAuth ? 'auth.json:openai.oauth' : 'missing';
|
||||
|
||||
if (authMode === 'oauth') {
|
||||
return { mode: 'oauth', source: oauthSource };
|
||||
}
|
||||
if (authMode === 'api_key') {
|
||||
return { mode: 'api_key', source: apiSource };
|
||||
}
|
||||
if (apiSource !== 'missing') {
|
||||
return { mode: 'auto->api_key', source: apiSource };
|
||||
}
|
||||
return { mode: 'auto->oauth', source: oauthSource };
|
||||
}
|
||||
|
||||
if (cfg.provider === 'anthropic') {
|
||||
const authMode = getEffectiveAuthMode(cfg);
|
||||
const apiSource = firstTruthySource([
|
||||
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
|
||||
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||
[Boolean(process.env.ANTHROPIC_API_KEY?.trim()), 'env:ANTHROPIC_API_KEY'],
|
||||
[credentialState.anthropicApiKeyStored, 'auth.json:anthropic.api_key'],
|
||||
]);
|
||||
const oauthSource = firstTruthySource([
|
||||
[Boolean(cfg.auth_token?.trim()), 'config.auth_token'],
|
||||
[Boolean(process.env.ANTHROPIC_AUTH_TOKEN?.trim()), 'env:ANTHROPIC_AUTH_TOKEN'],
|
||||
[credentialState.anthropicAuthTokenStored, 'auth.json:anthropic.auth_token'],
|
||||
]);
|
||||
|
||||
if (authMode === 'oauth') {
|
||||
return { mode: 'oauth', source: oauthSource };
|
||||
}
|
||||
if (authMode === 'api_key') {
|
||||
return { mode: 'api_key', source: apiSource };
|
||||
}
|
||||
if (apiSource !== 'missing') {
|
||||
return { mode: 'auto->api_key', source: apiSource };
|
||||
}
|
||||
return { mode: 'auto->oauth', source: oauthSource };
|
||||
}
|
||||
|
||||
if (cfg.provider === 'gemini') {
|
||||
const source = firstTruthySource([
|
||||
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||
[Boolean(process.env.GEMINI_API_KEY?.trim()), 'env:GEMINI_API_KEY'],
|
||||
[Boolean(process.env.GOOGLE_API_KEY?.trim()), 'env:GOOGLE_API_KEY'],
|
||||
]);
|
||||
return { mode: 'api_key', source };
|
||||
}
|
||||
|
||||
if (cfg.provider === 'bedrock') {
|
||||
const source = firstTruthySource([
|
||||
[Boolean(cfg.api_key?.trim() && cfg.auth_token?.trim()), 'config.api_key+auth_token'],
|
||||
[Boolean(process.env.AWS_ACCESS_KEY_ID?.trim() && process.env.AWS_SECRET_ACCESS_KEY?.trim()), 'env:AWS_ACCESS_KEY_ID+AWS_SECRET_ACCESS_KEY'],
|
||||
]);
|
||||
return { mode: 'aws_credentials', source };
|
||||
}
|
||||
|
||||
if (cfg.provider === 'ollama' || cfg.provider === 'llamacpp' || cfg.provider === 'synthetic') {
|
||||
return { mode: 'local', source: 'none' };
|
||||
}
|
||||
|
||||
const source = firstTruthySource([
|
||||
[Boolean(cfg.api_keys && cfg.api_keys.length > 0), 'config.api_keys'],
|
||||
[Boolean(cfg.api_key?.trim()), 'config.api_key'],
|
||||
]);
|
||||
return { mode: 'api_key', source };
|
||||
}
|
||||
|
||||
function resolveEndpoint(cfg: ModelConfig): string {
|
||||
if (cfg.endpoint?.trim()) {
|
||||
return cfg.endpoint.trim();
|
||||
}
|
||||
return DEFAULT_ENDPOINTS[cfg.provider] ?? '(provider default)';
|
||||
}
|
||||
|
||||
function normalizeProbeError(error: unknown): string {
|
||||
if (error instanceof Error) {
|
||||
return error.message;
|
||||
}
|
||||
return String(error);
|
||||
}
|
||||
|
||||
function isPreflightRequest(task: string): boolean {
|
||||
return task.trim().toLowerCase() === 'preflight';
|
||||
}
|
||||
|
||||
export function shouldRunCouncilPreflight(task: string): boolean {
|
||||
return isPreflightRequest(task);
|
||||
}
|
||||
|
||||
export async function buildCouncilPreflightReport(options: CouncilPreflightOptions): Promise<string> {
|
||||
const { config, registry, delegateRunner } = options;
|
||||
const councils = config.councils;
|
||||
const credentialState = options.credentialState ?? getCredentialStateFromSystem();
|
||||
|
||||
const roles = [
|
||||
{ role: 'D.arbiter', agent: councils.groups.D.arbiter_agent, tierOverride: councils.groups.D.model_tier },
|
||||
{ role: 'D.freethinker', agent: councils.groups.D.freethinker_agent, tierOverride: councils.groups.D.model_tier },
|
||||
...(councils.groups.D.grounder_agent
|
||||
? [{ role: 'D.grounder', agent: councils.groups.D.grounder_agent, tierOverride: councils.groups.D.model_tier }]
|
||||
: []),
|
||||
...(councils.groups.D.writer_agent
|
||||
? [{ role: 'D.writer', agent: councils.groups.D.writer_agent, tierOverride: councils.groups.D.model_tier }]
|
||||
: []),
|
||||
{ role: 'P.arbiter', agent: councils.groups.P.arbiter_agent, tierOverride: councils.groups.P.model_tier },
|
||||
{ role: 'P.freethinker', agent: councils.groups.P.freethinker_agent, tierOverride: councils.groups.P.model_tier },
|
||||
...(councils.groups.P.grounder_agent
|
||||
? [{ role: 'P.grounder', agent: councils.groups.P.grounder_agent, tierOverride: councils.groups.P.model_tier }]
|
||||
: []),
|
||||
...(councils.groups.P.writer_agent
|
||||
? [{ role: 'P.writer', agent: councils.groups.P.writer_agent, tierOverride: councils.groups.P.model_tier }]
|
||||
: []),
|
||||
{ role: 'Meta.arbiter', agent: councils.meta_arbiter_agent, tierOverride: councils.meta_model_tier },
|
||||
...(councils.meta_writer_agent
|
||||
? [{ role: 'Meta.writer', agent: councils.meta_writer_agent, tierOverride: councils.meta_model_tier }]
|
||||
: []),
|
||||
];
|
||||
|
||||
const roleLines: string[] = [];
|
||||
const tiersToProbe = new Set<ModelTier>();
|
||||
|
||||
for (const role of roles) {
|
||||
const agentConfig = registry.get(role.agent);
|
||||
const agentTier = agentConfig?.modelTier ?? 'default';
|
||||
const effectiveTier = role.tierOverride ?? agentTier;
|
||||
const { modelConfig, fellBackToDefault } = resolveTierConfig(config, effectiveTier);
|
||||
const modelLabel = `${modelConfig.provider}/${modelConfig.model}`;
|
||||
const missingSuffix = agentConfig ? '' : ' [agent_missing]';
|
||||
const fallbackSuffix = fellBackToDefault ? ' [tier_unconfigured->default]' : '';
|
||||
roleLines.push(
|
||||
`- ${role.role}: agent=${role.agent}${missingSuffix} ` +
|
||||
`agent_tier=${agentTier} override_tier=${role.tierOverride ?? 'none'} ` +
|
||||
`effective_tier=${effectiveTier} model=${modelLabel}${fallbackSuffix}`,
|
||||
);
|
||||
tiersToProbe.add(effectiveTier);
|
||||
}
|
||||
|
||||
const tierLines: string[] = [];
|
||||
for (const tier of [...tiersToProbe].sort()) {
|
||||
const { modelConfig, fellBackToDefault } = resolveTierConfig(config, tier);
|
||||
const auth = resolveAuth(modelConfig, credentialState);
|
||||
const endpoint = resolveEndpoint(modelConfig);
|
||||
tierLines.push(
|
||||
`- ${tier}: provider=${modelConfig.provider} model=${modelConfig.model} ` +
|
||||
`endpoint=${endpoint} auth_mode=${auth.mode} auth_source=${auth.source}` +
|
||||
`${auth.note ? ` note=${auth.note}` : ''}` +
|
||||
`${fellBackToDefault ? ' fallback=default' : ''}`,
|
||||
);
|
||||
}
|
||||
|
||||
const probeLines: string[] = [];
|
||||
if (options.includeLiveProbe ?? true) {
|
||||
const probeResults = await Promise.all(
|
||||
[...tiersToProbe].sort().map(async (tier) => {
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
await delegateRunner.delegate({
|
||||
tier,
|
||||
systemPrompt: 'Return exactly {"ok":true}.',
|
||||
message: 'Return exactly {"ok":true}.',
|
||||
maxTokens: 64,
|
||||
});
|
||||
return {
|
||||
tier,
|
||||
ok: true,
|
||||
latencyMs: Math.max(0, Date.now() - startedAt),
|
||||
error: '',
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
tier,
|
||||
ok: false,
|
||||
latencyMs: Math.max(0, Date.now() - startedAt),
|
||||
error: normalizeProbeError(error),
|
||||
};
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
for (const result of probeResults) {
|
||||
if (result.ok) {
|
||||
probeLines.push(`- ${result.tier}: ok (${result.latencyMs}ms)`);
|
||||
} else {
|
||||
probeLines.push(`- ${result.tier}: failed (${result.latencyMs}ms) ${result.error}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
probeLines.push('- skipped');
|
||||
}
|
||||
|
||||
const activeTier = options.activeTier ?? 'default';
|
||||
const activeModel = resolveTierConfig(config, activeTier).modelConfig;
|
||||
|
||||
return [
|
||||
'[Council preflight]',
|
||||
`Councils enabled: ${councils.enabled ? 'yes' : 'no'}`,
|
||||
`Active interactive tier: ${activeTier} (${activeModel.provider}/${activeModel.model})`,
|
||||
`Scaffold path: ${councils.scaffold_path?.trim() ? councils.scaffold_path : '(none)'}`,
|
||||
'',
|
||||
'Role routing:',
|
||||
...roleLines,
|
||||
'',
|
||||
'Tier provider/auth:',
|
||||
...tierLines,
|
||||
'',
|
||||
'Live tier probes:',
|
||||
...probeLines,
|
||||
'',
|
||||
'Path comparison:',
|
||||
'- Interactive TUI messages use the active tier above.',
|
||||
'- Council calls use per-role effective tiers (group override, then agent tier).',
|
||||
].join('\n');
|
||||
}
|
||||
@@ -166,6 +166,7 @@ export const councilTraceEventSchema = z.object({
|
||||
inputTokens: z.number().int().min(0),
|
||||
outputTokens: z.number().int().min(0),
|
||||
}).strict().optional(),
|
||||
latency_ms: z.number().int().min(0).optional(),
|
||||
dropped_reason: droppedReasonSchema.optional(),
|
||||
validation_failure: validationFailureReasonSchema.optional(),
|
||||
}).strict();
|
||||
|
||||
Reference in New Issue
Block a user