fix(core): harden env loading, OpenAI compatibility, and runtime recovery

This commit is contained in:
William Valentin
2026-02-22 15:56:21 -08:00
parent 387906ce4d
commit dafe9b4d3d
11 changed files with 450 additions and 21 deletions
+57
View File
@@ -376,6 +376,63 @@ describe('NativeAgent tool loop', () => {
expect(mockClient.chat).toHaveBeenCalledTimes(2);
});
it('recovers malformed textual shell tool_use and executes shell.exec', async () => {
let callCount = 0;
const seenCommands: string[] = [];
const mockClient: ModelClient = {
chat: vi.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
return {
content:
'{"type":"tool_use","id":"call_1","name":"shell_exec","input":{"command":" "grep -r "createCouncilRunTool" /home/will/lab/flynn/src --include="*.ts" | head -20"}}',
stopReason: 'end_turn',
usage: { inputTokens: 10, outputTokens: 5 },
};
}
return {
content: 'done',
stopReason: 'end_turn',
usage: { inputTokens: 10, outputTokens: 5 },
};
}),
};
const shellTool: Tool = {
name: 'shell.exec',
description: 'Execute shell command',
inputSchema: {
type: 'object',
properties: { command: { type: 'string' } },
required: ['command'],
},
execute: async (args) => {
const command = (args as { command: string }).command;
seenCommands.push(command);
return { success: true, output: command };
},
};
const registry = new ToolRegistry();
registry.register(shellTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
hooks.setInteractiveConfirmer(async () => ({ approved: true }));
const executor = new ToolExecutor(registry, hooks, { sensitiveMode: 'confirm_without_elevation' });
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
});
const response = await agent.process('find council tool wiring');
expect(response).toBe('done');
expect(mockClient.chat).toHaveBeenCalledTimes(2);
expect(seenCommands).toHaveLength(1);
expect(seenCommands[0]).toBe('grep -r "createCouncilRunTool" /home/will/lab/flynn/src --include="*.ts" | head -20');
});
it('works without tools (backward compatible)', async () => {
const mockClient: ModelClient = {
chat: vi.fn().mockResolvedValue({
+95
View File
@@ -260,6 +260,13 @@ export class NativeAgent {
}
}
}
if (toolCalls.length === 0) {
const recovered = this.extractMalformedShellToolCall(response.content);
if (recovered && toolRegistry.getByApiName(recovered.toolCall.name)) {
toolCalls = [recovered.toolCall];
assistantTextContent = recovered.remainingText;
}
}
const wantsToolUse = toolCalls.length > 0;
if (!wantsToolUse) {
@@ -705,6 +712,94 @@ export class NativeAgent {
};
}
private extractMalformedShellToolCall(content: string): { toolCall: ModelToolCall; remainingText: string } | null {
if (!content || !/"type"\s*:\s*"tool_use"/.test(content)) {
return null;
}
const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/);
if (!nameMatch?.[1]) {
return null;
}
const name = nameMatch[1];
const normalized = name.replace(/_/g, '.').toLowerCase();
if (normalized !== 'shell.exec') {
return null;
}
// Recover malformed shell command payloads where inner quotes are not escaped.
const commandMatch = content.match(/"command"\s*:\s*"([\s\S]*)"\s*}\s*}/);
if (!commandMatch?.[1]) {
return null;
}
const command = this.sanitizeRecoveredShellCommand(commandMatch[1]);
if (!command) {
return null;
}
const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/);
const id = idMatch?.[1]?.trim().length ? idMatch[1] : 'text_tool_call_recovered_shell';
return {
toolCall: {
id,
name,
args: { command },
},
remainingText: this.stripFirstToolUseObject(content),
};
}
private sanitizeRecoveredShellCommand(raw: string): string {
let command = raw.trim();
if (command.length === 0) {
return '';
}
// Common malformed pattern: opening quote duplicated into value.
if ((command.startsWith('"') && !command.endsWith('"')) || (command.startsWith('\'') && !command.endsWith('\''))) {
command = command.slice(1).trimStart();
}
return command;
}
private stripFirstToolUseObject(content: string): string {
const typeMatch = /"type"\s*:\s*"tool_use"/.exec(content);
if (!typeMatch || typeMatch.index < 0) {
return content.trim();
}
const objectStart = content.lastIndexOf('{', typeMatch.index);
if (objectStart < 0) {
return content.trim();
}
const objectEnd = this.findObjectEndByBraceDepth(content, objectStart);
if (objectEnd < 0) {
return content.trim();
}
return `${content.slice(0, objectStart)}${content.slice(objectEnd + 1)}`.trim();
}
private findObjectEndByBraceDepth(content: string, start: number): number {
let depth = 0;
for (let i = start; i < content.length; i++) {
const ch = content[i];
if (ch === '{') {
depth++;
} else if (ch === '}') {
depth--;
if (depth === 0) {
return i;
}
}
}
return -1;
}
private findJsonObjectEnd(content: string, start: number): number {
let depth = 0;
let inString = false;
+46
View File
@@ -484,6 +484,52 @@ models:
}
});
it('loads env vars from FLYNN_ENV_FILE before env-var checks', async () => {
const originalEnvFile = process.env.FLYNN_ENV_FILE;
const originalOpenAIKey = process.env.OPENAI_API_KEY;
delete process.env.OPENAI_API_KEY;
try {
mkdirSync(testDir, { recursive: true });
const envPath = join(testDir, 'cloud.env');
writeFileSync(envPath, 'OPENAI_API_KEY=sk-test-from-env-file\n');
process.env.FLYNN_ENV_FILE = envPath;
const configPath = join(testDir, 'openai-env-file.yaml');
writeFileSync(configPath, `
telegram:
bot_token: "test-token"
allowed_chat_ids: [123]
models:
default:
provider: openai
model: gpt-5.2
auth_mode: api_key
api_key: \${OPENAI_API_KEY}
`);
const ctx: DoctorContext = { configPath, dataDir: testDir };
const results = await runChecks(ctx);
const envCheck = results.find((r) => r.label.includes('Env vars resolved')) as CheckResult | undefined;
const modelCheck = results.find((r) => r.label.includes('Model connectivity')) as CheckResult | undefined;
expect(envCheck?.status).toBe('pass');
expect(modelCheck?.status).toBe('pass');
expect(modelCheck?.detail).toContain('api_key=config+env');
} finally {
if (originalEnvFile !== undefined) {
process.env.FLYNN_ENV_FILE = originalEnvFile;
} else {
delete process.env.FLYNN_ENV_FILE;
}
if (originalOpenAIKey !== undefined) {
process.env.OPENAI_API_KEY = originalOpenAIKey;
} else {
delete process.env.OPENAI_API_KEY;
}
}
});
it('reports WARN when Vercel AI Gateway has no available API key sources', async () => {
const originalKey = process.env.AI_GATEWAY_API_KEY;
delete process.env.AI_GATEWAY_API_KEY;
+3 -1
View File
@@ -1,6 +1,6 @@
import type { Command } from 'commander';
import type { Config } from '../config/index.js';
import { getConfigPath, getDataDir, formatStatus, resolveOverlayPath } from './shared.js';
import { getConfigPath, getDataDir, formatStatus, resolveOverlayPath, loadEnvFileIfPresent } from './shared.js';
import { existsSync, readFileSync, writeFileSync, unlinkSync } from 'fs';
import { homedir } from 'os';
import { resolve, join } from 'path';
@@ -643,6 +643,8 @@ const allChecks: Check[] = [
/** Run all doctor checks in order. Exported for testing. */
export async function runChecks(ctx: DoctorContext): Promise<CheckResult[]> {
// Keep doctor behavior aligned with other CLI commands that load cloud.env.
loadEnvFileIfPresent();
const results: CheckResult[] = [];
for (const check of allChecks) {
const result = await check(ctx);
+1 -1
View File
@@ -4,7 +4,7 @@ import { resolve, dirname, join } from 'path';
import { homedir } from 'os';
import { existsSync, readFileSync } from 'fs';
function loadEnvFileIfPresent(): void {
export function loadEnvFileIfPresent(): void {
const envFile = process.env.FLYNN_ENV_FILE ?? resolve(homedir(), '.config/flynn/cloud.env');
if (!existsSync(envFile)) {
return;
+76
View File
@@ -49,6 +49,7 @@ function minimalTuiPrivates(value: MinimalTui): {
};
activePromptCancel: (() => void) | null;
activeOperationCancel: (() => void) | null;
commandInFlight: boolean;
running: boolean;
} {
return value as unknown as {
@@ -70,6 +71,7 @@ function minimalTuiPrivates(value: MinimalTui): {
};
activePromptCancel: (() => void) | null;
activeOperationCancel: (() => void) | null;
commandInFlight: boolean;
running: boolean;
};
}
@@ -469,6 +471,38 @@ describe('MinimalTui prompt cancellation', () => {
expect(minimalTuiPrivates(tui).activePromptCancel).toBeNull();
});
it('returns empty string when readline is already closed during question', async () => {
const mockSession = {
id: 'test',
getHistory: () => [],
addMessage: vi.fn(),
clear: vi.fn(),
replaceHistory: vi.fn(),
};
const tui = new MinimalTui({
session: asSession(mockSession),
modelClient: asRouter({}),
systemPrompt: 'test',
});
const questionError = new Error('readline was closed');
(questionError as Error & { code?: string }).code = 'ERR_USE_AFTER_CLOSE';
minimalTuiPrivates(tui).rl = {
once: vi.fn(),
removeListener: vi.fn(),
question: vi.fn(() => {
throw questionError;
}),
write: vi.fn(),
prompt: vi.fn(),
};
await expect(minimalTuiPrivates(tui).prompt('Confirm? ')).resolves.toBe('');
expect(minimalTuiPrivates(tui).activePromptCancel).toBeNull();
});
it('uses Esc to cancel active running operation', () => {
const mockSession = {
id: 'test',
@@ -530,4 +564,46 @@ describe('MinimalTui prompt cancellation', () => {
logSpy.mockRestore();
}
});
it('exits immediately on Ctrl+C when a command is in flight', () => {
const mockSession = {
id: 'test',
getHistory: () => [],
addMessage: vi.fn(),
clear: vi.fn(),
replaceHistory: vi.fn(),
};
const write = vi.fn();
const prompt = vi.fn();
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
try {
const tui = new MinimalTui({
session: asSession(mockSession),
modelClient: asRouter({}),
systemPrompt: 'test',
});
minimalTuiPrivates(tui).rl = {
once: vi.fn(),
removeListener: vi.fn(),
question: vi.fn(),
write,
prompt,
};
minimalTuiPrivates(tui).running = true;
minimalTuiPrivates(tui).commandInFlight = true;
const cancel = vi.fn();
minimalTuiPrivates(tui).activeOperationCancel = cancel;
const shouldExit = minimalTuiPrivates(tui).handleCtrlCPress(1000);
expect(shouldExit).toBe(true);
expect(cancel).toHaveBeenCalledOnce();
expect(write).not.toHaveBeenCalled();
expect(prompt).not.toHaveBeenCalled();
expect(logSpy).not.toHaveBeenCalledWith(expect.stringContaining('Press Ctrl+C again to quit'));
} finally {
logSpy.mockRestore();
}
});
});
+23 -5
View File
@@ -95,6 +95,7 @@ export class MinimalTui {
private busyActive = false;
private verbose = false;
private lastCtrlCAtMs = 0;
private commandInFlight = false;
constructor(private config: MinimalTuiConfig) {}
@@ -325,6 +326,13 @@ export class MinimalTui {
return true;
}
// If a command is currently running (e.g. /council), exit immediately.
// Some command paths are not cancellable mid-flight, so double-press UX can trap users.
if (this.commandInFlight) {
this.activeOperationCancel?.();
return true;
}
const shouldExit = this.lastCtrlCAtMs > 0
&& (nowMs - this.lastCtrlCAtMs) <= MinimalTui.CTRL_C_EXIT_WINDOW_MS;
this.lastCtrlCAtMs = nowMs;
@@ -358,7 +366,12 @@ export class MinimalTui {
continue;
}
await this.handleCommand(command);
this.commandInFlight = true;
try {
await this.handleCommand(command);
} finally {
this.commandInFlight = false;
}
}
}
@@ -421,9 +434,14 @@ export class MinimalTui {
}
};
this.rl.question(promptText, (answer) => {
finish(answer);
});
try {
this.rl.question(promptText, (answer) => {
finish(answer);
});
} catch {
// readline can throw synchronously if it was closed between our guards.
finish('');
}
});
}
@@ -559,7 +577,7 @@ export class MinimalTui {
private async handleCouncilCommand(task: string): Promise<void> {
if (!task.trim()) {
console.log(`${colors.gray}Usage: /council <question or task>${colors.reset}\n`);
console.log(`${colors.gray}Usage: /council <question or task> | /council preflight${colors.reset}\n`);
return;
}
if (!this.config.onCouncil) {
+18 -1
View File
@@ -192,6 +192,23 @@ function valuesMatch(expected, actual) {
return expected === actual;
}
function valuesMatchForPath(path, expected, actual) {
if (valuesMatch(expected, actual)) {
return true;
}
// Some optional string paths are normalized by config handlers:
// writing "" is persisted as "unset" (undefined).
if (typeof expected === 'string' && expected.trim().length === 0 && (actual === undefined || actual === null)) {
const optionalStringSuffixes = ['scaffold_path'];
if (optionalStringSuffixes.some((suffix) => path.endsWith(suffix))) {
return true;
}
}
return false;
}
function setAssistantSaveState(message, tone = 'neutral') {
_assistantSaveState = {
message,
@@ -714,7 +731,7 @@ async function applyAssistantPatch(patches, statusEl) {
const mismatches = [];
for (const [key, value] of Object.entries(patches)) {
const actual = getByPath(fresh, key);
if (!valuesMatch(value, actual)) {
if (!valuesMatchForPath(key, value, actual)) {
mismatches.push(`${key} expected=${JSON.stringify(value)} actual=${JSON.stringify(actual)}`);
}
}
+63
View File
@@ -137,6 +137,37 @@ describe('OpenAIClient tool use', () => {
expect(response.stopReason).toBe('max_tokens');
});
it('retries with max_completion_tokens when provider rejects max_tokens', async () => {
const initialCallCount = mockCreate.mock.calls.length;
mockCreate
.mockRejectedValueOnce(new Error(
"400 Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead.",
))
.mockResolvedValueOnce({
choices: [{ message: { content: 'Hello from GPT-5.2!' }, finish_reason: 'stop' }],
usage: { prompt_tokens: 11, completion_tokens: 6 },
});
const client = new OpenAIClient({
apiKey: 'test-key',
model: 'gpt-5.2',
});
const response = await client.chat({
messages: [{ role: 'user', content: 'Hello' }],
});
expect(response.content).toBe('Hello from GPT-5.2!');
expect(mockCreate.mock.calls.length - initialCallCount).toBe(2);
const firstArgs = mockCreate.mock.calls[initialCallCount]?.[0] as Record<string, unknown>;
expect(firstArgs.max_tokens).toBeDefined();
const secondArgs = mockCreate.mock.calls[initialCallCount + 1]?.[0] as Record<string, unknown>;
expect(secondArgs.max_tokens).toBeUndefined();
expect(secondArgs.max_completion_tokens).toBeDefined();
});
it('rewrites Z.AI 401 errors with actionable auth guidance', async () => {
mockCreate.mockRejectedValueOnce({
status: 401,
@@ -169,4 +200,36 @@ describe('OpenAIClient tool use', () => {
messages: [{ role: 'user', content: 'hello' }],
})).rejects.toThrow(/The key lacks `model\.request` scope/);
});
it('passes OpenAI response_format json_schema when requested', async () => {
const client = new OpenAIClient({
apiKey: 'test-key',
model: 'gpt-5.2',
});
await client.chat({
messages: [{ role: 'user', content: 'emit json' }],
responseFormat: {
type: 'json_schema',
name: 'council_ideation',
schema: {
type: 'object',
additionalProperties: false,
required: ['ideas'],
properties: {
ideas: { type: 'array', items: { type: 'object' } },
},
},
strict: true,
},
});
const args = mockCreate.mock.calls.at(-1)?.[0] as Record<string, unknown>;
const responseFormat = args.response_format as Record<string, unknown>;
expect(responseFormat.type).toBe('json_schema');
const jsonSchema = responseFormat.json_schema as Record<string, unknown>;
expect(jsonSchema.name).toBe('council_ideation');
expect(jsonSchema.strict).toBe(true);
});
});
+57 -13
View File
@@ -254,12 +254,39 @@ export class OpenAIClient implements ModelClient {
}
// Build params, conditionally including tools
const maxTokens = request.maxTokens ?? this.defaultMaxTokens;
const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
model: this.model,
max_tokens: request.maxTokens ?? this.defaultMaxTokens,
max_tokens: maxTokens,
messages,
};
if (request.responseFormat) {
if (request.responseFormat.type === 'json_object') {
(params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
response_format?: { type: 'json_object' };
}).response_format = { type: 'json_object' };
} else {
(params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
response_format?: {
type: 'json_schema';
json_schema: {
name: string;
schema: Record<string, unknown>;
strict: boolean;
};
};
}).response_format = {
type: 'json_schema',
json_schema: {
name: request.responseFormat.name,
schema: request.responseFormat.schema,
strict: request.responseFormat.strict ?? true,
},
};
}
}
if (request.tools && request.tools.length > 0) {
params.tools = request.tools.map(t => ({
type: 'function' as const,
@@ -287,22 +314,39 @@ export class OpenAIClient implements ModelClient {
? (error as { status: number }).status
: undefined;
const message = error instanceof Error ? error.message : String(error);
const unsupportedMaxTokens = (
status === 400
|| message.includes('400 Unsupported parameter')
) && message.includes("Unsupported parameter: 'max_tokens'");
const isZai = (this.baseURL ?? '').includes('api.z.ai');
const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
const missingModelRequestScope = message.includes('Missing scopes: model.request');
if (unsupportedMaxTokens) {
const fallbackParams = {
...params,
max_completion_tokens: maxTokens,
} as OpenAI.ChatCompletionCreateParamsNonStreaming & { max_completion_tokens: number };
delete (fallbackParams as { max_tokens?: number }).max_tokens;
if (isZai && isUnauthorized401) {
const hint = missingModelRequestScope
? 'The key lacks `model.request` scope.'
: 'The API key is invalid, expired, or not allowed for this model/endpoint.';
throw new Error(
`Z.AI authentication failed (401). ${hint} ` +
'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
response = await this.client.chat.completions.create(
fallbackParams,
request.signal ? { signal: request.signal } : undefined,
);
}
} else {
const isZai = (this.baseURL ?? '').includes('api.z.ai');
const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
const missingModelRequestScope = message.includes('Missing scopes: model.request');
throw error;
if (isZai && isUnauthorized401) {
const hint = missingModelRequestScope
? 'The key lacks `model.request` scope.'
: 'The API key is invalid, expired, or not allowed for this model/endpoint.';
throw new Error(
`Z.AI authentication failed (401). ${hint} ` +
'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
);
}
throw error;
}
}
const choice = response.choices[0];
+11
View File
@@ -73,11 +73,22 @@ export interface ToolMessage {
// Union type for all messages in a conversation
export type ConversationMessage = Message | ToolMessage;
export type ChatResponseFormat =
| { type: 'json_object' }
| {
type: 'json_schema';
name: string;
schema: Record<string, unknown>;
strict?: boolean;
};
export interface ChatRequest {
messages: Message[];
system?: string;
maxTokens?: number;
tools?: ToolDefinition[];
/** Optional provider-level response format request (e.g., structured JSON output). */
responseFormat?: ChatResponseFormat;
/** Enable extended thinking/reasoning mode for this request. */
thinking?: boolean;
/** Optional abort signal for cancelling in-flight provider requests. */