diff --git a/src/backends/native/agent.test.ts b/src/backends/native/agent.test.ts index a48d967..64ed4c3 100644 --- a/src/backends/native/agent.test.ts +++ b/src/backends/native/agent.test.ts @@ -376,6 +376,63 @@ describe('NativeAgent tool loop', () => { expect(mockClient.chat).toHaveBeenCalledTimes(2); }); + it('recovers malformed textual shell tool_use and executes shell.exec', async () => { + let callCount = 0; + const seenCommands: string[] = []; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + return { + content: + '{"type":"tool_use","id":"call_1","name":"shell_exec","input":{"command":" "grep -r "createCouncilRunTool" /home/will/lab/flynn/src --include="*.ts" | head -20"}}', + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }; + } + return { + content: 'done', + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }; + }), + }; + + const shellTool: Tool = { + name: 'shell.exec', + description: 'Execute shell command', + inputSchema: { + type: 'object', + properties: { command: { type: 'string' } }, + required: ['command'], + }, + execute: async (args) => { + const command = (args as { command: string }).command; + seenCommands.push(command); + return { success: true, output: command }; + }, + }; + + const registry = new ToolRegistry(); + registry.register(shellTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + hooks.setInteractiveConfirmer(async () => ({ approved: true })); + const executor = new ToolExecutor(registry, hooks, { sensitiveMode: 'confirm_without_elevation' }); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + }); + + const response = await agent.process('find council tool wiring'); + expect(response).toBe('done'); + expect(mockClient.chat).toHaveBeenCalledTimes(2); + expect(seenCommands).toHaveLength(1); + expect(seenCommands[0]).toBe('grep -r "createCouncilRunTool" /home/will/lab/flynn/src --include="*.ts" | head -20'); + }); + it('works without tools (backward compatible)', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 4de6262..4d258a1 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -260,6 +260,13 @@ export class NativeAgent { } } } + if (toolCalls.length === 0) { + const recovered = this.extractMalformedShellToolCall(response.content); + if (recovered && toolRegistry.getByApiName(recovered.toolCall.name)) { + toolCalls = [recovered.toolCall]; + assistantTextContent = recovered.remainingText; + } + } const wantsToolUse = toolCalls.length > 0; if (!wantsToolUse) { @@ -705,6 +712,94 @@ export class NativeAgent { }; } + private extractMalformedShellToolCall(content: string): { toolCall: ModelToolCall; remainingText: string } | null { + if (!content || !/"type"\s*:\s*"tool_use"/.test(content)) { + return null; + } + + const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/); + if (!nameMatch?.[1]) { + return null; + } + const name = nameMatch[1]; + const normalized = name.replace(/_/g, '.').toLowerCase(); + if (normalized !== 'shell.exec') { + return null; + } + + // Recover malformed shell command payloads where inner quotes are not escaped. + const commandMatch = content.match(/"command"\s*:\s*"([\s\S]*)"\s*}\s*}/); + if (!commandMatch?.[1]) { + return null; + } + + const command = this.sanitizeRecoveredShellCommand(commandMatch[1]); + if (!command) { + return null; + } + + const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/); + const id = idMatch?.[1]?.trim().length ? idMatch[1] : 'text_tool_call_recovered_shell'; + + return { + toolCall: { + id, + name, + args: { command }, + }, + remainingText: this.stripFirstToolUseObject(content), + }; + } + + private sanitizeRecoveredShellCommand(raw: string): string { + let command = raw.trim(); + if (command.length === 0) { + return ''; + } + + // Common malformed pattern: opening quote duplicated into value. + if ((command.startsWith('"') && !command.endsWith('"')) || (command.startsWith('\'') && !command.endsWith('\''))) { + command = command.slice(1).trimStart(); + } + + return command; + } + + private stripFirstToolUseObject(content: string): string { + const typeMatch = /"type"\s*:\s*"tool_use"/.exec(content); + if (!typeMatch || typeMatch.index < 0) { + return content.trim(); + } + + const objectStart = content.lastIndexOf('{', typeMatch.index); + if (objectStart < 0) { + return content.trim(); + } + + const objectEnd = this.findObjectEndByBraceDepth(content, objectStart); + if (objectEnd < 0) { + return content.trim(); + } + + return `${content.slice(0, objectStart)}${content.slice(objectEnd + 1)}`.trim(); + } + + private findObjectEndByBraceDepth(content: string, start: number): number { + let depth = 0; + for (let i = start; i < content.length; i++) { + const ch = content[i]; + if (ch === '{') { + depth++; + } else if (ch === '}') { + depth--; + if (depth === 0) { + return i; + } + } + } + return -1; + } + private findJsonObjectEnd(content: string, start: number): number { let depth = 0; let inString = false; diff --git a/src/cli/doctor.test.ts b/src/cli/doctor.test.ts index 75167b8..aa73622 100644 --- a/src/cli/doctor.test.ts +++ b/src/cli/doctor.test.ts @@ -484,6 +484,52 @@ models: } }); + it('loads env vars from FLYNN_ENV_FILE before env-var checks', async () => { + const originalEnvFile = process.env.FLYNN_ENV_FILE; + const originalOpenAIKey = process.env.OPENAI_API_KEY; + delete process.env.OPENAI_API_KEY; + + try { + mkdirSync(testDir, { recursive: true }); + const envPath = join(testDir, 'cloud.env'); + writeFileSync(envPath, 'OPENAI_API_KEY=sk-test-from-env-file\n'); + process.env.FLYNN_ENV_FILE = envPath; + + const configPath = join(testDir, 'openai-env-file.yaml'); + writeFileSync(configPath, ` +telegram: + bot_token: "test-token" + allowed_chat_ids: [123] +models: + default: + provider: openai + model: gpt-5.2 + auth_mode: api_key + api_key: \${OPENAI_API_KEY} +`); + + const ctx: DoctorContext = { configPath, dataDir: testDir }; + const results = await runChecks(ctx); + const envCheck = results.find((r) => r.label.includes('Env vars resolved')) as CheckResult | undefined; + const modelCheck = results.find((r) => r.label.includes('Model connectivity')) as CheckResult | undefined; + + expect(envCheck?.status).toBe('pass'); + expect(modelCheck?.status).toBe('pass'); + expect(modelCheck?.detail).toContain('api_key=config+env'); + } finally { + if (originalEnvFile !== undefined) { + process.env.FLYNN_ENV_FILE = originalEnvFile; + } else { + delete process.env.FLYNN_ENV_FILE; + } + if (originalOpenAIKey !== undefined) { + process.env.OPENAI_API_KEY = originalOpenAIKey; + } else { + delete process.env.OPENAI_API_KEY; + } + } + }); + it('reports WARN when Vercel AI Gateway has no available API key sources', async () => { const originalKey = process.env.AI_GATEWAY_API_KEY; delete process.env.AI_GATEWAY_API_KEY; diff --git a/src/cli/doctor.ts b/src/cli/doctor.ts index 928673f..1e13d48 100644 --- a/src/cli/doctor.ts +++ b/src/cli/doctor.ts @@ -1,6 +1,6 @@ import type { Command } from 'commander'; import type { Config } from '../config/index.js'; -import { getConfigPath, getDataDir, formatStatus, resolveOverlayPath } from './shared.js'; +import { getConfigPath, getDataDir, formatStatus, resolveOverlayPath, loadEnvFileIfPresent } from './shared.js'; import { existsSync, readFileSync, writeFileSync, unlinkSync } from 'fs'; import { homedir } from 'os'; import { resolve, join } from 'path'; @@ -643,6 +643,8 @@ const allChecks: Check[] = [ /** Run all doctor checks in order. Exported for testing. */ export async function runChecks(ctx: DoctorContext): Promise { + // Keep doctor behavior aligned with other CLI commands that load cloud.env. + loadEnvFileIfPresent(); const results: CheckResult[] = []; for (const check of allChecks) { const result = await check(ctx); diff --git a/src/cli/shared.ts b/src/cli/shared.ts index b0dc384..54efdf8 100644 --- a/src/cli/shared.ts +++ b/src/cli/shared.ts @@ -4,7 +4,7 @@ import { resolve, dirname, join } from 'path'; import { homedir } from 'os'; import { existsSync, readFileSync } from 'fs'; -function loadEnvFileIfPresent(): void { +export function loadEnvFileIfPresent(): void { const envFile = process.env.FLYNN_ENV_FILE ?? resolve(homedir(), '.config/flynn/cloud.env'); if (!existsSync(envFile)) { return; diff --git a/src/frontends/tui/minimal.test.ts b/src/frontends/tui/minimal.test.ts index e829328..4a9a30b 100644 --- a/src/frontends/tui/minimal.test.ts +++ b/src/frontends/tui/minimal.test.ts @@ -49,6 +49,7 @@ function minimalTuiPrivates(value: MinimalTui): { }; activePromptCancel: (() => void) | null; activeOperationCancel: (() => void) | null; + commandInFlight: boolean; running: boolean; } { return value as unknown as { @@ -70,6 +71,7 @@ function minimalTuiPrivates(value: MinimalTui): { }; activePromptCancel: (() => void) | null; activeOperationCancel: (() => void) | null; + commandInFlight: boolean; running: boolean; }; } @@ -469,6 +471,38 @@ describe('MinimalTui prompt cancellation', () => { expect(minimalTuiPrivates(tui).activePromptCancel).toBeNull(); }); + it('returns empty string when readline is already closed during question', async () => { + const mockSession = { + id: 'test', + getHistory: () => [], + addMessage: vi.fn(), + clear: vi.fn(), + replaceHistory: vi.fn(), + }; + + const tui = new MinimalTui({ + session: asSession(mockSession), + modelClient: asRouter({}), + systemPrompt: 'test', + }); + + const questionError = new Error('readline was closed'); + (questionError as Error & { code?: string }).code = 'ERR_USE_AFTER_CLOSE'; + + minimalTuiPrivates(tui).rl = { + once: vi.fn(), + removeListener: vi.fn(), + question: vi.fn(() => { + throw questionError; + }), + write: vi.fn(), + prompt: vi.fn(), + }; + + await expect(minimalTuiPrivates(tui).prompt('Confirm? ')).resolves.toBe(''); + expect(minimalTuiPrivates(tui).activePromptCancel).toBeNull(); + }); + it('uses Esc to cancel active running operation', () => { const mockSession = { id: 'test', @@ -530,4 +564,46 @@ describe('MinimalTui prompt cancellation', () => { logSpy.mockRestore(); } }); + + it('exits immediately on Ctrl+C when a command is in flight', () => { + const mockSession = { + id: 'test', + getHistory: () => [], + addMessage: vi.fn(), + clear: vi.fn(), + replaceHistory: vi.fn(), + }; + + const write = vi.fn(); + const prompt = vi.fn(); + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + try { + const tui = new MinimalTui({ + session: asSession(mockSession), + modelClient: asRouter({}), + systemPrompt: 'test', + }); + minimalTuiPrivates(tui).rl = { + once: vi.fn(), + removeListener: vi.fn(), + question: vi.fn(), + write, + prompt, + }; + minimalTuiPrivates(tui).running = true; + minimalTuiPrivates(tui).commandInFlight = true; + const cancel = vi.fn(); + minimalTuiPrivates(tui).activeOperationCancel = cancel; + + const shouldExit = minimalTuiPrivates(tui).handleCtrlCPress(1000); + + expect(shouldExit).toBe(true); + expect(cancel).toHaveBeenCalledOnce(); + expect(write).not.toHaveBeenCalled(); + expect(prompt).not.toHaveBeenCalled(); + expect(logSpy).not.toHaveBeenCalledWith(expect.stringContaining('Press Ctrl+C again to quit')); + } finally { + logSpy.mockRestore(); + } + }); }); diff --git a/src/frontends/tui/minimal.ts b/src/frontends/tui/minimal.ts index 81d4f58..52a94fc 100644 --- a/src/frontends/tui/minimal.ts +++ b/src/frontends/tui/minimal.ts @@ -95,6 +95,7 @@ export class MinimalTui { private busyActive = false; private verbose = false; private lastCtrlCAtMs = 0; + private commandInFlight = false; constructor(private config: MinimalTuiConfig) {} @@ -325,6 +326,13 @@ export class MinimalTui { return true; } + // If a command is currently running (e.g. /council), exit immediately. + // Some command paths are not cancellable mid-flight, so double-press UX can trap users. + if (this.commandInFlight) { + this.activeOperationCancel?.(); + return true; + } + const shouldExit = this.lastCtrlCAtMs > 0 && (nowMs - this.lastCtrlCAtMs) <= MinimalTui.CTRL_C_EXIT_WINDOW_MS; this.lastCtrlCAtMs = nowMs; @@ -358,7 +366,12 @@ export class MinimalTui { continue; } - await this.handleCommand(command); + this.commandInFlight = true; + try { + await this.handleCommand(command); + } finally { + this.commandInFlight = false; + } } } @@ -421,9 +434,14 @@ export class MinimalTui { } }; - this.rl.question(promptText, (answer) => { - finish(answer); - }); + try { + this.rl.question(promptText, (answer) => { + finish(answer); + }); + } catch { + // readline can throw synchronously if it was closed between our guards. + finish(''); + } }); } @@ -559,7 +577,7 @@ export class MinimalTui { private async handleCouncilCommand(task: string): Promise { if (!task.trim()) { - console.log(`${colors.gray}Usage: /council ${colors.reset}\n`); + console.log(`${colors.gray}Usage: /council | /council preflight${colors.reset}\n`); return; } if (!this.config.onCouncil) { diff --git a/src/gateway/ui/pages/dashboard.js b/src/gateway/ui/pages/dashboard.js index e0a50cb..575d54c 100644 --- a/src/gateway/ui/pages/dashboard.js +++ b/src/gateway/ui/pages/dashboard.js @@ -192,6 +192,23 @@ function valuesMatch(expected, actual) { return expected === actual; } +function valuesMatchForPath(path, expected, actual) { + if (valuesMatch(expected, actual)) { + return true; + } + + // Some optional string paths are normalized by config handlers: + // writing "" is persisted as "unset" (undefined). + if (typeof expected === 'string' && expected.trim().length === 0 && (actual === undefined || actual === null)) { + const optionalStringSuffixes = ['scaffold_path']; + if (optionalStringSuffixes.some((suffix) => path.endsWith(suffix))) { + return true; + } + } + + return false; +} + function setAssistantSaveState(message, tone = 'neutral') { _assistantSaveState = { message, @@ -714,7 +731,7 @@ async function applyAssistantPatch(patches, statusEl) { const mismatches = []; for (const [key, value] of Object.entries(patches)) { const actual = getByPath(fresh, key); - if (!valuesMatch(value, actual)) { + if (!valuesMatchForPath(key, value, actual)) { mismatches.push(`${key} expected=${JSON.stringify(value)} actual=${JSON.stringify(actual)}`); } } diff --git a/src/models/openai.test.ts b/src/models/openai.test.ts index b4398ef..358f159 100644 --- a/src/models/openai.test.ts +++ b/src/models/openai.test.ts @@ -137,6 +137,37 @@ describe('OpenAIClient tool use', () => { expect(response.stopReason).toBe('max_tokens'); }); + it('retries with max_completion_tokens when provider rejects max_tokens', async () => { + const initialCallCount = mockCreate.mock.calls.length; + mockCreate + .mockRejectedValueOnce(new Error( + "400 Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead.", + )) + .mockResolvedValueOnce({ + choices: [{ message: { content: 'Hello from GPT-5.2!' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 11, completion_tokens: 6 }, + }); + + const client = new OpenAIClient({ + apiKey: 'test-key', + model: 'gpt-5.2', + }); + + const response = await client.chat({ + messages: [{ role: 'user', content: 'Hello' }], + }); + + expect(response.content).toBe('Hello from GPT-5.2!'); + expect(mockCreate.mock.calls.length - initialCallCount).toBe(2); + + const firstArgs = mockCreate.mock.calls[initialCallCount]?.[0] as Record; + expect(firstArgs.max_tokens).toBeDefined(); + + const secondArgs = mockCreate.mock.calls[initialCallCount + 1]?.[0] as Record; + expect(secondArgs.max_tokens).toBeUndefined(); + expect(secondArgs.max_completion_tokens).toBeDefined(); + }); + it('rewrites Z.AI 401 errors with actionable auth guidance', async () => { mockCreate.mockRejectedValueOnce({ status: 401, @@ -169,4 +200,36 @@ describe('OpenAIClient tool use', () => { messages: [{ role: 'user', content: 'hello' }], })).rejects.toThrow(/The key lacks `model\.request` scope/); }); + + it('passes OpenAI response_format json_schema when requested', async () => { + const client = new OpenAIClient({ + apiKey: 'test-key', + model: 'gpt-5.2', + }); + + await client.chat({ + messages: [{ role: 'user', content: 'emit json' }], + responseFormat: { + type: 'json_schema', + name: 'council_ideation', + schema: { + type: 'object', + additionalProperties: false, + required: ['ideas'], + properties: { + ideas: { type: 'array', items: { type: 'object' } }, + }, + }, + strict: true, + }, + }); + + const args = mockCreate.mock.calls.at(-1)?.[0] as Record; + const responseFormat = args.response_format as Record; + expect(responseFormat.type).toBe('json_schema'); + + const jsonSchema = responseFormat.json_schema as Record; + expect(jsonSchema.name).toBe('council_ideation'); + expect(jsonSchema.strict).toBe(true); + }); }); diff --git a/src/models/openai.ts b/src/models/openai.ts index 88f7a78..bbf566a 100644 --- a/src/models/openai.ts +++ b/src/models/openai.ts @@ -254,12 +254,39 @@ export class OpenAIClient implements ModelClient { } // Build params, conditionally including tools + const maxTokens = request.maxTokens ?? this.defaultMaxTokens; const params: OpenAI.ChatCompletionCreateParamsNonStreaming = { model: this.model, - max_tokens: request.maxTokens ?? this.defaultMaxTokens, + max_tokens: maxTokens, messages, }; + if (request.responseFormat) { + if (request.responseFormat.type === 'json_object') { + (params as OpenAI.ChatCompletionCreateParamsNonStreaming & { + response_format?: { type: 'json_object' }; + }).response_format = { type: 'json_object' }; + } else { + (params as OpenAI.ChatCompletionCreateParamsNonStreaming & { + response_format?: { + type: 'json_schema'; + json_schema: { + name: string; + schema: Record; + strict: boolean; + }; + }; + }).response_format = { + type: 'json_schema', + json_schema: { + name: request.responseFormat.name, + schema: request.responseFormat.schema, + strict: request.responseFormat.strict ?? true, + }, + }; + } + } + if (request.tools && request.tools.length > 0) { params.tools = request.tools.map(t => ({ type: 'function' as const, @@ -287,22 +314,39 @@ export class OpenAIClient implements ModelClient { ? (error as { status: number }).status : undefined; const message = error instanceof Error ? error.message : String(error); + const unsupportedMaxTokens = ( + status === 400 + || message.includes('400 Unsupported parameter') + ) && message.includes("Unsupported parameter: 'max_tokens'"); - const isZai = (this.baseURL ?? '').includes('api.z.ai'); - const isUnauthorized401 = status === 401 || /\b401\b/.test(message); - const missingModelRequestScope = message.includes('Missing scopes: model.request'); + if (unsupportedMaxTokens) { + const fallbackParams = { + ...params, + max_completion_tokens: maxTokens, + } as OpenAI.ChatCompletionCreateParamsNonStreaming & { max_completion_tokens: number }; + delete (fallbackParams as { max_tokens?: number }).max_tokens; - if (isZai && isUnauthorized401) { - const hint = missingModelRequestScope - ? 'The key lacks `model.request` scope.' - : 'The API key is invalid, expired, or not allowed for this model/endpoint.'; - throw new Error( - `Z.AI authentication failed (401). ${hint} ` + - 'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.', + response = await this.client.chat.completions.create( + fallbackParams, + request.signal ? { signal: request.signal } : undefined, ); - } + } else { + const isZai = (this.baseURL ?? '').includes('api.z.ai'); + const isUnauthorized401 = status === 401 || /\b401\b/.test(message); + const missingModelRequestScope = message.includes('Missing scopes: model.request'); - throw error; + if (isZai && isUnauthorized401) { + const hint = missingModelRequestScope + ? 'The key lacks `model.request` scope.' + : 'The API key is invalid, expired, or not allowed for this model/endpoint.'; + throw new Error( + `Z.AI authentication failed (401). ${hint} ` + + 'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.', + ); + } + + throw error; + } } const choice = response.choices[0]; diff --git a/src/models/types.ts b/src/models/types.ts index e3c54b6..cc3dfb5 100644 --- a/src/models/types.ts +++ b/src/models/types.ts @@ -73,11 +73,22 @@ export interface ToolMessage { // Union type for all messages in a conversation export type ConversationMessage = Message | ToolMessage; +export type ChatResponseFormat = + | { type: 'json_object' } + | { + type: 'json_schema'; + name: string; + schema: Record; + strict?: boolean; + }; + export interface ChatRequest { messages: Message[]; system?: string; maxTokens?: number; tools?: ToolDefinition[]; + /** Optional provider-level response format request (e.g., structured JSON output). */ + responseFormat?: ChatResponseFormat; /** Enable extended thinking/reasoning mode for this request. */ thinking?: boolean; /** Optional abort signal for cancelling in-flight provider requests. */