From 5451f8a1dedadaaa7a465a036b94f1aba136ebc3 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Tue, 17 Feb 2026 16:34:54 -0800 Subject: [PATCH] fix(tooling): surface non-executable tool-use warnings --- README.md | 6 +++++ docs/plans/openai-oauth-summary.md | 4 +++ src/backends/native/agent.test.ts | 30 +++++++++++++++++++++ src/backends/native/agent.ts | 42 +++++++++++++++++++++++++++--- src/models/openai.oauth.test.ts | 36 +++++++++++++++++++++++++ src/models/openai.ts | 12 ++++++++- 6 files changed, 126 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fb59ebc..70cb01b 100644 --- a/README.md +++ b/README.md @@ -316,6 +316,12 @@ models: Each tier can optionally specify `auth_mode` (`auto` | `api_key` | `oauth`) to control whether Flynn uses API keys vs OAuth/token auth for that provider. `use_oauth: true` remains supported as a compatibility alias for `auth_mode: oauth`. +Note: with `provider: openai` + `auth_mode: oauth` (Codex backend), Flynn currently does not send tool definitions to the provider. Tool execution is therefore unavailable in that mode, and any textual `tool_use` output should be treated as non-executable model text. + +Note: with `provider: ollama`, tool execution depends on model capabilities. If Ollama reports that the selected model does not support tools, Flynn omits tool definitions for that request. + +Note: with `provider: llamacpp`, tool execution depends on the served model/template correctly emitting OpenAI-style `tool_calls`. Models/templates that do not preserve tool-call structure may fall back to plain text behavior. + ### Agent Backends Flynn can run with the built-in native backend or delegate message processing to external CLI backends. diff --git a/docs/plans/openai-oauth-summary.md b/docs/plans/openai-oauth-summary.md index 97d00dc..600f3df 100644 --- a/docs/plans/openai-oauth-summary.md +++ b/docs/plans/openai-oauth-summary.md @@ -158,6 +158,10 @@ models: oauth_enabled: true ``` +### Current Limitation +- In Flynn, OpenAI OAuth (Codex backend) currently does not send tool definitions to the provider. +- Tool execution is unavailable in this mode; any textual `tool_use` content is non-executable model output. + --- ## Testing Strategy diff --git a/src/backends/native/agent.test.ts b/src/backends/native/agent.test.ts index 84f99d1..077defb 100644 --- a/src/backends/native/agent.test.ts +++ b/src/backends/native/agent.test.ts @@ -270,6 +270,36 @@ describe('NativeAgent tool loop', () => { expect(mockClient.chat).toHaveBeenCalledTimes(3); }); + it('surfaces warning when model emits textual tool_use block without structured tool calls', async () => { + const mockClient: ModelClient = { + chat: vi.fn().mockResolvedValue({ + content: 'Let me read the full email to evaluate legitimacy:{"type":"tool_use","id":"call_123","name":"gmail_read","input":{"id":"abc"}}', + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + }); + + const response = await agent.process('read latest email'); + expect(response).toContain('Tool call was emitted as plain text and was not executed.'); + expect(response).toContain('Tool: gmail_read (id: call_123)'); + expect(response).toContain('"type":"tool_use"'); + + const history = agent.getHistory(); + expect(history[history.length - 1]).toEqual({ role: 'assistant', content: response }); + }); + it('works without tools (backward compatible)', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 04b5102..bcf1766 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -49,6 +49,11 @@ interface LoopMessage { content: string | unknown[]; } +interface PseudoToolUse { + name?: string; + id?: string; +} + export class NativeAgent { private modelClient: ModelClient | ModelRouter; private systemPrompt: string; @@ -224,11 +229,14 @@ export class NativeAgent { const wantsToolUse = (response.stopReason === 'tool_use' || response.stopReason === 'tool_calls') && response.toolCalls && response.toolCalls.length > 0; if (!wantsToolUse) { - let finalContent = response.content; + const pseudoToolUse = this.extractPseudoToolUse(response.content); + let finalContent = pseudoToolUse + ? this.buildPseudoToolUseWarning(response.content, pseudoToolUse) + : response.content; if (response.thinkingContent) { - finalContent = `\n${response.thinkingContent}\n\n\n${response.content}`; + finalContent = `\n${response.thinkingContent}\n\n\n${finalContent}`; } - const assistantMsg: Message = { role: 'assistant', content: response.content }; + const assistantMsg: Message = { role: 'assistant', content: finalContent }; this.addToHistory(assistantMsg); return finalContent; } @@ -524,4 +532,32 @@ export class NativeAgent { private isAbortError(error: unknown): boolean { return error instanceof Error && error.name === 'AbortError'; } + + private extractPseudoToolUse(content: string): PseudoToolUse | null { + if (!content) { + return null; + } + if (!/"type"\s*:\s*"tool_use"/.test(content)) { + return null; + } + + const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/); + const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/); + return { + name: nameMatch?.[1], + id: idMatch?.[1], + }; + } + + private buildPseudoToolUseWarning(rawContent: string, pseudo: PseudoToolUse): string { + const toolName = pseudo.name ?? 'unknown'; + const toolId = pseudo.id ?? 'unknown'; + return [ + 'Tool call was emitted as plain text and was not executed.', + `Tool: ${toolName} (id: ${toolId})`, + 'This usually means the current model/backend did not return structured tool metadata.', + 'Original assistant output:', + rawContent, + ].join('\n'); + } } diff --git a/src/models/openai.oauth.test.ts b/src/models/openai.oauth.test.ts index cba050d..ed6f227 100644 --- a/src/models/openai.oauth.test.ts +++ b/src/models/openai.oauth.test.ts @@ -69,4 +69,40 @@ describe('OpenAIClient OAuth (Codex)', () => { expect(resp.content).toBe('hello'); expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 }); }); + + it('adds provider warning when tools are requested in OAuth mode', async () => { + const sse = makeSse([ + { event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } }, + { event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } }, + ]); + + globalThis.fetch = vi.fn(async () => { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(sse)); + controller.close(); + }, + }); + return new Response(stream, { status: 200 }); + }) as typeof fetch; + + const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true }); + const resp = await client.chat({ + system: 'You are helpful.', + messages: [{ role: 'user', content: 'use tools' }], + tools: [{ + name: 'gmail_read', + description: 'Read Gmail message', + input_schema: { + type: 'object', + properties: { id: { type: 'string' } }, + required: ['id'], + }, + }], + }); + + expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.'); + expect(resp.content).toContain('Requested tools were not sent to the provider'); + expect(resp.content).toContain('result body'); + }); }); diff --git a/src/models/openai.ts b/src/models/openai.ts index 29ed0c1..8bcc88d 100644 --- a/src/models/openai.ts +++ b/src/models/openai.ts @@ -213,8 +213,18 @@ export class OpenAIClient implements ModelClient { } } + const toolsRequested = Boolean(request.tools && request.tools.length > 0); + const content = toolsRequested + ? [ + '[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.', + 'Requested tools were not sent to the provider, so any textual tool_use output is not executable.', + '', + outputText, + ].join('\n') + : outputText; + return { - content: outputText, + content, stopReason: 'end_turn', usage: usage ?? { inputTokens: 0, outputTokens: 0 }, };