From 5451f8a1dedadaaa7a465a036b94f1aba136ebc3 Mon Sep 17 00:00:00 2001
From: William Valentin <william.valentin.info@gmail.com>
Date: Tue, 17 Feb 2026 16:34:54 -0800
Subject: [PATCH] fix(tooling): surface non-executable tool-use warnings

---
 README.md                          |  6 +++++
 docs/plans/openai-oauth-summary.md |  4 +++
 src/backends/native/agent.test.ts  | 30 +++++++++++++++++++++
 src/backends/native/agent.ts       | 42 +++++++++++++++++++++++++++---
 src/models/openai.oauth.test.ts    | 36 +++++++++++++++++++++++++
 src/models/openai.ts               | 12 ++++++++-
 6 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index fb59ebc..70cb01b 100644
--- a/README.md
+++ b/README.md
@@ -316,6 +316,12 @@ models:
 
 Each tier can optionally specify `auth_mode` (`auto` | `api_key` | `oauth`) to control whether Flynn uses API keys vs OAuth/token auth for that provider. `use_oauth: true` remains supported as a compatibility alias for `auth_mode: oauth`.
 
+Note: with `provider: openai` + `auth_mode: oauth` (Codex backend), Flynn currently does not send tool definitions to the provider. Tool execution is therefore unavailable in that mode, and any textual `tool_use` output should be treated as non-executable model text.
+
+Note: with `provider: ollama`, tool execution depends on model capabilities. If Ollama reports that the selected model does not support tools, Flynn omits tool definitions for that request.
+
+Note: with `provider: llamacpp`, tool execution depends on the served model/template correctly emitting OpenAI-style `tool_calls`. Models/templates that do not preserve tool-call structure may fall back to plain text behavior.
+
 ### Agent Backends
 
 Flynn can run with the built-in native backend or delegate message processing to external CLI backends.
diff --git a/docs/plans/openai-oauth-summary.md b/docs/plans/openai-oauth-summary.md
index 97d00dc..600f3df 100644
--- a/docs/plans/openai-oauth-summary.md
+++ b/docs/plans/openai-oauth-summary.md
@@ -158,6 +158,10 @@ models:
     oauth_enabled: true
 ```
 
+### Current Limitation
+- In Flynn, OpenAI OAuth (Codex backend) currently does not send tool definitions to the provider.
+- Tool execution is unavailable in this mode; any textual `tool_use` content is non-executable model output.
+
 ---
 
 ## Testing Strategy
diff --git a/src/backends/native/agent.test.ts b/src/backends/native/agent.test.ts
index 84f99d1..077defb 100644
--- a/src/backends/native/agent.test.ts
+++ b/src/backends/native/agent.test.ts
@@ -270,6 +270,36 @@ describe('NativeAgent tool loop', () => {
     expect(mockClient.chat).toHaveBeenCalledTimes(3);
   });
 
+  it('surfaces warning when model emits textual tool_use block without structured tool calls', async () => {
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockResolvedValue({
+        content: 'Let me read the full email to evaluate legitimacy:{"type":"tool_use","id":"call_123","name":"gmail_read","input":{"id":"abc"}}',
+        stopReason: 'end_turn',
+        usage: { inputTokens: 10, outputTokens: 5 },
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    const response = await agent.process('read latest email');
+    expect(response).toContain('Tool call was emitted as plain text and was not executed.');
+    expect(response).toContain('Tool: gmail_read (id: call_123)');
+    expect(response).toContain('"type":"tool_use"');
+
+    const history = agent.getHistory();
+    expect(history[history.length - 1]).toEqual({ role: 'assistant', content: response });
+  });
+
   it('works without tools (backward compatible)', async () => {
     const mockClient: ModelClient = {
       chat: vi.fn().mockResolvedValue({
diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts
index 04b5102..bcf1766 100644
--- a/src/backends/native/agent.ts
+++ b/src/backends/native/agent.ts
@@ -49,6 +49,11 @@ interface LoopMessage {
   content: string | unknown[];
 }
 
+interface PseudoToolUse {
+  name?: string;
+  id?: string;
+}
+
 export class NativeAgent {
   private modelClient: ModelClient | ModelRouter;
   private systemPrompt: string;
@@ -224,11 +229,14 @@ export class NativeAgent {
         const wantsToolUse = (response.stopReason === 'tool_use' || response.stopReason === 'tool_calls')
           && response.toolCalls && response.toolCalls.length > 0;
         if (!wantsToolUse) {
-          let finalContent = response.content;
+          const pseudoToolUse = this.extractPseudoToolUse(response.content);
+          let finalContent = pseudoToolUse
+            ? this.buildPseudoToolUseWarning(response.content, pseudoToolUse)
+            : response.content;
           if (response.thinkingContent) {
-            finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
+            finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${finalContent}`;
           }
-          const assistantMsg: Message = { role: 'assistant', content: response.content };
+          const assistantMsg: Message = { role: 'assistant', content: finalContent };
           this.addToHistory(assistantMsg);
           return finalContent;
         }
@@ -524,4 +532,32 @@ export class NativeAgent {
   private isAbortError(error: unknown): boolean {
     return error instanceof Error && error.name === 'AbortError';
   }
+
+  private extractPseudoToolUse(content: string): PseudoToolUse | null {
+    if (!content) {
+      return null;
+    }
+    if (!/"type"\s*:\s*"tool_use"/.test(content)) {
+      return null;
+    }
+
+    const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/);
+    const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/);
+    return {
+      name: nameMatch?.[1],
+      id: idMatch?.[1],
+    };
+  }
+
+  private buildPseudoToolUseWarning(rawContent: string, pseudo: PseudoToolUse): string {
+    const toolName = pseudo.name ?? 'unknown';
+    const toolId = pseudo.id ?? 'unknown';
+    return [
+      'Tool call was emitted as plain text and was not executed.',
+      `Tool: ${toolName} (id: ${toolId})`,
+      'This usually means the current model/backend did not return structured tool metadata.',
+      'Original assistant output:',
+      rawContent,
+    ].join('\n');
+  }
 }
diff --git a/src/models/openai.oauth.test.ts b/src/models/openai.oauth.test.ts
index cba050d..ed6f227 100644
--- a/src/models/openai.oauth.test.ts
+++ b/src/models/openai.oauth.test.ts
@@ -69,4 +69,40 @@ describe('OpenAIClient OAuth (Codex)', () => {
     expect(resp.content).toBe('hello');
     expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
   });
+
+  it('adds provider warning when tools are requested in OAuth mode', async () => {
+    const sse = makeSse([
+      { event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } },
+      { event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } },
+    ]);
+
+    globalThis.fetch = vi.fn(async () => {
+      const stream = new ReadableStream({
+        start(controller) {
+          controller.enqueue(new TextEncoder().encode(sse));
+          controller.close();
+        },
+      });
+      return new Response(stream, { status: 200 });
+    }) as typeof fetch;
+
+    const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
+    const resp = await client.chat({
+      system: 'You are helpful.',
+      messages: [{ role: 'user', content: 'use tools' }],
+      tools: [{
+        name: 'gmail_read',
+        description: 'Read Gmail message',
+        input_schema: {
+          type: 'object',
+          properties: { id: { type: 'string' } },
+          required: ['id'],
+        },
+      }],
+    });
+
+    expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.');
+    expect(resp.content).toContain('Requested tools were not sent to the provider');
+    expect(resp.content).toContain('result body');
+  });
 });
diff --git a/src/models/openai.ts b/src/models/openai.ts
index 29ed0c1..8bcc88d 100644
--- a/src/models/openai.ts
+++ b/src/models/openai.ts
@@ -213,8 +213,18 @@ export class OpenAIClient implements ModelClient {
       }
     }
 
+    const toolsRequested = Boolean(request.tools && request.tools.length > 0);
+    const content = toolsRequested
+      ? [
+        '[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.',
+        'Requested tools were not sent to the provider, so any textual tool_use output is not executable.',
+        '',
+        outputText,
+      ].join('\n')
+      : outputText;
+
     return {
-      content: outputText,
+      content,
       stopReason: 'end_turn',
       usage: usage ?? { inputTokens: 0, outputTokens: 0 },
     };