fix(tooling): surface non-executable tool-use warnings

2026-02-17 16:34:54 -08:00
parent 061b96fd68
commit 5451f8a1de
6 changed files with 126 additions and 4 deletions
@@ -270,6 +270,36 @@ describe('NativeAgent tool loop', () => {
    expect(mockClient.chat).toHaveBeenCalledTimes(3);
  });

+  it('surfaces warning when model emits textual tool_use block without structured tool calls', async () => {
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockResolvedValue({
+        content: 'Let me read the full email to evaluate legitimacy:{"type":"tool_use","id":"call_123","name":"gmail_read","input":{"id":"abc"}}',
+        stopReason: 'end_turn',
+        usage: { inputTokens: 10, outputTokens: 5 },
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    const response = await agent.process('read latest email');
+    expect(response).toContain('Tool call was emitted as plain text and was not executed.');
+    expect(response).toContain('Tool: gmail_read (id: call_123)');
+    expect(response).toContain('"type":"tool_use"');
+
+    const history = agent.getHistory();
+    expect(history[history.length - 1]).toEqual({ role: 'assistant', content: response });
+  });
+
  it('works without tools (backward compatible)', async () => {
    const mockClient: ModelClient = {
      chat: vi.fn().mockResolvedValue({
@@ -49,6 +49,11 @@ interface LoopMessage {
  content: string | unknown[];
 }

+interface PseudoToolUse {
+  name?: string;
+  id?: string;
+}
+
 export class NativeAgent {
  private modelClient: ModelClient | ModelRouter;
  private systemPrompt: string;
@@ -224,11 +229,14 @@ export class NativeAgent {
        const wantsToolUse = (response.stopReason === 'tool_use' || response.stopReason === 'tool_calls')
          && response.toolCalls && response.toolCalls.length > 0;
        if (!wantsToolUse) {
-          let finalContent = response.content;
+          const pseudoToolUse = this.extractPseudoToolUse(response.content);
+          let finalContent = pseudoToolUse
+            ? this.buildPseudoToolUseWarning(response.content, pseudoToolUse)
+            : response.content;
          if (response.thinkingContent) {
-            finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
+            finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${finalContent}`;
          }
-          const assistantMsg: Message = { role: 'assistant', content: response.content };
+          const assistantMsg: Message = { role: 'assistant', content: finalContent };
          this.addToHistory(assistantMsg);
          return finalContent;
        }
@@ -524,4 +532,32 @@ export class NativeAgent {
  private isAbortError(error: unknown): boolean {
    return error instanceof Error && error.name === 'AbortError';
  }
+
+  private extractPseudoToolUse(content: string): PseudoToolUse | null {
+    if (!content) {
+      return null;
+    }
+    if (!/"type"\s*:\s*"tool_use"/.test(content)) {
+      return null;
+    }
+
+    const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/);
+    const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/);
+    return {
+      name: nameMatch?.[1],
+      id: idMatch?.[1],
+    };
+  }
+
+  private buildPseudoToolUseWarning(rawContent: string, pseudo: PseudoToolUse): string {
+    const toolName = pseudo.name ?? 'unknown';
+    const toolId = pseudo.id ?? 'unknown';
+    return [
+      'Tool call was emitted as plain text and was not executed.',
+      `Tool: ${toolName} (id: ${toolId})`,
+      'This usually means the current model/backend did not return structured tool metadata.',
+      'Original assistant output:',
+      rawContent,
+    ].join('\n');
+  }
 }