fix(core): harden env loading, OpenAI compatibility, and runtime recovery

2026-02-22 15:56:21 -08:00
parent 387906ce4d
commit dafe9b4d3d
11 changed files with 450 additions and 21 deletions
@@ -376,6 +376,63 @@ describe('NativeAgent tool loop', () => {
    expect(mockClient.chat).toHaveBeenCalledTimes(2);
  });

+  it('recovers malformed textual shell tool_use and executes shell.exec', async () => {
+    let callCount = 0;
+    const seenCommands: string[] = [];
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return {
+            content:
+              '{"type":"tool_use","id":"call_1","name":"shell_exec","input":{"command":" "grep -r "createCouncilRunTool" /home/will/lab/flynn/src --include="*.ts" | head -20"}}',
+            stopReason: 'end_turn',
+            usage: { inputTokens: 10, outputTokens: 5 },
+          };
+        }
+        return {
+          content: 'done',
+          stopReason: 'end_turn',
+          usage: { inputTokens: 10, outputTokens: 5 },
+        };
+      }),
+    };
+
+    const shellTool: Tool = {
+      name: 'shell.exec',
+      description: 'Execute shell command',
+      inputSchema: {
+        type: 'object',
+        properties: { command: { type: 'string' } },
+        required: ['command'],
+      },
+      execute: async (args) => {
+        const command = (args as { command: string }).command;
+        seenCommands.push(command);
+        return { success: true, output: command };
+      },
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(shellTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    hooks.setInteractiveConfirmer(async () => ({ approved: true }));
+    const executor = new ToolExecutor(registry, hooks, { sensitiveMode: 'confirm_without_elevation' });
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    const response = await agent.process('find council tool wiring');
+    expect(response).toBe('done');
+    expect(mockClient.chat).toHaveBeenCalledTimes(2);
+    expect(seenCommands).toHaveLength(1);
+    expect(seenCommands[0]).toBe('grep -r "createCouncilRunTool" /home/will/lab/flynn/src --include="*.ts" | head -20');
+  });
+
  it('works without tools (backward compatible)', async () => {
    const mockClient: ModelClient = {
      chat: vi.fn().mockResolvedValue({
@@ -260,6 +260,13 @@ export class NativeAgent {
            }
          }
        }
+        if (toolCalls.length === 0) {
+          const recovered = this.extractMalformedShellToolCall(response.content);
+          if (recovered && toolRegistry.getByApiName(recovered.toolCall.name)) {
+            toolCalls = [recovered.toolCall];
+            assistantTextContent = recovered.remainingText;
+          }
+        }

        const wantsToolUse = toolCalls.length > 0;
        if (!wantsToolUse) {
@@ -705,6 +712,94 @@ export class NativeAgent {
    };
  }

+  private extractMalformedShellToolCall(content: string): { toolCall: ModelToolCall; remainingText: string } | null {
+    if (!content || !/"type"\s*:\s*"tool_use"/.test(content)) {
+      return null;
+    }
+
+    const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/);
+    if (!nameMatch?.[1]) {
+      return null;
+    }
+    const name = nameMatch[1];
+    const normalized = name.replace(/_/g, '.').toLowerCase();
+    if (normalized !== 'shell.exec') {
+      return null;
+    }
+
+    // Recover malformed shell command payloads where inner quotes are not escaped.
+    const commandMatch = content.match(/"command"\s*:\s*"([\s\S]*)"\s*}\s*}/);
+    if (!commandMatch?.[1]) {
+      return null;
+    }
+
+    const command = this.sanitizeRecoveredShellCommand(commandMatch[1]);
+    if (!command) {
+      return null;
+    }
+
+    const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/);
+    const id = idMatch?.[1]?.trim().length ? idMatch[1] : 'text_tool_call_recovered_shell';
+
+    return {
+      toolCall: {
+        id,
+        name,
+        args: { command },
+      },
+      remainingText: this.stripFirstToolUseObject(content),
+    };
+  }
+
+  private sanitizeRecoveredShellCommand(raw: string): string {
+    let command = raw.trim();
+    if (command.length === 0) {
+      return '';
+    }
+
+    // Common malformed pattern: opening quote duplicated into value.
+    if ((command.startsWith('"') && !command.endsWith('"')) || (command.startsWith('\'') && !command.endsWith('\''))) {
+      command = command.slice(1).trimStart();
+    }
+
+    return command;
+  }
+
+  private stripFirstToolUseObject(content: string): string {
+    const typeMatch = /"type"\s*:\s*"tool_use"/.exec(content);
+    if (!typeMatch || typeMatch.index < 0) {
+      return content.trim();
+    }
+
+    const objectStart = content.lastIndexOf('{', typeMatch.index);
+    if (objectStart < 0) {
+      return content.trim();
+    }
+
+    const objectEnd = this.findObjectEndByBraceDepth(content, objectStart);
+    if (objectEnd < 0) {
+      return content.trim();
+    }
+
+    return `${content.slice(0, objectStart)}${content.slice(objectEnd + 1)}`.trim();
+  }
+
+  private findObjectEndByBraceDepth(content: string, start: number): number {
+    let depth = 0;
+    for (let i = start; i < content.length; i++) {
+      const ch = content[i];
+      if (ch === '{') {
+        depth++;
+      } else if (ch === '}') {
+        depth--;
+        if (depth === 0) {
+          return i;
+        }
+      }
+    }
+    return -1;
+  }
+
  private findJsonObjectEnd(content: string, start: number): number {
    let depth = 0;
    let inString = false;