fix(core): harden env loading, OpenAI compatibility, and runtime recovery

2026-02-22 15:56:21 -08:00
parent 387906ce4d
commit dafe9b4d3d
11 changed files with 450 additions and 21 deletions
@@ -376,6 +376,63 @@ describe('NativeAgent tool loop', () => {
    expect(mockClient.chat).toHaveBeenCalledTimes(2);
  });

+  it('recovers malformed textual shell tool_use and executes shell.exec', async () => {
+    let callCount = 0;
+    const seenCommands: string[] = [];
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return {
+            content:
+              '{"type":"tool_use","id":"call_1","name":"shell_exec","input":{"command":" "grep -r "createCouncilRunTool" /home/will/lab/flynn/src --include="*.ts" | head -20"}}',
+            stopReason: 'end_turn',
+            usage: { inputTokens: 10, outputTokens: 5 },
+          };
+        }
+        return {
+          content: 'done',
+          stopReason: 'end_turn',
+          usage: { inputTokens: 10, outputTokens: 5 },
+        };
+      }),
+    };
+
+    const shellTool: Tool = {
+      name: 'shell.exec',
+      description: 'Execute shell command',
+      inputSchema: {
+        type: 'object',
+        properties: { command: { type: 'string' } },
+        required: ['command'],
+      },
+      execute: async (args) => {
+        const command = (args as { command: string }).command;
+        seenCommands.push(command);
+        return { success: true, output: command };
+      },
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(shellTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    hooks.setInteractiveConfirmer(async () => ({ approved: true }));
+    const executor = new ToolExecutor(registry, hooks, { sensitiveMode: 'confirm_without_elevation' });
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    const response = await agent.process('find council tool wiring');
+    expect(response).toBe('done');
+    expect(mockClient.chat).toHaveBeenCalledTimes(2);
+    expect(seenCommands).toHaveLength(1);
+    expect(seenCommands[0]).toBe('grep -r "createCouncilRunTool" /home/will/lab/flynn/src --include="*.ts" | head -20');
+  });
+
  it('works without tools (backward compatible)', async () => {
    const mockClient: ModelClient = {
      chat: vi.fn().mockResolvedValue({
@@ -260,6 +260,13 @@ export class NativeAgent {
            }
          }
        }
+        if (toolCalls.length === 0) {
+          const recovered = this.extractMalformedShellToolCall(response.content);
+          if (recovered && toolRegistry.getByApiName(recovered.toolCall.name)) {
+            toolCalls = [recovered.toolCall];
+            assistantTextContent = recovered.remainingText;
+          }
+        }

        const wantsToolUse = toolCalls.length > 0;
        if (!wantsToolUse) {
@@ -705,6 +712,94 @@ export class NativeAgent {
    };
  }

+  private extractMalformedShellToolCall(content: string): { toolCall: ModelToolCall; remainingText: string } | null {
+    if (!content || !/"type"\s*:\s*"tool_use"/.test(content)) {
+      return null;
+    }
+
+    const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/);
+    if (!nameMatch?.[1]) {
+      return null;
+    }
+    const name = nameMatch[1];
+    const normalized = name.replace(/_/g, '.').toLowerCase();
+    if (normalized !== 'shell.exec') {
+      return null;
+    }
+
+    // Recover malformed shell command payloads where inner quotes are not escaped.
+    const commandMatch = content.match(/"command"\s*:\s*"([\s\S]*)"\s*}\s*}/);
+    if (!commandMatch?.[1]) {
+      return null;
+    }
+
+    const command = this.sanitizeRecoveredShellCommand(commandMatch[1]);
+    if (!command) {
+      return null;
+    }
+
+    const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/);
+    const id = idMatch?.[1]?.trim().length ? idMatch[1] : 'text_tool_call_recovered_shell';
+
+    return {
+      toolCall: {
+        id,
+        name,
+        args: { command },
+      },
+      remainingText: this.stripFirstToolUseObject(content),
+    };
+  }
+
+  private sanitizeRecoveredShellCommand(raw: string): string {
+    let command = raw.trim();
+    if (command.length === 0) {
+      return '';
+    }
+
+    // Common malformed pattern: opening quote duplicated into value.
+    if ((command.startsWith('"') && !command.endsWith('"')) || (command.startsWith('\'') && !command.endsWith('\''))) {
+      command = command.slice(1).trimStart();
+    }
+
+    return command;
+  }
+
+  private stripFirstToolUseObject(content: string): string {
+    const typeMatch = /"type"\s*:\s*"tool_use"/.exec(content);
+    if (!typeMatch || typeMatch.index < 0) {
+      return content.trim();
+    }
+
+    const objectStart = content.lastIndexOf('{', typeMatch.index);
+    if (objectStart < 0) {
+      return content.trim();
+    }
+
+    const objectEnd = this.findObjectEndByBraceDepth(content, objectStart);
+    if (objectEnd < 0) {
+      return content.trim();
+    }
+
+    return `${content.slice(0, objectStart)}${content.slice(objectEnd + 1)}`.trim();
+  }
+
+  private findObjectEndByBraceDepth(content: string, start: number): number {
+    let depth = 0;
+    for (let i = start; i < content.length; i++) {
+      const ch = content[i];
+      if (ch === '{') {
+        depth++;
+      } else if (ch === '}') {
+        depth--;
+        if (depth === 0) {
+          return i;
+        }
+      }
+    }
+    return -1;
+  }
+
  private findJsonObjectEnd(content: string, start: number): number {
    let depth = 0;
    let inString = false;
@@ -484,6 +484,52 @@ models:
    }
  });

+  it('loads env vars from FLYNN_ENV_FILE before env-var checks', async () => {
+    const originalEnvFile = process.env.FLYNN_ENV_FILE;
+    const originalOpenAIKey = process.env.OPENAI_API_KEY;
+    delete process.env.OPENAI_API_KEY;
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+      const envPath = join(testDir, 'cloud.env');
+      writeFileSync(envPath, 'OPENAI_API_KEY=sk-test-from-env-file\n');
+      process.env.FLYNN_ENV_FILE = envPath;
+
+      const configPath = join(testDir, 'openai-env-file.yaml');
+      writeFileSync(configPath, `
+telegram:
+  bot_token: "test-token"
+  allowed_chat_ids: [123]
+models:
+  default:
+    provider: openai
+    model: gpt-5.2
+    auth_mode: api_key
+    api_key: \${OPENAI_API_KEY}
+`);
+
+      const ctx: DoctorContext = { configPath, dataDir: testDir };
+      const results = await runChecks(ctx);
+      const envCheck = results.find((r) => r.label.includes('Env vars resolved')) as CheckResult | undefined;
+      const modelCheck = results.find((r) => r.label.includes('Model connectivity')) as CheckResult | undefined;
+
+      expect(envCheck?.status).toBe('pass');
+      expect(modelCheck?.status).toBe('pass');
+      expect(modelCheck?.detail).toContain('api_key=config+env');
+    } finally {
+      if (originalEnvFile !== undefined) {
+        process.env.FLYNN_ENV_FILE = originalEnvFile;
+      } else {
+        delete process.env.FLYNN_ENV_FILE;
+      }
+      if (originalOpenAIKey !== undefined) {
+        process.env.OPENAI_API_KEY = originalOpenAIKey;
+      } else {
+        delete process.env.OPENAI_API_KEY;
+      }
+    }
+  });
+
  it('reports WARN when Vercel AI Gateway has no available API key sources', async () => {
    const originalKey = process.env.AI_GATEWAY_API_KEY;
    delete process.env.AI_GATEWAY_API_KEY;
@@ -1,6 +1,6 @@
 import type { Command } from 'commander';
 import type { Config } from '../config/index.js';
-import { getConfigPath, getDataDir, formatStatus, resolveOverlayPath } from './shared.js';
+import { getConfigPath, getDataDir, formatStatus, resolveOverlayPath, loadEnvFileIfPresent } from './shared.js';
 import { existsSync, readFileSync, writeFileSync, unlinkSync } from 'fs';
 import { homedir } from 'os';
 import { resolve, join } from 'path';
@@ -643,6 +643,8 @@ const allChecks: Check[] = [

 /** Run all doctor checks in order. Exported for testing. */
 export async function runChecks(ctx: DoctorContext): Promise<CheckResult[]> {
+  // Keep doctor behavior aligned with other CLI commands that load cloud.env.
+  loadEnvFileIfPresent();
  const results: CheckResult[] = [];
  for (const check of allChecks) {
    const result = await check(ctx);
@@ -4,7 +4,7 @@ import { resolve, dirname, join } from 'path';
 import { homedir } from 'os';
 import { existsSync, readFileSync } from 'fs';

-function loadEnvFileIfPresent(): void {
+export function loadEnvFileIfPresent(): void {
  const envFile = process.env.FLYNN_ENV_FILE ?? resolve(homedir(), '.config/flynn/cloud.env');
  if (!existsSync(envFile)) {
    return;
@@ -49,6 +49,7 @@ function minimalTuiPrivates(value: MinimalTui): {
  };
  activePromptCancel: (() => void) | null;
  activeOperationCancel: (() => void) | null;
+  commandInFlight: boolean;
  running: boolean;
 } {
  return value as unknown as {
@@ -70,6 +71,7 @@ function minimalTuiPrivates(value: MinimalTui): {
    };
    activePromptCancel: (() => void) | null;
    activeOperationCancel: (() => void) | null;
+    commandInFlight: boolean;
    running: boolean;
  };
 }
@@ -469,6 +471,38 @@ describe('MinimalTui prompt cancellation', () => {
    expect(minimalTuiPrivates(tui).activePromptCancel).toBeNull();
  });

+  it('returns empty string when readline is already closed during question', async () => {
+    const mockSession = {
+      id: 'test',
+      getHistory: () => [],
+      addMessage: vi.fn(),
+      clear: vi.fn(),
+      replaceHistory: vi.fn(),
+    };
+
+    const tui = new MinimalTui({
+      session: asSession(mockSession),
+      modelClient: asRouter({}),
+      systemPrompt: 'test',
+    });
+
+    const questionError = new Error('readline was closed');
+    (questionError as Error & { code?: string }).code = 'ERR_USE_AFTER_CLOSE';
+
+    minimalTuiPrivates(tui).rl = {
+      once: vi.fn(),
+      removeListener: vi.fn(),
+      question: vi.fn(() => {
+        throw questionError;
+      }),
+      write: vi.fn(),
+      prompt: vi.fn(),
+    };
+
+    await expect(minimalTuiPrivates(tui).prompt('Confirm? ')).resolves.toBe('');
+    expect(minimalTuiPrivates(tui).activePromptCancel).toBeNull();
+  });
+
  it('uses Esc to cancel active running operation', () => {
    const mockSession = {
      id: 'test',
@@ -530,4 +564,46 @@ describe('MinimalTui prompt cancellation', () => {
      logSpy.mockRestore();
    }
  });
+
+  it('exits immediately on Ctrl+C when a command is in flight', () => {
+    const mockSession = {
+      id: 'test',
+      getHistory: () => [],
+      addMessage: vi.fn(),
+      clear: vi.fn(),
+      replaceHistory: vi.fn(),
+    };
+
+    const write = vi.fn();
+    const prompt = vi.fn();
+    const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
+    try {
+      const tui = new MinimalTui({
+        session: asSession(mockSession),
+        modelClient: asRouter({}),
+        systemPrompt: 'test',
+      });
+      minimalTuiPrivates(tui).rl = {
+        once: vi.fn(),
+        removeListener: vi.fn(),
+        question: vi.fn(),
+        write,
+        prompt,
+      };
+      minimalTuiPrivates(tui).running = true;
+      minimalTuiPrivates(tui).commandInFlight = true;
+      const cancel = vi.fn();
+      minimalTuiPrivates(tui).activeOperationCancel = cancel;
+
+      const shouldExit = minimalTuiPrivates(tui).handleCtrlCPress(1000);
+
+      expect(shouldExit).toBe(true);
+      expect(cancel).toHaveBeenCalledOnce();
+      expect(write).not.toHaveBeenCalled();
+      expect(prompt).not.toHaveBeenCalled();
+      expect(logSpy).not.toHaveBeenCalledWith(expect.stringContaining('Press Ctrl+C again to quit'));
+    } finally {
+      logSpy.mockRestore();
+    }
+  });
 });
@@ -95,6 +95,7 @@ export class MinimalTui {
  private busyActive = false;
  private verbose = false;
  private lastCtrlCAtMs = 0;
+  private commandInFlight = false;

  constructor(private config: MinimalTuiConfig) {}

@@ -325,6 +326,13 @@ export class MinimalTui {
      return true;
    }

+    // If a command is currently running (e.g. /council), exit immediately.
+    // Some command paths are not cancellable mid-flight, so double-press UX can trap users.
+    if (this.commandInFlight) {
+      this.activeOperationCancel?.();
+      return true;
+    }
+
    const shouldExit = this.lastCtrlCAtMs > 0
      && (nowMs - this.lastCtrlCAtMs) <= MinimalTui.CTRL_C_EXIT_WINDOW_MS;
    this.lastCtrlCAtMs = nowMs;
@@ -358,7 +366,12 @@ export class MinimalTui {
        continue;
      }

-      await this.handleCommand(command);
+      this.commandInFlight = true;
+      try {
+        await this.handleCommand(command);
+      } finally {
+        this.commandInFlight = false;
+      }
    }
  }

@@ -421,9 +434,14 @@ export class MinimalTui {
        }
      };

-      this.rl.question(promptText, (answer) => {
-        finish(answer);
-      });
+      try {
+        this.rl.question(promptText, (answer) => {
+          finish(answer);
+        });
+      } catch {
+        // readline can throw synchronously if it was closed between our guards.
+        finish('');
+      }
    });
  }

@@ -559,7 +577,7 @@ export class MinimalTui {

  private async handleCouncilCommand(task: string): Promise<void> {
    if (!task.trim()) {
-      console.log(`${colors.gray}Usage: /council <question or task>${colors.reset}\n`);
+      console.log(`${colors.gray}Usage: /council <question or task> | /council preflight${colors.reset}\n`);
      return;
    }
    if (!this.config.onCouncil) {
@@ -192,6 +192,23 @@ function valuesMatch(expected, actual) {
  return expected === actual;
 }

+function valuesMatchForPath(path, expected, actual) {
+  if (valuesMatch(expected, actual)) {
+    return true;
+  }
+
+  // Some optional string paths are normalized by config handlers:
+  // writing "" is persisted as "unset" (undefined).
+  if (typeof expected === 'string' && expected.trim().length === 0 && (actual === undefined || actual === null)) {
+    const optionalStringSuffixes = ['scaffold_path'];
+    if (optionalStringSuffixes.some((suffix) => path.endsWith(suffix))) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 function setAssistantSaveState(message, tone = 'neutral') {
  _assistantSaveState = {
    message,
@@ -714,7 +731,7 @@ async function applyAssistantPatch(patches, statusEl) {
          const mismatches = [];
          for (const [key, value] of Object.entries(patches)) {
            const actual = getByPath(fresh, key);
-            if (!valuesMatch(value, actual)) {
+            if (!valuesMatchForPath(key, value, actual)) {
              mismatches.push(`${key} expected=${JSON.stringify(value)} actual=${JSON.stringify(actual)}`);
            }
          }
@@ -137,6 +137,37 @@ describe('OpenAIClient tool use', () => {
    expect(response.stopReason).toBe('max_tokens');
  });

+  it('retries with max_completion_tokens when provider rejects max_tokens', async () => {
+    const initialCallCount = mockCreate.mock.calls.length;
+    mockCreate
+      .mockRejectedValueOnce(new Error(
+        "400 Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead.",
+      ))
+      .mockResolvedValueOnce({
+        choices: [{ message: { content: 'Hello from GPT-5.2!' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 11, completion_tokens: 6 },
+      });
+
+    const client = new OpenAIClient({
+      apiKey: 'test-key',
+      model: 'gpt-5.2',
+    });
+
+    const response = await client.chat({
+      messages: [{ role: 'user', content: 'Hello' }],
+    });
+
+    expect(response.content).toBe('Hello from GPT-5.2!');
+    expect(mockCreate.mock.calls.length - initialCallCount).toBe(2);
+
+    const firstArgs = mockCreate.mock.calls[initialCallCount]?.[0] as Record<string, unknown>;
+    expect(firstArgs.max_tokens).toBeDefined();
+
+    const secondArgs = mockCreate.mock.calls[initialCallCount + 1]?.[0] as Record<string, unknown>;
+    expect(secondArgs.max_tokens).toBeUndefined();
+    expect(secondArgs.max_completion_tokens).toBeDefined();
+  });
+
  it('rewrites Z.AI 401 errors with actionable auth guidance', async () => {
    mockCreate.mockRejectedValueOnce({
      status: 401,
@@ -169,4 +200,36 @@ describe('OpenAIClient tool use', () => {
      messages: [{ role: 'user', content: 'hello' }],
    })).rejects.toThrow(/The key lacks `model\.request` scope/);
  });
+
+  it('passes OpenAI response_format json_schema when requested', async () => {
+    const client = new OpenAIClient({
+      apiKey: 'test-key',
+      model: 'gpt-5.2',
+    });
+
+    await client.chat({
+      messages: [{ role: 'user', content: 'emit json' }],
+      responseFormat: {
+        type: 'json_schema',
+        name: 'council_ideation',
+        schema: {
+          type: 'object',
+          additionalProperties: false,
+          required: ['ideas'],
+          properties: {
+            ideas: { type: 'array', items: { type: 'object' } },
+          },
+        },
+        strict: true,
+      },
+    });
+
+    const args = mockCreate.mock.calls.at(-1)?.[0] as Record<string, unknown>;
+    const responseFormat = args.response_format as Record<string, unknown>;
+    expect(responseFormat.type).toBe('json_schema');
+
+    const jsonSchema = responseFormat.json_schema as Record<string, unknown>;
+    expect(jsonSchema.name).toBe('council_ideation');
+    expect(jsonSchema.strict).toBe(true);
+  });
 });
@@ -254,12 +254,39 @@ export class OpenAIClient implements ModelClient {
    }

    // Build params, conditionally including tools
+    const maxTokens = request.maxTokens ?? this.defaultMaxTokens;
    const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
      model: this.model,
-      max_tokens: request.maxTokens ?? this.defaultMaxTokens,
+      max_tokens: maxTokens,
      messages,
    };

+    if (request.responseFormat) {
+      if (request.responseFormat.type === 'json_object') {
+        (params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
+          response_format?: { type: 'json_object' };
+        }).response_format = { type: 'json_object' };
+      } else {
+        (params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
+          response_format?: {
+            type: 'json_schema';
+            json_schema: {
+              name: string;
+              schema: Record<string, unknown>;
+              strict: boolean;
+            };
+          };
+        }).response_format = {
+          type: 'json_schema',
+          json_schema: {
+            name: request.responseFormat.name,
+            schema: request.responseFormat.schema,
+            strict: request.responseFormat.strict ?? true,
+          },
+        };
+      }
+    }
+
    if (request.tools && request.tools.length > 0) {
      params.tools = request.tools.map(t => ({
        type: 'function' as const,
@@ -287,22 +314,39 @@ export class OpenAIClient implements ModelClient {
        ? (error as { status: number }).status
        : undefined;
      const message = error instanceof Error ? error.message : String(error);
+      const unsupportedMaxTokens = (
+        status === 400
+        || message.includes('400 Unsupported parameter')
+      ) && message.includes("Unsupported parameter: 'max_tokens'");

-      const isZai = (this.baseURL ?? '').includes('api.z.ai');
-      const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
-      const missingModelRequestScope = message.includes('Missing scopes: model.request');
+      if (unsupportedMaxTokens) {
+        const fallbackParams = {
+          ...params,
+          max_completion_tokens: maxTokens,
+        } as OpenAI.ChatCompletionCreateParamsNonStreaming & { max_completion_tokens: number };
+        delete (fallbackParams as { max_tokens?: number }).max_tokens;

-      if (isZai && isUnauthorized401) {
-        const hint = missingModelRequestScope
-          ? 'The key lacks `model.request` scope.'
-          : 'The API key is invalid, expired, or not allowed for this model/endpoint.';
-        throw new Error(
-          `Z.AI authentication failed (401). ${hint} ` +
-          'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
+        response = await this.client.chat.completions.create(
+          fallbackParams,
+          request.signal ? { signal: request.signal } : undefined,
        );
-      }
+      } else {
+        const isZai = (this.baseURL ?? '').includes('api.z.ai');
+        const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
+        const missingModelRequestScope = message.includes('Missing scopes: model.request');

-      throw error;
+        if (isZai && isUnauthorized401) {
+          const hint = missingModelRequestScope
+            ? 'The key lacks `model.request` scope.'
+            : 'The API key is invalid, expired, or not allowed for this model/endpoint.';
+          throw new Error(
+            `Z.AI authentication failed (401). ${hint} ` +
+            'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
+          );
+        }
+
+        throw error;
+      }
    }

    const choice = response.choices[0];
@@ -73,11 +73,22 @@ export interface ToolMessage {
 // Union type for all messages in a conversation
 export type ConversationMessage = Message | ToolMessage;

+export type ChatResponseFormat =
+  | { type: 'json_object' }
+  | {
+    type: 'json_schema';
+    name: string;
+    schema: Record<string, unknown>;
+    strict?: boolean;
+  };
+
 export interface ChatRequest {
  messages: Message[];
  system?: string;
  maxTokens?: number;
  tools?: ToolDefinition[];
+  /** Optional provider-level response format request (e.g., structured JSON output). */
+  responseFormat?: ChatResponseFormat;
  /** Enable extended thinking/reasoning mode for this request. */
  thinking?: boolean;
  /** Optional abort signal for cancelling in-flight provider requests. */