fix(core): harden env loading, OpenAI compatibility, and runtime recovery

2026-02-22 15:56:21 -08:00
parent 387906ce4d
commit dafe9b4d3d
11 changed files with 450 additions and 21 deletions
@@ -137,6 +137,37 @@ describe('OpenAIClient tool use', () => {
    expect(response.stopReason).toBe('max_tokens');
  });

+  it('retries with max_completion_tokens when provider rejects max_tokens', async () => {
+    const initialCallCount = mockCreate.mock.calls.length;
+    mockCreate
+      .mockRejectedValueOnce(new Error(
+        "400 Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead.",
+      ))
+      .mockResolvedValueOnce({
+        choices: [{ message: { content: 'Hello from GPT-5.2!' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 11, completion_tokens: 6 },
+      });
+
+    const client = new OpenAIClient({
+      apiKey: 'test-key',
+      model: 'gpt-5.2',
+    });
+
+    const response = await client.chat({
+      messages: [{ role: 'user', content: 'Hello' }],
+    });
+
+    expect(response.content).toBe('Hello from GPT-5.2!');
+    expect(mockCreate.mock.calls.length - initialCallCount).toBe(2);
+
+    const firstArgs = mockCreate.mock.calls[initialCallCount]?.[0] as Record<string, unknown>;
+    expect(firstArgs.max_tokens).toBeDefined();
+
+    const secondArgs = mockCreate.mock.calls[initialCallCount + 1]?.[0] as Record<string, unknown>;
+    expect(secondArgs.max_tokens).toBeUndefined();
+    expect(secondArgs.max_completion_tokens).toBeDefined();
+  });
+
  it('rewrites Z.AI 401 errors with actionable auth guidance', async () => {
    mockCreate.mockRejectedValueOnce({
      status: 401,
@@ -169,4 +200,36 @@ describe('OpenAIClient tool use', () => {
      messages: [{ role: 'user', content: 'hello' }],
    })).rejects.toThrow(/The key lacks `model\.request` scope/);
  });
+
+  it('passes OpenAI response_format json_schema when requested', async () => {
+    const client = new OpenAIClient({
+      apiKey: 'test-key',
+      model: 'gpt-5.2',
+    });
+
+    await client.chat({
+      messages: [{ role: 'user', content: 'emit json' }],
+      responseFormat: {
+        type: 'json_schema',
+        name: 'council_ideation',
+        schema: {
+          type: 'object',
+          additionalProperties: false,
+          required: ['ideas'],
+          properties: {
+            ideas: { type: 'array', items: { type: 'object' } },
+          },
+        },
+        strict: true,
+      },
+    });
+
+    const args = mockCreate.mock.calls.at(-1)?.[0] as Record<string, unknown>;
+    const responseFormat = args.response_format as Record<string, unknown>;
+    expect(responseFormat.type).toBe('json_schema');
+
+    const jsonSchema = responseFormat.json_schema as Record<string, unknown>;
+    expect(jsonSchema.name).toBe('council_ideation');
+    expect(jsonSchema.strict).toBe(true);
+  });
 });
@@ -254,12 +254,39 @@ export class OpenAIClient implements ModelClient {
    }

    // Build params, conditionally including tools
+    const maxTokens = request.maxTokens ?? this.defaultMaxTokens;
    const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
      model: this.model,
-      max_tokens: request.maxTokens ?? this.defaultMaxTokens,
+      max_tokens: maxTokens,
      messages,
    };

+    if (request.responseFormat) {
+      if (request.responseFormat.type === 'json_object') {
+        (params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
+          response_format?: { type: 'json_object' };
+        }).response_format = { type: 'json_object' };
+      } else {
+        (params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
+          response_format?: {
+            type: 'json_schema';
+            json_schema: {
+              name: string;
+              schema: Record<string, unknown>;
+              strict: boolean;
+            };
+          };
+        }).response_format = {
+          type: 'json_schema',
+          json_schema: {
+            name: request.responseFormat.name,
+            schema: request.responseFormat.schema,
+            strict: request.responseFormat.strict ?? true,
+          },
+        };
+      }
+    }
+
    if (request.tools && request.tools.length > 0) {
      params.tools = request.tools.map(t => ({
        type: 'function' as const,
@@ -287,22 +314,39 @@ export class OpenAIClient implements ModelClient {
        ? (error as { status: number }).status
        : undefined;
      const message = error instanceof Error ? error.message : String(error);
+      const unsupportedMaxTokens = (
+        status === 400
+        || message.includes('400 Unsupported parameter')
+      ) && message.includes("Unsupported parameter: 'max_tokens'");

-      const isZai = (this.baseURL ?? '').includes('api.z.ai');
-      const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
-      const missingModelRequestScope = message.includes('Missing scopes: model.request');
+      if (unsupportedMaxTokens) {
+        const fallbackParams = {
+          ...params,
+          max_completion_tokens: maxTokens,
+        } as OpenAI.ChatCompletionCreateParamsNonStreaming & { max_completion_tokens: number };
+        delete (fallbackParams as { max_tokens?: number }).max_tokens;

-      if (isZai && isUnauthorized401) {
-        const hint = missingModelRequestScope
-          ? 'The key lacks `model.request` scope.'
-          : 'The API key is invalid, expired, or not allowed for this model/endpoint.';
-        throw new Error(
-          `Z.AI authentication failed (401). ${hint} ` +
-          'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
+        response = await this.client.chat.completions.create(
+          fallbackParams,
+          request.signal ? { signal: request.signal } : undefined,
        );
-      }
+      } else {
+        const isZai = (this.baseURL ?? '').includes('api.z.ai');
+        const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
+        const missingModelRequestScope = message.includes('Missing scopes: model.request');

-      throw error;
+        if (isZai && isUnauthorized401) {
+          const hint = missingModelRequestScope
+            ? 'The key lacks `model.request` scope.'
+            : 'The API key is invalid, expired, or not allowed for this model/endpoint.';
+          throw new Error(
+            `Z.AI authentication failed (401). ${hint} ` +
+            'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
+          );
+        }
+
+        throw error;
+      }
    }

    const choice = response.choices[0];
@@ -73,11 +73,22 @@ export interface ToolMessage {
 // Union type for all messages in a conversation
 export type ConversationMessage = Message | ToolMessage;

+export type ChatResponseFormat =
+  | { type: 'json_object' }
+  | {
+    type: 'json_schema';
+    name: string;
+    schema: Record<string, unknown>;
+    strict?: boolean;
+  };
+
 export interface ChatRequest {
  messages: Message[];
  system?: string;
  maxTokens?: number;
  tools?: ToolDefinition[];
+  /** Optional provider-level response format request (e.g., structured JSON output). */
+  responseFormat?: ChatResponseFormat;
  /** Enable extended thinking/reasoning mode for this request. */
  thinking?: boolean;
  /** Optional abort signal for cancelling in-flight provider requests. */