fix(core): harden env loading, OpenAI compatibility, and runtime recovery

This commit is contained in:
William Valentin
2026-02-22 15:56:21 -08:00
parent 387906ce4d
commit dafe9b4d3d
11 changed files with 450 additions and 21 deletions
+63
View File
@@ -137,6 +137,37 @@ describe('OpenAIClient tool use', () => {
expect(response.stopReason).toBe('max_tokens');
});
it('retries with max_completion_tokens when provider rejects max_tokens', async () => {
const initialCallCount = mockCreate.mock.calls.length;
mockCreate
.mockRejectedValueOnce(new Error(
"400 Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead.",
))
.mockResolvedValueOnce({
choices: [{ message: { content: 'Hello from GPT-5.2!' }, finish_reason: 'stop' }],
usage: { prompt_tokens: 11, completion_tokens: 6 },
});
const client = new OpenAIClient({
apiKey: 'test-key',
model: 'gpt-5.2',
});
const response = await client.chat({
messages: [{ role: 'user', content: 'Hello' }],
});
expect(response.content).toBe('Hello from GPT-5.2!');
expect(mockCreate.mock.calls.length - initialCallCount).toBe(2);
const firstArgs = mockCreate.mock.calls[initialCallCount]?.[0] as Record<string, unknown>;
expect(firstArgs.max_tokens).toBeDefined();
const secondArgs = mockCreate.mock.calls[initialCallCount + 1]?.[0] as Record<string, unknown>;
expect(secondArgs.max_tokens).toBeUndefined();
expect(secondArgs.max_completion_tokens).toBeDefined();
});
it('rewrites Z.AI 401 errors with actionable auth guidance', async () => {
mockCreate.mockRejectedValueOnce({
status: 401,
@@ -169,4 +200,36 @@ describe('OpenAIClient tool use', () => {
messages: [{ role: 'user', content: 'hello' }],
})).rejects.toThrow(/The key lacks `model\.request` scope/);
});
it('passes OpenAI response_format json_schema when requested', async () => {
const client = new OpenAIClient({
apiKey: 'test-key',
model: 'gpt-5.2',
});
await client.chat({
messages: [{ role: 'user', content: 'emit json' }],
responseFormat: {
type: 'json_schema',
name: 'council_ideation',
schema: {
type: 'object',
additionalProperties: false,
required: ['ideas'],
properties: {
ideas: { type: 'array', items: { type: 'object' } },
},
},
strict: true,
},
});
const args = mockCreate.mock.calls.at(-1)?.[0] as Record<string, unknown>;
const responseFormat = args.response_format as Record<string, unknown>;
expect(responseFormat.type).toBe('json_schema');
const jsonSchema = responseFormat.json_schema as Record<string, unknown>;
expect(jsonSchema.name).toBe('council_ideation');
expect(jsonSchema.strict).toBe(true);
});
});
+57 -13
View File
@@ -254,12 +254,39 @@ export class OpenAIClient implements ModelClient {
}
// Build params, conditionally including tools
const maxTokens = request.maxTokens ?? this.defaultMaxTokens;
const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
model: this.model,
max_tokens: request.maxTokens ?? this.defaultMaxTokens,
max_tokens: maxTokens,
messages,
};
if (request.responseFormat) {
if (request.responseFormat.type === 'json_object') {
(params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
response_format?: { type: 'json_object' };
}).response_format = { type: 'json_object' };
} else {
(params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
response_format?: {
type: 'json_schema';
json_schema: {
name: string;
schema: Record<string, unknown>;
strict: boolean;
};
};
}).response_format = {
type: 'json_schema',
json_schema: {
name: request.responseFormat.name,
schema: request.responseFormat.schema,
strict: request.responseFormat.strict ?? true,
},
};
}
}
if (request.tools && request.tools.length > 0) {
params.tools = request.tools.map(t => ({
type: 'function' as const,
@@ -287,22 +314,39 @@ export class OpenAIClient implements ModelClient {
? (error as { status: number }).status
: undefined;
const message = error instanceof Error ? error.message : String(error);
const unsupportedMaxTokens = (
status === 400
|| message.includes('400 Unsupported parameter')
) && message.includes("Unsupported parameter: 'max_tokens'");
const isZai = (this.baseURL ?? '').includes('api.z.ai');
const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
const missingModelRequestScope = message.includes('Missing scopes: model.request');
if (unsupportedMaxTokens) {
const fallbackParams = {
...params,
max_completion_tokens: maxTokens,
} as OpenAI.ChatCompletionCreateParamsNonStreaming & { max_completion_tokens: number };
delete (fallbackParams as { max_tokens?: number }).max_tokens;
if (isZai && isUnauthorized401) {
const hint = missingModelRequestScope
? 'The key lacks `model.request` scope.'
: 'The API key is invalid, expired, or not allowed for this model/endpoint.';
throw new Error(
`Z.AI authentication failed (401). ${hint} ` +
'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
response = await this.client.chat.completions.create(
fallbackParams,
request.signal ? { signal: request.signal } : undefined,
);
}
} else {
const isZai = (this.baseURL ?? '').includes('api.z.ai');
const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
const missingModelRequestScope = message.includes('Missing scopes: model.request');
throw error;
if (isZai && isUnauthorized401) {
const hint = missingModelRequestScope
? 'The key lacks `model.request` scope.'
: 'The API key is invalid, expired, or not allowed for this model/endpoint.';
throw new Error(
`Z.AI authentication failed (401). ${hint} ` +
'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
);
}
throw error;
}
}
const choice = response.choices[0];
+11
View File
@@ -73,11 +73,22 @@ export interface ToolMessage {
// Union type for all messages in a conversation
export type ConversationMessage = Message | ToolMessage;
export type ChatResponseFormat =
| { type: 'json_object' }
| {
type: 'json_schema';
name: string;
schema: Record<string, unknown>;
strict?: boolean;
};
export interface ChatRequest {
messages: Message[];
system?: string;
maxTokens?: number;
tools?: ToolDefinition[];
/** Optional provider-level response format request (e.g., structured JSON output). */
responseFormat?: ChatResponseFormat;
/** Enable extended thinking/reasoning mode for this request. */
thinking?: boolean;
/** Optional abort signal for cancelling in-flight provider requests. */