fix(core): harden env loading, OpenAI compatibility, and runtime recovery
This commit is contained in:
@@ -137,6 +137,37 @@ describe('OpenAIClient tool use', () => {
|
||||
expect(response.stopReason).toBe('max_tokens');
|
||||
});
|
||||
|
||||
it('retries with max_completion_tokens when provider rejects max_tokens', async () => {
|
||||
const initialCallCount = mockCreate.mock.calls.length;
|
||||
mockCreate
|
||||
.mockRejectedValueOnce(new Error(
|
||||
"400 Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead.",
|
||||
))
|
||||
.mockResolvedValueOnce({
|
||||
choices: [{ message: { content: 'Hello from GPT-5.2!' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 11, completion_tokens: 6 },
|
||||
});
|
||||
|
||||
const client = new OpenAIClient({
|
||||
apiKey: 'test-key',
|
||||
model: 'gpt-5.2',
|
||||
});
|
||||
|
||||
const response = await client.chat({
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
});
|
||||
|
||||
expect(response.content).toBe('Hello from GPT-5.2!');
|
||||
expect(mockCreate.mock.calls.length - initialCallCount).toBe(2);
|
||||
|
||||
const firstArgs = mockCreate.mock.calls[initialCallCount]?.[0] as Record<string, unknown>;
|
||||
expect(firstArgs.max_tokens).toBeDefined();
|
||||
|
||||
const secondArgs = mockCreate.mock.calls[initialCallCount + 1]?.[0] as Record<string, unknown>;
|
||||
expect(secondArgs.max_tokens).toBeUndefined();
|
||||
expect(secondArgs.max_completion_tokens).toBeDefined();
|
||||
});
|
||||
|
||||
it('rewrites Z.AI 401 errors with actionable auth guidance', async () => {
|
||||
mockCreate.mockRejectedValueOnce({
|
||||
status: 401,
|
||||
@@ -169,4 +200,36 @@ describe('OpenAIClient tool use', () => {
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
})).rejects.toThrow(/The key lacks `model\.request` scope/);
|
||||
});
|
||||
|
||||
it('passes OpenAI response_format json_schema when requested', async () => {
|
||||
const client = new OpenAIClient({
|
||||
apiKey: 'test-key',
|
||||
model: 'gpt-5.2',
|
||||
});
|
||||
|
||||
await client.chat({
|
||||
messages: [{ role: 'user', content: 'emit json' }],
|
||||
responseFormat: {
|
||||
type: 'json_schema',
|
||||
name: 'council_ideation',
|
||||
schema: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['ideas'],
|
||||
properties: {
|
||||
ideas: { type: 'array', items: { type: 'object' } },
|
||||
},
|
||||
},
|
||||
strict: true,
|
||||
},
|
||||
});
|
||||
|
||||
const args = mockCreate.mock.calls.at(-1)?.[0] as Record<string, unknown>;
|
||||
const responseFormat = args.response_format as Record<string, unknown>;
|
||||
expect(responseFormat.type).toBe('json_schema');
|
||||
|
||||
const jsonSchema = responseFormat.json_schema as Record<string, unknown>;
|
||||
expect(jsonSchema.name).toBe('council_ideation');
|
||||
expect(jsonSchema.strict).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
+57
-13
@@ -254,12 +254,39 @@ export class OpenAIClient implements ModelClient {
|
||||
}
|
||||
|
||||
// Build params, conditionally including tools
|
||||
const maxTokens = request.maxTokens ?? this.defaultMaxTokens;
|
||||
const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
|
||||
model: this.model,
|
||||
max_tokens: request.maxTokens ?? this.defaultMaxTokens,
|
||||
max_tokens: maxTokens,
|
||||
messages,
|
||||
};
|
||||
|
||||
if (request.responseFormat) {
|
||||
if (request.responseFormat.type === 'json_object') {
|
||||
(params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
|
||||
response_format?: { type: 'json_object' };
|
||||
}).response_format = { type: 'json_object' };
|
||||
} else {
|
||||
(params as OpenAI.ChatCompletionCreateParamsNonStreaming & {
|
||||
response_format?: {
|
||||
type: 'json_schema';
|
||||
json_schema: {
|
||||
name: string;
|
||||
schema: Record<string, unknown>;
|
||||
strict: boolean;
|
||||
};
|
||||
};
|
||||
}).response_format = {
|
||||
type: 'json_schema',
|
||||
json_schema: {
|
||||
name: request.responseFormat.name,
|
||||
schema: request.responseFormat.schema,
|
||||
strict: request.responseFormat.strict ?? true,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (request.tools && request.tools.length > 0) {
|
||||
params.tools = request.tools.map(t => ({
|
||||
type: 'function' as const,
|
||||
@@ -287,22 +314,39 @@ export class OpenAIClient implements ModelClient {
|
||||
? (error as { status: number }).status
|
||||
: undefined;
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const unsupportedMaxTokens = (
|
||||
status === 400
|
||||
|| message.includes('400 Unsupported parameter')
|
||||
) && message.includes("Unsupported parameter: 'max_tokens'");
|
||||
|
||||
const isZai = (this.baseURL ?? '').includes('api.z.ai');
|
||||
const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
|
||||
const missingModelRequestScope = message.includes('Missing scopes: model.request');
|
||||
if (unsupportedMaxTokens) {
|
||||
const fallbackParams = {
|
||||
...params,
|
||||
max_completion_tokens: maxTokens,
|
||||
} as OpenAI.ChatCompletionCreateParamsNonStreaming & { max_completion_tokens: number };
|
||||
delete (fallbackParams as { max_tokens?: number }).max_tokens;
|
||||
|
||||
if (isZai && isUnauthorized401) {
|
||||
const hint = missingModelRequestScope
|
||||
? 'The key lacks `model.request` scope.'
|
||||
: 'The API key is invalid, expired, or not allowed for this model/endpoint.';
|
||||
throw new Error(
|
||||
`Z.AI authentication failed (401). ${hint} ` +
|
||||
'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
|
||||
response = await this.client.chat.completions.create(
|
||||
fallbackParams,
|
||||
request.signal ? { signal: request.signal } : undefined,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
const isZai = (this.baseURL ?? '').includes('api.z.ai');
|
||||
const isUnauthorized401 = status === 401 || /\b401\b/.test(message);
|
||||
const missingModelRequestScope = message.includes('Missing scopes: model.request');
|
||||
|
||||
throw error;
|
||||
if (isZai && isUnauthorized401) {
|
||||
const hint = missingModelRequestScope
|
||||
? 'The key lacks `model.request` scope.'
|
||||
: 'The API key is invalid, expired, or not allowed for this model/endpoint.';
|
||||
throw new Error(
|
||||
`Z.AI authentication failed (401). ${hint} ` +
|
||||
'Run `flynn zai-auth` to update credentials, or set ZAI_API_KEY / ZHIPUAI_API_KEY / ZHIPUAI_AUTH_TOKEN.',
|
||||
);
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
const choice = response.choices[0];
|
||||
|
||||
@@ -73,11 +73,22 @@ export interface ToolMessage {
|
||||
// Union type for all messages in a conversation
|
||||
export type ConversationMessage = Message | ToolMessage;
|
||||
|
||||
export type ChatResponseFormat =
|
||||
| { type: 'json_object' }
|
||||
| {
|
||||
type: 'json_schema';
|
||||
name: string;
|
||||
schema: Record<string, unknown>;
|
||||
strict?: boolean;
|
||||
};
|
||||
|
||||
export interface ChatRequest {
|
||||
messages: Message[];
|
||||
system?: string;
|
||||
maxTokens?: number;
|
||||
tools?: ToolDefinition[];
|
||||
/** Optional provider-level response format request (e.g., structured JSON output). */
|
||||
responseFormat?: ChatResponseFormat;
|
||||
/** Enable extended thinking/reasoning mode for this request. */
|
||||
thinking?: boolean;
|
||||
/** Optional abort signal for cancelling in-flight provider requests. */
|
||||
|
||||
Reference in New Issue
Block a user