fix: normalize OpenAI/GitHub finish_reason to Flynn stopReason conventions

OpenAI-compatible providers return 'stop' and 'tool_calls' as finish_reason
values, but Flynn's agent loop expects Anthropic-style 'end_turn' and
'tool_use'. This caused the agent to exit the tool loop prematurely when
falling back to GitHub Copilot (due to Anthropic API quota exhaustion).

- openai.ts: Map 'stop' → 'end_turn', 'length' → 'max_tokens', tool_calls
  with actual tools → 'tool_use', tool_calls without tools → 'end_turn'
- github.ts: Handle edge case where finish_reason is 'tool_calls' but no
  tools were parsed
- agent.ts: Accept both 'tool_use' and 'tool_calls' as valid stop reasons
  (belt-and-suspenders), extract toolCalls to local variable for TS narrowing
- openai.test.ts: Update expectations to match new normalized values
This commit is contained in:
William Valentin
2026-02-11 09:49:36 -08:00
parent 1aab006a7f
commit 01c3175fdb
4 changed files with 37 additions and 9 deletions
+13 -6
View File
@@ -174,8 +174,12 @@ export class NativeAgent {
this._totalUsage.outputTokens += response.usage.outputTokens; this._totalUsage.outputTokens += response.usage.outputTokens;
this._callCount++; this._callCount++;
// If the model didn't request tool use, we're done // If the model didn't request tool use, we're done.
if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) { // Check both 'tool_use' (Anthropic) and 'tool_calls' (OpenAI-compatible) stop reasons,
// but always require actual toolCalls to be present.
const wantsToolUse = (response.stopReason === 'tool_use' || response.stopReason === 'tool_calls')
&& response.toolCalls && response.toolCalls.length > 0;
if (!wantsToolUse) {
let finalContent = response.content; let finalContent = response.content;
if (response.thinkingContent) { if (response.thinkingContent) {
finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`; finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
@@ -185,8 +189,11 @@ export class NativeAgent {
return finalContent; return finalContent;
} }
// Safe to assert non-null — wantsToolUse guarantees toolCalls exists and is non-empty
const toolCalls = response.toolCalls!;
// Check for repeated tool calls — build a fingerprint from tool names + args // Check for repeated tool calls — build a fingerprint from tool names + args
const fingerprint = response.toolCalls const fingerprint = toolCalls
.map(tc => `${tc.name}:${JSON.stringify(tc.args)}`) .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`)
.sort() .sort()
.join('|'); .join('|');
@@ -199,7 +206,7 @@ export class NativeAgent {
} }
// Track consecutive calls to the same tool (by name, ignoring args) // Track consecutive calls to the same tool (by name, ignoring args)
const toolNames = response.toolCalls.map(tc => tc.name).sort().join(','); const toolNames = toolCalls.map(tc => tc.name).sort().join(',');
if (toolNames === lastToolName) { if (toolNames === lastToolName) {
sameToolStreak++; sameToolStreak++;
} else { } else {
@@ -213,7 +220,7 @@ export class NativeAgent {
if (response.content) { if (response.content) {
assistantContent.push({ type: 'text', text: response.content }); assistantContent.push({ type: 'text', text: response.content });
} }
for (const tc of response.toolCalls) { for (const tc of toolCalls) {
assistantContent.push({ assistantContent.push({
type: 'tool_use', type: 'tool_use',
id: tc.id, id: tc.id,
@@ -226,7 +233,7 @@ export class NativeAgent {
// Execute each tool call and collect results // Execute each tool call and collect results
const toolResultBlocks: unknown[] = []; const toolResultBlocks: unknown[] = [];
lastToolResults = []; lastToolResults = [];
for (const tc of response.toolCalls) { for (const tc of toolCalls) {
const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name; const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args }); this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });
+3
View File
@@ -161,6 +161,9 @@ export class GitHubModelsClient implements ModelClient {
stopReason = 'end_turn'; stopReason = 'end_turn';
} else if (reason === 'length') { } else if (reason === 'length') {
stopReason = 'max_tokens'; stopReason = 'max_tokens';
} else if (reason === 'tool_calls') {
// Edge case: finish_reason says tool_calls but none were parsed
stopReason = 'end_turn';
} else { } else {
stopReason = reason ?? 'end_turn'; stopReason = reason ?? 'end_turn';
} }
+2 -2
View File
@@ -29,7 +29,7 @@ describe('OpenAIClient', () => {
}); });
expect(response.content).toBe('Hello from GPT!'); expect(response.content).toBe('Hello from GPT!');
expect(response.stopReason).toBe('stop'); expect(response.stopReason).toBe('end_turn');
expect(response.usage.inputTokens).toBe(10); expect(response.usage.inputTokens).toBe(10);
expect(response.usage.outputTokens).toBe(5); expect(response.usage.outputTokens).toBe(5);
}); });
@@ -66,7 +66,7 @@ describe('OpenAIClient tool use', () => {
}], }],
}); });
expect(response.stopReason).toBe('tool_calls'); expect(response.stopReason).toBe('tool_use');
expect(response.toolCalls).toHaveLength(1); expect(response.toolCalls).toHaveLength(1);
expect(response.toolCalls![0]).toEqual({ expect(response.toolCalls![0]).toEqual({
id: 'call_1', id: 'call_1',
+19 -1
View File
@@ -96,9 +96,27 @@ export class OpenAIClient implements ModelClient {
args: JSON.parse(tc.function.arguments), args: JSON.parse(tc.function.arguments),
})) ?? []; })) ?? [];
// Map OpenAI finish reasons to Flynn's stop reasons
let stopReason: string;
if (toolCalls.length > 0) {
stopReason = 'tool_use';
} else {
const reason = choice?.finish_reason;
if (reason === 'stop') {
stopReason = 'end_turn';
} else if (reason === 'length') {
stopReason = 'max_tokens';
} else if (reason === 'tool_calls') {
// Edge case: finish_reason says tool_calls but none were parsed
stopReason = 'end_turn';
} else {
stopReason = reason ?? 'end_turn';
}
}
return { return {
content, content,
stopReason: choice?.finish_reason ?? 'stop', stopReason,
usage: { usage: {
inputTokens: response.usage?.prompt_tokens ?? 0, inputTokens: response.usage?.prompt_tokens ?? 0,
outputTokens: response.usage?.completion_tokens ?? 0, outputTokens: response.usage?.completion_tokens ?? 0,