diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 96aca9b..26c334d 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -174,8 +174,12 @@ export class NativeAgent { this._totalUsage.outputTokens += response.usage.outputTokens; this._callCount++; - // If the model didn't request tool use, we're done - if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) { + // If the model didn't request tool use, we're done. + // Check both 'tool_use' (Anthropic) and 'tool_calls' (OpenAI-compatible) stop reasons, + // but always require actual toolCalls to be present. + const wantsToolUse = (response.stopReason === 'tool_use' || response.stopReason === 'tool_calls') + && response.toolCalls && response.toolCalls.length > 0; + if (!wantsToolUse) { let finalContent = response.content; if (response.thinkingContent) { finalContent = `\n${response.thinkingContent}\n\n\n${response.content}`; @@ -185,8 +189,11 @@ export class NativeAgent { return finalContent; } + // Safe to assert non-null — wantsToolUse guarantees toolCalls exists and is non-empty + const toolCalls = response.toolCalls!; + // Check for repeated tool calls — build a fingerprint from tool names + args - const fingerprint = response.toolCalls + const fingerprint = toolCalls .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`) .sort() .join('|'); @@ -199,7 +206,7 @@ export class NativeAgent { } // Track consecutive calls to the same tool (by name, ignoring args) - const toolNames = response.toolCalls.map(tc => tc.name).sort().join(','); + const toolNames = toolCalls.map(tc => tc.name).sort().join(','); if (toolNames === lastToolName) { sameToolStreak++; } else { @@ -213,7 +220,7 @@ export class NativeAgent { if (response.content) { assistantContent.push({ type: 'text', text: response.content }); } - for (const tc of response.toolCalls) { + for (const tc of toolCalls) { assistantContent.push({ type: 'tool_use', id: tc.id, @@ -226,7 +233,7 @@ export class NativeAgent { // Execute each tool call and collect results const toolResultBlocks: unknown[] = []; lastToolResults = []; - for (const tc of response.toolCalls) { + for (const tc of toolCalls) { const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name; this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args }); diff --git a/src/models/github.ts b/src/models/github.ts index 8e9ff65..d656665 100644 --- a/src/models/github.ts +++ b/src/models/github.ts @@ -161,6 +161,9 @@ export class GitHubModelsClient implements ModelClient { stopReason = 'end_turn'; } else if (reason === 'length') { stopReason = 'max_tokens'; + } else if (reason === 'tool_calls') { + // Edge case: finish_reason says tool_calls but none were parsed + stopReason = 'end_turn'; } else { stopReason = reason ?? 'end_turn'; } diff --git a/src/models/openai.test.ts b/src/models/openai.test.ts index 823f0cd..bd33647 100644 --- a/src/models/openai.test.ts +++ b/src/models/openai.test.ts @@ -29,7 +29,7 @@ describe('OpenAIClient', () => { }); expect(response.content).toBe('Hello from GPT!'); - expect(response.stopReason).toBe('stop'); + expect(response.stopReason).toBe('end_turn'); expect(response.usage.inputTokens).toBe(10); expect(response.usage.outputTokens).toBe(5); }); @@ -66,7 +66,7 @@ describe('OpenAIClient tool use', () => { }], }); - expect(response.stopReason).toBe('tool_calls'); + expect(response.stopReason).toBe('tool_use'); expect(response.toolCalls).toHaveLength(1); expect(response.toolCalls![0]).toEqual({ id: 'call_1', diff --git a/src/models/openai.ts b/src/models/openai.ts index e812ca8..c5f527e 100644 --- a/src/models/openai.ts +++ b/src/models/openai.ts @@ -96,9 +96,27 @@ export class OpenAIClient implements ModelClient { args: JSON.parse(tc.function.arguments), })) ?? []; + // Map OpenAI finish reasons to Flynn's stop reasons + let stopReason: string; + if (toolCalls.length > 0) { + stopReason = 'tool_use'; + } else { + const reason = choice?.finish_reason; + if (reason === 'stop') { + stopReason = 'end_turn'; + } else if (reason === 'length') { + stopReason = 'max_tokens'; + } else if (reason === 'tool_calls') { + // Edge case: finish_reason says tool_calls but none were parsed + stopReason = 'end_turn'; + } else { + stopReason = reason ?? 'end_turn'; + } + } + return { content, - stopReason: choice?.finish_reason ?? 'stop', + stopReason, usage: { inputTokens: response.usage?.prompt_tokens ?? 0, outputTokens: response.usage?.completion_tokens ?? 0,