fix(tooling): surface non-executable tool-use warnings
This commit is contained in:
@@ -316,6 +316,12 @@ models:
|
|||||||
|
|
||||||
Each tier can optionally specify `auth_mode` (`auto` | `api_key` | `oauth`) to control whether Flynn uses API keys vs OAuth/token auth for that provider. `use_oauth: true` remains supported as a compatibility alias for `auth_mode: oauth`.
|
Each tier can optionally specify `auth_mode` (`auto` | `api_key` | `oauth`) to control whether Flynn uses API keys vs OAuth/token auth for that provider. `use_oauth: true` remains supported as a compatibility alias for `auth_mode: oauth`.
|
||||||
|
|
||||||
|
Note: with `provider: openai` + `auth_mode: oauth` (Codex backend), Flynn currently does not send tool definitions to the provider. Tool execution is therefore unavailable in that mode, and any textual `tool_use` output should be treated as non-executable model text.
|
||||||
|
|
||||||
|
Note: with `provider: ollama`, tool execution depends on model capabilities. If Ollama reports that the selected model does not support tools, Flynn omits tool definitions for that request.
|
||||||
|
|
||||||
|
Note: with `provider: llamacpp`, tool execution depends on the served model/template correctly emitting OpenAI-style `tool_calls`. Models/templates that do not preserve tool-call structure may fall back to plain text behavior.
|
||||||
|
|
||||||
### Agent Backends
|
### Agent Backends
|
||||||
|
|
||||||
Flynn can run with the built-in native backend or delegate message processing to external CLI backends.
|
Flynn can run with the built-in native backend or delegate message processing to external CLI backends.
|
||||||
|
|||||||
@@ -158,6 +158,10 @@ models:
|
|||||||
oauth_enabled: true
|
oauth_enabled: true
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Current Limitation
|
||||||
|
- In Flynn, OpenAI OAuth (Codex backend) currently does not send tool definitions to the provider.
|
||||||
|
- Tool execution is unavailable in this mode; any textual `tool_use` content is non-executable model output.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Testing Strategy
|
## Testing Strategy
|
||||||
|
|||||||
@@ -270,6 +270,36 @@ describe('NativeAgent tool loop', () => {
|
|||||||
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('surfaces warning when model emits textual tool_use block without structured tool calls', async () => {
|
||||||
|
const mockClient: ModelClient = {
|
||||||
|
chat: vi.fn().mockResolvedValue({
|
||||||
|
content: 'Let me read the full email to evaluate legitimacy:{"type":"tool_use","id":"call_123","name":"gmail_read","input":{"id":"abc"}}',
|
||||||
|
stopReason: 'end_turn',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
const registry = new ToolRegistry();
|
||||||
|
registry.register(echoTool);
|
||||||
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||||
|
const executor = new ToolExecutor(registry, hooks);
|
||||||
|
|
||||||
|
const agent = new NativeAgent({
|
||||||
|
modelClient: mockClient,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
toolRegistry: registry,
|
||||||
|
toolExecutor: executor,
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await agent.process('read latest email');
|
||||||
|
expect(response).toContain('Tool call was emitted as plain text and was not executed.');
|
||||||
|
expect(response).toContain('Tool: gmail_read (id: call_123)');
|
||||||
|
expect(response).toContain('"type":"tool_use"');
|
||||||
|
|
||||||
|
const history = agent.getHistory();
|
||||||
|
expect(history[history.length - 1]).toEqual({ role: 'assistant', content: response });
|
||||||
|
});
|
||||||
|
|
||||||
it('works without tools (backward compatible)', async () => {
|
it('works without tools (backward compatible)', async () => {
|
||||||
const mockClient: ModelClient = {
|
const mockClient: ModelClient = {
|
||||||
chat: vi.fn().mockResolvedValue({
|
chat: vi.fn().mockResolvedValue({
|
||||||
|
|||||||
@@ -49,6 +49,11 @@ interface LoopMessage {
|
|||||||
content: string | unknown[];
|
content: string | unknown[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface PseudoToolUse {
|
||||||
|
name?: string;
|
||||||
|
id?: string;
|
||||||
|
}
|
||||||
|
|
||||||
export class NativeAgent {
|
export class NativeAgent {
|
||||||
private modelClient: ModelClient | ModelRouter;
|
private modelClient: ModelClient | ModelRouter;
|
||||||
private systemPrompt: string;
|
private systemPrompt: string;
|
||||||
@@ -224,11 +229,14 @@ export class NativeAgent {
|
|||||||
const wantsToolUse = (response.stopReason === 'tool_use' || response.stopReason === 'tool_calls')
|
const wantsToolUse = (response.stopReason === 'tool_use' || response.stopReason === 'tool_calls')
|
||||||
&& response.toolCalls && response.toolCalls.length > 0;
|
&& response.toolCalls && response.toolCalls.length > 0;
|
||||||
if (!wantsToolUse) {
|
if (!wantsToolUse) {
|
||||||
let finalContent = response.content;
|
const pseudoToolUse = this.extractPseudoToolUse(response.content);
|
||||||
|
let finalContent = pseudoToolUse
|
||||||
|
? this.buildPseudoToolUseWarning(response.content, pseudoToolUse)
|
||||||
|
: response.content;
|
||||||
if (response.thinkingContent) {
|
if (response.thinkingContent) {
|
||||||
finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
|
finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${finalContent}`;
|
||||||
}
|
}
|
||||||
const assistantMsg: Message = { role: 'assistant', content: response.content };
|
const assistantMsg: Message = { role: 'assistant', content: finalContent };
|
||||||
this.addToHistory(assistantMsg);
|
this.addToHistory(assistantMsg);
|
||||||
return finalContent;
|
return finalContent;
|
||||||
}
|
}
|
||||||
@@ -524,4 +532,32 @@ export class NativeAgent {
|
|||||||
private isAbortError(error: unknown): boolean {
|
private isAbortError(error: unknown): boolean {
|
||||||
return error instanceof Error && error.name === 'AbortError';
|
return error instanceof Error && error.name === 'AbortError';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private extractPseudoToolUse(content: string): PseudoToolUse | null {
|
||||||
|
if (!content) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (!/"type"\s*:\s*"tool_use"/.test(content)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/);
|
||||||
|
const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/);
|
||||||
|
return {
|
||||||
|
name: nameMatch?.[1],
|
||||||
|
id: idMatch?.[1],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private buildPseudoToolUseWarning(rawContent: string, pseudo: PseudoToolUse): string {
|
||||||
|
const toolName = pseudo.name ?? 'unknown';
|
||||||
|
const toolId = pseudo.id ?? 'unknown';
|
||||||
|
return [
|
||||||
|
'Tool call was emitted as plain text and was not executed.',
|
||||||
|
`Tool: ${toolName} (id: ${toolId})`,
|
||||||
|
'This usually means the current model/backend did not return structured tool metadata.',
|
||||||
|
'Original assistant output:',
|
||||||
|
rawContent,
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -69,4 +69,40 @@ describe('OpenAIClient OAuth (Codex)', () => {
|
|||||||
expect(resp.content).toBe('hello');
|
expect(resp.content).toBe('hello');
|
||||||
expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
|
expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('adds provider warning when tools are requested in OAuth mode', async () => {
|
||||||
|
const sse = makeSse([
|
||||||
|
{ event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } },
|
||||||
|
{ event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } },
|
||||||
|
]);
|
||||||
|
|
||||||
|
globalThis.fetch = vi.fn(async () => {
|
||||||
|
const stream = new ReadableStream({
|
||||||
|
start(controller) {
|
||||||
|
controller.enqueue(new TextEncoder().encode(sse));
|
||||||
|
controller.close();
|
||||||
|
},
|
||||||
|
});
|
||||||
|
return new Response(stream, { status: 200 });
|
||||||
|
}) as typeof fetch;
|
||||||
|
|
||||||
|
const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
|
||||||
|
const resp = await client.chat({
|
||||||
|
system: 'You are helpful.',
|
||||||
|
messages: [{ role: 'user', content: 'use tools' }],
|
||||||
|
tools: [{
|
||||||
|
name: 'gmail_read',
|
||||||
|
description: 'Read Gmail message',
|
||||||
|
input_schema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: { id: { type: 'string' } },
|
||||||
|
required: ['id'],
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.');
|
||||||
|
expect(resp.content).toContain('Requested tools were not sent to the provider');
|
||||||
|
expect(resp.content).toContain('result body');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
+11
-1
@@ -213,8 +213,18 @@ export class OpenAIClient implements ModelClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const toolsRequested = Boolean(request.tools && request.tools.length > 0);
|
||||||
|
const content = toolsRequested
|
||||||
|
? [
|
||||||
|
'[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.',
|
||||||
|
'Requested tools were not sent to the provider, so any textual tool_use output is not executable.',
|
||||||
|
'',
|
||||||
|
outputText,
|
||||||
|
].join('\n')
|
||||||
|
: outputText;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
content: outputText,
|
content,
|
||||||
stopReason: 'end_turn',
|
stopReason: 'end_turn',
|
||||||
usage: usage ?? { inputTokens: 0, outputTokens: 0 },
|
usage: usage ?? { inputTokens: 0, outputTokens: 0 },
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user