90ce622080
Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials. Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
153 lines
5.3 KiB
TypeScript
153 lines
5.3 KiB
TypeScript
// src/tools/integration.test.ts
|
|
import { describe, it, expect, vi } from 'vitest';
|
|
import { NativeAgent } from '../backends/native/agent.js';
|
|
import { ToolRegistry } from './registry.js';
|
|
import { ToolExecutor } from './executor.js';
|
|
import { HookEngine } from '../hooks/engine.js';
|
|
import { shellExecTool } from './builtin/shell.js';
|
|
import { fileReadTool } from './builtin/file-read.js';
|
|
import { fileWriteTool } from './builtin/file-write.js';
|
|
import type { ModelClient, ChatResponse } from '../models/types.js';
|
|
import { mkdtempSync, rmSync } from 'fs';
|
|
import { join } from 'path';
|
|
import { tmpdir } from 'os';
|
|
|
|
describe('Tool integration (end-to-end)', () => {
|
|
it('agent uses shell tool and returns result', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
if (callCount === 1) {
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo integration_test' } }],
|
|
} satisfies ChatResponse;
|
|
}
|
|
return {
|
|
content: 'The command output was: integration_test',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 20, outputTokens: 10 },
|
|
} satisfies ChatResponse;
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(shellExecTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You have tools.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
toolPolicyContext: { autonomyLevel: 'autonomous' },
|
|
});
|
|
|
|
const result = await agent.process('run echo integration_test');
|
|
expect(result).toContain('integration_test');
|
|
});
|
|
|
|
it('agent chains multiple tools across iterations', async () => {
|
|
const dir = mkdtempSync(join(tmpdir(), 'flynn-integ-'));
|
|
let callCount = 0;
|
|
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
if (callCount === 1) {
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: 'c1', name: 'file.write', args: { path: join(dir, 'test.txt'), content: 'hello' } }],
|
|
};
|
|
}
|
|
if (callCount === 2) {
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 15, outputTokens: 8 },
|
|
toolCalls: [{ id: 'c2', name: 'file.read', args: { path: join(dir, 'test.txt') } }],
|
|
};
|
|
}
|
|
return {
|
|
content: 'I wrote and read the file. It contains: hello',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 20, outputTokens: 10 },
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(fileWriteTool);
|
|
registry.register(fileReadTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You have tools.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
toolPolicyContext: { autonomyLevel: 'autonomous' },
|
|
});
|
|
|
|
try {
|
|
const result = await agent.process('write hello to test.txt then read it');
|
|
expect(result).toContain('hello');
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
|
} finally {
|
|
rmSync(dir, { recursive: true });
|
|
}
|
|
});
|
|
|
|
it('verifies tool results are passed back to model correctly', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation((request: { messages: unknown[] }) => {
|
|
callCount++;
|
|
if (callCount === 1) {
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo verify_pass' } }],
|
|
};
|
|
}
|
|
// Second call: verify the tool result was included in messages
|
|
const lastMsg = request.messages[request.messages.length - 1] as { content: unknown[] };
|
|
const resultBlock = lastMsg.content[0] as { type: string; tool_use_id: string; content: string };
|
|
expect(resultBlock.type).toBe('tool_result');
|
|
expect(resultBlock.tool_use_id).toBe('c1');
|
|
expect(resultBlock.content).toContain('verify_pass');
|
|
|
|
return {
|
|
content: 'Verified tool result',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 20, outputTokens: 10 },
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(shellExecTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You have tools.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
toolPolicyContext: { autonomyLevel: 'autonomous' },
|
|
});
|
|
|
|
const result = await agent.process('verify tool results');
|
|
expect(result).toBe('Verified tool result');
|
|
});
|
|
});
|