feat(runtime): add talk mode and capture tools
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
import { describe, it, expect, vi, beforeAll, beforeEach } from 'vitest';
|
||||
import { execFile } from 'child_process';
|
||||
import { readFile, unlink } from 'fs/promises';
|
||||
import type { ChildProcess } from 'child_process';
|
||||
|
||||
vi.mock('child_process', () => ({
|
||||
execFile: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('fs/promises', () => ({
|
||||
readFile: vi.fn(),
|
||||
unlink: vi.fn(),
|
||||
}));
|
||||
|
||||
const mockExecFile = vi.mocked(execFile);
|
||||
const mockReadFile = vi.mocked(readFile);
|
||||
const mockUnlink = vi.mocked(unlink);
|
||||
type ExecFileCallback = NonNullable<Parameters<typeof execFile>[3]>;
|
||||
|
||||
function mockChildProcess(): ChildProcess {
|
||||
return {} as ChildProcess;
|
||||
}
|
||||
|
||||
function mockExecFileOnce(impl: (callback: ExecFileCallback) => void): void {
|
||||
mockExecFile.mockImplementationOnce((_cmd, _args, _opts, callback) => {
|
||||
if (typeof callback === 'function') {
|
||||
impl(callback as ExecFileCallback);
|
||||
}
|
||||
return mockChildProcess();
|
||||
});
|
||||
}
|
||||
|
||||
describe('capture tools', () => {
|
||||
let screenCaptureTool: typeof import('./capture.js').screenCaptureTool;
|
||||
let cameraCaptureTool: typeof import('./capture.js').cameraCaptureTool;
|
||||
|
||||
beforeAll(async () => {
|
||||
const mod = await import('./capture.js');
|
||||
screenCaptureTool = mod.screenCaptureTool;
|
||||
cameraCaptureTool = mod.cameraCaptureTool;
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockReadFile.mockResolvedValue(Buffer.from('image-bytes'));
|
||||
mockUnlink.mockResolvedValue(undefined);
|
||||
});
|
||||
|
||||
it('screen.capture returns base64 payload when command succeeds', async () => {
|
||||
const platformSpy = vi.spyOn(process, 'platform', 'get').mockReturnValue('linux');
|
||||
mockExecFileOnce((callback) => callback(null, '', ''));
|
||||
|
||||
const result = await screenCaptureTool.execute({ format: 'png' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toContain('"mimeType":"image/png"');
|
||||
expect(mockExecFile).toHaveBeenCalled();
|
||||
|
||||
platformSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('camera.capture returns error on unsupported platform', async () => {
|
||||
const platformSpy = vi.spyOn(process, 'platform', 'get').mockReturnValue('win32');
|
||||
const result = await cameraCaptureTool.execute({});
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('only supported');
|
||||
platformSpy.mockRestore();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,124 @@
|
||||
import { execFile } from 'child_process';
|
||||
import { randomUUID } from 'crypto';
|
||||
import { readFile, unlink } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
|
||||
import type { Tool, ToolResult } from '../types.js';
|
||||
|
||||
interface CaptureArgs {
|
||||
format?: 'png' | 'jpg';
|
||||
}
|
||||
|
||||
interface CameraCaptureArgs extends CaptureArgs {
|
||||
device?: string;
|
||||
}
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = 15000;
|
||||
|
||||
function tempPath(format: string): string {
|
||||
return join(tmpdir(), `flynn-capture-${randomUUID()}.${format}`);
|
||||
}
|
||||
|
||||
function exec(command: string, args: string[], timeoutMs = DEFAULT_TIMEOUT_MS): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile(command, args, { timeout: timeoutMs }, (error, _stdout, stderr) => {
|
||||
if (error) {
|
||||
reject(new Error(`${command} ${args.join(' ')} failed: ${stderr || error.message}`));
|
||||
return;
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function readBase64AndCleanup(path: string): Promise<string> {
|
||||
try {
|
||||
const data = await readFile(path);
|
||||
return data.toString('base64');
|
||||
} finally {
|
||||
await unlink(path).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
export const screenCaptureTool: Tool = {
|
||||
name: 'screen.capture',
|
||||
description: 'Capture the current screen and return a base64-encoded image (png/jpg).',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
format: { type: 'string', description: 'Image format: png (default) or jpg' },
|
||||
},
|
||||
},
|
||||
execute: async (rawArgs: unknown): Promise<ToolResult> => {
|
||||
try {
|
||||
const args = (rawArgs as CaptureArgs | undefined) ?? {};
|
||||
const format = args.format === 'jpg' ? 'jpg' : 'png';
|
||||
const out = tempPath(format);
|
||||
|
||||
if (process.platform === 'darwin') {
|
||||
await exec('screencapture', ['-x', '-t', format === 'jpg' ? 'jpg' : 'png', out]);
|
||||
} else if (process.platform === 'linux') {
|
||||
try {
|
||||
await exec('grim', [out]);
|
||||
} catch {
|
||||
await exec('import', ['-window', 'root', out]);
|
||||
}
|
||||
} else {
|
||||
return { success: false, output: '', error: 'screen.capture is only supported on macOS/Linux hosts' };
|
||||
}
|
||||
|
||||
const data = await readBase64AndCleanup(out);
|
||||
return {
|
||||
success: true,
|
||||
output: JSON.stringify({ mimeType: format === 'jpg' ? 'image/jpeg' : 'image/png', data }),
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
export const cameraCaptureTool: Tool = {
|
||||
name: 'camera.capture',
|
||||
description: 'Capture a single frame from the default camera and return a base64-encoded image.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
format: { type: 'string', description: 'Image format: png (default) or jpg' },
|
||||
device: { type: 'string', description: 'Optional camera device identifier (platform-specific)' },
|
||||
},
|
||||
},
|
||||
execute: async (rawArgs: unknown): Promise<ToolResult> => {
|
||||
try {
|
||||
const args = (rawArgs as CameraCaptureArgs | undefined) ?? {};
|
||||
const format = args.format === 'jpg' ? 'jpg' : 'png';
|
||||
const out = tempPath(format);
|
||||
|
||||
if (process.platform === 'darwin') {
|
||||
await exec('imagesnap', ['-q', '-w', '1', out]);
|
||||
} else if (process.platform === 'linux') {
|
||||
const device = args.device ?? '/dev/video0';
|
||||
await exec('ffmpeg', ['-y', '-f', 'video4linux2', '-i', device, '-frames:v', '1', out]);
|
||||
} else {
|
||||
return { success: false, output: '', error: 'camera.capture is only supported on macOS/Linux hosts' };
|
||||
}
|
||||
|
||||
const data = await readBase64AndCleanup(out);
|
||||
return {
|
||||
success: true,
|
||||
output: JSON.stringify({ mimeType: format === 'jpg' ? 'image/jpeg' : 'image/png', data }),
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
},
|
||||
};
|
||||
@@ -27,6 +27,7 @@ export { createGcalTools } from './gcal.js';
|
||||
export { createGdocsTools } from './gdocs.js';
|
||||
export { createGdriveTools } from './gdrive.js';
|
||||
export { createGtasksTools } from './gtasks.js';
|
||||
export { screenCaptureTool, cameraCaptureTool } from './capture.js';
|
||||
|
||||
import type { Tool } from '../types.js';
|
||||
import type { MemoryStore } from '../../memory/store.js';
|
||||
@@ -40,6 +41,7 @@ import { filePatchTool } from './file-patch.js';
|
||||
import { fileListTool } from './file-list.js';
|
||||
import { systemInfoTool } from './system-info.js';
|
||||
import { webFetchTool } from './web-fetch.js';
|
||||
import { screenCaptureTool, cameraCaptureTool } from './capture.js';
|
||||
import { createMemoryReadTool } from './memory-read.js';
|
||||
import { createMemoryWriteTool } from './memory-write.js';
|
||||
import { createMemorySearchTool } from './memory-search.js';
|
||||
@@ -55,6 +57,8 @@ export const allBuiltinTools: Tool[] = [
|
||||
fileListTool,
|
||||
systemInfoTool,
|
||||
webFetchTool,
|
||||
screenCaptureTool,
|
||||
cameraCaptureTool,
|
||||
];
|
||||
|
||||
/** Create memory tools that require a MemoryStore instance. */
|
||||
|
||||
@@ -16,3 +16,4 @@ export { fileEditTool } from './builtin/file-edit.js';
|
||||
export { fileListTool } from './builtin/file-list.js';
|
||||
export { systemInfoTool } from './builtin/system-info.js';
|
||||
export { webFetchTool } from './builtin/web-fetch.js';
|
||||
export { screenCaptureTool, cameraCaptureTool } from './builtin/capture.js';
|
||||
|
||||
+3
-1
@@ -76,6 +76,8 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
|
||||
'process.output',
|
||||
'process.kill',
|
||||
'process.list',
|
||||
'screen.capture',
|
||||
'camera.capture',
|
||||
'browser.navigate',
|
||||
'browser.screenshot',
|
||||
'browser.click',
|
||||
@@ -91,7 +93,7 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
|
||||
/** Named groups for use in allow/deny lists (e.g. 'group:fs'). */
|
||||
export const TOOL_GROUPS: Record<string, string[]> = {
|
||||
'group:fs': ['file.read', 'file.write', 'file.edit', 'file.patch', 'file.list'],
|
||||
'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list'],
|
||||
'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list', 'screen.capture', 'camera.capture'],
|
||||
'group:web': ['web.fetch', 'web.search', 'browser.navigate', 'browser.screenshot', 'browser.click', 'browser.type', 'browser.content', 'browser.eval'],
|
||||
'group:memory': ['memory.read', 'memory.write', 'memory.search'],
|
||||
'group:gmail': ['gmail.list', 'gmail.search', 'gmail.read'],
|
||||
|
||||
Reference in New Issue
Block a user