feat(runtime): add talk mode and capture tools

This commit is contained in:
William Valentin
2026-02-16 10:17:24 -08:00
parent a9b38150c0
commit 83b8e38b11
12 changed files with 391 additions and 4 deletions
+124
View File
@@ -0,0 +1,124 @@
import { execFile } from 'child_process';
import { randomUUID } from 'crypto';
import { readFile, unlink } from 'fs/promises';
import { join } from 'path';
import { tmpdir } from 'os';
import type { Tool, ToolResult } from '../types.js';
interface CaptureArgs {
format?: 'png' | 'jpg';
}
interface CameraCaptureArgs extends CaptureArgs {
device?: string;
}
const DEFAULT_TIMEOUT_MS = 15000;
function tempPath(format: string): string {
return join(tmpdir(), `flynn-capture-${randomUUID()}.${format}`);
}
function exec(command: string, args: string[], timeoutMs = DEFAULT_TIMEOUT_MS): Promise<void> {
return new Promise((resolve, reject) => {
execFile(command, args, { timeout: timeoutMs }, (error, _stdout, stderr) => {
if (error) {
reject(new Error(`${command} ${args.join(' ')} failed: ${stderr || error.message}`));
return;
}
resolve();
});
});
}
async function readBase64AndCleanup(path: string): Promise<string> {
try {
const data = await readFile(path);
return data.toString('base64');
} finally {
await unlink(path).catch(() => {});
}
}
export const screenCaptureTool: Tool = {
name: 'screen.capture',
description: 'Capture the current screen and return a base64-encoded image (png/jpg).',
inputSchema: {
type: 'object',
properties: {
format: { type: 'string', description: 'Image format: png (default) or jpg' },
},
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
try {
const args = (rawArgs as CaptureArgs | undefined) ?? {};
const format = args.format === 'jpg' ? 'jpg' : 'png';
const out = tempPath(format);
if (process.platform === 'darwin') {
await exec('screencapture', ['-x', '-t', format === 'jpg' ? 'jpg' : 'png', out]);
} else if (process.platform === 'linux') {
try {
await exec('grim', [out]);
} catch {
await exec('import', ['-window', 'root', out]);
}
} else {
return { success: false, output: '', error: 'screen.capture is only supported on macOS/Linux hosts' };
}
const data = await readBase64AndCleanup(out);
return {
success: true,
output: JSON.stringify({ mimeType: format === 'jpg' ? 'image/jpeg' : 'image/png', data }),
};
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
export const cameraCaptureTool: Tool = {
name: 'camera.capture',
description: 'Capture a single frame from the default camera and return a base64-encoded image.',
inputSchema: {
type: 'object',
properties: {
format: { type: 'string', description: 'Image format: png (default) or jpg' },
device: { type: 'string', description: 'Optional camera device identifier (platform-specific)' },
},
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
try {
const args = (rawArgs as CameraCaptureArgs | undefined) ?? {};
const format = args.format === 'jpg' ? 'jpg' : 'png';
const out = tempPath(format);
if (process.platform === 'darwin') {
await exec('imagesnap', ['-q', '-w', '1', out]);
} else if (process.platform === 'linux') {
const device = args.device ?? '/dev/video0';
await exec('ffmpeg', ['-y', '-f', 'video4linux2', '-i', device, '-frames:v', '1', out]);
} else {
return { success: false, output: '', error: 'camera.capture is only supported on macOS/Linux hosts' };
}
const data = await readBase64AndCleanup(out);
return {
success: true,
output: JSON.stringify({ mimeType: format === 'jpg' ? 'image/jpeg' : 'image/png', data }),
};
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};