feat(runtime): add talk mode and capture tools
This commit is contained in:
@@ -16,6 +16,8 @@ Self-hosted personal AI assistant with Telegram and Terminal interfaces.
|
|||||||
- **Docker Sandboxing**: Per-session container isolation for tool execution
|
- **Docker Sandboxing**: Per-session container isolation for tool execution
|
||||||
- **Multi-Agent Routing**: Config-driven agent selection per sender/channel with tool profiles
|
- **Multi-Agent Routing**: Config-driven agent selection per sender/channel with tool profiles
|
||||||
- **Media Pipeline**: Image analysis, outbound attachments, audio transcription and native audio passthrough across all channels
|
- **Media Pipeline**: Image analysis, outbound attachments, audio transcription and native audio passthrough across all channels
|
||||||
|
- **Talk Mode (Wake Phrase)**: Optional wake-phrase gating (`audio.talk_mode`) with timed conversation windows
|
||||||
|
- **Capture Tools**: `screen.capture` and `camera.capture` tools for host capture workflows
|
||||||
- **Session Transfer**: Move conversations between frontends
|
- **Session Transfer**: Move conversations between frontends
|
||||||
- **CLI**: Full command-line interface (`flynn start`, `send`, `doctor`, `completion`, etc.)
|
- **CLI**: Full command-line interface (`flynn start`, `send`, `doctor`, `completion`, etc.)
|
||||||
- **Shell Completion**: Auto-generated completions for bash, zsh, and fish with `--install` flag
|
- **Shell Completion**: Auto-generated completions for bash, zsh, and fish with `--install` flag
|
||||||
@@ -294,6 +296,10 @@ audio:
|
|||||||
| `provider.endpoint` | yes | Whisper-compatible API endpoint |
|
| `provider.endpoint` | yes | Whisper-compatible API endpoint |
|
||||||
| `provider.api_key` | no | Bearer token for authentication |
|
| `provider.api_key` | no | Bearer token for authentication |
|
||||||
| `provider.model` | no | Model name sent in request (default: `whisper-1`) |
|
| `provider.model` | no | Model name sent in request (default: `whisper-1`) |
|
||||||
|
| `talk_mode.enabled` | no | Enable wake-phrase talk mode gating (default: `false`) |
|
||||||
|
| `talk_mode.wake_phrase` | no | Phrase that activates talk mode (default: `hey flynn`) |
|
||||||
|
| `talk_mode.timeout_ms` | no | Active listen window after wake (default: `120000`) |
|
||||||
|
| `talk_mode.allow_manual_toggle` | no | Enable `/talk on|off|status` controls (default: `true`) |
|
||||||
|
|
||||||
Without an `audio` config, voice messages from non-audio-capable models will display an error message to the user. For local transcription, you can run a whisper.cpp server:
|
Without an `audio` config, voice messages from non-audio-capable models will display an error message to the user. For local transcription, you can run a whisper.cpp server:
|
||||||
|
|
||||||
@@ -314,6 +320,17 @@ docker run -d \
|
|||||||
# docker compose up -d
|
# docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Capture Tools
|
||||||
|
|
||||||
|
Flynn includes host capture tools:
|
||||||
|
- `screen.capture` -> captures current screen and returns base64 image payload
|
||||||
|
- `camera.capture` -> captures one camera frame and returns base64 image payload
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- These are host-command wrappers and require platform binaries:
|
||||||
|
- macOS: `screencapture` (screen), `imagesnap` (camera)
|
||||||
|
- Linux: `grim` or ImageMagick `import` (screen), `ffmpeg` (camera)
|
||||||
|
|
||||||
## Telegram Commands
|
## Telegram Commands
|
||||||
|
|
||||||
| Command | Description |
|
| Command | Description |
|
||||||
|
|||||||
@@ -248,3 +248,8 @@ hooks:
|
|||||||
# endpoint: "http://localhost:18801/v1/audio/transcriptions"
|
# endpoint: "http://localhost:18801/v1/audio/transcriptions"
|
||||||
# api_key: "${WHISPER_API_KEY}" # Optional Bearer token
|
# api_key: "${WHISPER_API_KEY}" # Optional Bearer token
|
||||||
# model: "whisper-1" # Model name (default: whisper-1)
|
# model: "whisper-1" # Model name (default: whisper-1)
|
||||||
|
# talk_mode:
|
||||||
|
# enabled: false
|
||||||
|
# wake_phrase: "hey flynn"
|
||||||
|
# timeout_ms: 120000
|
||||||
|
# allow_manual_toggle: true
|
||||||
|
|||||||
@@ -350,6 +350,39 @@ describe('configSchema — signal', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('configSchema — audio talk mode', () => {
|
||||||
|
const minimalConfig = {
|
||||||
|
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||||
|
models: { default: { provider: 'anthropic', model: 'claude-3' } },
|
||||||
|
};
|
||||||
|
|
||||||
|
it('defaults talk_mode fields', () => {
|
||||||
|
const result = configSchema.parse(minimalConfig);
|
||||||
|
expect(result.audio.talk_mode.enabled).toBe(false);
|
||||||
|
expect(result.audio.talk_mode.wake_phrase).toBe('hey flynn');
|
||||||
|
expect(result.audio.talk_mode.timeout_ms).toBe(120000);
|
||||||
|
expect(result.audio.talk_mode.allow_manual_toggle).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts custom talk_mode settings', () => {
|
||||||
|
const result = configSchema.parse({
|
||||||
|
...minimalConfig,
|
||||||
|
audio: {
|
||||||
|
talk_mode: {
|
||||||
|
enabled: true,
|
||||||
|
wake_phrase: 'ok flynn',
|
||||||
|
timeout_ms: 300000,
|
||||||
|
allow_manual_toggle: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(result.audio.talk_mode.enabled).toBe(true);
|
||||||
|
expect(result.audio.talk_mode.wake_phrase).toBe('ok flynn');
|
||||||
|
expect(result.audio.talk_mode.timeout_ms).toBe(300000);
|
||||||
|
expect(result.audio.talk_mode.allow_manual_toggle).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('configSchema — teams', () => {
|
describe('configSchema — teams', () => {
|
||||||
const minimalConfig = {
|
const minimalConfig = {
|
||||||
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||||
|
|||||||
@@ -472,9 +472,17 @@ const audioProviderSchema = z.object({
|
|||||||
model: z.string().optional(),
|
model: z.string().optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const talkModeSchema = z.object({
|
||||||
|
enabled: z.boolean().default(false),
|
||||||
|
wake_phrase: z.string().default('hey flynn'),
|
||||||
|
timeout_ms: z.number().min(1000).max(60 * 60 * 1000).default(120000),
|
||||||
|
allow_manual_toggle: z.boolean().default(true),
|
||||||
|
}).default({});
|
||||||
|
|
||||||
const audioSchema = z.object({
|
const audioSchema = z.object({
|
||||||
enabled: z.boolean().default(false),
|
enabled: z.boolean().default(false),
|
||||||
provider: audioProviderSchema.optional(),
|
provider: audioProviderSchema.optional(),
|
||||||
|
talk_mode: talkModeSchema,
|
||||||
}).default({});
|
}).default({});
|
||||||
|
|
||||||
// ── Tool policy schemas ──────────────────────────────────────────────
|
// ── Tool policy schemas ──────────────────────────────────────────────
|
||||||
|
|||||||
@@ -544,3 +544,67 @@ describe('daemon audio routing integration', () => {
|
|||||||
expect(atts?.some(a => a.mimeType === 'image/jpeg')).toBe(true);
|
expect(atts?.some(a => a.mimeType === 'image/jpeg')).toBe(true);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('daemon talk mode (voice wake) integration', () => {
|
||||||
|
afterEach(() => {
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ignores messages until wake phrase is used', async () => {
|
||||||
|
const processSpy = vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('ok');
|
||||||
|
const session = {
|
||||||
|
id: 'telegram:user-talk-1',
|
||||||
|
addMessage: vi.fn(),
|
||||||
|
getHistory: vi.fn(() => []),
|
||||||
|
clear: vi.fn(),
|
||||||
|
replaceHistory: vi.fn(),
|
||||||
|
getConfig: vi.fn(() => undefined),
|
||||||
|
setConfig: vi.fn(),
|
||||||
|
deleteConfig: vi.fn(),
|
||||||
|
};
|
||||||
|
|
||||||
|
const router = createMessageRouter({
|
||||||
|
sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'],
|
||||||
|
modelRouter: {
|
||||||
|
getAvailableTiers: () => ['fast', 'default', 'complex', 'local'],
|
||||||
|
getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }),
|
||||||
|
getLabel: (tier: string) => tier,
|
||||||
|
} as unknown as MessageRouterDeps['modelRouter'],
|
||||||
|
systemPrompt: 'test prompt',
|
||||||
|
toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'],
|
||||||
|
toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'],
|
||||||
|
config: {
|
||||||
|
agents: {
|
||||||
|
primary_tier: 'default',
|
||||||
|
delegation: { compaction: 'fast', memory_extraction: 'fast', classification: 'fast', tool_summarisation: 'fast', complex_reasoning: 'complex' },
|
||||||
|
max_delegation_depth: 3,
|
||||||
|
max_iterations: 10,
|
||||||
|
},
|
||||||
|
compaction: { enabled: false },
|
||||||
|
models: { default: { provider: 'anthropic', model: 'claude' } },
|
||||||
|
audio: { talk_mode: { enabled: true, wake_phrase: 'hey flynn', timeout_ms: 120000, allow_manual_toggle: true } },
|
||||||
|
} as unknown as MessageRouterDeps['config'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const reply = vi.fn(async (_message: OutboundMessage) => {});
|
||||||
|
|
||||||
|
await router.handler({
|
||||||
|
id: 'm-talk-1',
|
||||||
|
channel: 'telegram',
|
||||||
|
senderId: 'user-talk-1',
|
||||||
|
text: 'hello there',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
} as MessageRouterInput, reply);
|
||||||
|
expect(processSpy).not.toHaveBeenCalled();
|
||||||
|
|
||||||
|
await router.handler({
|
||||||
|
id: 'm-talk-2',
|
||||||
|
channel: 'telegram',
|
||||||
|
senderId: 'user-talk-1',
|
||||||
|
text: 'hey flynn what time is it?',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
} as MessageRouterInput, reply);
|
||||||
|
expect(processSpy).toHaveBeenCalledOnce();
|
||||||
|
expect(processSpy).toHaveBeenCalledWith('what time is it?', undefined);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
+58
-3
@@ -70,6 +70,7 @@ export function createMessageRouter(deps: {
|
|||||||
} {
|
} {
|
||||||
// Cache agents by session ID + agent config name to avoid recreating on every message
|
// Cache agents by session ID + agent config name to avoid recreating on every message
|
||||||
const agents = new Map<string, { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector }>();
|
const agents = new Map<string, { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector }>();
|
||||||
|
const talkModeUntil = new Map<string, number>();
|
||||||
|
|
||||||
function getOrCreateAgent(channel: string, senderId: string, metadata?: Record<string, unknown>, agentOverride?: string): { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector } {
|
function getOrCreateAgent(channel: string, senderId: string, metadata?: Record<string, unknown>, agentOverride?: string): { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector } {
|
||||||
// Resolve agent config name via routing (sender → channel → default fallback)
|
// Resolve agent config name via routing (sender → channel → default fallback)
|
||||||
@@ -246,10 +247,60 @@ export function createMessageRouter(deps: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const handler = async (msg: InboundMessage, reply: (response: OutboundMessage) => Promise<void>): Promise<void> => {
|
const handler = async (msg: InboundMessage, reply: (response: OutboundMessage) => Promise<void>): Promise<void> => {
|
||||||
|
let incomingText = msg.text;
|
||||||
|
const talkMode = deps.config.audio?.talk_mode;
|
||||||
|
if (talkMode?.enabled && incomingText.trim().length > 0) {
|
||||||
|
const key = `${msg.channel}:${msg.senderId}`;
|
||||||
|
const now = Date.now();
|
||||||
|
const timeoutMs = talkMode.timeout_ms;
|
||||||
|
const currentUntil = talkModeUntil.get(key) ?? 0;
|
||||||
|
const lower = incomingText.trim().toLowerCase();
|
||||||
|
|
||||||
|
if (talkMode.allow_manual_toggle) {
|
||||||
|
if (lower === '/talk on') {
|
||||||
|
talkModeUntil.set(key, now + timeoutMs);
|
||||||
|
await reply({ text: `Talk mode enabled for ${Math.ceil(timeoutMs / 1000)}s.`, replyTo: msg.id });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (lower === '/talk off') {
|
||||||
|
talkModeUntil.delete(key);
|
||||||
|
await reply({ text: 'Talk mode disabled.', replyTo: msg.id });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (lower === '/talk status') {
|
||||||
|
if (currentUntil <= now) {
|
||||||
|
await reply({ text: 'Talk mode is idle (wake phrase required).', replyTo: msg.id });
|
||||||
|
} else {
|
||||||
|
await reply({ text: `Talk mode active for ${Math.ceil((currentUntil - now) / 1000)}s.`, replyTo: msg.id });
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const phrase = talkMode.wake_phrase.trim();
|
||||||
|
const wakeRegex = phrase
|
||||||
|
? new RegExp(`^\\s*${escapeRegex(phrase)}(?:[\\s,:!.-]+)?`, 'i')
|
||||||
|
: null;
|
||||||
|
const wakeMatched = Boolean(wakeRegex && wakeRegex.test(incomingText));
|
||||||
|
|
||||||
|
if (wakeMatched && wakeRegex) {
|
||||||
|
talkModeUntil.set(key, now + timeoutMs);
|
||||||
|
incomingText = incomingText.replace(wakeRegex, '').trim();
|
||||||
|
if (!incomingText) {
|
||||||
|
await reply({ text: `Listening. Talk mode active for ${Math.ceil(timeoutMs / 1000)}s.`, replyTo: msg.id });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else if (currentUntil > now) {
|
||||||
|
talkModeUntil.set(key, now + timeoutMs);
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let intentAgentOverride: string | undefined;
|
let intentAgentOverride: string | undefined;
|
||||||
let intentSkillOverride: string | undefined;
|
let intentSkillOverride: string | undefined;
|
||||||
if (deps.config.intents?.enabled && deps.intentRegistry) {
|
if (deps.config.intents?.enabled && deps.intentRegistry) {
|
||||||
const intentMatch = deps.intentRegistry.match(msg.text);
|
const intentMatch = deps.intentRegistry.match(incomingText);
|
||||||
|
|
||||||
if (intentMatch?.rule.target.type === 'agent') {
|
if (intentMatch?.rule.target.type === 'agent') {
|
||||||
let confidence = intentMatch.score;
|
let confidence = intentMatch.score;
|
||||||
@@ -298,7 +349,7 @@ export function createMessageRouter(deps: {
|
|||||||
|
|
||||||
const commandInput = msg.metadata?.isCommand && typeof msg.metadata.command === 'string'
|
const commandInput = msg.metadata?.isCommand && typeof msg.metadata.command === 'string'
|
||||||
? `/${msg.metadata.command}${msg.metadata.commandArgs ? ` ${msg.metadata.commandArgs}` : ''}`
|
? `/${msg.metadata.command}${msg.metadata.commandArgs ? ` ${msg.metadata.commandArgs}` : ''}`
|
||||||
: msg.text;
|
: incomingText;
|
||||||
|
|
||||||
if (deps.commandRegistry && deps.commandRegistry.isCommand(commandInput)) {
|
if (deps.commandRegistry && deps.commandRegistry.isCommand(commandInput)) {
|
||||||
const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
|
const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
|
||||||
@@ -604,7 +655,7 @@ export function createMessageRouter(deps: {
|
|||||||
const supportsAudioOverride = (tierConfig as Record<string, unknown> | undefined)?.supports_audio as boolean | undefined;
|
const supportsAudioOverride = (tierConfig as Record<string, unknown> | undefined)?.supports_audio as boolean | undefined;
|
||||||
const nativeAudioSupported = supportsAudioInput(modelProvider, modelName, supportsAudioOverride);
|
const nativeAudioSupported = supportsAudioInput(modelProvider, modelName, supportsAudioOverride);
|
||||||
|
|
||||||
let messageText = msg.text;
|
let messageText = incomingText;
|
||||||
let attachments = msg.attachments;
|
let attachments = msg.attachments;
|
||||||
const audioAttachments = (msg.attachments ?? []).filter((a: Attachment) => isSupportedAudio(a));
|
const audioAttachments = (msg.attachments ?? []).filter((a: Attachment) => isSupportedAudio(a));
|
||||||
|
|
||||||
@@ -666,3 +717,7 @@ export function createMessageRouter(deps: {
|
|||||||
|
|
||||||
return { handler, agents };
|
return { handler, agents };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function escapeRegex(value: string): string {
|
||||||
|
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
|
}
|
||||||
|
|||||||
@@ -110,6 +110,12 @@ describe('discoverServices', () => {
|
|||||||
endpoint: 'http://localhost:18801/v1/audio/transcriptions',
|
endpoint: 'http://localhost:18801/v1/audio/transcriptions',
|
||||||
model: 'whisper-1',
|
model: 'whisper-1',
|
||||||
},
|
},
|
||||||
|
talk_mode: {
|
||||||
|
enabled: false,
|
||||||
|
wake_phrase: 'hey flynn',
|
||||||
|
timeout_ms: 120000,
|
||||||
|
allow_manual_toggle: true,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const reg = new ChannelRegistry();
|
const reg = new ChannelRegistry();
|
||||||
|
|||||||
@@ -0,0 +1,68 @@
|
|||||||
|
import { describe, it, expect, vi, beforeAll, beforeEach } from 'vitest';
|
||||||
|
import { execFile } from 'child_process';
|
||||||
|
import { readFile, unlink } from 'fs/promises';
|
||||||
|
import type { ChildProcess } from 'child_process';
|
||||||
|
|
||||||
|
vi.mock('child_process', () => ({
|
||||||
|
execFile: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('fs/promises', () => ({
|
||||||
|
readFile: vi.fn(),
|
||||||
|
unlink: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const mockExecFile = vi.mocked(execFile);
|
||||||
|
const mockReadFile = vi.mocked(readFile);
|
||||||
|
const mockUnlink = vi.mocked(unlink);
|
||||||
|
type ExecFileCallback = NonNullable<Parameters<typeof execFile>[3]>;
|
||||||
|
|
||||||
|
function mockChildProcess(): ChildProcess {
|
||||||
|
return {} as ChildProcess;
|
||||||
|
}
|
||||||
|
|
||||||
|
function mockExecFileOnce(impl: (callback: ExecFileCallback) => void): void {
|
||||||
|
mockExecFile.mockImplementationOnce((_cmd, _args, _opts, callback) => {
|
||||||
|
if (typeof callback === 'function') {
|
||||||
|
impl(callback as ExecFileCallback);
|
||||||
|
}
|
||||||
|
return mockChildProcess();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('capture tools', () => {
|
||||||
|
let screenCaptureTool: typeof import('./capture.js').screenCaptureTool;
|
||||||
|
let cameraCaptureTool: typeof import('./capture.js').cameraCaptureTool;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
const mod = await import('./capture.js');
|
||||||
|
screenCaptureTool = mod.screenCaptureTool;
|
||||||
|
cameraCaptureTool = mod.cameraCaptureTool;
|
||||||
|
});
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
mockReadFile.mockResolvedValue(Buffer.from('image-bytes'));
|
||||||
|
mockUnlink.mockResolvedValue(undefined);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('screen.capture returns base64 payload when command succeeds', async () => {
|
||||||
|
const platformSpy = vi.spyOn(process, 'platform', 'get').mockReturnValue('linux');
|
||||||
|
mockExecFileOnce((callback) => callback(null, '', ''));
|
||||||
|
|
||||||
|
const result = await screenCaptureTool.execute({ format: 'png' });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
expect(result.output).toContain('"mimeType":"image/png"');
|
||||||
|
expect(mockExecFile).toHaveBeenCalled();
|
||||||
|
|
||||||
|
platformSpy.mockRestore();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('camera.capture returns error on unsupported platform', async () => {
|
||||||
|
const platformSpy = vi.spyOn(process, 'platform', 'get').mockReturnValue('win32');
|
||||||
|
const result = await cameraCaptureTool.execute({});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
expect(result.error).toContain('only supported');
|
||||||
|
platformSpy.mockRestore();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,124 @@
|
|||||||
|
import { execFile } from 'child_process';
|
||||||
|
import { randomUUID } from 'crypto';
|
||||||
|
import { readFile, unlink } from 'fs/promises';
|
||||||
|
import { join } from 'path';
|
||||||
|
import { tmpdir } from 'os';
|
||||||
|
|
||||||
|
import type { Tool, ToolResult } from '../types.js';
|
||||||
|
|
||||||
|
interface CaptureArgs {
|
||||||
|
format?: 'png' | 'jpg';
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CameraCaptureArgs extends CaptureArgs {
|
||||||
|
device?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_TIMEOUT_MS = 15000;
|
||||||
|
|
||||||
|
function tempPath(format: string): string {
|
||||||
|
return join(tmpdir(), `flynn-capture-${randomUUID()}.${format}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function exec(command: string, args: string[], timeoutMs = DEFAULT_TIMEOUT_MS): Promise<void> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
execFile(command, args, { timeout: timeoutMs }, (error, _stdout, stderr) => {
|
||||||
|
if (error) {
|
||||||
|
reject(new Error(`${command} ${args.join(' ')} failed: ${stderr || error.message}`));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function readBase64AndCleanup(path: string): Promise<string> {
|
||||||
|
try {
|
||||||
|
const data = await readFile(path);
|
||||||
|
return data.toString('base64');
|
||||||
|
} finally {
|
||||||
|
await unlink(path).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const screenCaptureTool: Tool = {
|
||||||
|
name: 'screen.capture',
|
||||||
|
description: 'Capture the current screen and return a base64-encoded image (png/jpg).',
|
||||||
|
inputSchema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
format: { type: 'string', description: 'Image format: png (default) or jpg' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
execute: async (rawArgs: unknown): Promise<ToolResult> => {
|
||||||
|
try {
|
||||||
|
const args = (rawArgs as CaptureArgs | undefined) ?? {};
|
||||||
|
const format = args.format === 'jpg' ? 'jpg' : 'png';
|
||||||
|
const out = tempPath(format);
|
||||||
|
|
||||||
|
if (process.platform === 'darwin') {
|
||||||
|
await exec('screencapture', ['-x', '-t', format === 'jpg' ? 'jpg' : 'png', out]);
|
||||||
|
} else if (process.platform === 'linux') {
|
||||||
|
try {
|
||||||
|
await exec('grim', [out]);
|
||||||
|
} catch {
|
||||||
|
await exec('import', ['-window', 'root', out]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return { success: false, output: '', error: 'screen.capture is only supported on macOS/Linux hosts' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await readBase64AndCleanup(out);
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
output: JSON.stringify({ mimeType: format === 'jpg' ? 'image/jpeg' : 'image/png', data }),
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
output: '',
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export const cameraCaptureTool: Tool = {
|
||||||
|
name: 'camera.capture',
|
||||||
|
description: 'Capture a single frame from the default camera and return a base64-encoded image.',
|
||||||
|
inputSchema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
format: { type: 'string', description: 'Image format: png (default) or jpg' },
|
||||||
|
device: { type: 'string', description: 'Optional camera device identifier (platform-specific)' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
execute: async (rawArgs: unknown): Promise<ToolResult> => {
|
||||||
|
try {
|
||||||
|
const args = (rawArgs as CameraCaptureArgs | undefined) ?? {};
|
||||||
|
const format = args.format === 'jpg' ? 'jpg' : 'png';
|
||||||
|
const out = tempPath(format);
|
||||||
|
|
||||||
|
if (process.platform === 'darwin') {
|
||||||
|
await exec('imagesnap', ['-q', '-w', '1', out]);
|
||||||
|
} else if (process.platform === 'linux') {
|
||||||
|
const device = args.device ?? '/dev/video0';
|
||||||
|
await exec('ffmpeg', ['-y', '-f', 'video4linux2', '-i', device, '-frames:v', '1', out]);
|
||||||
|
} else {
|
||||||
|
return { success: false, output: '', error: 'camera.capture is only supported on macOS/Linux hosts' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await readBase64AndCleanup(out);
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
output: JSON.stringify({ mimeType: format === 'jpg' ? 'image/jpeg' : 'image/png', data }),
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
output: '',
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
@@ -27,6 +27,7 @@ export { createGcalTools } from './gcal.js';
|
|||||||
export { createGdocsTools } from './gdocs.js';
|
export { createGdocsTools } from './gdocs.js';
|
||||||
export { createGdriveTools } from './gdrive.js';
|
export { createGdriveTools } from './gdrive.js';
|
||||||
export { createGtasksTools } from './gtasks.js';
|
export { createGtasksTools } from './gtasks.js';
|
||||||
|
export { screenCaptureTool, cameraCaptureTool } from './capture.js';
|
||||||
|
|
||||||
import type { Tool } from '../types.js';
|
import type { Tool } from '../types.js';
|
||||||
import type { MemoryStore } from '../../memory/store.js';
|
import type { MemoryStore } from '../../memory/store.js';
|
||||||
@@ -40,6 +41,7 @@ import { filePatchTool } from './file-patch.js';
|
|||||||
import { fileListTool } from './file-list.js';
|
import { fileListTool } from './file-list.js';
|
||||||
import { systemInfoTool } from './system-info.js';
|
import { systemInfoTool } from './system-info.js';
|
||||||
import { webFetchTool } from './web-fetch.js';
|
import { webFetchTool } from './web-fetch.js';
|
||||||
|
import { screenCaptureTool, cameraCaptureTool } from './capture.js';
|
||||||
import { createMemoryReadTool } from './memory-read.js';
|
import { createMemoryReadTool } from './memory-read.js';
|
||||||
import { createMemoryWriteTool } from './memory-write.js';
|
import { createMemoryWriteTool } from './memory-write.js';
|
||||||
import { createMemorySearchTool } from './memory-search.js';
|
import { createMemorySearchTool } from './memory-search.js';
|
||||||
@@ -55,6 +57,8 @@ export const allBuiltinTools: Tool[] = [
|
|||||||
fileListTool,
|
fileListTool,
|
||||||
systemInfoTool,
|
systemInfoTool,
|
||||||
webFetchTool,
|
webFetchTool,
|
||||||
|
screenCaptureTool,
|
||||||
|
cameraCaptureTool,
|
||||||
];
|
];
|
||||||
|
|
||||||
/** Create memory tools that require a MemoryStore instance. */
|
/** Create memory tools that require a MemoryStore instance. */
|
||||||
|
|||||||
@@ -16,3 +16,4 @@ export { fileEditTool } from './builtin/file-edit.js';
|
|||||||
export { fileListTool } from './builtin/file-list.js';
|
export { fileListTool } from './builtin/file-list.js';
|
||||||
export { systemInfoTool } from './builtin/system-info.js';
|
export { systemInfoTool } from './builtin/system-info.js';
|
||||||
export { webFetchTool } from './builtin/web-fetch.js';
|
export { webFetchTool } from './builtin/web-fetch.js';
|
||||||
|
export { screenCaptureTool, cameraCaptureTool } from './builtin/capture.js';
|
||||||
|
|||||||
+3
-1
@@ -76,6 +76,8 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
|
|||||||
'process.output',
|
'process.output',
|
||||||
'process.kill',
|
'process.kill',
|
||||||
'process.list',
|
'process.list',
|
||||||
|
'screen.capture',
|
||||||
|
'camera.capture',
|
||||||
'browser.navigate',
|
'browser.navigate',
|
||||||
'browser.screenshot',
|
'browser.screenshot',
|
||||||
'browser.click',
|
'browser.click',
|
||||||
@@ -91,7 +93,7 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
|
|||||||
/** Named groups for use in allow/deny lists (e.g. 'group:fs'). */
|
/** Named groups for use in allow/deny lists (e.g. 'group:fs'). */
|
||||||
export const TOOL_GROUPS: Record<string, string[]> = {
|
export const TOOL_GROUPS: Record<string, string[]> = {
|
||||||
'group:fs': ['file.read', 'file.write', 'file.edit', 'file.patch', 'file.list'],
|
'group:fs': ['file.read', 'file.write', 'file.edit', 'file.patch', 'file.list'],
|
||||||
'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list'],
|
'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list', 'screen.capture', 'camera.capture'],
|
||||||
'group:web': ['web.fetch', 'web.search', 'browser.navigate', 'browser.screenshot', 'browser.click', 'browser.type', 'browser.content', 'browser.eval'],
|
'group:web': ['web.fetch', 'web.search', 'browser.navigate', 'browser.screenshot', 'browser.click', 'browser.type', 'browser.content', 'browser.eval'],
|
||||||
'group:memory': ['memory.read', 'memory.write', 'memory.search'],
|
'group:memory': ['memory.read', 'memory.write', 'memory.search'],
|
||||||
'group:gmail': ['gmail.list', 'gmail.search', 'gmail.read'],
|
'group:gmail': ['gmail.list', 'gmail.search', 'gmail.read'],
|
||||||
|
|||||||
Reference in New Issue
Block a user