feat(tui,gemini): implement verbose transfer and url image fetch

This commit is contained in:
William Valentin
2026-02-17 10:58:14 -08:00
parent 77ae15b3e2
commit e3b6f9df7c
8 changed files with 254 additions and 30 deletions
+19 -2
View File
@@ -3737,10 +3737,27 @@
"docs/plans/state.json"
],
"test_status": "pnpm test:run src/config/schema.test.ts src/daemon/channels.test.ts src/channels/line/adapter.test.ts src/channels/zalo/adapter.test.ts + pnpm typecheck passing"
},
"tui-verbose-transfer-and-gemini-url-image-fetch": {
"status": "completed",
"date": "2026-02-17",
"updated": "2026-02-17",
"summary": "Implemented remaining TUI/media gaps by making `/verbose` functional (tool activity now conditionally rendered), enabling `/transfer` in fullscreen TUI via shared callback wiring, and upgrading Gemini URL-image handling to fetch/encode remote images as inlineData with safe text fallback on fetch failure.",
"files_modified": [
"src/frontends/tui/components/App.tsx",
"src/frontends/tui/fullscreen.ts",
"src/frontends/tui/minimal.ts",
"src/frontends/tui/minimal.test.ts",
"src/cli/tui.ts",
"src/models/gemini.ts",
"src/models/gemini.test.ts",
"docs/plans/state.json"
],
"test_status": "pnpm test:run src/frontends/tui/minimal.test.ts src/models/gemini.test.ts + pnpm typecheck passing"
}
},
"overall_progress": {
"total_test_count": 1876,
"total_test_count": 1879,
"all_tests_passing": true,
"p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)",
@@ -3755,7 +3772,7 @@
"tier2_completion": "4/4 (100%) — inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
"tier3_completion": "5/5 (100%) — lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
"tier4_completion": "4/4 (100%) — gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
"feature_gap_scorecard": "126/128 match (98%), 0 partial (0%), 2 missing (2%)",
"feature_gap_scorecard": "128/128 match (100%), 0 partial (0%), 0 missing (0%)",
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done",
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
+15 -13
View File
@@ -232,6 +232,18 @@ export function registerTuiCommand(program: Command): void {
process.exit(0);
});
const transferSessionToTarget = (target: string): string => {
if (target !== 'telegram') {
return `Unknown transfer target: ${target}`;
}
if (!config.telegram || config.telegram.allowed_chat_ids.length === 0) {
return 'Telegram not configured';
}
const telegramUserId = String(config.telegram.allowed_chat_ids[0]);
sessionManager.transferSession('tui', 'local', 'telegram', telegramUserId);
return `Session transferred to Telegram (${telegramUserId})`;
};
if (opts.fullscreen) {
await startFullscreenTui({
session,
@@ -243,6 +255,7 @@ export function registerTuiCommand(program: Command): void {
hookEngine,
modelProviderConfigs,
contextThresholdPct: config.compaction.threshold_pct,
onTransfer: transferSessionToTarget,
onExit: cleanup,
});
} else {
@@ -260,19 +273,7 @@ export function registerTuiCommand(program: Command): void {
modelProviderConfigs,
contextThresholdPct: config.compaction.threshold_pct,
currentLocalProvider: config.models.local?.provider,
onTransfer: (target) => {
if (target === 'telegram') {
if (config.telegram && config.telegram.allowed_chat_ids.length > 0) {
const telegramUserId = String(config.telegram.allowed_chat_ids[0]);
sessionManager.transferSession('tui', 'local', 'telegram', telegramUserId);
console.log(`Session transferred to Telegram (${telegramUserId})\n`);
} else {
console.log('Telegram not configured\n');
}
} else {
console.log(`Unknown transfer target: ${target}\n`);
}
},
onTransfer: transferSessionToTarget,
onFullscreen: () => {
switchingToFullscreen = true;
tui.stop(true);
@@ -293,6 +294,7 @@ export function registerTuiCommand(program: Command): void {
hookEngine,
modelProviderConfigs,
contextThresholdPct: config.compaction.threshold_pct,
onTransfer: transferSessionToTarget,
onExit: cleanup,
});
return;
+18 -3
View File
@@ -51,6 +51,7 @@ export interface AppProps {
hookEngine?: HookEngine;
modelProviderConfigs?: Partial<Record<ModelProvider, ModelConfig>>;
contextThresholdPct?: number;
onTransfer?: (target: string) => string | void;
onExit?: () => void;
}
@@ -64,6 +65,7 @@ export function App({
hookEngine,
modelProviderConfigs,
contextThresholdPct,
onTransfer,
onExit,
}: AppProps): React.ReactElement {
const { exit } = useApp();
@@ -92,6 +94,9 @@ export function App({
if (!agent) {return;}
const handleToolEvent = (event: ToolUseEvent) => {
if (!verbose) {
return;
}
if (event.type === 'start') {
const label = formatToolName(event.tool);
const argsStr = event.args ? ` (${formatToolArgs(event.args)})` : '';
@@ -114,7 +119,7 @@ export function App({
return () => {
agent.setOnToolUse(undefined);
};
}, [agent]);
}, [agent, verbose]);
// Inline confirmations for dangerous tools (e.g. shell.exec) in fullscreen mode.
useEffect(() => {
@@ -367,9 +372,18 @@ export function App({
case 'fullscreen':
return;
case 'transfer':
setMessages(prev => [...prev, session.addMessage({ role: 'assistant', content: 'Transfer not supported in fullscreen mode.' })]);
case 'transfer': {
if (!onTransfer) {
setMessages(prev => [...prev, session.addMessage({ role: 'assistant', content: 'Transfer target is not available in fullscreen mode.' })]);
return;
}
const result = onTransfer(command.target);
const content = typeof result === 'string' && result.trim()
? result
: `Transfer requested: ${command.target}`;
setMessages(prev => [...prev, session.addMessage({ role: 'assistant', content })]);
return;
}
case 'queue': {
if (!command.action || command.action === 'show') {
@@ -572,6 +586,7 @@ export function App({
tokenUsage.inputTokens,
tokenUsage.outputTokens,
modelProviderConfigs,
onTransfer,
]);
return (
+2
View File
@@ -18,6 +18,7 @@ export interface FullscreenTuiConfig {
hookEngine?: HookEngine;
modelProviderConfigs?: Partial<Record<ModelProvider, ModelConfig>>;
contextThresholdPct?: number;
onTransfer?: (target: string) => string | void;
onExit?: () => void;
}
@@ -42,6 +43,7 @@ export async function startFullscreenTui(config: FullscreenTuiConfig): Promise<v
hookEngine: config.hookEngine,
modelProviderConfigs: config.modelProviderConfigs,
contextThresholdPct: config.contextThresholdPct,
onTransfer: config.onTransfer,
onExit: config.onExit,
}),
);
+63
View File
@@ -22,6 +22,10 @@ function asRouter(value: unknown): ModelClient & ModelRouter {
return value as ModelClient & ModelRouter;
}
function asModelClient(value: unknown): ModelClient {
return value as ModelClient;
}
function asAgent(value: unknown): NativeAgent {
return value as NativeAgent;
}
@@ -30,6 +34,9 @@ function minimalTuiPrivates(value: MinimalTui): {
handleBackendCommand: (provider: string) => Promise<void>;
handleModelCommand: (tier: string, providerModel?: string) => void;
handleContextCommand: () => void;
handleVerboseCommand: () => void;
handleToolEvent: (event: unknown) => void;
handleCommand: (command: unknown) => Promise<void>;
handleEscapeAction: () => boolean;
prompt: (text: string) => Promise<string>;
rl: {
@@ -45,6 +52,9 @@ function minimalTuiPrivates(value: MinimalTui): {
handleBackendCommand: (provider: string) => Promise<void>;
handleModelCommand: (tier: string, providerModel?: string) => void;
handleContextCommand: () => void;
handleVerboseCommand: () => void;
handleToolEvent: (event: unknown) => void;
handleCommand: (command: unknown) => Promise<void>;
handleEscapeAction: () => boolean;
prompt: (text: string) => Promise<string>;
rl: {
@@ -328,6 +338,59 @@ describe('MinimalTui backend command', () => {
}
}
});
it('prints transfer result text when /transfer is invoked', async () => {
const mockSession = {
id: 'test',
getHistory: () => [],
addMessage: vi.fn(),
clear: vi.fn(),
replaceHistory: vi.fn(),
};
const onTransfer = vi.fn(() => 'Session transferred to Telegram (12345)');
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
try {
const tui = new MinimalTui({
session: asSession(mockSession),
modelClient: asModelClient({}),
systemPrompt: 'test',
onTransfer,
});
await minimalTuiPrivates(tui).handleCommand({ type: 'transfer', target: 'telegram' });
expect(onTransfer).toHaveBeenCalledWith('telegram');
expect(logSpy).toHaveBeenCalledWith('Session transferred to Telegram (12345)\n');
} finally {
logSpy.mockRestore();
}
});
it('only renders tool activity when verbose mode is enabled', () => {
const mockSession = {
id: 'test',
getHistory: () => [],
addMessage: vi.fn(),
clear: vi.fn(),
replaceHistory: vi.fn(),
};
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
try {
const tui = new MinimalTui({
session: asSession(mockSession),
modelClient: asModelClient({}),
systemPrompt: 'test',
});
minimalTuiPrivates(tui).handleToolEvent({ type: 'start', tool: 'shell.exec', args: { command: 'ls' } });
expect(logSpy).not.toHaveBeenCalled();
minimalTuiPrivates(tui).handleVerboseCommand();
minimalTuiPrivates(tui).handleToolEvent({ type: 'start', tool: 'shell.exec', args: { command: 'ls' } });
expect(logSpy).toHaveBeenCalledWith(expect.stringContaining('Shell: Exec'));
} finally {
logSpy.mockRestore();
}
});
});
describe('MinimalTui prompt cancellation', () => {
+62 -3
View File
@@ -2,7 +2,7 @@ import * as readline from 'node:readline';
import type { ManagedSession } from '../../session/index.js';
import type { ModelClient, TokenUsage } from '../../models/types.js';
import type { ModelRouter } from '../../models/router.js';
import type { NativeAgent } from '../../backends/native/agent.js';
import type { NativeAgent, ToolUseEvent } from '../../backends/native/agent.js';
import { parseCommand, getHelpText, resolveModelAlias, getCommandCompletions, getCommandTooltip, type Command } from './commands.js';
import { renderMarkdown } from './markdown.js';
import type { ModelConfig, ModelProvider } from '../../config/schema.js';
@@ -62,7 +62,7 @@ export interface MinimalTuiConfig {
systemPrompt: string;
agent?: NativeAgent;
onFullscreen?: () => void;
onTransfer?: (target: string) => void;
onTransfer?: (target: string) => string | void;
localProviders?: Record<string, ModelConfig>;
modelProviderConfigs?: Partial<Record<ModelProvider, ModelConfig>>;
currentLocalProvider?: string;
@@ -152,12 +152,62 @@ export class MinimalTui {
}
}
private formatToolName(name: string): string {
const parts = name.split('.');
return parts.map((p, i) => {
const capitalized = p.charAt(0).toUpperCase() + p.slice(1);
return i === 0 && parts.length > 1 ? capitalized + ':' : capitalized;
}).join(' ');
}
private formatToolArgs(args: unknown): string {
if (!args || typeof args !== 'object') {
return '';
}
const entries = Object.entries(args as Record<string, unknown>);
if (entries.length === 0) {
return '';
}
return entries.map(([key, value]) => {
if (typeof value === 'string') {
const display = value.length > 50 ? `${value.slice(0, 47)}...` : value;
return `${key}: "${display}"`;
}
if (typeof value === 'number' || typeof value === 'boolean') {
return `${key}: ${value}`;
}
return `${key}: ${JSON.stringify(value)}`;
}).join(', ');
}
private handleToolEvent(event: ToolUseEvent): void {
if (!this.verbose) {
return;
}
if (event.type === 'start') {
const label = this.formatToolName(event.tool);
const argsStr = event.args ? ` (${this.formatToolArgs(event.args)})` : '';
console.log(`${colors.gray}> ${label}${argsStr}${colors.reset}`);
return;
}
if (event.type === 'end' && event.result) {
if (event.result.success) {
console.log(`${colors.gray} done (${event.result.output.split('\n').length} lines)${colors.reset}`);
} else {
console.log(`${colors.gray} error ${event.result.error ?? 'unknown error'}${colors.reset}`);
}
}
}
async start(): Promise<void> {
this.running = true;
if (this.config.agent && this.config.modelRouter) {
this.config.agent.setModelTier(this.config.modelRouter.getTier());
}
if (this.config.agent) {
this.config.agent.setOnToolUse(this.handleToolEvent.bind(this));
}
this.rl = readline.createInterface({
input: process.stdin,
@@ -366,8 +416,17 @@ export class MinimalTui {
break;
case 'transfer':
this.config.onTransfer?.(command.target);
{
if (!this.config.onTransfer) {
console.log(`${colors.gray}Transfer target is not available in this TUI mode.${colors.reset}\n`);
break;
}
const result = this.config.onTransfer(command.target);
if (typeof result === 'string' && result.trim()) {
console.log(`${result}\n`);
}
break;
}
case 'message':
await this.handleMessage(command.content);
+39
View File
@@ -108,6 +108,45 @@ describe('GeminiClient', () => {
});
});
it('fetches URL-based images and sends them as inlineData', async () => {
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
headers: new Headers({ 'content-type': 'image/jpeg' }),
arrayBuffer: async () => Uint8Array.from([1, 2, 3]).buffer,
} as Response);
const client = new GeminiClient({
apiKey: 'test-key',
model: 'gemini-2.0-flash',
});
await client.chat({
messages: [{
role: 'user',
content: [
{ type: 'text', text: 'Analyze this' },
{ type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/image.jpg' } },
],
}],
});
expect(fetchSpy).toHaveBeenCalledWith('https://example.com/image.jpg');
expect(mockGenerateContent).toHaveBeenCalledWith({
contents: [{
role: 'user',
parts: [
{ text: 'Analyze this' },
{
inlineData: {
mimeType: 'image/jpeg',
data: Buffer.from([1, 2, 3]).toString('base64'),
},
},
],
}],
});
fetchSpy.mockRestore();
});
it('maps MAX_TOKENS finish reason', async () => {
mockGenerateContent.mockResolvedValueOnce(
makeResponse([{ text: 'Truncated...' }], 'MAX_TOKENS'),
+36 -9
View File
@@ -44,7 +44,7 @@ export class GeminiClient implements ModelClient {
async chat(request: ChatRequest): Promise<ChatResponse> {
const model = this.getModel(request);
const contents = convertMessages(request.messages);
const contents = await convertMessages(request.messages);
const result = await model.generateContent({ contents });
const response = result.response;
@@ -100,7 +100,7 @@ export class GeminiClient implements ModelClient {
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
const model = this.getModel(request);
const contents = convertMessages(request.messages);
const contents = await convertMessages(request.messages);
try {
const result = await model.generateContentStream({ contents });
@@ -162,8 +162,8 @@ export class GeminiClient implements ModelClient {
}
/** Convert Flynn's Message[] to Gemini Content[] format, including multimodal parts */
function convertMessages(messages: Message[]): Content[] {
return messages.map(m => {
async function convertMessages(messages: Message[]): Promise<Content[]> {
return Promise.all(messages.map(async (m) => {
const role = m.role === 'assistant' ? 'model' : 'user';
if (typeof m.content === 'string') {
@@ -171,7 +171,7 @@ function convertMessages(messages: Message[]): Content[] {
}
// Multimodal content — convert each part
const parts: Part[] = m.content.map(part => {
const parts = await Promise.all(m.content.map(async (part): Promise<Part> => {
if (part.type === 'text') {
return { text: part.text };
}
@@ -184,8 +184,12 @@ function convertMessages(messages: Message[]): Content[] {
},
};
}
// URL-based images — Gemini doesn't natively support URL refs in inline data,
// so we pass as a text description. In production, you'd want to fetch + base64 encode.
if (part.source.type === 'url' && part.source.url) {
const inlineImage = await fetchImageAsInlineData(part.source.url, part.source.media_type);
if (inlineImage) {
return inlineImage;
}
}
return { text: `[Image: ${part.source.url ?? 'unavailable'}]` };
}
// Audio part — Gemini supports native audio via inlineData (same format as images)
@@ -198,10 +202,33 @@ function convertMessages(messages: Message[]): Content[] {
};
}
return { text: JSON.stringify(part) };
});
}));
return { role, parts };
});
}));
}
async function fetchImageAsInlineData(url: string, fallbackMimeType: string): Promise<Part | null> {
try {
const response = await fetch(url);
if (!response.ok) {
return null;
}
const mimeTypeHeader = response.headers.get('content-type');
const mimeType = mimeTypeHeader ? mimeTypeHeader.split(';')[0].trim() : fallbackMimeType;
const data = Buffer.from(await response.arrayBuffer()).toString('base64');
if (!data) {
return null;
}
return {
inlineData: {
mimeType,
data,
},
};
} catch {
return null;
}
}
/** Convert Flynn's ToolDefinition to Gemini FunctionDeclaration format */