From 0be93c20b5ea7804e051615c5ebdee871c54db2f Mon Sep 17 00:00:00 2001 From: William Valentin Date: Mon, 16 Feb 2026 14:17:52 -0800 Subject: [PATCH] feat(tools): add minio ingestion into memory namespaces --- README.md | 8 +- docs/api/TOOLS.md | 51 ++++++- docs/plans/state.json | 19 +++ src/daemon/index.ts | 5 +- src/tools/builtin/index.ts | 1 + src/tools/builtin/minio-ingest.test.ts | 128 +++++++++++++++++ src/tools/builtin/minio-ingest.ts | 182 +++++++++++++++++++++++++ src/tools/index.ts | 2 +- src/tools/policy.test.ts | 15 ++ src/tools/policy.ts | 4 +- 10 files changed, 407 insertions(+), 8 deletions(-) create mode 100644 src/tools/builtin/minio-ingest.test.ts create mode 100644 src/tools/builtin/minio-ingest.ts diff --git a/README.md b/README.md index c01acfc..14d752b 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Self-hosted personal AI assistant with Telegram and Terminal interfaces. - **Heartbeat Monitor**: Periodic health checks (gateway, model, channels, memory, disk) with failure notifications - **Scheduled Backups**: Interval- or cron-based snapshot backups with optional startup run - **MinIO File Sharing Tool**: Upload a local file and return a temporary MinIO share link via `minio.share` +- **MinIO Knowledge Ingestion Tool**: Pull text-like objects from MinIO into memory namespaces via `minio.ingest` - **Gmail Pub/Sub Watcher**: Monitor Gmail inbox via Google Cloud Pub/Sub push notifications with polling fallback - **Vector Memory Search**: Hybrid keyword + semantic search with embeddings (OpenAI, Gemini, Ollama, llama.cpp, Voyage AI) - **Docker Deployment**: Multi-stage Dockerfile and docker-compose.yml for production containers @@ -606,11 +607,10 @@ backup: ## MinIO Share Tool -When `backup.minio.enabled` is configured, Flynn also exposes a `minio.share` tool: +When `backup.minio.enabled` is configured, Flynn also exposes MinIO tools: -- Uploads a local file to the configured MinIO bucket -- Returns a temporary download URL (`mc share download`) -- Useful for sharing CSVs, logs, images, and generated artifacts without dumping file contents in chat +- `minio.share`: upload a local file to the configured MinIO bucket and return a temporary download URL (`mc share download`) +- `minio.ingest`: read a text-like object from MinIO and append/replace a memory namespace (useful for syncing notes/runbooks into long-term memory) ## Inbound Webhooks diff --git a/docs/api/TOOLS.md b/docs/api/TOOLS.md index 60597c0..d52cc08 100644 --- a/docs/api/TOOLS.md +++ b/docs/api/TOOLS.md @@ -27,6 +27,7 @@ Tools are executable capabilities that the AI agent can call to perform actions - **Web**: `web.fetch`, `web.search` - **Browser**: `browser.navigate`, `browser.screenshot` - **Memory**: `memory.read`, `memory.write`, `memory.search` +- **MinIO**: `minio.share`, `minio.ingest` - **Media**: `media.send`, `image.analyze`, `audio.transcribe` - **System**: `system.info` - **Session**: `sessions.list`, `sessions.delete` @@ -466,7 +467,7 @@ Tools are organized into groups: - `group:web`: Web and browser tools - `group:memory`: Memory and search tools -There are additional groups for specific integrations (gmail/gcal/gdocs/gdrive/gtasks/cron). See `TOOL_GROUPS` in `src/tools/policy.ts`. +There are additional groups for specific integrations (gmail/gcal/gdocs/gdrive/gtasks/cron/minio). See `TOOL_GROUPS` in `src/tools/policy.ts`. ### Policy Resolution @@ -958,6 +959,54 @@ Search memory using hybrid (keyword + vector) search. } ``` +### MinIO Tools + +#### `minio.share` + +Upload a local file to MinIO and return a temporary presigned download URL. + +#### `minio.ingest` + +Read a text-like object from MinIO and write it into a memory namespace. + +```json +{ + "name": "minio.ingest", + "description": "Read a text-like object from MinIO and ingest it into memory namespace for later retrieval/search.", + "inputSchema": { + "type": "object", + "properties": { + "object_key": { + "type": "string", + "description": "Object key in MinIO bucket" + }, + "bucket": { + "type": "string", + "description": "Optional bucket override" + }, + "namespace": { + "type": "string", + "description": "Memory namespace (default: global/knowledge)" + }, + "mode": { + "type": "string", + "enum": ["append", "replace"], + "description": "Write mode" + }, + "max_chars": { + "type": "number", + "description": "Maximum characters to ingest" + }, + "force": { + "type": "boolean", + "description": "Override non-text extension/content safety checks" + } + }, + "required": ["object_key"] + } +} +``` + ### Media Tools #### `media.send` diff --git a/docs/plans/state.json b/docs/plans/state.json index beecb3e..c62bb9c 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -61,6 +61,25 @@ ], "test_status": "pnpm eslint src/gateway/ui/pages/dashboard.js + pnpm typecheck passing; full pnpm lint currently fails due pre-existing unrelated repo lint errors" }, + "minio-knowledge-ingestion-tool": { + "status": "completed", + "date": "2026-02-16", + "updated": "2026-02-16", + "summary": "Added `minio.ingest` tool to import text-like objects from MinIO into memory namespaces (`append`/`replace`) with extension/content safety checks, truncation controls, and force override. Wired tool registration when MinIO + memory are enabled, updated tool policy/groups/docs, and added focused tests.", + "files_modified": [ + "src/tools/builtin/minio-ingest.ts", + "src/tools/builtin/minio-ingest.test.ts", + "src/tools/builtin/index.ts", + "src/tools/index.ts", + "src/daemon/index.ts", + "src/tools/policy.ts", + "src/tools/policy.test.ts", + "README.md", + "docs/api/TOOLS.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/tools/builtin/minio-ingest.test.ts src/tools/policy.test.ts + pnpm typecheck passing" + }, "backup-session-summary-audit-trail": { "status": "completed", "date": "2026-02-16", diff --git a/src/daemon/index.ts b/src/daemon/index.ts index 2f9c48a..6f0f0de 100644 --- a/src/daemon/index.ts +++ b/src/daemon/index.ts @@ -27,7 +27,7 @@ import { RoutingPolicy } from '../routing/index.js'; import type { ModelRouter } from '../models/index.js'; import { SessionStore, SessionManager, parseDuration } from '../session/index.js'; import { HookEngine } from '../hooks/index.js'; -import { createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool } from '../tools/index.js'; +import { createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool, createMinioIngestTool } from '../tools/index.js'; import { ChannelRegistry } from '../channels/index.js'; import type { McpManager } from '../mcp/index.js'; import type { SkillRegistry, SkillInstaller } from '../skills/index.js'; @@ -193,6 +193,9 @@ export async function startDaemon(config: Config, options?: StartDaemonOptions): } if (config.backup.minio.enabled) { toolRegistry.register(createMinioShareTool(config.backup)); + if (memoryStore) { + toolRegistry.register(createMinioIngestTool(config.backup, memoryStore)); + } } // ── Lifecycle ── diff --git a/src/tools/builtin/index.ts b/src/tools/builtin/index.ts index 4dae64e..1731cdf 100644 --- a/src/tools/builtin/index.ts +++ b/src/tools/builtin/index.ts @@ -28,6 +28,7 @@ export { createGdocsTools } from './gdocs.js'; export { createGdriveTools } from './gdrive.js'; export { createGtasksTools } from './gtasks.js'; export { createMinioShareTool } from './minio-share.js'; +export { createMinioIngestTool } from './minio-ingest.js'; export { screenCaptureTool, cameraCaptureTool } from './capture.js'; import type { Tool } from '../types.js'; diff --git a/src/tools/builtin/minio-ingest.test.ts b/src/tools/builtin/minio-ingest.test.ts new file mode 100644 index 0000000..840a7d2 --- /dev/null +++ b/src/tools/builtin/minio-ingest.test.ts @@ -0,0 +1,128 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createMinioIngestTool, minioIngestInternals } from './minio-ingest.js'; +import type { BackupConfig } from '../../config/schema.js'; +import type { MemoryStore } from '../../memory/store.js'; + +function makeBackupConfig(overrides?: Partial): BackupConfig { + return { + enabled: true, + schedule: undefined, + interval: '24h', + run_on_start: false, + notify: undefined, + failure_threshold: 1, + notify_recovery: true, + local_dir: '~/.local/share/flynn/backups', + include_vectors: true, + minio: { + enabled: true, + endpoint: 'localhost:9000', + access_key: 'minio-admin', + secret_key: 'minio-secret', + bucket: 'flynn-knowledge', + prefix: 'flynn', + secure: false, + }, + ...overrides, + }; +} + +describe('minio ingest internals', () => { + it('accepts known text-like extensions', () => { + expect(minioIngestInternals.isLikelyTextObject('notes/today.md')).toBe(true); + expect(minioIngestInternals.isLikelyTextObject('logs/daemon.log')).toBe(true); + }); + + it('rejects likely binary extensions', () => { + expect(minioIngestInternals.isLikelyTextObject('manual.pdf')).toBe(false); + }); +}); + +describe('createMinioIngestTool', () => { + it('ingests object and writes to memory', async () => { + const write = vi.fn(); + const store = { write } as unknown as MemoryStore; + const execRunner = vi.fn(async () => ({ + stdout: '# Runbook\n\nRestart service before deploy.\n', + stderr: '', + })); + + const tool = createMinioIngestTool(makeBackupConfig(), store, { + execRunner, + now: () => new Date('2026-02-16T15:00:00.000Z'), + }); + + const result = await tool.execute({ + object_key: 'knowledge/runbook.md', + namespace: 'global/runbooks', + mode: 'append', + }); + + expect(result.success).toBe(true); + expect(result.output).toContain('Ingested MinIO object'); + expect(write).toHaveBeenCalledWith( + 'global/runbooks', + expect.stringContaining('source: minio://flynn-knowledge/knowledge/runbook.md'), + 'append', + ); + expect(execRunner).toHaveBeenCalledWith( + 'mc', + ['cat', 'flynningest/flynn-knowledge/knowledge/runbook.md'], + expect.objectContaining({ env: expect.any(Object) }), + ); + }); + + it('rejects likely binary object unless force=true', async () => { + const write = vi.fn(); + const store = { write } as unknown as MemoryStore; + const execRunner = vi.fn(); + const tool = createMinioIngestTool(makeBackupConfig(), store, { execRunner }); + + const result = await tool.execute({ object_key: 'knowledge/diagram.pdf' }); + expect(result.success).toBe(false); + expect(result.error).toContain('Unsupported object type'); + expect(execRunner).not.toHaveBeenCalled(); + }); + + it('allows non-text extension when force=true', async () => { + const write = vi.fn(); + const store = { write } as unknown as MemoryStore; + const execRunner = vi.fn(async () => ({ + stdout: 'PDF text extracted upstream', + stderr: '', + })); + const tool = createMinioIngestTool(makeBackupConfig(), store, { execRunner }); + + const result = await tool.execute({ + object_key: 'knowledge/diagram.pdf', + force: true, + mode: 'replace', + }); + expect(result.success).toBe(true); + expect(write).toHaveBeenCalledWith( + 'global/knowledge', + expect.stringContaining('PDF text extracted upstream'), + 'replace', + ); + }); + + it('returns an error when minio is disabled', async () => { + const write = vi.fn(); + const store = { write } as unknown as MemoryStore; + const tool = createMinioIngestTool(makeBackupConfig({ + minio: { + enabled: false, + endpoint: undefined, + access_key: undefined, + secret_key: undefined, + bucket: undefined, + prefix: 'flynn', + secure: true, + }, + }), store); + + const result = await tool.execute({ object_key: 'notes/today.md' }); + expect(result.success).toBe(false); + expect(result.error).toContain('backup.minio.enabled=true'); + }); +}); diff --git a/src/tools/builtin/minio-ingest.ts b/src/tools/builtin/minio-ingest.ts new file mode 100644 index 0000000..19ec287 --- /dev/null +++ b/src/tools/builtin/minio-ingest.ts @@ -0,0 +1,182 @@ +import { promisify } from 'node:util'; +import { execFile } from 'node:child_process'; +import { extname } from 'node:path'; +import type { BackupConfig } from '../../config/schema.js'; +import type { MemoryStore } from '../../memory/store.js'; +import type { Tool, ToolResult } from '../types.js'; +import { backupInternals } from '../../backup/index.js'; + +const execFileAsync = promisify(execFile); + +type ExecRunner = ( + file: string, + args: string[], + options?: { env?: NodeJS.ProcessEnv; maxBuffer?: number }, +) => Promise<{ stdout: string; stderr: string }>; + +const TEXT_EXTENSIONS = new Set([ + '.txt', + '.md', + '.markdown', + '.csv', + '.tsv', + '.json', + '.jsonl', + '.yaml', + '.yml', + '.log', + '.xml', + '.html', + '.htm', +]); + +export interface MinioIngestDeps { + execRunner?: ExecRunner; + now?: () => Date; +} + +function isLikelyText(content: string): boolean { + return !content.includes('\u0000'); +} + +function isLikelyTextObject(objectKey: string): boolean { + const ext = extname(objectKey).toLowerCase(); + if (!ext) {return true;} + return TEXT_EXTENSIONS.has(ext); +} + +export const minioIngestInternals = { + isLikelyText, + isLikelyTextObject, +}; + +interface MinioIngestArgs { + object_key: string; + bucket?: string; + namespace?: string; + mode?: 'append' | 'replace'; + max_chars?: number; + force?: boolean; +} + +export function createMinioIngestTool(config: BackupConfig, store: MemoryStore, deps?: MinioIngestDeps): Tool { + return { + name: 'minio.ingest', + description: 'Read a text-like object from MinIO and ingest it into memory namespace for later retrieval/search.', + inputSchema: { + type: 'object', + properties: { + object_key: { + type: 'string', + description: 'Object key in MinIO bucket (for example: "knowledge/runbook.md")', + }, + bucket: { + type: 'string', + description: 'Optional bucket override. Defaults to backup.minio.bucket.', + }, + namespace: { + type: 'string', + description: 'Memory namespace to write to. Default: "global/knowledge".', + }, + mode: { + type: 'string', + enum: ['append', 'replace'], + description: 'Write mode for memory namespace. Default: "append".', + }, + max_chars: { + type: 'number', + description: 'Maximum characters to ingest. Default: 20000.', + }, + force: { + type: 'boolean', + description: 'Ingest even if file extension/content look non-text.', + }, + }, + required: ['object_key'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as MinioIngestArgs; + const minio = config.minio; + const objectKey = args.object_key?.trim(); + const namespace = args.namespace ?? 'global/knowledge'; + const mode = args.mode ?? 'append'; + const maxChars = Math.max(1, Math.floor(args.max_chars ?? 20_000)); + const force = args.force ?? false; + const bucket = args.bucket ?? minio.bucket; + + if (!objectKey) { + return { success: false, output: '', error: 'object_key is required' }; + } + if (!minio.enabled) { + return { success: false, output: '', error: 'MinIO ingestion requires backup.minio.enabled=true' }; + } + if (!minio.endpoint || !minio.access_key || !minio.secret_key || !bucket) { + return { + success: false, + output: '', + error: 'Missing MinIO credentials in backup.minio (endpoint/access_key/secret_key/bucket)', + }; + } + if (!force && !isLikelyTextObject(objectKey)) { + return { + success: false, + output: '', + error: `Unsupported object type for ingestion: ${objectKey}. Use force=true if you know it is text.`, + }; + } + + const alias = 'flynningest'; + const host = backupInternals.buildMinioHost({ + endpoint: minio.endpoint, + accessKey: minio.access_key, + secretKey: minio.secret_key, + secure: minio.secure, + }); + const env = { ...process.env, [`MC_HOST_${alias}`]: host }; + const runner = deps?.execRunner ?? (async (file: string, cmdArgs: string[], options?: { env?: NodeJS.ProcessEnv; maxBuffer?: number }) => { + return execFileAsync(file, cmdArgs, options); + }); + const remotePath = `${alias}/${bucket}/${objectKey}`; + + try { + const { stdout } = await runner('mc', ['cat', remotePath], { env, maxBuffer: 20 * 1024 * 1024 }); + const text = typeof stdout === 'string' ? stdout : stdout.toString('utf-8'); + + if (!force && !isLikelyText(text)) { + return { + success: false, + output: '', + error: `Object appears binary and cannot be ingested safely: ${objectKey}. Use force=true to override.`, + }; + } + + const trimmed = text.trim(); + if (!trimmed) { + return { + success: false, + output: '', + error: `Object is empty: minio://${bucket}/${objectKey}`, + }; + } + + const clipped = trimmed.length > maxChars + ? `${trimmed.slice(0, maxChars)}\n\n[truncated to ${maxChars} chars]` + : trimmed; + const importedAt = (deps?.now ? deps.now() : new Date()).toISOString(); + const payload = `## MinIO Import\nsource: minio://${bucket}/${objectKey}\nimported_at: ${importedAt}\n\n${clipped}`; + store.write(namespace, payload, mode); + + return { + success: true, + output: `Ingested MinIO object into memory.\nSource: minio://${bucket}/${objectKey}\nNamespace: ${namespace}\nMode: ${mode}`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; +} diff --git a/src/tools/index.ts b/src/tools/index.ts index d5b8fed..1d57515 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -5,7 +5,7 @@ export { ToolExecutor } from './executor.js'; export type { ToolExecutorConfig } from './executor.js'; export { ToolPolicy } from './policy.js'; export type { ToolPolicyContext } from './policy.js'; -export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createAudioTranscribeTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool } from './builtin/index.js'; +export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createAudioTranscribeTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool, createMinioIngestTool } from './builtin/index.js'; export type { WebSearchConfig } from './builtin/web-search.js'; export type { ProcessManagerConfig } from './builtin/process/index.js'; export type { BrowserManagerConfig } from './builtin/browser/index.js'; diff --git a/src/tools/policy.test.ts b/src/tools/policy.test.ts index 5528baa..288c52d 100644 --- a/src/tools/policy.test.ts +++ b/src/tools/policy.test.ts @@ -17,6 +17,8 @@ const ALL_TOOL_NAMES = [ 'memory.read', 'memory.write', 'memory.search', + 'minio.share', + 'minio.ingest', 'process.start', 'process.status', 'process.output', @@ -480,6 +482,19 @@ describe('ToolPolicy', () => { expect(names).toContain('file.read'); // from minimal }); + it('expands group:minio', () => { + const policy = new ToolPolicy(defaultConfig({ + profile: 'minimal', + allow: ['group:minio'], + })); + const result = policy.filterTools(ALL_TOOLS); + const names = result.map(t => t.name); + expect(names).toContain('minio.share'); + expect(names).toContain('minio.ingest'); + expect(names).toContain('file.read'); + expect(names).not.toContain('shell.exec'); + }); + it('unknown group name passes through as literal', () => { const policy = new ToolPolicy(defaultConfig({ profile: 'minimal', diff --git a/src/tools/policy.ts b/src/tools/policy.ts index 5d24562..210cdf6 100644 --- a/src/tools/policy.ts +++ b/src/tools/policy.ts @@ -40,6 +40,7 @@ const PROFILE_TOOLS: Record> = { 'cron.create', 'cron.delete', 'minio.share', + 'minio.ingest', ]), coding: new Set([ 'file.read', @@ -69,6 +70,7 @@ const PROFILE_TOOLS: Record> = { 'cron.create', 'cron.delete', 'minio.share', + 'minio.ingest', 'file.write', 'file.edit', 'file.patch', @@ -104,7 +106,7 @@ export const TOOL_GROUPS: Record = { 'group:gdrive': ['drive.list', 'drive.search', 'drive.read'], 'group:gtasks': ['tasks.lists', 'tasks.list'], 'group:cron': ['cron.list', 'cron.trigger', 'cron.create', 'cron.delete'], - 'group:minio': ['minio.share'], + 'group:minio': ['minio.share', 'minio.ingest'], }; /** Expand group references in a list of tool names/patterns. */