diff --git a/README.md b/README.md index 14d752b..cfcb256 100644 --- a/README.md +++ b/README.md @@ -611,6 +611,7 @@ When `backup.minio.enabled` is configured, Flynn also exposes MinIO tools: - `minio.share`: upload a local file to the configured MinIO bucket and return a temporary download URL (`mc share download`) - `minio.ingest`: read a text-like object from MinIO and append/replace a memory namespace (useful for syncing notes/runbooks into long-term memory) +- `minio.sync`: recursively ingest a MinIO prefix into nested memory namespaces with object and size limits ## Inbound Webhooks diff --git a/docs/api/TOOLS.md b/docs/api/TOOLS.md index d52cc08..8a8a013 100644 --- a/docs/api/TOOLS.md +++ b/docs/api/TOOLS.md @@ -27,7 +27,7 @@ Tools are executable capabilities that the AI agent can call to perform actions - **Web**: `web.fetch`, `web.search` - **Browser**: `browser.navigate`, `browser.screenshot` - **Memory**: `memory.read`, `memory.write`, `memory.search` -- **MinIO**: `minio.share`, `minio.ingest` +- **MinIO**: `minio.share`, `minio.ingest`, `minio.sync` - **Media**: `media.send`, `image.analyze`, `audio.transcribe` - **System**: `system.info` - **Session**: `sessions.list`, `sessions.delete` @@ -1007,6 +1007,52 @@ Read a text-like object from MinIO and write it into a memory namespace. } ``` +#### `minio.sync` + +Sync text-like objects from a MinIO prefix into nested memory namespaces. + +```json +{ + "name": "minio.sync", + "description": "Sync text-like objects from a MinIO prefix into memory namespaces.", + "inputSchema": { + "type": "object", + "properties": { + "prefix": { + "type": "string", + "description": "MinIO object prefix to sync recursively" + }, + "bucket": { + "type": "string", + "description": "Optional bucket override" + }, + "namespace_base": { + "type": "string", + "description": "Base memory namespace" + }, + "mode": { + "type": "string", + "enum": ["append", "replace"], + "description": "Write mode per object namespace" + }, + "max_objects": { + "type": "number", + "description": "Maximum objects to ingest in one run" + }, + "max_chars_per_object": { + "type": "number", + "description": "Maximum characters per object" + }, + "force": { + "type": "boolean", + "description": "Override non-text extension/content safety checks" + } + }, + "required": ["prefix"] + } +} +``` + ### Media Tools #### `media.send` diff --git a/docs/plans/state.json b/docs/plans/state.json index c62bb9c..8c37843 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -80,6 +80,25 @@ ], "test_status": "pnpm test:run src/tools/builtin/minio-ingest.test.ts src/tools/policy.test.ts + pnpm typecheck passing" }, + "minio-prefix-sync-tool": { + "status": "completed", + "date": "2026-02-16", + "updated": "2026-02-16", + "summary": "Added `minio.sync` tool to recursively ingest MinIO prefixes into nested memory namespaces with per-run object limits, per-object content limits, extension/content safety checks, and summary output. Registered tool with MinIO+memory runtime, expanded `group:minio` policy coverage, and updated docs/tests.", + "files_modified": [ + "src/tools/builtin/minio-sync.ts", + "src/tools/builtin/minio-sync.test.ts", + "src/tools/builtin/index.ts", + "src/tools/index.ts", + "src/daemon/index.ts", + "src/tools/policy.ts", + "src/tools/policy.test.ts", + "README.md", + "docs/api/TOOLS.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/tools/builtin/minio-sync.test.ts src/tools/policy.test.ts + pnpm typecheck passing" + }, "backup-session-summary-audit-trail": { "status": "completed", "date": "2026-02-16", diff --git a/src/daemon/index.ts b/src/daemon/index.ts index 6f0f0de..76e9bee 100644 --- a/src/daemon/index.ts +++ b/src/daemon/index.ts @@ -27,7 +27,7 @@ import { RoutingPolicy } from '../routing/index.js'; import type { ModelRouter } from '../models/index.js'; import { SessionStore, SessionManager, parseDuration } from '../session/index.js'; import { HookEngine } from '../hooks/index.js'; -import { createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool, createMinioIngestTool } from '../tools/index.js'; +import { createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool, createMinioIngestTool, createMinioSyncTool } from '../tools/index.js'; import { ChannelRegistry } from '../channels/index.js'; import type { McpManager } from '../mcp/index.js'; import type { SkillRegistry, SkillInstaller } from '../skills/index.js'; @@ -195,6 +195,7 @@ export async function startDaemon(config: Config, options?: StartDaemonOptions): toolRegistry.register(createMinioShareTool(config.backup)); if (memoryStore) { toolRegistry.register(createMinioIngestTool(config.backup, memoryStore)); + toolRegistry.register(createMinioSyncTool(config.backup, memoryStore)); } } diff --git a/src/tools/builtin/index.ts b/src/tools/builtin/index.ts index 1731cdf..2b1399a 100644 --- a/src/tools/builtin/index.ts +++ b/src/tools/builtin/index.ts @@ -29,6 +29,7 @@ export { createGdriveTools } from './gdrive.js'; export { createGtasksTools } from './gtasks.js'; export { createMinioShareTool } from './minio-share.js'; export { createMinioIngestTool } from './minio-ingest.js'; +export { createMinioSyncTool } from './minio-sync.js'; export { screenCaptureTool, cameraCaptureTool } from './capture.js'; import type { Tool } from '../types.js'; diff --git a/src/tools/builtin/minio-sync.test.ts b/src/tools/builtin/minio-sync.test.ts new file mode 100644 index 0000000..ae109dc --- /dev/null +++ b/src/tools/builtin/minio-sync.test.ts @@ -0,0 +1,139 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { BackupConfig } from '../../config/schema.js'; +import type { MemoryStore } from '../../memory/store.js'; +import { createMinioSyncTool, minioSyncInternals } from './minio-sync.js'; + +function makeBackupConfig(overrides?: Partial): BackupConfig { + return { + enabled: true, + schedule: undefined, + interval: '24h', + run_on_start: false, + notify: undefined, + failure_threshold: 1, + notify_recovery: true, + local_dir: '~/.local/share/flynn/backups', + include_vectors: true, + minio: { + enabled: true, + endpoint: 'localhost:9000', + access_key: 'minio-admin', + secret_key: 'minio-secret', + bucket: 'flynn-knowledge', + prefix: 'flynn', + secure: false, + }, + ...overrides, + }; +} + +describe('minio sync internals', () => { + it('parses file object keys from mc ls --json output', () => { + const stdout = [ + '{"status":"success","type":"folder","key":"knowledge/"}', + '{"status":"success","type":"file","key":"knowledge/runbook.md"}', + '{"status":"success","type":"file","name":"knowledge/notes.txt"}', + ].join('\n'); + + expect(minioSyncInternals.parseListedObjectKeys(stdout)).toEqual([ + 'knowledge/runbook.md', + 'knowledge/notes.txt', + ]); + }); + + it('normalizes object paths into namespace-safe segments', () => { + expect(minioSyncInternals.normalizeNamespaceSegment('knowledge/team runbook.v2.md')).toBe('knowledge/team_runbook_v2'); + }); +}); + +describe('createMinioSyncTool', () => { + it('syncs text-like objects into nested memory namespaces', async () => { + const write = vi.fn(); + const store = { write } as unknown as MemoryStore; + + const execRunner = vi.fn(async (_file: string, args: string[]) => { + if (args[0] === 'ls') { + return { + stdout: [ + '{"status":"success","type":"file","key":"knowledge/runbook.md"}', + '{"status":"success","type":"file","key":"knowledge/ops/alerts.txt"}', + ].join('\n'), + stderr: '', + }; + } + if (args[0] === 'cat' && args[1]?.endsWith('knowledge/runbook.md')) { + return { stdout: '# Runbook\nDeploy carefully.\n', stderr: '' }; + } + if (args[0] === 'cat' && args[1]?.endsWith('knowledge/ops/alerts.txt')) { + return { stdout: 'Alert routing notes.', stderr: '' }; + } + return { stdout: '', stderr: '' }; + }); + + const tool = createMinioSyncTool(makeBackupConfig(), store, { + execRunner, + now: () => new Date('2026-02-16T16:00:00.000Z'), + }); + + const result = await tool.execute({ + prefix: 'knowledge/', + namespace_base: 'global/knowledge/minio', + mode: 'replace', + max_objects: 10, + }); + + expect(result.success).toBe(true); + expect(result.output).toContain('Imported: 2'); + expect(write).toHaveBeenCalledTimes(2); + expect(write).toHaveBeenNthCalledWith( + 1, + 'global/knowledge/minio/knowledge/runbook', + expect.stringContaining('source: minio://flynn-knowledge/knowledge/runbook.md'), + 'replace', + ); + expect(write).toHaveBeenNthCalledWith( + 2, + 'global/knowledge/minio/knowledge/ops/alerts', + expect.stringContaining('Alert routing notes.'), + 'replace', + ); + }); + + it('skips binary-like extensions by default', async () => { + const write = vi.fn(); + const store = { write } as unknown as MemoryStore; + const execRunner = vi.fn(async (_file: string, args: string[]) => { + if (args[0] === 'ls') { + return { stdout: '{"status":"success","type":"file","key":"knowledge/diagram.pdf"}', stderr: '' }; + } + return { stdout: 'fake text', stderr: '' }; + }); + + const tool = createMinioSyncTool(makeBackupConfig(), store, { execRunner }); + const result = await tool.execute({ prefix: 'knowledge/' }); + expect(result.success).toBe(true); + expect(result.output).toContain('Imported: 0'); + expect(result.output).toContain('Skipped: 1'); + expect(write).not.toHaveBeenCalled(); + }); + + it('returns an error when minio is disabled', async () => { + const write = vi.fn(); + const store = { write } as unknown as MemoryStore; + const tool = createMinioSyncTool(makeBackupConfig({ + minio: { + enabled: false, + endpoint: undefined, + access_key: undefined, + secret_key: undefined, + bucket: undefined, + prefix: 'flynn', + secure: true, + }, + }), store); + + const result = await tool.execute({ prefix: 'knowledge/' }); + expect(result.success).toBe(false); + expect(result.error).toContain('backup.minio.enabled=true'); + }); +}); diff --git a/src/tools/builtin/minio-sync.ts b/src/tools/builtin/minio-sync.ts new file mode 100644 index 0000000..5033a91 --- /dev/null +++ b/src/tools/builtin/minio-sync.ts @@ -0,0 +1,219 @@ +import { promisify } from 'node:util'; +import { execFile } from 'node:child_process'; +import type { BackupConfig } from '../../config/schema.js'; +import type { MemoryStore } from '../../memory/store.js'; +import type { Tool, ToolResult } from '../types.js'; +import { backupInternals } from '../../backup/index.js'; +import { minioIngestInternals } from './minio-ingest.js'; + +const execFileAsync = promisify(execFile); + +type ExecRunner = ( + file: string, + args: string[], + options?: { env?: NodeJS.ProcessEnv; maxBuffer?: number }, +) => Promise<{ stdout: string; stderr: string }>; + +interface MinioSyncArgs { + prefix: string; + bucket?: string; + namespace_base?: string; + mode?: 'append' | 'replace'; + max_objects?: number; + max_chars_per_object?: number; + force?: boolean; +} + +export interface MinioSyncDeps { + execRunner?: ExecRunner; + now?: () => Date; +} + +function parseListedObjectKeys(stdout: string): string[] { + const keys: string[] = []; + const lines = stdout.split('\n').map((line) => line.trim()).filter(Boolean); + for (const line of lines) { + try { + const parsed = JSON.parse(line) as Record; + const key = typeof parsed.key === 'string' + ? parsed.key + : typeof parsed.name === 'string' + ? parsed.name + : null; + const type = typeof parsed.type === 'string' ? parsed.type : null; + if (!key) {continue;} + if (type && type !== 'file') {continue;} + if (key.endsWith('/')) {continue;} + keys.push(key); + } catch { + continue; + } + } + return keys; +} + +function normalizeNamespaceSegment(value: string): string { + return value + .replace(/\.[^.]+$/, '') + .replace(/[^a-zA-Z0-9/_-]/g, '_') + .replace(/\/+/g, '/') + .replace(/^\/+|\/+$/g, ''); +} + +export const minioSyncInternals = { + parseListedObjectKeys, + normalizeNamespaceSegment, +}; + +export function createMinioSyncTool(config: BackupConfig, store: MemoryStore, deps?: MinioSyncDeps): Tool { + return { + name: 'minio.sync', + description: 'Sync text-like objects from a MinIO prefix into memory namespaces.', + inputSchema: { + type: 'object', + properties: { + prefix: { + type: 'string', + description: 'MinIO object prefix to sync recursively (for example: "knowledge/")', + }, + bucket: { + type: 'string', + description: 'Optional bucket override. Defaults to backup.minio.bucket.', + }, + namespace_base: { + type: 'string', + description: 'Base memory namespace. Per-object namespaces are nested under this path.', + }, + mode: { + type: 'string', + enum: ['append', 'replace'], + description: 'Write mode per object namespace. Default: append.', + }, + max_objects: { + type: 'number', + description: 'Maximum number of objects to ingest per run. Default: 20.', + }, + max_chars_per_object: { + type: 'number', + description: 'Maximum characters ingested per object. Default: 8000.', + }, + force: { + type: 'boolean', + description: 'Allow non-text-like files/extensions.', + }, + }, + required: ['prefix'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as MinioSyncArgs; + const minio = config.minio; + const prefix = args.prefix?.trim(); + const bucket = args.bucket ?? minio.bucket; + const namespaceBase = args.namespace_base ?? 'global/knowledge/minio'; + const mode = args.mode ?? 'append'; + const maxObjects = Math.max(1, Math.floor(args.max_objects ?? 20)); + const maxChars = Math.max(1, Math.floor(args.max_chars_per_object ?? 8_000)); + const force = args.force ?? false; + + if (!prefix) { + return { success: false, output: '', error: 'prefix is required' }; + } + if (!minio.enabled) { + return { success: false, output: '', error: 'MinIO sync requires backup.minio.enabled=true' }; + } + if (!minio.endpoint || !minio.access_key || !minio.secret_key || !bucket) { + return { + success: false, + output: '', + error: 'Missing MinIO credentials in backup.minio (endpoint/access_key/secret_key/bucket)', + }; + } + + const alias = 'flynnsync'; + const host = backupInternals.buildMinioHost({ + endpoint: minio.endpoint, + accessKey: minio.access_key, + secretKey: minio.secret_key, + secure: minio.secure, + }); + const env = { ...process.env, [`MC_HOST_${alias}`]: host }; + const runner = deps?.execRunner ?? (async (file: string, cmdArgs: string[], options?: { env?: NodeJS.ProcessEnv; maxBuffer?: number }) => { + return execFileAsync(file, cmdArgs, options); + }); + + try { + const basePath = `${alias}/${bucket}/${prefix}`; + const { stdout: listed } = await runner('mc', ['ls', '--json', '--recursive', basePath], { + env, + maxBuffer: 20 * 1024 * 1024, + }); + + const keys = parseListedObjectKeys(typeof listed === 'string' ? listed : listed.toString('utf-8')); + if (keys.length === 0) { + return { + success: true, + output: `No objects found under prefix minio://${bucket}/${prefix}`, + }; + } + + const selected = keys.slice(0, maxObjects); + let imported = 0; + let skipped = 0; + const importedNamespaces: string[] = []; + + for (const key of selected) { + if (!force && !minioIngestInternals.isLikelyTextObject(key)) { + skipped++; + continue; + } + + const remotePath = `${alias}/${bucket}/${key}`; + const { stdout: objectStdout } = await runner('mc', ['cat', remotePath], { + env, + maxBuffer: 20 * 1024 * 1024, + }); + const text = typeof objectStdout === 'string' ? objectStdout : objectStdout.toString('utf-8'); + + if (!force && !minioIngestInternals.isLikelyText(text)) { + skipped++; + continue; + } + const trimmed = text.trim(); + if (!trimmed) { + skipped++; + continue; + } + + const clipped = trimmed.length > maxChars + ? `${trimmed.slice(0, maxChars)}\n\n[truncated to ${maxChars} chars]` + : trimmed; + const importedAt = (deps?.now ? deps.now() : new Date()).toISOString(); + const objectNamespace = normalizeNamespaceSegment(key); + const targetNamespace = `${namespaceBase}/${objectNamespace}`; + const payload = `## MinIO Sync Import\nsource: minio://${bucket}/${key}\nimported_at: ${importedAt}\n\n${clipped}`; + store.write(targetNamespace, payload, mode); + imported++; + importedNamespaces.push(targetNamespace); + } + + return { + success: true, + output: [ + `MinIO sync completed.`, + `Prefix: minio://${bucket}/${prefix}`, + `Scanned: ${selected.length} object(s)`, + `Imported: ${imported}`, + `Skipped: ${skipped}`, + importedNamespaces.length > 0 ? `Namespaces:\n- ${importedNamespaces.join('\n- ')}` : 'Namespaces:\n- (none)', + ].join('\n'), + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; +} diff --git a/src/tools/index.ts b/src/tools/index.ts index 1d57515..e688c4a 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -5,7 +5,7 @@ export { ToolExecutor } from './executor.js'; export type { ToolExecutorConfig } from './executor.js'; export { ToolPolicy } from './policy.js'; export type { ToolPolicyContext } from './policy.js'; -export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createAudioTranscribeTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool, createMinioIngestTool } from './builtin/index.js'; +export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createAudioTranscribeTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools, createGmailTools, createGcalTools, createGdocsTools, createGdriveTools, createGtasksTools, createMinioShareTool, createMinioIngestTool, createMinioSyncTool } from './builtin/index.js'; export type { WebSearchConfig } from './builtin/web-search.js'; export type { ProcessManagerConfig } from './builtin/process/index.js'; export type { BrowserManagerConfig } from './builtin/browser/index.js'; diff --git a/src/tools/policy.test.ts b/src/tools/policy.test.ts index 288c52d..100debf 100644 --- a/src/tools/policy.test.ts +++ b/src/tools/policy.test.ts @@ -19,6 +19,7 @@ const ALL_TOOL_NAMES = [ 'memory.search', 'minio.share', 'minio.ingest', + 'minio.sync', 'process.start', 'process.status', 'process.output', @@ -491,6 +492,7 @@ describe('ToolPolicy', () => { const names = result.map(t => t.name); expect(names).toContain('minio.share'); expect(names).toContain('minio.ingest'); + expect(names).toContain('minio.sync'); expect(names).toContain('file.read'); expect(names).not.toContain('shell.exec'); }); diff --git a/src/tools/policy.ts b/src/tools/policy.ts index 210cdf6..c55cecc 100644 --- a/src/tools/policy.ts +++ b/src/tools/policy.ts @@ -41,6 +41,7 @@ const PROFILE_TOOLS: Record> = { 'cron.delete', 'minio.share', 'minio.ingest', + 'minio.sync', ]), coding: new Set([ 'file.read', @@ -71,6 +72,7 @@ const PROFILE_TOOLS: Record> = { 'cron.delete', 'minio.share', 'minio.ingest', + 'minio.sync', 'file.write', 'file.edit', 'file.patch', @@ -106,7 +108,7 @@ export const TOOL_GROUPS: Record = { 'group:gdrive': ['drive.list', 'drive.search', 'drive.read'], 'group:gtasks': ['tasks.lists', 'tasks.list'], 'group:cron': ['cron.list', 'cron.trigger', 'cron.create', 'cron.delete'], - 'group:minio': ['minio.share', 'minio.ingest'], + 'group:minio': ['minio.share', 'minio.ingest', 'minio.sync'], }; /** Expand group references in a list of tool names/patterns. */