import { promisify } from 'node:util'; import { execFile } from 'node:child_process'; import { extname } from 'node:path'; import { mkdtempSync, rmSync } from 'node:fs'; import { join } from 'node:path'; import { tmpdir } from 'node:os'; import type { BackupConfig } from '../../config/schema.js'; import type { MemoryStore } from '../../memory/store.js'; import type { Tool, ToolResult } from '../types.js'; import { backupInternals } from '../../backup/index.js'; const execFileAsync = promisify(execFile); type ExecRunner = ( file: string, args: string[], options?: { env?: NodeJS.ProcessEnv; maxBuffer?: number }, ) => Promise<{ stdout: string | Buffer; stderr: string | Buffer }>; const TEXT_EXTENSIONS = new Set([ '.txt', '.md', '.markdown', '.csv', '.tsv', '.json', '.jsonl', '.yaml', '.yml', '.log', '.xml', '.html', '.htm', ]); const EXTRACTABLE_BINARY_EXTENSIONS = new Set([ '.pdf', ]); export interface MinioIngestDeps { execRunner?: ExecRunner; now?: () => Date; } function isLikelyText(content: string): boolean { return !content.includes('\u0000'); } function isLikelyTextObject(objectKey: string): boolean { const ext = extname(objectKey).toLowerCase(); if (!ext) {return true;} return TEXT_EXTENSIONS.has(ext) || EXTRACTABLE_BINARY_EXTENSIONS.has(ext); } function isExtractableBinaryObject(objectKey: string): boolean { return EXTRACTABLE_BINARY_EXTENSIONS.has(extname(objectKey).toLowerCase()); } async function readObjectText( runner: ExecRunner, remotePath: string, objectKey: string, env: NodeJS.ProcessEnv, ): Promise { if (!isExtractableBinaryObject(objectKey)) { const { stdout } = await runner('mc', ['cat', remotePath], { env, maxBuffer: 20 * 1024 * 1024 }); return toText(stdout); } const tempDir = mkdtempSync(join(tmpdir(), 'flynn-minio-ingest-')); const localPath = join(tempDir, 'object.bin'); try { await runner('mc', ['cp', remotePath, localPath], { env, maxBuffer: 20 * 1024 * 1024 }); const { stdout } = await runner('pdftotext', ['-q', localPath, '-'], { maxBuffer: 20 * 1024 * 1024 }); return toText(stdout); } finally { rmSync(tempDir, { recursive: true, force: true }); } } export const minioIngestInternals = { isLikelyText, isLikelyTextObject, isExtractableBinaryObject, readObjectText, }; interface MinioIngestArgs { object_key: string; bucket?: string; namespace?: string; mode?: 'append' | 'replace'; max_chars?: number; force?: boolean; } export function createMinioIngestTool(config: BackupConfig, store: MemoryStore, deps?: MinioIngestDeps): Tool { return { name: 'minio.ingest', description: 'Read a text-like object from MinIO and ingest it into memory namespace for later retrieval/search.', inputSchema: { type: 'object', properties: { object_key: { type: 'string', description: 'Object key in MinIO bucket (for example: "knowledge/runbook.md")', }, bucket: { type: 'string', description: 'Optional bucket override. Defaults to backup.minio.bucket.', }, namespace: { type: 'string', description: 'Memory namespace to write to. Default: "global/knowledge".', }, mode: { type: 'string', enum: ['append', 'replace'], description: 'Write mode for memory namespace. Default: "append".', }, max_chars: { type: 'number', description: 'Maximum characters to ingest. Default: 20000.', }, force: { type: 'boolean', description: 'Ingest even if file extension/content look non-text.', }, }, required: ['object_key'], }, execute: async (rawArgs: unknown): Promise => { const args = rawArgs as MinioIngestArgs; const minio = config.minio; const objectKey = args.object_key?.trim(); const namespace = args.namespace ?? 'global/knowledge'; const mode = args.mode ?? 'append'; const maxChars = Math.max(1, Math.floor(args.max_chars ?? 20_000)); const force = args.force ?? false; const bucket = args.bucket ?? minio.bucket; if (!objectKey) { return { success: false, output: '', error: 'object_key is required' }; } if (!minio.enabled) { return { success: false, output: '', error: 'MinIO ingestion requires backup.minio.enabled=true' }; } if (!minio.endpoint || !minio.access_key || !minio.secret_key || !bucket) { return { success: false, output: '', error: 'Missing MinIO credentials in backup.minio (endpoint/access_key/secret_key/bucket)', }; } if (!force && !isLikelyTextObject(objectKey)) { return { success: false, output: '', error: `Unsupported object type for ingestion: ${objectKey}. Use force=true if you know it is text.`, }; } const alias = 'flynningest'; const host = backupInternals.buildMinioHost({ endpoint: minio.endpoint, accessKey: minio.access_key, secretKey: minio.secret_key, secure: minio.secure, }); const env = { ...process.env, [`MC_HOST_${alias}`]: host }; const runner = deps?.execRunner ?? (async (file: string, cmdArgs: string[], options?: { env?: NodeJS.ProcessEnv; maxBuffer?: number }) => { return execFileAsync(file, cmdArgs, options); }); const remotePath = `${alias}/${bucket}/${objectKey}`; try { const text = await readObjectText(runner, remotePath, objectKey, env); if (!force && !isExtractableBinaryObject(objectKey) && !isLikelyText(text)) { return { success: false, output: '', error: `Object appears binary and cannot be ingested safely: ${objectKey}. Use force=true to override.`, }; } const trimmed = text.trim(); if (!trimmed) { return { success: false, output: '', error: `Object is empty: minio://${bucket}/${objectKey}`, }; } const clipped = trimmed.length > maxChars ? `${trimmed.slice(0, maxChars)}\n\n[truncated to ${maxChars} chars]` : trimmed; const importedAt = (deps?.now ? deps.now() : new Date()).toISOString(); const payload = `## MinIO Import\nsource: minio://${bucket}/${objectKey}\nimported_at: ${importedAt}\n\n${clipped}`; store.write(namespace, payload, mode); return { success: true, output: `Ingested MinIO object into memory.\nSource: minio://${bucket}/${objectKey}\nNamespace: ${namespace}\nMode: ${mode}`, }; } catch (error) { return { success: false, output: '', error: error instanceof Error ? error.message : String(error), }; } }, }; } function toText(value: string | Buffer): string { return typeof value === 'string' ? value : value.toString('utf-8'); }