feat(cli): enforce minio ingestion extractor checks in setup and doctor

This commit is contained in:
William Valentin
2026-02-16 14:43:20 -08:00
parent 0548ab3833
commit 289fc44380
8 changed files with 275 additions and 4 deletions
+7
View File
@@ -630,6 +630,13 @@ When `backup.minio.enabled` is configured, Flynn also exposes MinIO tools:
- `minio.ingest`: read a text-like object (plus PDF/DOCX via extractor tools when available) from MinIO and append/replace a memory namespace
- `minio.sync`: recursively ingest a MinIO prefix into nested memory namespaces with object and size limits (including PDF/DOCX extraction when available)
PDF/DOCX ingestion runtime requirements:
- PDF extraction requires `pdftotext`.
- DOCX extraction requires `pandoc` or `docx2txt`.
- `flynn setup` now checks these dependencies after config save when `backup.minio.enabled: true`.
- `flynn doctor` reports `MinIO ingest extractors` status so missing binaries are visible in health checks.
## Kubernetes Tools
Optional Kubernetes tools are available when `k8s.enabled: true`:
+5
View File
@@ -962,6 +962,11 @@ Search memory using hybrid (keyword + vector) search.
### MinIO Tools
Runtime extractor requirements for binary document ingestion:
- PDF (`.pdf`): requires `pdftotext`
- DOCX (`.docx`): requires either `pandoc` or `docx2txt`
#### `minio.share`
Upload a local file to MinIO and return a temporary presigned download URL.
+18 -1
View File
@@ -172,6 +172,23 @@
],
"test_status": "pnpm test:run src/tools/builtin/minio-ingest.test.ts src/tools/builtin/minio-sync.test.ts + pnpm typecheck passing"
},
"minio-ingestion-extractor-requirements-setup-and-doctor": {
"status": "completed",
"date": "2026-02-16",
"updated": "2026-02-16",
"summary": "Documented MinIO ingestion runtime extractor requirements (PDF: `pdftotext`, DOCX: `pandoc` or `docx2txt`) and added shared dependency checks in setup + doctor flows. `flynn setup` now reports extractor readiness after config save when MinIO is enabled, and `flynn doctor` now includes a `MinIO ingest extractors` check.",
"files_modified": [
"src/cli/minioExtractors.ts",
"src/cli/minioExtractors.test.ts",
"src/cli/setup.ts",
"src/cli/doctor.ts",
"src/cli/doctor.test.ts",
"README.md",
"docs/api/TOOLS.md",
"docs/plans/state.json"
],
"test_status": "pnpm test:run src/cli/minioExtractors.test.ts src/cli/doctor.test.ts + pnpm typecheck passing"
},
"backup-session-summary-audit-trail": {
"status": "completed",
"date": "2026-02-16",
@@ -3456,7 +3473,7 @@
}
},
"overall_progress": {
"total_test_count": 1852,
"total_test_count": 1857,
"all_tests_passing": true,
"p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)",
+49
View File
@@ -335,6 +335,55 @@ models:
expect(registryCheck?.detail).toContain('unconfigured');
});
it('reports SKIP for MinIO ingest extractors when MinIO is disabled', async () => {
mkdirSync(testDir, { recursive: true });
const configPath = join(testDir, 'minio-disabled.yaml');
writeFileSync(configPath, `
telegram:
bot_token: "test-token"
allowed_chat_ids: [123]
models:
default:
provider: anthropic
model: claude-sonnet
backup:
minio:
enabled: false
`);
const ctx: DoctorContext = { configPath, dataDir: testDir };
const results = await runChecks(ctx);
const minioCheck = results.find(r => r.label.includes('MinIO ingest extractors'));
expect(minioCheck?.status).toBe('skip');
});
it('reports MinIO ingest extractor status when MinIO is enabled', async () => {
mkdirSync(testDir, { recursive: true });
const configPath = join(testDir, 'minio-enabled.yaml');
writeFileSync(configPath, `
telegram:
bot_token: "test-token"
allowed_chat_ids: [123]
models:
default:
provider: anthropic
model: claude-sonnet
backup:
minio:
enabled: true
`);
const ctx: DoctorContext = { configPath, dataDir: testDir };
const results = await runChecks(ctx);
const minioCheck = results.find(r => r.label.includes('MinIO ingest extractors'));
expect(minioCheck).toBeDefined();
expect(['pass', 'warn']).toContain(minioCheck?.status);
expect(minioCheck?.detail).toContain('pdf:');
expect(minioCheck?.detail).toContain('docx:');
});
it('reports PASS for skills registry when source is parsable', async () => {
mkdirSync(testDir, { recursive: true });
const registryPath = join(testDir, 'registry.json');
+24
View File
@@ -6,6 +6,7 @@ import { homedir } from 'os';
import { resolve, join } from 'path';
import { parse } from 'yaml';
import { configSchema } from '../config/schema.js';
import { checkMinioExtractorStatus, summarizeMinioExtractorStatus } from './minioExtractors.js';
export interface CheckResult {
status: 'pass' | 'fail' | 'warn' | 'skip';
@@ -574,6 +575,28 @@ const checkGmail: Check = async (ctx) => {
return { status: warnings.length > 0 ? 'warn' : 'pass', label: 'Gmail configured', detail: withWarnings };
};
const checkMinioExtractors: Check = async (ctx) => {
if (!ctx.config) {
return { status: 'skip', label: 'MinIO ingest extractors', detail: '(config invalid)' };
}
const status = await checkMinioExtractorStatus(ctx.config as unknown as Record<string, unknown>);
if (!status.minioEnabled) {
return { status: 'skip', label: 'MinIO ingest extractors', detail: '(backup.minio not enabled)' };
}
const summary = summarizeMinioExtractorStatus(status);
if (status.missingRequirements.length > 0) {
return {
status: 'warn',
label: 'MinIO ingest extractors',
detail: `${summary} — install missing extractors for PDF/DOCX ingestion`,
};
}
return { status: 'pass', label: 'MinIO ingest extractors', detail: summary };
};
const allChecks: Check[] = [
checkConfigExists,
checkOverlayExists,
@@ -586,6 +609,7 @@ const allChecks: Check[] = [
checkModelConnectivity,
checkTelegram,
checkGmail,
checkMinioExtractors,
checkMcpServers,
checkSkills,
checkSkillsRegistry,
+41
View File
@@ -0,0 +1,41 @@
import { describe, it, expect } from 'vitest';
import {
checkMinioExtractorStatus,
renderMinioExtractorSetupLines,
summarizeMinioExtractorStatus,
} from './minioExtractors.js';
describe('minio extractor requirements', () => {
it('skips checks when backup.minio is not enabled', async () => {
const status = await checkMinioExtractorStatus({});
expect(status.minioEnabled).toBe(false);
expect(status.missingRequirements).toEqual([]);
expect(renderMinioExtractorSetupLines(status)).toEqual([]);
});
it('reports missing pdf/docx extractors', async () => {
const status = await checkMinioExtractorStatus(
{ backup: { minio: { enabled: true } } },
async () => false,
);
expect(status.minioEnabled).toBe(true);
expect(status.pdfSupported).toBe(false);
expect(status.docxSupported).toBe(false);
expect(status.missingRequirements).toEqual(['pdftotext', 'pandoc or docx2txt']);
expect(summarizeMinioExtractorStatus(status)).toBe('pdf:missing(pdftotext), docx:missing(pandoc|docx2txt)');
});
it('accepts pandoc as docx extractor', async () => {
const status = await checkMinioExtractorStatus(
{ backup: { minio: { enabled: true } } },
async (command) => command === 'pdftotext' || command === 'pandoc',
);
expect(status.pdfSupported).toBe(true);
expect(status.docxSupported).toBe(true);
expect(status.availableDocxExtractors).toEqual(['pandoc']);
expect(status.missingRequirements).toEqual([]);
});
});
+105
View File
@@ -0,0 +1,105 @@
import { execFile } from 'child_process';
import { promisify } from 'util';
const execFileAsync = promisify(execFile);
type UnknownRecord = Record<string, unknown>;
type CommandExistsFn = (command: string) => Promise<boolean>;
const asRecord = (value: unknown): UnknownRecord | undefined => (
value && typeof value === 'object' ? value as UnknownRecord : undefined
);
async function commandExists(command: string): Promise<boolean> {
try {
await execFileAsync('sh', ['-lc', `command -v ${command} >/dev/null 2>&1`]);
return true;
} catch {
return false;
}
}
export interface MinioExtractorStatus {
minioEnabled: boolean;
pdfSupported: boolean;
docxSupported: boolean;
availableDocxExtractors: string[];
missingRequirements: string[];
}
export async function checkMinioExtractorStatus(
config: Record<string, unknown>,
exists: CommandExistsFn = commandExists,
): Promise<MinioExtractorStatus> {
const backup = asRecord(config.backup);
const minio = asRecord(backup?.minio);
const minioEnabled = minio?.enabled === true;
if (!minioEnabled) {
return {
minioEnabled: false,
pdfSupported: false,
docxSupported: false,
availableDocxExtractors: [],
missingRequirements: [],
};
}
const [hasPdfToText, hasPandoc, hasDocx2Txt] = await Promise.all([
exists('pdftotext'),
exists('pandoc'),
exists('docx2txt'),
]);
const availableDocxExtractors = [
...(hasPandoc ? ['pandoc'] : []),
...(hasDocx2Txt ? ['docx2txt'] : []),
];
const pdfSupported = hasPdfToText;
const docxSupported = availableDocxExtractors.length > 0;
const missingRequirements: string[] = [];
if (!pdfSupported) {
missingRequirements.push('pdftotext');
}
if (!docxSupported) {
missingRequirements.push('pandoc or docx2txt');
}
return {
minioEnabled,
pdfSupported,
docxSupported,
availableDocxExtractors,
missingRequirements,
};
}
export function summarizeMinioExtractorStatus(status: MinioExtractorStatus): string {
const pdf = status.pdfSupported ? 'pdf:ok(pdftotext)' : 'pdf:missing(pdftotext)';
const docx = status.docxSupported
? `docx:ok(${status.availableDocxExtractors.join('|')})`
: 'docx:missing(pandoc|docx2txt)';
return `${pdf}, ${docx}`;
}
export function renderMinioExtractorSetupLines(status: MinioExtractorStatus): string[] {
if (!status.minioEnabled) {
return [];
}
const lines: string[] = [
'MinIO ingestion extractor requirements:',
` PDF (.pdf): pdftotext ${status.pdfSupported ? 'detected' : 'missing'}`,
` DOCX (.docx): pandoc or docx2txt ${status.docxSupported ? `detected (${status.availableDocxExtractors.join(', ')})` : 'missing'}`,
];
if (status.missingRequirements.length > 0) {
lines.push(' Missing extractors will limit PDF/DOCX ingestion for minio.ingest and minio.sync.');
lines.push(' Install missing tools, then run `flynn doctor` to verify.');
} else {
lines.push(' All extractor dependencies detected.');
}
return lines;
}
+26 -3
View File
@@ -8,6 +8,7 @@ import { createPrompter } from './setup/prompts.js';
import { ConfigBuilder } from './setup/config.js';
import { runFirstRunWizard, runMenu } from './setup/orchestrator.js';
import { runGoogleAuth } from './setup/automation.js';
import { checkMinioExtractorStatus, renderMinioExtractorSetupLines } from './minioExtractors.js';
export async function runSetup(configPath: string): Promise<void> {
const rl = createInterface({ input: process.stdin, output: process.stdout });
@@ -21,12 +22,16 @@ export async function runSetup(configPath: string): Promise<void> {
const builder = ConfigBuilder.fromObject(parsed);
await runMenu(p, builder);
saveConfig(configPath, builder, p);
await runGoogleAuth(p, builder.build());
const config = builder.build();
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
await runGoogleAuth(p, config);
} else {
// No config → first-run wizard
const builder = await runFirstRunWizard(p);
saveConfig(configPath, builder, p);
await runGoogleAuth(p, builder.build());
const config = builder.build();
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
await runGoogleAuth(p, config);
const shouldStart = await p.confirm('Start Flynn now?', true);
if (shouldStart) {
@@ -46,7 +51,9 @@ export async function runSetup(configPath: string): Promise<void> {
const menuBuilder = ConfigBuilder.fromObject(parsed);
await runMenu(p, menuBuilder);
saveConfig(configPath, menuBuilder, p);
await runGoogleAuth(p, menuBuilder.build());
const config = menuBuilder.build();
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
await runGoogleAuth(p, config);
}
}
} finally {
@@ -62,6 +69,22 @@ function saveConfig(configPath: string, builder: ConfigBuilder, p: { println(msg
p.println(`✓ Config saved to ${configPath}`);
}
async function printMinioExtractorSetupStatus(
p: { println(msg?: string): void },
config: Record<string, unknown>,
): Promise<void> {
const status = await checkMinioExtractorStatus(config);
const lines = renderMinioExtractorSetupLines(status);
if (lines.length === 0) {
return;
}
p.println();
for (const line of lines) {
p.println(line);
}
}
export function registerSetupCommand(program: Command): void {
program
.command('setup')