feat(cli): enforce minio ingestion extractor checks in setup and doctor
This commit is contained in:
@@ -630,6 +630,13 @@ When `backup.minio.enabled` is configured, Flynn also exposes MinIO tools:
|
||||
- `minio.ingest`: read a text-like object (plus PDF/DOCX via extractor tools when available) from MinIO and append/replace a memory namespace
|
||||
- `minio.sync`: recursively ingest a MinIO prefix into nested memory namespaces with object and size limits (including PDF/DOCX extraction when available)
|
||||
|
||||
PDF/DOCX ingestion runtime requirements:
|
||||
|
||||
- PDF extraction requires `pdftotext`.
|
||||
- DOCX extraction requires `pandoc` or `docx2txt`.
|
||||
- `flynn setup` now checks these dependencies after config save when `backup.minio.enabled: true`.
|
||||
- `flynn doctor` reports `MinIO ingest extractors` status so missing binaries are visible in health checks.
|
||||
|
||||
## Kubernetes Tools
|
||||
|
||||
Optional Kubernetes tools are available when `k8s.enabled: true`:
|
||||
|
||||
@@ -962,6 +962,11 @@ Search memory using hybrid (keyword + vector) search.
|
||||
|
||||
### MinIO Tools
|
||||
|
||||
Runtime extractor requirements for binary document ingestion:
|
||||
|
||||
- PDF (`.pdf`): requires `pdftotext`
|
||||
- DOCX (`.docx`): requires either `pandoc` or `docx2txt`
|
||||
|
||||
#### `minio.share`
|
||||
|
||||
Upload a local file to MinIO and return a temporary presigned download URL.
|
||||
|
||||
+18
-1
@@ -172,6 +172,23 @@
|
||||
],
|
||||
"test_status": "pnpm test:run src/tools/builtin/minio-ingest.test.ts src/tools/builtin/minio-sync.test.ts + pnpm typecheck passing"
|
||||
},
|
||||
"minio-ingestion-extractor-requirements-setup-and-doctor": {
|
||||
"status": "completed",
|
||||
"date": "2026-02-16",
|
||||
"updated": "2026-02-16",
|
||||
"summary": "Documented MinIO ingestion runtime extractor requirements (PDF: `pdftotext`, DOCX: `pandoc` or `docx2txt`) and added shared dependency checks in setup + doctor flows. `flynn setup` now reports extractor readiness after config save when MinIO is enabled, and `flynn doctor` now includes a `MinIO ingest extractors` check.",
|
||||
"files_modified": [
|
||||
"src/cli/minioExtractors.ts",
|
||||
"src/cli/minioExtractors.test.ts",
|
||||
"src/cli/setup.ts",
|
||||
"src/cli/doctor.ts",
|
||||
"src/cli/doctor.test.ts",
|
||||
"README.md",
|
||||
"docs/api/TOOLS.md",
|
||||
"docs/plans/state.json"
|
||||
],
|
||||
"test_status": "pnpm test:run src/cli/minioExtractors.test.ts src/cli/doctor.test.ts + pnpm typecheck passing"
|
||||
},
|
||||
"backup-session-summary-audit-trail": {
|
||||
"status": "completed",
|
||||
"date": "2026-02-16",
|
||||
@@ -3456,7 +3473,7 @@
|
||||
}
|
||||
},
|
||||
"overall_progress": {
|
||||
"total_test_count": 1852,
|
||||
"total_test_count": 1857,
|
||||
"all_tests_passing": true,
|
||||
"p0_completion": "3/3 (100%)",
|
||||
"p1_completion": "4/4 (100%)",
|
||||
|
||||
@@ -335,6 +335,55 @@ models:
|
||||
expect(registryCheck?.detail).toContain('unconfigured');
|
||||
});
|
||||
|
||||
it('reports SKIP for MinIO ingest extractors when MinIO is disabled', async () => {
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
const configPath = join(testDir, 'minio-disabled.yaml');
|
||||
writeFileSync(configPath, `
|
||||
telegram:
|
||||
bot_token: "test-token"
|
||||
allowed_chat_ids: [123]
|
||||
models:
|
||||
default:
|
||||
provider: anthropic
|
||||
model: claude-sonnet
|
||||
backup:
|
||||
minio:
|
||||
enabled: false
|
||||
`);
|
||||
|
||||
const ctx: DoctorContext = { configPath, dataDir: testDir };
|
||||
const results = await runChecks(ctx);
|
||||
|
||||
const minioCheck = results.find(r => r.label.includes('MinIO ingest extractors'));
|
||||
expect(minioCheck?.status).toBe('skip');
|
||||
});
|
||||
|
||||
it('reports MinIO ingest extractor status when MinIO is enabled', async () => {
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
const configPath = join(testDir, 'minio-enabled.yaml');
|
||||
writeFileSync(configPath, `
|
||||
telegram:
|
||||
bot_token: "test-token"
|
||||
allowed_chat_ids: [123]
|
||||
models:
|
||||
default:
|
||||
provider: anthropic
|
||||
model: claude-sonnet
|
||||
backup:
|
||||
minio:
|
||||
enabled: true
|
||||
`);
|
||||
|
||||
const ctx: DoctorContext = { configPath, dataDir: testDir };
|
||||
const results = await runChecks(ctx);
|
||||
|
||||
const minioCheck = results.find(r => r.label.includes('MinIO ingest extractors'));
|
||||
expect(minioCheck).toBeDefined();
|
||||
expect(['pass', 'warn']).toContain(minioCheck?.status);
|
||||
expect(minioCheck?.detail).toContain('pdf:');
|
||||
expect(minioCheck?.detail).toContain('docx:');
|
||||
});
|
||||
|
||||
it('reports PASS for skills registry when source is parsable', async () => {
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
const registryPath = join(testDir, 'registry.json');
|
||||
|
||||
@@ -6,6 +6,7 @@ import { homedir } from 'os';
|
||||
import { resolve, join } from 'path';
|
||||
import { parse } from 'yaml';
|
||||
import { configSchema } from '../config/schema.js';
|
||||
import { checkMinioExtractorStatus, summarizeMinioExtractorStatus } from './minioExtractors.js';
|
||||
|
||||
export interface CheckResult {
|
||||
status: 'pass' | 'fail' | 'warn' | 'skip';
|
||||
@@ -574,6 +575,28 @@ const checkGmail: Check = async (ctx) => {
|
||||
return { status: warnings.length > 0 ? 'warn' : 'pass', label: 'Gmail configured', detail: withWarnings };
|
||||
};
|
||||
|
||||
const checkMinioExtractors: Check = async (ctx) => {
|
||||
if (!ctx.config) {
|
||||
return { status: 'skip', label: 'MinIO ingest extractors', detail: '(config invalid)' };
|
||||
}
|
||||
|
||||
const status = await checkMinioExtractorStatus(ctx.config as unknown as Record<string, unknown>);
|
||||
if (!status.minioEnabled) {
|
||||
return { status: 'skip', label: 'MinIO ingest extractors', detail: '(backup.minio not enabled)' };
|
||||
}
|
||||
|
||||
const summary = summarizeMinioExtractorStatus(status);
|
||||
if (status.missingRequirements.length > 0) {
|
||||
return {
|
||||
status: 'warn',
|
||||
label: 'MinIO ingest extractors',
|
||||
detail: `${summary} — install missing extractors for PDF/DOCX ingestion`,
|
||||
};
|
||||
}
|
||||
|
||||
return { status: 'pass', label: 'MinIO ingest extractors', detail: summary };
|
||||
};
|
||||
|
||||
const allChecks: Check[] = [
|
||||
checkConfigExists,
|
||||
checkOverlayExists,
|
||||
@@ -586,6 +609,7 @@ const allChecks: Check[] = [
|
||||
checkModelConnectivity,
|
||||
checkTelegram,
|
||||
checkGmail,
|
||||
checkMinioExtractors,
|
||||
checkMcpServers,
|
||||
checkSkills,
|
||||
checkSkillsRegistry,
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
checkMinioExtractorStatus,
|
||||
renderMinioExtractorSetupLines,
|
||||
summarizeMinioExtractorStatus,
|
||||
} from './minioExtractors.js';
|
||||
|
||||
describe('minio extractor requirements', () => {
|
||||
it('skips checks when backup.minio is not enabled', async () => {
|
||||
const status = await checkMinioExtractorStatus({});
|
||||
|
||||
expect(status.minioEnabled).toBe(false);
|
||||
expect(status.missingRequirements).toEqual([]);
|
||||
expect(renderMinioExtractorSetupLines(status)).toEqual([]);
|
||||
});
|
||||
|
||||
it('reports missing pdf/docx extractors', async () => {
|
||||
const status = await checkMinioExtractorStatus(
|
||||
{ backup: { minio: { enabled: true } } },
|
||||
async () => false,
|
||||
);
|
||||
|
||||
expect(status.minioEnabled).toBe(true);
|
||||
expect(status.pdfSupported).toBe(false);
|
||||
expect(status.docxSupported).toBe(false);
|
||||
expect(status.missingRequirements).toEqual(['pdftotext', 'pandoc or docx2txt']);
|
||||
expect(summarizeMinioExtractorStatus(status)).toBe('pdf:missing(pdftotext), docx:missing(pandoc|docx2txt)');
|
||||
});
|
||||
|
||||
it('accepts pandoc as docx extractor', async () => {
|
||||
const status = await checkMinioExtractorStatus(
|
||||
{ backup: { minio: { enabled: true } } },
|
||||
async (command) => command === 'pdftotext' || command === 'pandoc',
|
||||
);
|
||||
|
||||
expect(status.pdfSupported).toBe(true);
|
||||
expect(status.docxSupported).toBe(true);
|
||||
expect(status.availableDocxExtractors).toEqual(['pandoc']);
|
||||
expect(status.missingRequirements).toEqual([]);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,105 @@
|
||||
import { execFile } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
type UnknownRecord = Record<string, unknown>;
|
||||
type CommandExistsFn = (command: string) => Promise<boolean>;
|
||||
|
||||
const asRecord = (value: unknown): UnknownRecord | undefined => (
|
||||
value && typeof value === 'object' ? value as UnknownRecord : undefined
|
||||
);
|
||||
|
||||
async function commandExists(command: string): Promise<boolean> {
|
||||
try {
|
||||
await execFileAsync('sh', ['-lc', `command -v ${command} >/dev/null 2>&1`]);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export interface MinioExtractorStatus {
|
||||
minioEnabled: boolean;
|
||||
pdfSupported: boolean;
|
||||
docxSupported: boolean;
|
||||
availableDocxExtractors: string[];
|
||||
missingRequirements: string[];
|
||||
}
|
||||
|
||||
export async function checkMinioExtractorStatus(
|
||||
config: Record<string, unknown>,
|
||||
exists: CommandExistsFn = commandExists,
|
||||
): Promise<MinioExtractorStatus> {
|
||||
const backup = asRecord(config.backup);
|
||||
const minio = asRecord(backup?.minio);
|
||||
const minioEnabled = minio?.enabled === true;
|
||||
|
||||
if (!minioEnabled) {
|
||||
return {
|
||||
minioEnabled: false,
|
||||
pdfSupported: false,
|
||||
docxSupported: false,
|
||||
availableDocxExtractors: [],
|
||||
missingRequirements: [],
|
||||
};
|
||||
}
|
||||
|
||||
const [hasPdfToText, hasPandoc, hasDocx2Txt] = await Promise.all([
|
||||
exists('pdftotext'),
|
||||
exists('pandoc'),
|
||||
exists('docx2txt'),
|
||||
]);
|
||||
|
||||
const availableDocxExtractors = [
|
||||
...(hasPandoc ? ['pandoc'] : []),
|
||||
...(hasDocx2Txt ? ['docx2txt'] : []),
|
||||
];
|
||||
const pdfSupported = hasPdfToText;
|
||||
const docxSupported = availableDocxExtractors.length > 0;
|
||||
|
||||
const missingRequirements: string[] = [];
|
||||
if (!pdfSupported) {
|
||||
missingRequirements.push('pdftotext');
|
||||
}
|
||||
if (!docxSupported) {
|
||||
missingRequirements.push('pandoc or docx2txt');
|
||||
}
|
||||
|
||||
return {
|
||||
minioEnabled,
|
||||
pdfSupported,
|
||||
docxSupported,
|
||||
availableDocxExtractors,
|
||||
missingRequirements,
|
||||
};
|
||||
}
|
||||
|
||||
export function summarizeMinioExtractorStatus(status: MinioExtractorStatus): string {
|
||||
const pdf = status.pdfSupported ? 'pdf:ok(pdftotext)' : 'pdf:missing(pdftotext)';
|
||||
const docx = status.docxSupported
|
||||
? `docx:ok(${status.availableDocxExtractors.join('|')})`
|
||||
: 'docx:missing(pandoc|docx2txt)';
|
||||
return `${pdf}, ${docx}`;
|
||||
}
|
||||
|
||||
export function renderMinioExtractorSetupLines(status: MinioExtractorStatus): string[] {
|
||||
if (!status.minioEnabled) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const lines: string[] = [
|
||||
'MinIO ingestion extractor requirements:',
|
||||
` PDF (.pdf): pdftotext ${status.pdfSupported ? 'detected' : 'missing'}`,
|
||||
` DOCX (.docx): pandoc or docx2txt ${status.docxSupported ? `detected (${status.availableDocxExtractors.join(', ')})` : 'missing'}`,
|
||||
];
|
||||
|
||||
if (status.missingRequirements.length > 0) {
|
||||
lines.push(' Missing extractors will limit PDF/DOCX ingestion for minio.ingest and minio.sync.');
|
||||
lines.push(' Install missing tools, then run `flynn doctor` to verify.');
|
||||
} else {
|
||||
lines.push(' All extractor dependencies detected.');
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
+26
-3
@@ -8,6 +8,7 @@ import { createPrompter } from './setup/prompts.js';
|
||||
import { ConfigBuilder } from './setup/config.js';
|
||||
import { runFirstRunWizard, runMenu } from './setup/orchestrator.js';
|
||||
import { runGoogleAuth } from './setup/automation.js';
|
||||
import { checkMinioExtractorStatus, renderMinioExtractorSetupLines } from './minioExtractors.js';
|
||||
|
||||
export async function runSetup(configPath: string): Promise<void> {
|
||||
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
||||
@@ -21,12 +22,16 @@ export async function runSetup(configPath: string): Promise<void> {
|
||||
const builder = ConfigBuilder.fromObject(parsed);
|
||||
await runMenu(p, builder);
|
||||
saveConfig(configPath, builder, p);
|
||||
await runGoogleAuth(p, builder.build());
|
||||
const config = builder.build();
|
||||
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
|
||||
await runGoogleAuth(p, config);
|
||||
} else {
|
||||
// No config → first-run wizard
|
||||
const builder = await runFirstRunWizard(p);
|
||||
saveConfig(configPath, builder, p);
|
||||
await runGoogleAuth(p, builder.build());
|
||||
const config = builder.build();
|
||||
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
|
||||
await runGoogleAuth(p, config);
|
||||
|
||||
const shouldStart = await p.confirm('Start Flynn now?', true);
|
||||
if (shouldStart) {
|
||||
@@ -46,7 +51,9 @@ export async function runSetup(configPath: string): Promise<void> {
|
||||
const menuBuilder = ConfigBuilder.fromObject(parsed);
|
||||
await runMenu(p, menuBuilder);
|
||||
saveConfig(configPath, menuBuilder, p);
|
||||
await runGoogleAuth(p, menuBuilder.build());
|
||||
const config = menuBuilder.build();
|
||||
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
|
||||
await runGoogleAuth(p, config);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
@@ -62,6 +69,22 @@ function saveConfig(configPath: string, builder: ConfigBuilder, p: { println(msg
|
||||
p.println(`✓ Config saved to ${configPath}`);
|
||||
}
|
||||
|
||||
async function printMinioExtractorSetupStatus(
|
||||
p: { println(msg?: string): void },
|
||||
config: Record<string, unknown>,
|
||||
): Promise<void> {
|
||||
const status = await checkMinioExtractorStatus(config);
|
||||
const lines = renderMinioExtractorSetupLines(status);
|
||||
if (lines.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
p.println();
|
||||
for (const line of lines) {
|
||||
p.println(line);
|
||||
}
|
||||
}
|
||||
|
||||
export function registerSetupCommand(program: Command): void {
|
||||
program
|
||||
.command('setup')
|
||||
|
||||
Reference in New Issue
Block a user