feat(cli): enforce minio ingestion extractor checks in setup and doctor
This commit is contained in:
@@ -630,6 +630,13 @@ When `backup.minio.enabled` is configured, Flynn also exposes MinIO tools:
|
|||||||
- `minio.ingest`: read a text-like object (plus PDF/DOCX via extractor tools when available) from MinIO and append/replace a memory namespace
|
- `minio.ingest`: read a text-like object (plus PDF/DOCX via extractor tools when available) from MinIO and append/replace a memory namespace
|
||||||
- `minio.sync`: recursively ingest a MinIO prefix into nested memory namespaces with object and size limits (including PDF/DOCX extraction when available)
|
- `minio.sync`: recursively ingest a MinIO prefix into nested memory namespaces with object and size limits (including PDF/DOCX extraction when available)
|
||||||
|
|
||||||
|
PDF/DOCX ingestion runtime requirements:
|
||||||
|
|
||||||
|
- PDF extraction requires `pdftotext`.
|
||||||
|
- DOCX extraction requires `pandoc` or `docx2txt`.
|
||||||
|
- `flynn setup` now checks these dependencies after config save when `backup.minio.enabled: true`.
|
||||||
|
- `flynn doctor` reports `MinIO ingest extractors` status so missing binaries are visible in health checks.
|
||||||
|
|
||||||
## Kubernetes Tools
|
## Kubernetes Tools
|
||||||
|
|
||||||
Optional Kubernetes tools are available when `k8s.enabled: true`:
|
Optional Kubernetes tools are available when `k8s.enabled: true`:
|
||||||
|
|||||||
@@ -962,6 +962,11 @@ Search memory using hybrid (keyword + vector) search.
|
|||||||
|
|
||||||
### MinIO Tools
|
### MinIO Tools
|
||||||
|
|
||||||
|
Runtime extractor requirements for binary document ingestion:
|
||||||
|
|
||||||
|
- PDF (`.pdf`): requires `pdftotext`
|
||||||
|
- DOCX (`.docx`): requires either `pandoc` or `docx2txt`
|
||||||
|
|
||||||
#### `minio.share`
|
#### `minio.share`
|
||||||
|
|
||||||
Upload a local file to MinIO and return a temporary presigned download URL.
|
Upload a local file to MinIO and return a temporary presigned download URL.
|
||||||
|
|||||||
+18
-1
@@ -172,6 +172,23 @@
|
|||||||
],
|
],
|
||||||
"test_status": "pnpm test:run src/tools/builtin/minio-ingest.test.ts src/tools/builtin/minio-sync.test.ts + pnpm typecheck passing"
|
"test_status": "pnpm test:run src/tools/builtin/minio-ingest.test.ts src/tools/builtin/minio-sync.test.ts + pnpm typecheck passing"
|
||||||
},
|
},
|
||||||
|
"minio-ingestion-extractor-requirements-setup-and-doctor": {
|
||||||
|
"status": "completed",
|
||||||
|
"date": "2026-02-16",
|
||||||
|
"updated": "2026-02-16",
|
||||||
|
"summary": "Documented MinIO ingestion runtime extractor requirements (PDF: `pdftotext`, DOCX: `pandoc` or `docx2txt`) and added shared dependency checks in setup + doctor flows. `flynn setup` now reports extractor readiness after config save when MinIO is enabled, and `flynn doctor` now includes a `MinIO ingest extractors` check.",
|
||||||
|
"files_modified": [
|
||||||
|
"src/cli/minioExtractors.ts",
|
||||||
|
"src/cli/minioExtractors.test.ts",
|
||||||
|
"src/cli/setup.ts",
|
||||||
|
"src/cli/doctor.ts",
|
||||||
|
"src/cli/doctor.test.ts",
|
||||||
|
"README.md",
|
||||||
|
"docs/api/TOOLS.md",
|
||||||
|
"docs/plans/state.json"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm test:run src/cli/minioExtractors.test.ts src/cli/doctor.test.ts + pnpm typecheck passing"
|
||||||
|
},
|
||||||
"backup-session-summary-audit-trail": {
|
"backup-session-summary-audit-trail": {
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"date": "2026-02-16",
|
"date": "2026-02-16",
|
||||||
@@ -3456,7 +3473,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"overall_progress": {
|
"overall_progress": {
|
||||||
"total_test_count": 1852,
|
"total_test_count": 1857,
|
||||||
"all_tests_passing": true,
|
"all_tests_passing": true,
|
||||||
"p0_completion": "3/3 (100%)",
|
"p0_completion": "3/3 (100%)",
|
||||||
"p1_completion": "4/4 (100%)",
|
"p1_completion": "4/4 (100%)",
|
||||||
|
|||||||
@@ -335,6 +335,55 @@ models:
|
|||||||
expect(registryCheck?.detail).toContain('unconfigured');
|
expect(registryCheck?.detail).toContain('unconfigured');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('reports SKIP for MinIO ingest extractors when MinIO is disabled', async () => {
|
||||||
|
mkdirSync(testDir, { recursive: true });
|
||||||
|
const configPath = join(testDir, 'minio-disabled.yaml');
|
||||||
|
writeFileSync(configPath, `
|
||||||
|
telegram:
|
||||||
|
bot_token: "test-token"
|
||||||
|
allowed_chat_ids: [123]
|
||||||
|
models:
|
||||||
|
default:
|
||||||
|
provider: anthropic
|
||||||
|
model: claude-sonnet
|
||||||
|
backup:
|
||||||
|
minio:
|
||||||
|
enabled: false
|
||||||
|
`);
|
||||||
|
|
||||||
|
const ctx: DoctorContext = { configPath, dataDir: testDir };
|
||||||
|
const results = await runChecks(ctx);
|
||||||
|
|
||||||
|
const minioCheck = results.find(r => r.label.includes('MinIO ingest extractors'));
|
||||||
|
expect(minioCheck?.status).toBe('skip');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports MinIO ingest extractor status when MinIO is enabled', async () => {
|
||||||
|
mkdirSync(testDir, { recursive: true });
|
||||||
|
const configPath = join(testDir, 'minio-enabled.yaml');
|
||||||
|
writeFileSync(configPath, `
|
||||||
|
telegram:
|
||||||
|
bot_token: "test-token"
|
||||||
|
allowed_chat_ids: [123]
|
||||||
|
models:
|
||||||
|
default:
|
||||||
|
provider: anthropic
|
||||||
|
model: claude-sonnet
|
||||||
|
backup:
|
||||||
|
minio:
|
||||||
|
enabled: true
|
||||||
|
`);
|
||||||
|
|
||||||
|
const ctx: DoctorContext = { configPath, dataDir: testDir };
|
||||||
|
const results = await runChecks(ctx);
|
||||||
|
|
||||||
|
const minioCheck = results.find(r => r.label.includes('MinIO ingest extractors'));
|
||||||
|
expect(minioCheck).toBeDefined();
|
||||||
|
expect(['pass', 'warn']).toContain(minioCheck?.status);
|
||||||
|
expect(minioCheck?.detail).toContain('pdf:');
|
||||||
|
expect(minioCheck?.detail).toContain('docx:');
|
||||||
|
});
|
||||||
|
|
||||||
it('reports PASS for skills registry when source is parsable', async () => {
|
it('reports PASS for skills registry when source is parsable', async () => {
|
||||||
mkdirSync(testDir, { recursive: true });
|
mkdirSync(testDir, { recursive: true });
|
||||||
const registryPath = join(testDir, 'registry.json');
|
const registryPath = join(testDir, 'registry.json');
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import { homedir } from 'os';
|
|||||||
import { resolve, join } from 'path';
|
import { resolve, join } from 'path';
|
||||||
import { parse } from 'yaml';
|
import { parse } from 'yaml';
|
||||||
import { configSchema } from '../config/schema.js';
|
import { configSchema } from '../config/schema.js';
|
||||||
|
import { checkMinioExtractorStatus, summarizeMinioExtractorStatus } from './minioExtractors.js';
|
||||||
|
|
||||||
export interface CheckResult {
|
export interface CheckResult {
|
||||||
status: 'pass' | 'fail' | 'warn' | 'skip';
|
status: 'pass' | 'fail' | 'warn' | 'skip';
|
||||||
@@ -574,6 +575,28 @@ const checkGmail: Check = async (ctx) => {
|
|||||||
return { status: warnings.length > 0 ? 'warn' : 'pass', label: 'Gmail configured', detail: withWarnings };
|
return { status: warnings.length > 0 ? 'warn' : 'pass', label: 'Gmail configured', detail: withWarnings };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const checkMinioExtractors: Check = async (ctx) => {
|
||||||
|
if (!ctx.config) {
|
||||||
|
return { status: 'skip', label: 'MinIO ingest extractors', detail: '(config invalid)' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const status = await checkMinioExtractorStatus(ctx.config as unknown as Record<string, unknown>);
|
||||||
|
if (!status.minioEnabled) {
|
||||||
|
return { status: 'skip', label: 'MinIO ingest extractors', detail: '(backup.minio not enabled)' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const summary = summarizeMinioExtractorStatus(status);
|
||||||
|
if (status.missingRequirements.length > 0) {
|
||||||
|
return {
|
||||||
|
status: 'warn',
|
||||||
|
label: 'MinIO ingest extractors',
|
||||||
|
detail: `${summary} — install missing extractors for PDF/DOCX ingestion`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status: 'pass', label: 'MinIO ingest extractors', detail: summary };
|
||||||
|
};
|
||||||
|
|
||||||
const allChecks: Check[] = [
|
const allChecks: Check[] = [
|
||||||
checkConfigExists,
|
checkConfigExists,
|
||||||
checkOverlayExists,
|
checkOverlayExists,
|
||||||
@@ -586,6 +609,7 @@ const allChecks: Check[] = [
|
|||||||
checkModelConnectivity,
|
checkModelConnectivity,
|
||||||
checkTelegram,
|
checkTelegram,
|
||||||
checkGmail,
|
checkGmail,
|
||||||
|
checkMinioExtractors,
|
||||||
checkMcpServers,
|
checkMcpServers,
|
||||||
checkSkills,
|
checkSkills,
|
||||||
checkSkillsRegistry,
|
checkSkillsRegistry,
|
||||||
|
|||||||
@@ -0,0 +1,41 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import {
|
||||||
|
checkMinioExtractorStatus,
|
||||||
|
renderMinioExtractorSetupLines,
|
||||||
|
summarizeMinioExtractorStatus,
|
||||||
|
} from './minioExtractors.js';
|
||||||
|
|
||||||
|
describe('minio extractor requirements', () => {
|
||||||
|
it('skips checks when backup.minio is not enabled', async () => {
|
||||||
|
const status = await checkMinioExtractorStatus({});
|
||||||
|
|
||||||
|
expect(status.minioEnabled).toBe(false);
|
||||||
|
expect(status.missingRequirements).toEqual([]);
|
||||||
|
expect(renderMinioExtractorSetupLines(status)).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports missing pdf/docx extractors', async () => {
|
||||||
|
const status = await checkMinioExtractorStatus(
|
||||||
|
{ backup: { minio: { enabled: true } } },
|
||||||
|
async () => false,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(status.minioEnabled).toBe(true);
|
||||||
|
expect(status.pdfSupported).toBe(false);
|
||||||
|
expect(status.docxSupported).toBe(false);
|
||||||
|
expect(status.missingRequirements).toEqual(['pdftotext', 'pandoc or docx2txt']);
|
||||||
|
expect(summarizeMinioExtractorStatus(status)).toBe('pdf:missing(pdftotext), docx:missing(pandoc|docx2txt)');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts pandoc as docx extractor', async () => {
|
||||||
|
const status = await checkMinioExtractorStatus(
|
||||||
|
{ backup: { minio: { enabled: true } } },
|
||||||
|
async (command) => command === 'pdftotext' || command === 'pandoc',
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(status.pdfSupported).toBe(true);
|
||||||
|
expect(status.docxSupported).toBe(true);
|
||||||
|
expect(status.availableDocxExtractors).toEqual(['pandoc']);
|
||||||
|
expect(status.missingRequirements).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,105 @@
|
|||||||
|
import { execFile } from 'child_process';
|
||||||
|
import { promisify } from 'util';
|
||||||
|
|
||||||
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
|
type UnknownRecord = Record<string, unknown>;
|
||||||
|
type CommandExistsFn = (command: string) => Promise<boolean>;
|
||||||
|
|
||||||
|
const asRecord = (value: unknown): UnknownRecord | undefined => (
|
||||||
|
value && typeof value === 'object' ? value as UnknownRecord : undefined
|
||||||
|
);
|
||||||
|
|
||||||
|
async function commandExists(command: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await execFileAsync('sh', ['-lc', `command -v ${command} >/dev/null 2>&1`]);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MinioExtractorStatus {
|
||||||
|
minioEnabled: boolean;
|
||||||
|
pdfSupported: boolean;
|
||||||
|
docxSupported: boolean;
|
||||||
|
availableDocxExtractors: string[];
|
||||||
|
missingRequirements: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function checkMinioExtractorStatus(
|
||||||
|
config: Record<string, unknown>,
|
||||||
|
exists: CommandExistsFn = commandExists,
|
||||||
|
): Promise<MinioExtractorStatus> {
|
||||||
|
const backup = asRecord(config.backup);
|
||||||
|
const minio = asRecord(backup?.minio);
|
||||||
|
const minioEnabled = minio?.enabled === true;
|
||||||
|
|
||||||
|
if (!minioEnabled) {
|
||||||
|
return {
|
||||||
|
minioEnabled: false,
|
||||||
|
pdfSupported: false,
|
||||||
|
docxSupported: false,
|
||||||
|
availableDocxExtractors: [],
|
||||||
|
missingRequirements: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const [hasPdfToText, hasPandoc, hasDocx2Txt] = await Promise.all([
|
||||||
|
exists('pdftotext'),
|
||||||
|
exists('pandoc'),
|
||||||
|
exists('docx2txt'),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const availableDocxExtractors = [
|
||||||
|
...(hasPandoc ? ['pandoc'] : []),
|
||||||
|
...(hasDocx2Txt ? ['docx2txt'] : []),
|
||||||
|
];
|
||||||
|
const pdfSupported = hasPdfToText;
|
||||||
|
const docxSupported = availableDocxExtractors.length > 0;
|
||||||
|
|
||||||
|
const missingRequirements: string[] = [];
|
||||||
|
if (!pdfSupported) {
|
||||||
|
missingRequirements.push('pdftotext');
|
||||||
|
}
|
||||||
|
if (!docxSupported) {
|
||||||
|
missingRequirements.push('pandoc or docx2txt');
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
minioEnabled,
|
||||||
|
pdfSupported,
|
||||||
|
docxSupported,
|
||||||
|
availableDocxExtractors,
|
||||||
|
missingRequirements,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function summarizeMinioExtractorStatus(status: MinioExtractorStatus): string {
|
||||||
|
const pdf = status.pdfSupported ? 'pdf:ok(pdftotext)' : 'pdf:missing(pdftotext)';
|
||||||
|
const docx = status.docxSupported
|
||||||
|
? `docx:ok(${status.availableDocxExtractors.join('|')})`
|
||||||
|
: 'docx:missing(pandoc|docx2txt)';
|
||||||
|
return `${pdf}, ${docx}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function renderMinioExtractorSetupLines(status: MinioExtractorStatus): string[] {
|
||||||
|
if (!status.minioEnabled) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines: string[] = [
|
||||||
|
'MinIO ingestion extractor requirements:',
|
||||||
|
` PDF (.pdf): pdftotext ${status.pdfSupported ? 'detected' : 'missing'}`,
|
||||||
|
` DOCX (.docx): pandoc or docx2txt ${status.docxSupported ? `detected (${status.availableDocxExtractors.join(', ')})` : 'missing'}`,
|
||||||
|
];
|
||||||
|
|
||||||
|
if (status.missingRequirements.length > 0) {
|
||||||
|
lines.push(' Missing extractors will limit PDF/DOCX ingestion for minio.ingest and minio.sync.');
|
||||||
|
lines.push(' Install missing tools, then run `flynn doctor` to verify.');
|
||||||
|
} else {
|
||||||
|
lines.push(' All extractor dependencies detected.');
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
+26
-3
@@ -8,6 +8,7 @@ import { createPrompter } from './setup/prompts.js';
|
|||||||
import { ConfigBuilder } from './setup/config.js';
|
import { ConfigBuilder } from './setup/config.js';
|
||||||
import { runFirstRunWizard, runMenu } from './setup/orchestrator.js';
|
import { runFirstRunWizard, runMenu } from './setup/orchestrator.js';
|
||||||
import { runGoogleAuth } from './setup/automation.js';
|
import { runGoogleAuth } from './setup/automation.js';
|
||||||
|
import { checkMinioExtractorStatus, renderMinioExtractorSetupLines } from './minioExtractors.js';
|
||||||
|
|
||||||
export async function runSetup(configPath: string): Promise<void> {
|
export async function runSetup(configPath: string): Promise<void> {
|
||||||
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
||||||
@@ -21,12 +22,16 @@ export async function runSetup(configPath: string): Promise<void> {
|
|||||||
const builder = ConfigBuilder.fromObject(parsed);
|
const builder = ConfigBuilder.fromObject(parsed);
|
||||||
await runMenu(p, builder);
|
await runMenu(p, builder);
|
||||||
saveConfig(configPath, builder, p);
|
saveConfig(configPath, builder, p);
|
||||||
await runGoogleAuth(p, builder.build());
|
const config = builder.build();
|
||||||
|
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
|
||||||
|
await runGoogleAuth(p, config);
|
||||||
} else {
|
} else {
|
||||||
// No config → first-run wizard
|
// No config → first-run wizard
|
||||||
const builder = await runFirstRunWizard(p);
|
const builder = await runFirstRunWizard(p);
|
||||||
saveConfig(configPath, builder, p);
|
saveConfig(configPath, builder, p);
|
||||||
await runGoogleAuth(p, builder.build());
|
const config = builder.build();
|
||||||
|
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
|
||||||
|
await runGoogleAuth(p, config);
|
||||||
|
|
||||||
const shouldStart = await p.confirm('Start Flynn now?', true);
|
const shouldStart = await p.confirm('Start Flynn now?', true);
|
||||||
if (shouldStart) {
|
if (shouldStart) {
|
||||||
@@ -46,7 +51,9 @@ export async function runSetup(configPath: string): Promise<void> {
|
|||||||
const menuBuilder = ConfigBuilder.fromObject(parsed);
|
const menuBuilder = ConfigBuilder.fromObject(parsed);
|
||||||
await runMenu(p, menuBuilder);
|
await runMenu(p, menuBuilder);
|
||||||
saveConfig(configPath, menuBuilder, p);
|
saveConfig(configPath, menuBuilder, p);
|
||||||
await runGoogleAuth(p, menuBuilder.build());
|
const config = menuBuilder.build();
|
||||||
|
await printMinioExtractorSetupStatus(p, config as Record<string, unknown>);
|
||||||
|
await runGoogleAuth(p, config);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
@@ -62,6 +69,22 @@ function saveConfig(configPath: string, builder: ConfigBuilder, p: { println(msg
|
|||||||
p.println(`✓ Config saved to ${configPath}`);
|
p.println(`✓ Config saved to ${configPath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function printMinioExtractorSetupStatus(
|
||||||
|
p: { println(msg?: string): void },
|
||||||
|
config: Record<string, unknown>,
|
||||||
|
): Promise<void> {
|
||||||
|
const status = await checkMinioExtractorStatus(config);
|
||||||
|
const lines = renderMinioExtractorSetupLines(status);
|
||||||
|
if (lines.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.println();
|
||||||
|
for (const line of lines) {
|
||||||
|
p.println(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function registerSetupCommand(program: Command): void {
|
export function registerSetupCommand(program: Command): void {
|
||||||
program
|
program
|
||||||
.command('setup')
|
.command('setup')
|
||||||
|
|||||||
Reference in New Issue
Block a user