feat(cli): add os-aware install hints for minio extractor dependencies

This commit is contained in:
William Valentin
2026-02-16 14:45:25 -08:00
parent 289fc44380
commit 030fb13a26
6 changed files with 117 additions and 7 deletions
+2 -2
View File
@@ -634,8 +634,8 @@ PDF/DOCX ingestion runtime requirements:
- PDF extraction requires `pdftotext`.
- DOCX extraction requires `pandoc` or `docx2txt`.
- `flynn setup` now checks these dependencies after config save when `backup.minio.enabled: true`.
- `flynn doctor` reports `MinIO ingest extractors` status so missing binaries are visible in health checks.
- `flynn setup` now checks these dependencies after config save when `backup.minio.enabled: true`, and prints OS-aware install hints when missing.
- `flynn doctor` reports `MinIO ingest extractors` status (with install hints) so missing binaries are visible in health checks.
## Kubernetes Tools
+2 -2
View File
@@ -176,7 +176,7 @@
"status": "completed",
"date": "2026-02-16",
"updated": "2026-02-16",
"summary": "Documented MinIO ingestion runtime extractor requirements (PDF: `pdftotext`, DOCX: `pandoc` or `docx2txt`) and added shared dependency checks in setup + doctor flows. `flynn setup` now reports extractor readiness after config save when MinIO is enabled, and `flynn doctor` now includes a `MinIO ingest extractors` check.",
"summary": "Documented MinIO ingestion runtime extractor requirements (PDF: `pdftotext`, DOCX: `pandoc` or `docx2txt`) and added shared dependency checks in setup + doctor flows. `flynn setup` now reports extractor readiness after config save when MinIO is enabled and includes OS-aware package install hints when missing; `flynn doctor` now includes a `MinIO ingest extractors` check with matching install guidance.",
"files_modified": [
"src/cli/minioExtractors.ts",
"src/cli/minioExtractors.test.ts",
@@ -3473,7 +3473,7 @@
}
},
"overall_progress": {
"total_test_count": 1857,
"total_test_count": 1859,
"all_tests_passing": true,
"p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)",
+8 -2
View File
@@ -6,7 +6,11 @@ import { homedir } from 'os';
import { resolve, join } from 'path';
import { parse } from 'yaml';
import { configSchema } from '../config/schema.js';
import { checkMinioExtractorStatus, summarizeMinioExtractorStatus } from './minioExtractors.js';
import {
checkMinioExtractorStatus,
getMinioExtractorInstallHints,
summarizeMinioExtractorStatus,
} from './minioExtractors.js';
export interface CheckResult {
status: 'pass' | 'fail' | 'warn' | 'skip';
@@ -587,10 +591,12 @@ const checkMinioExtractors: Check = async (ctx) => {
const summary = summarizeMinioExtractorStatus(status);
if (status.missingRequirements.length > 0) {
const installHints = await getMinioExtractorInstallHints(status);
const hint = installHints.length > 0 ? `; hint: ${installHints[0]}` : '';
return {
status: 'warn',
label: 'MinIO ingest extractors',
detail: `${summary} — install missing extractors for PDF/DOCX ingestion`,
detail: `${summary} — install missing extractors for PDF/DOCX ingestion${hint}`,
};
}
+22
View File
@@ -1,6 +1,7 @@
import { describe, it, expect } from 'vitest';
import {
checkMinioExtractorStatus,
getMinioExtractorInstallHints,
renderMinioExtractorSetupLines,
summarizeMinioExtractorStatus,
} from './minioExtractors.js';
@@ -38,4 +39,25 @@ describe('minio extractor requirements', () => {
expect(status.availableDocxExtractors).toEqual(['pandoc']);
expect(status.missingRequirements).toEqual([]);
});
it('returns Homebrew install hint on macOS when extractors are missing', async () => {
const status = await checkMinioExtractorStatus(
{ backup: { minio: { enabled: true } } },
async () => false,
);
const hints = await getMinioExtractorInstallHints(status, { platform: 'darwin' });
expect(hints).toEqual(['brew install poppler pandoc']);
});
it('returns apt-get install hint on Linux when apt-get is available', async () => {
const status = await checkMinioExtractorStatus(
{ backup: { minio: { enabled: true } } },
async () => false,
);
const hints = await getMinioExtractorInstallHints(status, {
platform: 'linux',
exists: async (command) => command === 'apt-get',
});
expect(hints).toEqual(['sudo apt-get install -y poppler-utils pandoc']);
});
});
+74
View File
@@ -27,6 +27,11 @@ export interface MinioExtractorStatus {
missingRequirements: string[];
}
export interface MinioExtractorHintOptions {
platform?: NodeJS.Platform;
exists?: CommandExistsFn;
}
export async function checkMinioExtractorStatus(
config: Record<string, unknown>,
exists: CommandExistsFn = commandExists,
@@ -103,3 +108,72 @@ export function renderMinioExtractorSetupLines(status: MinioExtractorStatus): st
return lines;
}
function hasMissingPdf(status: MinioExtractorStatus): boolean {
return !status.pdfSupported;
}
function hasMissingDocx(status: MinioExtractorStatus): boolean {
return !status.docxSupported;
}
function packageListFor(manager: string, status: MinioExtractorStatus): string[] {
const packages: string[] = [];
if (hasMissingPdf(status)) {
const pdfMap: Record<string, string> = {
'apt-get': 'poppler-utils',
dnf: 'poppler-utils',
pacman: 'poppler',
zypper: 'poppler-tools',
apk: 'poppler-utils',
};
if (pdfMap[manager]) {
packages.push(pdfMap[manager]);
}
}
if (hasMissingDocx(status)) {
packages.push('pandoc');
}
return Array.from(new Set(packages));
}
export async function getMinioExtractorInstallHints(
status: MinioExtractorStatus,
options: MinioExtractorHintOptions = {},
): Promise<string[]> {
if (!status.minioEnabled || status.missingRequirements.length === 0) {
return [];
}
const platform = options.platform ?? process.platform;
const exists = options.exists ?? commandExists;
if (platform === 'darwin') {
const packages: string[] = [];
if (hasMissingPdf(status)) {
packages.push('poppler');
}
if (hasMissingDocx(status)) {
packages.push('pandoc');
}
return [`brew install ${Array.from(new Set(packages)).join(' ')}`];
}
if (platform === 'linux') {
const managers = ['apt-get', 'dnf', 'pacman', 'zypper', 'apk'];
for (const manager of managers) {
if (!(await exists(manager))) {
continue;
}
const packages = packageListFor(manager, status);
if (packages.length > 0) {
if (manager === 'pacman') {
return [`sudo pacman -S --needed ${packages.join(' ')}`];
}
return [`sudo ${manager} install -y ${packages.join(' ')}`];
}
}
}
return ['Install pdftotext (poppler) and pandoc/docx2txt using your system package manager'];
}
+9 -1
View File
@@ -8,7 +8,11 @@ import { createPrompter } from './setup/prompts.js';
import { ConfigBuilder } from './setup/config.js';
import { runFirstRunWizard, runMenu } from './setup/orchestrator.js';
import { runGoogleAuth } from './setup/automation.js';
import { checkMinioExtractorStatus, renderMinioExtractorSetupLines } from './minioExtractors.js';
import {
checkMinioExtractorStatus,
getMinioExtractorInstallHints,
renderMinioExtractorSetupLines,
} from './minioExtractors.js';
export async function runSetup(configPath: string): Promise<void> {
const rl = createInterface({ input: process.stdin, output: process.stdout });
@@ -78,11 +82,15 @@ async function printMinioExtractorSetupStatus(
if (lines.length === 0) {
return;
}
const installHints = await getMinioExtractorInstallHints(status);
p.println();
for (const line of lines) {
p.println(line);
}
for (const hint of installHints) {
p.println(` Install hint: ${hint}`);
}
}
export function registerSetupCommand(program: Command): void {