feat(tools): add pdf extraction for minio ingestion
This commit is contained in:
@@ -12,7 +12,7 @@ type ExecRunner = (
|
||||
file: string,
|
||||
args: string[],
|
||||
options?: { env?: NodeJS.ProcessEnv; maxBuffer?: number },
|
||||
) => Promise<{ stdout: string; stderr: string }>;
|
||||
) => Promise<{ stdout: string | Buffer; stderr: string | Buffer }>;
|
||||
|
||||
interface MinioSyncArgs {
|
||||
prefix: string;
|
||||
@@ -168,13 +168,9 @@ export function createMinioSyncTool(config: BackupConfig, store: MemoryStore, de
|
||||
}
|
||||
|
||||
const remotePath = `${alias}/${bucket}/${key}`;
|
||||
const { stdout: objectStdout } = await runner('mc', ['cat', remotePath], {
|
||||
env,
|
||||
maxBuffer: 20 * 1024 * 1024,
|
||||
});
|
||||
const text = typeof objectStdout === 'string' ? objectStdout : objectStdout.toString('utf-8');
|
||||
const text = await minioIngestInternals.readObjectText(runner, remotePath, key, env);
|
||||
|
||||
if (!force && !minioIngestInternals.isLikelyText(text)) {
|
||||
if (!force && !minioIngestInternals.isExtractableBinaryObject(key) && !minioIngestInternals.isLikelyText(text)) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user