Initial commit

This commit is contained in:
OpenCode Test
2025-12-24 10:50:10 -08:00
commit e1a64aa092
70 changed files with 5827 additions and 0 deletions

35
apps/worker/Dockerfile Normal file
View File

@@ -0,0 +1,35 @@
# syntax=docker/dockerfile:1
FROM oven/bun:1.3.3 AS deps
WORKDIR /app
# Workspace manifests (copy all workspace package.json files so Bun
# can resolve workspace:* deps without mutating the lockfile).
COPY package.json bun.lock tsconfig.base.json ./
COPY apps/web/package.json ./apps/web/package.json
COPY apps/worker/package.json ./apps/worker/package.json
COPY packages/config/package.json ./packages/config/package.json
COPY packages/db/package.json ./packages/db/package.json
COPY packages/minio/package.json ./packages/minio/package.json
COPY packages/queue/package.json ./packages/queue/package.json
RUN bun install --frozen-lockfile --production --ignore-scripts
FROM oven/bun:1.3.3 AS runner
WORKDIR /app
# Media tooling for worker pipeline
RUN apt-get update \
&& apt-get install -y --no-install-recommends ffmpeg libimage-exiftool-perl ca-certificates \
&& rm -rf /var/lib/apt/lists/*
ENV NODE_ENV=production
COPY --from=deps /app/node_modules ./node_modules
COPY --from=deps /app/package.json ./package.json
COPY --from=deps /app/bun.lock ./bun.lock
COPY apps/worker ./apps/worker
COPY packages ./packages
CMD ["bun", "--cwd", "apps/worker", "run", "start"]

19
apps/worker/package.json Normal file
View File

@@ -0,0 +1,19 @@
{
"name": "@tline/worker",
"version": "0.0.0",
"private": true,
"type": "module",
"dependencies": {
"@tline/config": "workspace:*",
"@tline/db": "workspace:*",
"@tline/minio": "workspace:*",
"@tline/queue": "workspace:*",
"@aws-sdk/client-s3": "^3.899.0",
"bullmq": "^5.61.0",
"sharp": "^0.33.5"
},
"scripts": {
"dev": "bun run src/index.ts",
"start": "bun run src/index.ts"
}
}

58
apps/worker/src/index.ts Normal file
View File

@@ -0,0 +1,58 @@
import { getAppName } from "@tline/config";
import { Worker, type Job } from "bullmq";
import { closeQueue, getQueueEnv, getQueueName, getRedis } from "@tline/queue";
import { closeDb } from "@tline/db";
import {
handleCopyToCanonical,
handleProcessAsset,
handleScanMinioPrefix
} from "./jobs";
console.log(`[${getAppName()}] worker boot`);
const env = getQueueEnv();
const queueName = getQueueName();
const connection = getRedis();
try {
await connection.connect();
} catch (err) {
console.error(`[${getAppName()}] redis connect failed`, { err, redisUrl: env.REDIS_URL });
process.exit(1);
}
const worker = new Worker(
queueName,
async (job: Job) => {
if (job.name === "scan_minio_prefix") return handleScanMinioPrefix(job.data);
if (job.name === "process_asset") return handleProcessAsset(job.data);
if (job.name === "copy_to_canonical") return handleCopyToCanonical(job.data);
throw new Error(`Unknown job: ${job.name}`);
},
{
connection,
concurrency: 1
}
);
worker.on("failed", (job: Job | undefined, err: Error) => {
console.error(`[${getAppName()}] job failed`, { jobId: job?.id, name: job?.name, err });
});
worker.on("completed", (job: Job) => {
console.log(`[${getAppName()}] job completed`, { jobId: job.id, name: job.name });
});
async function shutdown(signal: string) {
console.log(`[${getAppName()}] shutting down`, { signal });
await Promise.allSettled([worker.close(), closeDb()]);
await Promise.allSettled([closeQueue()]);
process.exit(0);
}
process.on("SIGINT", () => void shutdown("SIGINT"));
process.on("SIGTERM", () => void shutdown("SIGTERM"));

616
apps/worker/src/jobs.ts Normal file
View File

@@ -0,0 +1,616 @@
import { spawn } from "child_process";
import { mkdtemp, rm } from "fs/promises";
import { tmpdir } from "os";
import { join } from "path";
import { createWriteStream, createReadStream } from "fs";
import { Readable } from "stream";
import sharp from "sharp";
import {
CopyObjectCommand,
GetObjectCommand,
HeadObjectCommand,
ListObjectsV2Command,
PutObjectCommand
} from "@aws-sdk/client-s3";
import { getDb } from "@tline/db";
import { getMinioInternalClient } from "@tline/minio";
import {
copyToCanonicalPayloadSchema,
enqueueCopyToCanonical,
enqueueProcessAsset,
processAssetPayloadSchema,
scanMinioPrefixPayloadSchema,
} from "@tline/queue";
const allowedScanPrefixes = ["originals/"] as const;
function assertAllowedScanPrefix(prefix: string) {
if (allowedScanPrefixes.some((allowed) => prefix.startsWith(allowed))) return;
throw new Error(`scan prefix not allowed: ${prefix}`);
}
function getExtensionLower(key: string) {
const dot = key.lastIndexOf(".");
if (dot === -1) return "";
return key.slice(dot + 1).toLowerCase();
}
function inferMedia(
key: string,
): { mediaType: "image" | "video"; mimeType: string } | null {
const ext = getExtensionLower(key);
if (["jpg", "jpeg"].includes(ext))
return { mediaType: "image", mimeType: "image/jpeg" };
if (ext === "png") return { mediaType: "image", mimeType: "image/png" };
if (ext === "gif") return { mediaType: "image", mimeType: "image/gif" };
if (ext === "webp") return { mediaType: "image", mimeType: "image/webp" };
if (ext === "heic") return { mediaType: "image", mimeType: "image/heic" };
if (ext === "heif") return { mediaType: "image", mimeType: "image/heif" };
if (ext === "mov") return { mediaType: "video", mimeType: "video/quicktime" };
if (ext === "mp4") return { mediaType: "video", mimeType: "video/mp4" };
if (ext === "m4v") return { mediaType: "video", mimeType: "video/x-m4v" };
if (ext === "mkv")
return { mediaType: "video", mimeType: "video/x-matroska" };
return null;
}
async function listAllObjectKeys(input: { bucket: string; prefix: string }) {
const s3 = getMinioInternalClient();
const keys: string[] = [];
let continuationToken: string | undefined;
do {
const res = await s3.send(
new ListObjectsV2Command({
Bucket: input.bucket,
Prefix: input.prefix,
ContinuationToken: continuationToken,
}),
);
for (const obj of res.Contents ?? []) {
if (!obj.Key) continue;
keys.push(obj.Key);
}
continuationToken = res.IsTruncated ? res.NextContinuationToken : undefined;
} while (continuationToken);
return keys;
}
export async function handleScanMinioPrefix(raw: unknown) {
const payload = scanMinioPrefixPayloadSchema.parse(raw);
assertAllowedScanPrefix(payload.prefix);
const keys = await listAllObjectKeys({
bucket: payload.bucket,
prefix: payload.prefix,
});
const db = getDb();
let processed = 0;
let skipped = 0;
let enqueued = 0;
for (const key of keys) {
if (key.endsWith("/")) {
skipped++;
continue;
}
const inferred = inferMedia(key);
if (!inferred) {
skipped++;
continue;
}
const rows = await db<
{
id: string;
status: "new" | "processing" | "ready" | "failed";
}[]
>`
insert into assets (bucket, media_type, mime_type, source_key, active_key)
values (${payload.bucket}, ${inferred.mediaType}, ${inferred.mimeType}, ${key}, ${key})
on conflict (bucket, source_key)
do update
set media_type = excluded.media_type,
mime_type = excluded.mime_type,
active_key = excluded.active_key
returning id, status
`;
processed++;
const [asset] = rows;
if (!asset) continue;
if (asset.status === "new" || asset.status === "failed") {
await enqueueProcessAsset({ assetId: asset.id });
enqueued++;
}
}
return {
ok: true,
importId: payload.importId,
bucket: payload.bucket,
scannedPrefix: payload.prefix,
found: keys.length,
processed,
skipped,
enqueued,
};
}
function streamToFile(stream: Readable, filePath: string): Promise<void> {
return new Promise((resolve, reject) => {
const writeStream = createWriteStream(filePath);
stream.pipe(writeStream);
writeStream.on("finish", resolve);
writeStream.on("error", reject);
});
}
async function runCommand(cmd: string, args: string[]): Promise<string> {
return new Promise((resolve, reject) => {
const proc = spawn(cmd, args);
let stdout = "";
let stderr = "";
proc.stdout.on("data", (data) => {
stdout += data.toString();
});
proc.stderr.on("data", (data) => {
stderr += data.toString();
});
proc.on("close", (code) => {
if (code === 0) {
resolve(stdout);
} else {
reject(new Error(`${cmd} failed with code ${code}: ${stderr}`));
}
});
proc.on("error", reject);
});
}
async function uploadObject(input: {
bucket: string;
key: string;
filePath: string;
contentType?: string;
}): Promise<void> {
const s3 = getMinioInternalClient();
await s3.send(
new PutObjectCommand({
Bucket: input.bucket,
Key: input.key,
Body: createReadStream(input.filePath),
ContentType: input.contentType,
}),
);
}
async function getObjectLastModified(input: { bucket: string; key: string }): Promise<Date | null> {
const s3 = getMinioInternalClient();
const res = await s3.send(new HeadObjectCommand({ Bucket: input.bucket, Key: input.key }));
return res.LastModified ?? null;
}
function parseExifDate(dateStr: string | undefined): Date | null {
if (!dateStr) return null;
const s = dateStr.trim();
// ExifTool commonly emits: "YYYY:MM:DD HH:MM:SS", sometimes with fractional seconds and/or tz.
const m = s.match(
/^(\d{4}):(\d{2}):(\d{2})[ T](\d{2}):(\d{2}):(\d{2})(\.\d+)?(?:\s*(Z|[+-]\d{2}:\d{2}))?$/,
);
if (m) {
const [, y, mo, d, hh, mm, ss, frac, tz] = m;
// If tz missing, prefer deterministic UTC over server-local interpretation.
const iso = `${y}-${mo}-${d}T${hh}:${mm}:${ss}${frac ?? ""}${tz ?? "Z"}`;
const date = new Date(iso);
return isNaN(date.getTime()) ? null : date;
}
const date = new Date(s);
return isNaN(date.getTime()) ? null : date;
}
function isPlausibleCaptureTs(date: Date) {
const ts = date.getTime();
if (!Number.isFinite(ts)) return false;
const year = date.getUTCFullYear();
// Guard against bogus container/default dates; allow up to 24h in future.
return year >= 1971 && ts <= Date.now() + 24 * 60 * 60 * 1000;
}
function inferExtFromKey(key: string): string {
const ext = getExtensionLower(key);
return ext || "bin";
}
function pad2(n: number) {
return String(n).padStart(2, "0");
}
function utcDateParts(date: Date) {
const y = date.getUTCFullYear();
const m = date.getUTCMonth() + 1;
const d = date.getUTCDate();
return { y, m, d };
}
export async function handleProcessAsset(raw: unknown) {
const payload = processAssetPayloadSchema.parse(raw);
const db = getDb();
const s3 = getMinioInternalClient();
await db`
update assets
set status = 'processing', error_message = null
where id = ${payload.assetId}
and status in ('new', 'failed')
`;
try {
const [asset] = await db<
{
id: string;
bucket: string;
active_key: string;
media_type: "image" | "video";
mime_type: string;
created_at: Date;
}[]
>`
select id, bucket, active_key, media_type, mime_type, created_at
from assets
where id = ${payload.assetId}
`;
if (!asset) {
throw new Error(`Asset not found: ${payload.assetId}`);
}
const tempDir = await mkdtemp(join(tmpdir(), "tline-process-"));
try {
const containerExt = asset.mime_type.split("/")[1] ?? "bin";
const inputPath = join(tempDir, `input.${containerExt}`);
const getRes = await s3.send(
new GetObjectCommand({
Bucket: asset.bucket,
Key: asset.active_key,
}),
);
if (!getRes.Body) throw new Error("Empty response body from S3");
await streamToFile(getRes.Body as Readable, inputPath);
const updates: Record<string, unknown> = {
capture_ts_utc: null,
date_confidence: null,
width: null,
height: null,
rotation: null,
duration_seconds: null,
thumb_small_key: null,
thumb_med_key: null,
poster_key: null,
raw_tags_json: null
};
let rawTags: Record<string, unknown> = {};
let captureTs: Date | null = null;
let dateConfidence:
| "camera"
| "container"
| "object_mtime"
| "import_time"
| null = null;
async function tryReadExifTags(): Promise<Record<string, unknown>> {
try {
const exifOutput = await runCommand("exiftool", ["-j", inputPath]);
const exifData = JSON.parse(exifOutput);
if (Array.isArray(exifData) && exifData.length > 0) {
const first = exifData[0];
if (first && typeof first === "object") {
return first as Record<string, unknown>;
}
}
return {};
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
return { exiftool_error: message };
}
}
function maybeSetCaptureDateFromTags(tags: Record<string, unknown>) {
if (captureTs) return;
// ExifTool uses different fields across image/video vendors.
const dateFields = [
"DateTimeOriginal",
"CreateDate",
"ModifyDate",
"MediaCreateDate",
"TrackCreateDate",
"CreationDate",
"GPSDateTime",
] as const;
for (const field of dateFields) {
const val = tags[field] as string | undefined;
if (!val) continue;
const parsed = parseExifDate(val);
if (parsed && isPlausibleCaptureTs(parsed)) {
captureTs = parsed;
dateConfidence = "camera";
return;
}
}
}
async function applyObjectMtimeFallback() {
if (captureTs) return;
try {
const mtime = await getObjectLastModified({
bucket: asset.bucket,
key: asset.active_key,
});
if (!mtime) return;
if (!isPlausibleCaptureTs(mtime)) return;
captureTs = mtime;
dateConfidence = "object_mtime";
rawTags = { ...rawTags, object_last_modified: mtime.toISOString() };
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
rawTags = { ...rawTags, object_last_modified_error: message };
}
}
if (asset.media_type === "image") {
rawTags = await tryReadExifTags();
maybeSetCaptureDateFromTags(rawTags);
await applyObjectMtimeFallback();
if (rawTags.ImageWidth !== undefined) updates.width = Number(rawTags.ImageWidth);
if (rawTags.ImageHeight !== undefined) updates.height = Number(rawTags.ImageHeight);
if (rawTags.Rotation !== undefined) updates.rotation = Number(rawTags.Rotation);
const imgMeta = await sharp(inputPath).metadata();
if (updates.width === null && imgMeta.width) updates.width = imgMeta.width;
if (updates.height === null && imgMeta.height) updates.height = imgMeta.height;
const thumb256Path = join(tempDir, "thumb_256.jpg");
const thumb768Path = join(tempDir, "thumb_768.jpg");
await sharp(inputPath)
.rotate()
.resize(256, 256, { fit: "inside", withoutEnlargement: true })
.jpeg({ quality: 80 })
.toFile(thumb256Path);
await sharp(inputPath)
.rotate()
.resize(768, 768, { fit: "inside", withoutEnlargement: true })
.jpeg({ quality: 80 })
.toFile(thumb768Path);
const thumb256Key = `thumbs/${asset.id}/image_256.jpg`;
const thumb768Key = `thumbs/${asset.id}/image_768.jpg`;
await uploadObject({
bucket: asset.bucket,
key: thumb256Key,
filePath: thumb256Path,
contentType: "image/jpeg",
});
await uploadObject({
bucket: asset.bucket,
key: thumb768Key,
filePath: thumb768Path,
contentType: "image/jpeg",
});
updates.thumb_small_key = thumb256Key;
updates.thumb_med_key = thumb768Key;
} else if (asset.media_type === "video") {
rawTags = await tryReadExifTags();
maybeSetCaptureDateFromTags(rawTags);
const ffprobeOutput = await runCommand("ffprobe", [
"-v",
"error",
"-select_streams",
"v:0",
"-show_entries",
"stream=width,height,duration",
"-show_entries",
"format_tags=creation_time",
"-of",
"json",
inputPath
]);
const ffprobeData = JSON.parse(ffprobeOutput);
if (!captureTs && ffprobeData.format?.tags?.creation_time) {
const ts = new Date(ffprobeData.format.tags.creation_time);
if (!isNaN(ts.getTime()) && isPlausibleCaptureTs(ts)) {
captureTs = ts;
dateConfidence = "container";
}
}
await applyObjectMtimeFallback();
if (ffprobeData.streams?.[0]) {
const stream = ffprobeData.streams[0];
if (stream.width) updates.width = Number(stream.width);
if (stream.height) updates.height = Number(stream.height);
if (stream.duration)
updates.duration_seconds = Math.round(Number(stream.duration));
}
rawTags = { ...rawTags, ffprobe: ffprobeData };
const posterPath = join(tempDir, "poster_256.jpg");
await runCommand("ffmpeg", [
"-i",
inputPath,
"-vf",
"scale=256:256:force_original_aspect_ratio=decrease",
"-vframes",
"1",
"-q:v",
"2",
"-y",
posterPath
]);
const posterKey = `thumbs/${asset.id}/poster_256.jpg`;
await uploadObject({
bucket: asset.bucket,
key: posterKey,
filePath: posterPath,
contentType: "image/jpeg",
});
updates.poster_key = posterKey;
}
if (asset.media_type === "video" && typeof updates.poster_key !== "string") {
throw new Error("poster generation did not produce output");
}
if (
asset.media_type === "image" &&
(typeof updates.thumb_small_key !== "string" || typeof updates.thumb_med_key !== "string")
) {
throw new Error("thumb generation did not produce output");
}
if (!captureTs) {
captureTs = new Date(asset.created_at);
dateConfidence = "import_time";
rawTags = {
...rawTags,
capture_date_fallback: "import_time",
};
}
updates.capture_ts_utc = captureTs;
updates.date_confidence = dateConfidence;
updates.raw_tags_json = rawTags;
await db`
update assets
set ${db(
updates,
"capture_ts_utc",
"date_confidence",
"width",
"height",
"rotation",
"duration_seconds",
"thumb_small_key",
"thumb_med_key",
"poster_key",
"raw_tags_json"
)}, status = 'ready', error_message = null
where id = ${asset.id}
`;
// Only uploads (staging/*) are copied into canonical by default.
if (asset.active_key.startsWith("staging/")) {
await enqueueCopyToCanonical({ assetId: asset.id });
}
return { ok: true };
} finally {
await rm(tempDir, { recursive: true, force: true });
}
} catch (err) {
const message = err instanceof Error ? err.message : "unknown_error";
await db`
update assets
set status = 'failed', error_message = ${message}
where id = ${payload.assetId}
`;
throw err;
}
}
export async function handleCopyToCanonical(raw: unknown) {
const payload = copyToCanonicalPayloadSchema.parse(raw);
const db = getDb();
const s3 = getMinioInternalClient();
const [asset] = await db<
{
id: string;
bucket: string;
source_key: string;
active_key: string;
canonical_key: string | null;
capture_ts_utc: Date | null;
}[]
>`
select id, bucket, source_key, active_key, canonical_key, capture_ts_utc
from assets
where id = ${payload.assetId}
limit 1
`;
if (!asset) throw new Error(`Asset not found: ${payload.assetId}`);
// Canonical layout is date-based; if we don't have a date yet, do nothing.
// This job can be retried later after metadata extraction improves.
if (!asset.capture_ts_utc) {
return { ok: true, assetId: asset.id, skipped: "missing_capture_ts" };
}
// Never copy external archive originals by default.
if (asset.source_key.startsWith("originals/")) {
return { ok: true, assetId: asset.id, skipped: "external_archive" };
}
const ext = inferExtFromKey(asset.source_key);
const { y, m, d } = utcDateParts(new Date(asset.capture_ts_utc));
const canonicalKey = `canonical/originals/${y}/${pad2(m)}/${pad2(d)}/${asset.id}.${ext}`;
// Idempotency: if already canonicalized, don't redo work.
if (asset.canonical_key === canonicalKey && asset.active_key === canonicalKey) {
return { ok: true, assetId: asset.id, canonicalKey, already: true };
}
await s3.send(
new CopyObjectCommand({
Bucket: asset.bucket,
Key: canonicalKey,
CopySource: `${asset.bucket}/${asset.active_key}`,
MetadataDirective: "COPY",
}),
);
await db`
update assets
set canonical_key = ${canonicalKey}, active_key = ${canonicalKey}
where id = ${asset.id}
`;
return { ok: true, assetId: asset.id, canonicalKey };
}

View File

@@ -0,0 +1,7 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"types": ["bun-types"]
},
"include": ["src/**/*.ts", "../../packages/*/src/**/*.ts"]
}