chore(compose): remediate vulnerable service images

This commit is contained in:
William Valentin
2026-05-21 23:46:09 -07:00
parent d744e5c036
commit 7b9fafcff5
+8 -7
View File
@@ -45,7 +45,7 @@ services:
command:
- |
set -e
for m in medium small; do
for m in medium small base; do
if [ -f /app/models/ggml-$$m.bin ]; then
echo "Model ggml-$$m.bin already present, skipping download."
else
@@ -55,9 +55,10 @@ services:
done
# Primary whisper.cpp server: NVIDIA RTX 5070 Ti via CUDA (Blackwell sm_120).
# Uses ggml-small.bin (~850 MiB VRAM) — fits alongside gemma 3 12b which runs
# with `--parallel 1` (frees ~900 MiB of VRAM). Benchmarked at ~150 ms per
# short clip, ~93x faster than the CPU server below with identical WER.
# Uses ggml-base.bin to keep the service alive while llama-server owns most of
# the laptop GPU VRAM. The previous ggml-small.bin profile needed ~465 MiB
# contiguous CUDA memory and restarted when only ~560 MiB fragmented VRAM was
# free. CPU whisper-server below remains the higher-accuracy fallback.
#
# The official `ghcr.io/ggml-org/whisper.cpp:main-cuda` ships kernels only
# for sm_75/80/86/90 and fails to init CUDA on Blackwell. We build a custom
@@ -79,7 +80,7 @@ services:
entrypoint: ["whisper-server"]
command:
- --model
- /app/models/ggml-small.bin
- /app/models/ggml-base.bin
- --host
- 0.0.0.0
- --port
@@ -224,7 +225,7 @@ services:
# Optional local dependency: liteLLM proxy for unified LLM API.
# Start with: docker compose --profile api up -d litellm
litellm:
image: litellm/litellm:v1.82.3-stable.patch.2
image: litellm/litellm:v1.83.7-stable
container_name: litellm
restart: unless-stopped
profiles: ["api"]
@@ -309,7 +310,7 @@ services:
# Dedicated local n8n instance for agent-oriented workflows.
# Start with: docker compose --profile automation up -d n8n-agent
n8n-agent:
image: docker.n8n.io/n8nio/n8n:2.11.3
image: docker.n8n.io/n8nio/n8n:2.22.1
container_name: n8n-agent
restart: unless-stopped
profiles: ["automation"]