chore(compose): remediate vulnerable service images

2026-05-21 23:46:09 -07:00
parent d744e5c036
commit 7b9fafcff5
1 changed files with 8 additions and 7 deletions
@@ -45,7 +45,7 @@ services:
    command:
      - |
        set -e
-        for m in medium small; do
+        for m in medium small base; do
          if [ -f /app/models/ggml-$$m.bin ]; then
            echo "Model ggml-$$m.bin already present, skipping download."
          else
@@ -55,9 +55,10 @@ services:
        done
  # Primary whisper.cpp server: NVIDIA RTX 5070 Ti via CUDA (Blackwell sm_120).
-  # Uses ggml-small.bin (~850 MiB VRAM) — fits alongside gemma 3 12b which runs
+  # Uses ggml-base.bin to keep the service alive while llama-server owns most of
-  # with `--parallel 1` (frees ~900 MiB of VRAM). Benchmarked at ~150 ms per
+  # the laptop GPU VRAM. The previous ggml-small.bin profile needed ~465 MiB
-  # short clip, ~93x faster than the CPU server below with identical WER.
+  # contiguous CUDA memory and restarted when only ~560 MiB fragmented VRAM was
  # free. CPU whisper-server below remains the higher-accuracy fallback.
  #
  # The official `ghcr.io/ggml-org/whisper.cpp:main-cuda` ships kernels only
  # for sm_75/80/86/90 and fails to init CUDA on Blackwell. We build a custom
@@ -79,7 +80,7 @@ services:
    entrypoint: ["whisper-server"]
    command:
      - --model
-      - /app/models/ggml-small.bin
+      - /app/models/ggml-base.bin
      - --host
      - 0.0.0.0
      - --port
@@ -224,7 +225,7 @@ services:
  # Optional local dependency: liteLLM proxy for unified LLM API.
  # Start with: docker compose --profile api up -d litellm
  litellm:
-    image: litellm/litellm:v1.82.3-stable.patch.2
+    image: litellm/litellm:v1.83.7-stable
    container_name: litellm
    restart: unless-stopped
    profiles: ["api"]
@@ -309,7 +310,7 @@ services:
  # Dedicated local n8n instance for agent-oriented workflows.
  # Start with: docker compose --profile automation up -d n8n-agent
  n8n-agent:
-    image: docker.n8n.io/n8nio/n8n:2.11.3
+    image: docker.n8n.io/n8nio/n8n:2.22.1
    container_name: n8n-agent
    restart: unless-stopped
    profiles: ["automation"]