chore(compose): remediate vulnerable service images
This commit is contained in:
+8
-7
@@ -45,7 +45,7 @@ services:
|
|||||||
command:
|
command:
|
||||||
- |
|
- |
|
||||||
set -e
|
set -e
|
||||||
for m in medium small; do
|
for m in medium small base; do
|
||||||
if [ -f /app/models/ggml-$$m.bin ]; then
|
if [ -f /app/models/ggml-$$m.bin ]; then
|
||||||
echo "Model ggml-$$m.bin already present, skipping download."
|
echo "Model ggml-$$m.bin already present, skipping download."
|
||||||
else
|
else
|
||||||
@@ -55,9 +55,10 @@ services:
|
|||||||
done
|
done
|
||||||
|
|
||||||
# Primary whisper.cpp server: NVIDIA RTX 5070 Ti via CUDA (Blackwell sm_120).
|
# Primary whisper.cpp server: NVIDIA RTX 5070 Ti via CUDA (Blackwell sm_120).
|
||||||
# Uses ggml-small.bin (~850 MiB VRAM) — fits alongside gemma 3 12b which runs
|
# Uses ggml-base.bin to keep the service alive while llama-server owns most of
|
||||||
# with `--parallel 1` (frees ~900 MiB of VRAM). Benchmarked at ~150 ms per
|
# the laptop GPU VRAM. The previous ggml-small.bin profile needed ~465 MiB
|
||||||
# short clip, ~93x faster than the CPU server below with identical WER.
|
# contiguous CUDA memory and restarted when only ~560 MiB fragmented VRAM was
|
||||||
|
# free. CPU whisper-server below remains the higher-accuracy fallback.
|
||||||
#
|
#
|
||||||
# The official `ghcr.io/ggml-org/whisper.cpp:main-cuda` ships kernels only
|
# The official `ghcr.io/ggml-org/whisper.cpp:main-cuda` ships kernels only
|
||||||
# for sm_75/80/86/90 and fails to init CUDA on Blackwell. We build a custom
|
# for sm_75/80/86/90 and fails to init CUDA on Blackwell. We build a custom
|
||||||
@@ -79,7 +80,7 @@ services:
|
|||||||
entrypoint: ["whisper-server"]
|
entrypoint: ["whisper-server"]
|
||||||
command:
|
command:
|
||||||
- --model
|
- --model
|
||||||
- /app/models/ggml-small.bin
|
- /app/models/ggml-base.bin
|
||||||
- --host
|
- --host
|
||||||
- 0.0.0.0
|
- 0.0.0.0
|
||||||
- --port
|
- --port
|
||||||
@@ -224,7 +225,7 @@ services:
|
|||||||
# Optional local dependency: liteLLM proxy for unified LLM API.
|
# Optional local dependency: liteLLM proxy for unified LLM API.
|
||||||
# Start with: docker compose --profile api up -d litellm
|
# Start with: docker compose --profile api up -d litellm
|
||||||
litellm:
|
litellm:
|
||||||
image: litellm/litellm:v1.82.3-stable.patch.2
|
image: litellm/litellm:v1.83.7-stable
|
||||||
container_name: litellm
|
container_name: litellm
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
profiles: ["api"]
|
profiles: ["api"]
|
||||||
@@ -309,7 +310,7 @@ services:
|
|||||||
# Dedicated local n8n instance for agent-oriented workflows.
|
# Dedicated local n8n instance for agent-oriented workflows.
|
||||||
# Start with: docker compose --profile automation up -d n8n-agent
|
# Start with: docker compose --profile automation up -d n8n-agent
|
||||||
n8n-agent:
|
n8n-agent:
|
||||||
image: docker.n8n.io/n8nio/n8n:2.11.3
|
image: docker.n8n.io/n8nio/n8n:2.22.1
|
||||||
container_name: n8n-agent
|
container_name: n8n-agent
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
profiles: ["automation"]
|
profiles: ["automation"]
|
||||||
|
|||||||
Reference in New Issue
Block a user