feat(deploy): add whisper.cpp Kubernetes deployment

Add Dockerfile and K8s manifests for whisper.cpp transcription service.
Deploys to ai-stack namespace with worker node affinity for GPU access.
This commit is contained in:
William Valentin
2026-02-12 00:14:41 -08:00
parent e0ce07ac43
commit 636f4b3311
2 changed files with 144 additions and 0 deletions
+40
View File
@@ -0,0 +1,40 @@
FROM ubuntu:22.04 AS build
WORKDIR /app
RUN apt-get update && \
apt-get install -y build-essential wget cmake git \
&& rm -rf /var/lib/apt/lists/*
ARG WHISPER_CPP_VERSION=v1.8.3
RUN git clone --depth 1 --branch ${WHISPER_CPP_VERSION} \
https://github.com/ggml-org/whisper.cpp.git .
ARG WHISPER_MODEL=base.en
RUN bash ./models/download-ggml-model.sh ${WHISPER_MODEL}
RUN cmake -B build && cmake --build build --config Release -j$(nproc)
FROM ubuntu:22.04
WORKDIR /app
RUN apt-get update && \
apt-get install -y curl ffmpeg \
&& rm -rf /var/lib/apt/lists/*
COPY --from=build /app/build/bin/whisper-server /usr/local/bin/whisper-server
COPY --from=build /app/build/src/libwhisper.so* /usr/local/lib/
COPY --from=build /app/build/ggml/src/libggml*.so* /usr/local/lib/
COPY --from=build /app/models /app/models
RUN ldconfig
ARG WHISPER_MODEL=base.en
ENV WHISPER_MODEL_PATH=/app/models/ggml-${WHISPER_MODEL}.bin
EXPOSE 8080
ENTRYPOINT ["whisper-server"]
CMD ["--model", "/app/models/ggml-base.en.bin", \
"--host", "0.0.0.0", \
"--port", "8080", \
"--convert", \
"--language", "en", \
"--inference-path", "/v1/audio/transcriptions"]
+104
View File
@@ -0,0 +1,104 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: whisper
namespace: ai-stack
labels:
app: whisper
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: whisper
template:
metadata:
labels:
app: whisper
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: node-role.kubernetes.io/worker
operator: In
values: ["true"]
imagePullSecrets:
- name: gitea-regcred
containers:
- name: whisper
image: gitea-http.taildb3494.ts.net/will/whisper-cpp:latest
imagePullPolicy: Always
args:
- "--model"
- "/app/models/ggml-base.en.bin"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--convert"
- "--language"
- "en"
- "--inference-path"
- "/v1/audio/transcriptions"
ports:
- name: http
containerPort: 8080
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: "2"
memory: 1Gi
livenessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: whisper
namespace: ai-stack
labels:
app: whisper
spec:
selector:
app: whisper
ports:
- name: http
port: 8080
targetPort: http
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: whisper
namespace: ai-stack
spec:
ingressClassName: nginx
rules:
- host: whisper.ai-stack.192.168.153.240.nip.io
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: whisper
port:
name: http