feat(deploy): add whisper.cpp Kubernetes deployment

Add Dockerfile and K8s manifests for whisper.cpp transcription service. Deploys to ai-stack namespace with worker node affinity for GPU access.
2026-02-12 00:14:41 -08:00
parent e0ce07ac43
commit 636f4b3311
2 changed files with 144 additions and 0 deletions
@@ -0,0 +1,40 @@
+FROM ubuntu:22.04 AS build
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y build-essential wget cmake git \
+    && rm -rf /var/lib/apt/lists/*
+
+ARG WHISPER_CPP_VERSION=v1.8.3
+RUN git clone --depth 1 --branch ${WHISPER_CPP_VERSION} \
+    https://github.com/ggml-org/whisper.cpp.git .
+
+ARG WHISPER_MODEL=base.en
+RUN bash ./models/download-ggml-model.sh ${WHISPER_MODEL}
+RUN cmake -B build && cmake --build build --config Release -j$(nproc)
+
+FROM ubuntu:22.04
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y curl ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=build /app/build/bin/whisper-server /usr/local/bin/whisper-server
+COPY --from=build /app/build/src/libwhisper.so* /usr/local/lib/
+COPY --from=build /app/build/ggml/src/libggml*.so* /usr/local/lib/
+COPY --from=build /app/models /app/models
+RUN ldconfig
+
+ARG WHISPER_MODEL=base.en
+ENV WHISPER_MODEL_PATH=/app/models/ggml-${WHISPER_MODEL}.bin
+
+EXPOSE 8080
+
+ENTRYPOINT ["whisper-server"]
+CMD ["--model", "/app/models/ggml-base.en.bin", \
+     "--host", "0.0.0.0", \
+     "--port", "8080", \
+     "--convert", \
+     "--language", "en", \
+     "--inference-path", "/v1/audio/transcriptions"]
@@ -0,0 +1,104 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: whisper
+  namespace: ai-stack
+  labels:
+    app: whisper
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: whisper
+  template:
+    metadata:
+      labels:
+        app: whisper
+    spec:
+      affinity:
+        nodeAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              preference:
+                matchExpressions:
+                  - key: node-role.kubernetes.io/worker
+                    operator: In
+                    values: ["true"]
+      imagePullSecrets:
+        - name: gitea-regcred
+      containers:
+        - name: whisper
+          image: gitea-http.taildb3494.ts.net/will/whisper-cpp:latest
+          imagePullPolicy: Always
+          args:
+            - "--model"
+            - "/app/models/ggml-base.en.bin"
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - "8080"
+            - "--convert"
+            - "--language"
+            - "en"
+            - "--inference-path"
+            - "/v1/audio/transcriptions"
+          ports:
+            - name: http
+              containerPort: 8080
+          resources:
+            requests:
+              cpu: 100m
+              memory: 256Mi
+            limits:
+              cpu: "2"
+              memory: 1Gi
+          livenessProbe:
+            httpGet:
+              path: /
+              port: http
+            initialDelaySeconds: 10
+            periodSeconds: 30
+            timeoutSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: /
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            timeoutSeconds: 5
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: whisper
+  namespace: ai-stack
+  labels:
+    app: whisper
+spec:
+  selector:
+    app: whisper
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: whisper
+  namespace: ai-stack
+spec:
+  ingressClassName: nginx
+  rules:
+    - host: whisper.ai-stack.192.168.153.240.nip.io
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: whisper
+                port:
+                  name: http