feat(deploy): add whisper.cpp Kubernetes deployment
Add Dockerfile and K8s manifests for whisper.cpp transcription service. Deploys to ai-stack namespace with worker node affinity for GPU access.
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
FROM ubuntu:22.04 AS build
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ARG WHISPER_CPP_VERSION=v1.8.3
|
||||
RUN git clone --depth 1 --branch ${WHISPER_CPP_VERSION} \
|
||||
https://github.com/ggml-org/whisper.cpp.git .
|
||||
|
||||
ARG WHISPER_MODEL=base.en
|
||||
RUN bash ./models/download-ggml-model.sh ${WHISPER_MODEL}
|
||||
RUN cmake -B build && cmake --build build --config Release -j$(nproc)
|
||||
|
||||
FROM ubuntu:22.04
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=build /app/build/bin/whisper-server /usr/local/bin/whisper-server
|
||||
COPY --from=build /app/build/src/libwhisper.so* /usr/local/lib/
|
||||
COPY --from=build /app/build/ggml/src/libggml*.so* /usr/local/lib/
|
||||
COPY --from=build /app/models /app/models
|
||||
RUN ldconfig
|
||||
|
||||
ARG WHISPER_MODEL=base.en
|
||||
ENV WHISPER_MODEL_PATH=/app/models/ggml-${WHISPER_MODEL}.bin
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
ENTRYPOINT ["whisper-server"]
|
||||
CMD ["--model", "/app/models/ggml-base.en.bin", \
|
||||
"--host", "0.0.0.0", \
|
||||
"--port", "8080", \
|
||||
"--convert", \
|
||||
"--language", "en", \
|
||||
"--inference-path", "/v1/audio/transcriptions"]
|
||||
@@ -0,0 +1,104 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: whisper
|
||||
namespace: ai-stack
|
||||
labels:
|
||||
app: whisper
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: whisper
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: whisper
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: In
|
||||
values: ["true"]
|
||||
imagePullSecrets:
|
||||
- name: gitea-regcred
|
||||
containers:
|
||||
- name: whisper
|
||||
image: gitea-http.taildb3494.ts.net/will/whisper-cpp:latest
|
||||
imagePullPolicy: Always
|
||||
args:
|
||||
- "--model"
|
||||
- "/app/models/ggml-base.en.bin"
|
||||
- "--host"
|
||||
- "0.0.0.0"
|
||||
- "--port"
|
||||
- "8080"
|
||||
- "--convert"
|
||||
- "--language"
|
||||
- "en"
|
||||
- "--inference-path"
|
||||
- "/v1/audio/transcriptions"
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: "2"
|
||||
memory: 1Gi
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: whisper
|
||||
namespace: ai-stack
|
||||
labels:
|
||||
app: whisper
|
||||
spec:
|
||||
selector:
|
||||
app: whisper
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: http
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: whisper
|
||||
namespace: ai-stack
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
rules:
|
||||
- host: whisper.ai-stack.192.168.153.240.nip.io
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: whisper
|
||||
port:
|
||||
name: http
|
||||
Reference in New Issue
Block a user