Files
William Valentin 636f4b3311 feat(deploy): add whisper.cpp Kubernetes deployment
Add Dockerfile and K8s manifests for whisper.cpp transcription service.
Deploys to ai-stack namespace with worker node affinity for GPU access.
2026-02-12 00:14:41 -08:00

105 lines
2.3 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: whisper
namespace: ai-stack
labels:
app: whisper
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: whisper
template:
metadata:
labels:
app: whisper
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: node-role.kubernetes.io/worker
operator: In
values: ["true"]
imagePullSecrets:
- name: gitea-regcred
containers:
- name: whisper
image: gitea-http.taildb3494.ts.net/will/whisper-cpp:latest
imagePullPolicy: Always
args:
- "--model"
- "/app/models/ggml-base.en.bin"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--convert"
- "--language"
- "en"
- "--inference-path"
- "/v1/audio/transcriptions"
ports:
- name: http
containerPort: 8080
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: "2"
memory: 1Gi
livenessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: whisper
namespace: ai-stack
labels:
app: whisper
spec:
selector:
app: whisper
ports:
- name: http
port: 8080
targetPort: http
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: whisper
namespace: ai-stack
spec:
ingressClassName: nginx
rules:
- host: whisper.ai-stack.192.168.153.240.nip.io
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: whisper
port:
name: http