636f4b3311
Add Dockerfile and K8s manifests for whisper.cpp transcription service. Deploys to ai-stack namespace with worker node affinity for GPU access.
105 lines
2.3 KiB
YAML
105 lines
2.3 KiB
YAML
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: whisper
|
|
namespace: ai-stack
|
|
labels:
|
|
app: whisper
|
|
spec:
|
|
replicas: 1
|
|
strategy:
|
|
type: Recreate
|
|
selector:
|
|
matchLabels:
|
|
app: whisper
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: whisper
|
|
spec:
|
|
affinity:
|
|
nodeAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- weight: 100
|
|
preference:
|
|
matchExpressions:
|
|
- key: node-role.kubernetes.io/worker
|
|
operator: In
|
|
values: ["true"]
|
|
imagePullSecrets:
|
|
- name: gitea-regcred
|
|
containers:
|
|
- name: whisper
|
|
image: gitea-http.taildb3494.ts.net/will/whisper-cpp:latest
|
|
imagePullPolicy: Always
|
|
args:
|
|
- "--model"
|
|
- "/app/models/ggml-base.en.bin"
|
|
- "--host"
|
|
- "0.0.0.0"
|
|
- "--port"
|
|
- "8080"
|
|
- "--convert"
|
|
- "--language"
|
|
- "en"
|
|
- "--inference-path"
|
|
- "/v1/audio/transcriptions"
|
|
ports:
|
|
- name: http
|
|
containerPort: 8080
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 256Mi
|
|
limits:
|
|
cpu: "2"
|
|
memory: 1Gi
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /
|
|
port: http
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 30
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /
|
|
port: http
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: whisper
|
|
namespace: ai-stack
|
|
labels:
|
|
app: whisper
|
|
spec:
|
|
selector:
|
|
app: whisper
|
|
ports:
|
|
- name: http
|
|
port: 8080
|
|
targetPort: http
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: whisper
|
|
namespace: ai-stack
|
|
spec:
|
|
ingressClassName: nginx
|
|
rules:
|
|
- host: whisper.ai-stack.192.168.153.240.nip.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: whisper
|
|
port:
|
|
name: http
|