diff --git a/deploy/whisper/Dockerfile b/deploy/whisper/Dockerfile new file mode 100644 index 0000000..f124b18 --- /dev/null +++ b/deploy/whisper/Dockerfile @@ -0,0 +1,40 @@ +FROM ubuntu:22.04 AS build +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y build-essential wget cmake git \ + && rm -rf /var/lib/apt/lists/* + +ARG WHISPER_CPP_VERSION=v1.8.3 +RUN git clone --depth 1 --branch ${WHISPER_CPP_VERSION} \ + https://github.com/ggml-org/whisper.cpp.git . + +ARG WHISPER_MODEL=base.en +RUN bash ./models/download-ggml-model.sh ${WHISPER_MODEL} +RUN cmake -B build && cmake --build build --config Release -j$(nproc) + +FROM ubuntu:22.04 +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y curl ffmpeg \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /app/build/bin/whisper-server /usr/local/bin/whisper-server +COPY --from=build /app/build/src/libwhisper.so* /usr/local/lib/ +COPY --from=build /app/build/ggml/src/libggml*.so* /usr/local/lib/ +COPY --from=build /app/models /app/models +RUN ldconfig + +ARG WHISPER_MODEL=base.en +ENV WHISPER_MODEL_PATH=/app/models/ggml-${WHISPER_MODEL}.bin + +EXPOSE 8080 + +ENTRYPOINT ["whisper-server"] +CMD ["--model", "/app/models/ggml-base.en.bin", \ + "--host", "0.0.0.0", \ + "--port", "8080", \ + "--convert", \ + "--language", "en", \ + "--inference-path", "/v1/audio/transcriptions"] diff --git a/deploy/whisper/k8s.yaml b/deploy/whisper/k8s.yaml new file mode 100644 index 0000000..681a41c --- /dev/null +++ b/deploy/whisper/k8s.yaml @@ -0,0 +1,104 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: whisper + namespace: ai-stack + labels: + app: whisper +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: whisper + template: + metadata: + labels: + app: whisper + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: node-role.kubernetes.io/worker + operator: In + values: ["true"] + imagePullSecrets: + - name: gitea-regcred + containers: + - name: whisper + image: gitea-http.taildb3494.ts.net/will/whisper-cpp:latest + imagePullPolicy: Always + args: + - "--model" + - "/app/models/ggml-base.en.bin" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--convert" + - "--language" + - "en" + - "--inference-path" + - "/v1/audio/transcriptions" + ports: + - name: http + containerPort: 8080 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: "2" + memory: 1Gi + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 +--- +apiVersion: v1 +kind: Service +metadata: + name: whisper + namespace: ai-stack + labels: + app: whisper +spec: + selector: + app: whisper + ports: + - name: http + port: 8080 + targetPort: http +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: whisper + namespace: ai-stack +spec: + ingressClassName: nginx + rules: + - host: whisper.ai-stack.192.168.153.240.nip.io + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: whisper + port: + name: http