diff --git a/whisper-cuda-blackwell/Dockerfile b/whisper-cuda-blackwell/Dockerfile new file mode 100644 index 0000000..ca06472 --- /dev/null +++ b/whisper-cuda-blackwell/Dockerfile @@ -0,0 +1,91 @@ +# whisper.cpp built for NVIDIA Blackwell (compute capability 12.0 / sm_120). +# +# Why this exists: the official `ghcr.io/ggml-org/whisper.cpp:main-cuda` image +# only ships CUDA kernels for sm_75/80/86/90 (Turing -> Hopper) and includes +# no PTX, so it fails to initialize CUDA on RTX 50-series Blackwell GPUs with +# `ggml_cuda_init: failed to initialize CUDA: system has unsupported display +# driver / cuda driver combination`. +# +# Build: +# docker build -t whisper.cpp:cuda-blackwell ./whisper-cuda-blackwell +# +# Override args if needed: +# --build-arg CUDA_ARCH=120 # set to your GPU's compute capability +# --build-arg WHISPER_REF=v1.7.6 # pin to a specific whisper.cpp tag + +ARG CUDA_VERSION=12.9.1 +ARG UBUNTU_VERSION=24.04 + +# ---------- Build stage ---------- +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS builder + +ARG CUDA_ARCH=120 +ARG WHISPER_REF=master + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + git \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /src +RUN git clone --depth 1 --branch "${WHISPER_REF}" https://github.com/ggml-org/whisper.cpp.git . 2>/dev/null \ + || git clone --depth 1 https://github.com/ggml-org/whisper.cpp.git . + +# Build with CUDA enabled and explicit Blackwell architecture. +# Adding "${CUDA_ARCH}-virtual" emits PTX too so JIT can target newer chips +# from the same family if Blackwell ever gets minor variants. +# +# Why the linker dance: +# `libggml-cuda.so` calls into the CUDA driver API (`cuGetErrorString`, +# `cuMemMap`, etc.) but doesn't declare a `DT_NEEDED` for libcuda. When the +# whisper-server executable links against libggml-cuda.so, ld then sees those +# unresolved symbols and refuses. We fix it two ways at once: +# 1. Symlink the driver stub to /usr/local/cuda/lib64/stubs/libcuda.so.1 +# and register the dir with ldconfig so `-lcuda` resolves. +# 2. Force `-lcuda` onto the link line via CMAKE_*_LINKER_FLAGS, with +# `--no-as-needed` so ld keeps it even when ordering would normally drop it. +RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \ + && echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/cuda-stubs.conf \ + && ldconfig \ + && cmake -B build \ + -DGGML_CUDA=ON \ + -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH};${CUDA_ARCH}-virtual" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs -Wl,--no-as-needed -lcuda -Wl,--as-needed" \ + -DCMAKE_SHARED_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs -Wl,--no-as-needed -lcuda -Wl,--as-needed" \ + && cmake --build build --config Release -j "$(nproc)" \ + --target whisper-server whisper-cli whisper-bench + +# ---------- Runtime stage ---------- +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-runtime-ubuntu${UBUNTU_VERSION} + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ffmpeg \ + curl \ + ca-certificates \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +# Binaries +COPY --from=builder /src/build/bin/whisper-server /usr/local/bin/whisper-server +COPY --from=builder /src/build/bin/whisper-cli /usr/local/bin/whisper-cli +COPY --from=builder /src/build/bin/whisper-bench /usr/local/bin/whisper-bench + +# whisper.cpp builds dynamic libs by default; copy them all. +COPY --from=builder /src/build/src/libwhisper.so* /usr/local/lib/ +COPY --from=builder /src/build/ggml/src/libggml.so* /usr/local/lib/ +COPY --from=builder /src/build/ggml/src/libggml-base.so* /usr/local/lib/ +COPY --from=builder /src/build/ggml/src/libggml-cpu.so* /usr/local/lib/ +COPY --from=builder /src/build/ggml/src/ggml-cuda/libggml-cuda.so* /usr/local/lib/ + +# Helpful extras: the model-download script and the JFK sample. +COPY --from=builder /src/models/download-ggml-model.sh /app/models/download-ggml-model.sh +COPY --from=builder /src/samples /app/samples + +RUN ldconfig + +WORKDIR /app +EXPOSE 8080 +ENTRYPOINT ["whisper-server"]