Files
2026-06-04 13:07:51 -07:00

148 lines
4.3 KiB
Python

import os
import subprocess
import tempfile
import threading
import time
from pathlib import Path
from typing import Optional
import numpy as np
import openvino as ov
import openvino_genai as ov_genai
import soundfile as sf
from fastapi import FastAPI, File, Form, UploadFile
from fastapi.responses import JSONResponse, PlainTextResponse
MODEL_DIR = Path(os.environ.get("WHISPER_MODEL_DIR", "/models/whisper-tiny-fp16-ov"))
DEVICE = os.environ.get("WHISPER_DEVICE", "NPU")
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
app = FastAPI(title="OpenVINO NPU Whisper server", version="0.1.0")
_lock = threading.Lock()
_pipe = None
_core = None
def busy_us() -> Optional[int]:
try:
return int(BUSY_PATH.read_text().strip())
except Exception:
return None
def get_core():
global _core
if _core is None:
_core = ov.Core()
return _core
def get_pipe():
global _pipe
if _pipe is None:
_pipe = ov_genai.WhisperPipeline(str(MODEL_DIR), DEVICE)
return _pipe
def load_audio(upload_path: Path) -> tuple[np.ndarray, int]:
"""Decode arbitrary uploaded audio to mono 16 kHz float32 using ffmpeg + soundfile."""
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wav:
wav_path = Path(wav.name)
try:
subprocess.run(
[
"ffmpeg",
"-nostdin",
"-hide_banner",
"-loglevel",
"error",
"-y",
"-i",
str(upload_path),
"-ac",
"1",
"-ar",
"16000",
"-f",
"wav",
str(wav_path),
],
check=True,
)
audio, sr = sf.read(wav_path, dtype="float32")
if audio.ndim > 1:
audio = audio.mean(axis=1)
return audio, int(sr)
finally:
try:
wav_path.unlink()
except FileNotFoundError:
pass
@app.get("/")
def root():
return PlainTextResponse("OpenVINO NPU Whisper server\n")
@app.get("/health")
def health():
try:
core = get_core()
devices = core.available_devices
npu_name = core.get_property("NPU", "FULL_DEVICE_NAME") if "NPU" in devices else None
return {
"ok": "NPU" in devices,
"device": DEVICE,
"devices": devices,
"npu": npu_name,
"model_dir": str(MODEL_DIR),
"model_exists": MODEL_DIR.exists(),
"npu_busy_time_us": busy_us(),
}
except Exception as e:
return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})
@app.post("/v1/audio/transcriptions")
async def transcriptions(
file: UploadFile = File(...),
model: Optional[str] = Form(default=None),
language: Optional[str] = Form(default=None),
response_format: Optional[str] = Form(default="json"),
):
suffix = Path(file.filename or "audio").suffix or ".audio"
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
upload_path = Path(tmp.name)
tmp.write(await file.read())
before = busy_us()
t0 = time.perf_counter()
try:
audio, sr = load_audio(upload_path)
# OpenVINO GenAI WhisperPipeline appears stateful for Whisper generation on
# this stack: reusing one pipeline produced unstable language detection on
# repeated short clips. Recreate per request for correctness; OpenVINO's
# compiled-cache path keeps warm init reasonably fast.
with _lock:
pipe = ov_genai.WhisperPipeline(str(MODEL_DIR), DEVICE)
result = pipe.generate(audio)
text = str(result).strip()
elapsed = time.perf_counter() - t0
after = busy_us()
if response_format == "text":
return PlainTextResponse(text)
return {
"text": text,
"duration_seconds": round(elapsed, 4),
"sample_rate": sr,
"device": DEVICE,
"model": model or MODEL_DIR.name,
"npu_busy_delta_us": None if before is None or after is None else after - before,
}
finally:
try:
upload_path.unlink()
except FileNotFoundError:
pass