feat(voice): add OpenVINO NPU Whisper service
This commit is contained in:
@@ -0,0 +1,147 @@
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import openvino as ov
|
||||
import openvino_genai as ov_genai
|
||||
import soundfile as sf
|
||||
from fastapi import FastAPI, File, Form, UploadFile
|
||||
from fastapi.responses import JSONResponse, PlainTextResponse
|
||||
|
||||
MODEL_DIR = Path(os.environ.get("WHISPER_MODEL_DIR", "/models/whisper-tiny-fp16-ov"))
|
||||
DEVICE = os.environ.get("WHISPER_DEVICE", "NPU")
|
||||
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||
|
||||
app = FastAPI(title="OpenVINO NPU Whisper server", version="0.1.0")
|
||||
_lock = threading.Lock()
|
||||
_pipe = None
|
||||
_core = None
|
||||
|
||||
|
||||
def busy_us() -> Optional[int]:
|
||||
try:
|
||||
return int(BUSY_PATH.read_text().strip())
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_core():
|
||||
global _core
|
||||
if _core is None:
|
||||
_core = ov.Core()
|
||||
return _core
|
||||
|
||||
|
||||
def get_pipe():
|
||||
global _pipe
|
||||
if _pipe is None:
|
||||
_pipe = ov_genai.WhisperPipeline(str(MODEL_DIR), DEVICE)
|
||||
return _pipe
|
||||
|
||||
|
||||
def load_audio(upload_path: Path) -> tuple[np.ndarray, int]:
|
||||
"""Decode arbitrary uploaded audio to mono 16 kHz float32 using ffmpeg + soundfile."""
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wav:
|
||||
wav_path = Path(wav.name)
|
||||
try:
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-nostdin",
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-y",
|
||||
"-i",
|
||||
str(upload_path),
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-f",
|
||||
"wav",
|
||||
str(wav_path),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
audio, sr = sf.read(wav_path, dtype="float32")
|
||||
if audio.ndim > 1:
|
||||
audio = audio.mean(axis=1)
|
||||
return audio, int(sr)
|
||||
finally:
|
||||
try:
|
||||
wav_path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def root():
|
||||
return PlainTextResponse("OpenVINO NPU Whisper server\n")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
try:
|
||||
core = get_core()
|
||||
devices = core.available_devices
|
||||
npu_name = core.get_property("NPU", "FULL_DEVICE_NAME") if "NPU" in devices else None
|
||||
return {
|
||||
"ok": "NPU" in devices,
|
||||
"device": DEVICE,
|
||||
"devices": devices,
|
||||
"npu": npu_name,
|
||||
"model_dir": str(MODEL_DIR),
|
||||
"model_exists": MODEL_DIR.exists(),
|
||||
"npu_busy_time_us": busy_us(),
|
||||
}
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})
|
||||
|
||||
|
||||
@app.post("/v1/audio/transcriptions")
|
||||
async def transcriptions(
|
||||
file: UploadFile = File(...),
|
||||
model: Optional[str] = Form(default=None),
|
||||
language: Optional[str] = Form(default=None),
|
||||
response_format: Optional[str] = Form(default="json"),
|
||||
):
|
||||
suffix = Path(file.filename or "audio").suffix or ".audio"
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
||||
upload_path = Path(tmp.name)
|
||||
tmp.write(await file.read())
|
||||
|
||||
before = busy_us()
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
audio, sr = load_audio(upload_path)
|
||||
# OpenVINO GenAI WhisperPipeline appears stateful for Whisper generation on
|
||||
# this stack: reusing one pipeline produced unstable language detection on
|
||||
# repeated short clips. Recreate per request for correctness; OpenVINO's
|
||||
# compiled-cache path keeps warm init reasonably fast.
|
||||
with _lock:
|
||||
pipe = ov_genai.WhisperPipeline(str(MODEL_DIR), DEVICE)
|
||||
result = pipe.generate(audio)
|
||||
text = str(result).strip()
|
||||
elapsed = time.perf_counter() - t0
|
||||
after = busy_us()
|
||||
if response_format == "text":
|
||||
return PlainTextResponse(text)
|
||||
return {
|
||||
"text": text,
|
||||
"duration_seconds": round(elapsed, 4),
|
||||
"sample_rate": sr,
|
||||
"device": DEVICE,
|
||||
"model": model or MODEL_DIR.name,
|
||||
"npu_busy_delta_us": None if before is None or after is None else after - before,
|
||||
}
|
||||
finally:
|
||||
try:
|
||||
upload_path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
Reference in New Issue
Block a user