148 lines
4.3 KiB
Python
148 lines
4.3 KiB
Python
import os
|
|
import subprocess
|
|
import tempfile
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import numpy as np
|
|
import openvino as ov
|
|
import openvino_genai as ov_genai
|
|
import soundfile as sf
|
|
from fastapi import FastAPI, File, Form, UploadFile
|
|
from fastapi.responses import JSONResponse, PlainTextResponse
|
|
|
|
MODEL_DIR = Path(os.environ.get("WHISPER_MODEL_DIR", "/models/whisper-tiny-fp16-ov"))
|
|
DEVICE = os.environ.get("WHISPER_DEVICE", "NPU")
|
|
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
|
|
|
app = FastAPI(title="OpenVINO NPU Whisper server", version="0.1.0")
|
|
_lock = threading.Lock()
|
|
_pipe = None
|
|
_core = None
|
|
|
|
|
|
def busy_us() -> Optional[int]:
|
|
try:
|
|
return int(BUSY_PATH.read_text().strip())
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def get_core():
|
|
global _core
|
|
if _core is None:
|
|
_core = ov.Core()
|
|
return _core
|
|
|
|
|
|
def get_pipe():
|
|
global _pipe
|
|
if _pipe is None:
|
|
_pipe = ov_genai.WhisperPipeline(str(MODEL_DIR), DEVICE)
|
|
return _pipe
|
|
|
|
|
|
def load_audio(upload_path: Path) -> tuple[np.ndarray, int]:
|
|
"""Decode arbitrary uploaded audio to mono 16 kHz float32 using ffmpeg + soundfile."""
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wav:
|
|
wav_path = Path(wav.name)
|
|
try:
|
|
subprocess.run(
|
|
[
|
|
"ffmpeg",
|
|
"-nostdin",
|
|
"-hide_banner",
|
|
"-loglevel",
|
|
"error",
|
|
"-y",
|
|
"-i",
|
|
str(upload_path),
|
|
"-ac",
|
|
"1",
|
|
"-ar",
|
|
"16000",
|
|
"-f",
|
|
"wav",
|
|
str(wav_path),
|
|
],
|
|
check=True,
|
|
)
|
|
audio, sr = sf.read(wav_path, dtype="float32")
|
|
if audio.ndim > 1:
|
|
audio = audio.mean(axis=1)
|
|
return audio, int(sr)
|
|
finally:
|
|
try:
|
|
wav_path.unlink()
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
|
|
@app.get("/")
|
|
def root():
|
|
return PlainTextResponse("OpenVINO NPU Whisper server\n")
|
|
|
|
|
|
@app.get("/health")
|
|
def health():
|
|
try:
|
|
core = get_core()
|
|
devices = core.available_devices
|
|
npu_name = core.get_property("NPU", "FULL_DEVICE_NAME") if "NPU" in devices else None
|
|
return {
|
|
"ok": "NPU" in devices,
|
|
"device": DEVICE,
|
|
"devices": devices,
|
|
"npu": npu_name,
|
|
"model_dir": str(MODEL_DIR),
|
|
"model_exists": MODEL_DIR.exists(),
|
|
"npu_busy_time_us": busy_us(),
|
|
}
|
|
except Exception as e:
|
|
return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})
|
|
|
|
|
|
@app.post("/v1/audio/transcriptions")
|
|
async def transcriptions(
|
|
file: UploadFile = File(...),
|
|
model: Optional[str] = Form(default=None),
|
|
language: Optional[str] = Form(default=None),
|
|
response_format: Optional[str] = Form(default="json"),
|
|
):
|
|
suffix = Path(file.filename or "audio").suffix or ".audio"
|
|
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
|
upload_path = Path(tmp.name)
|
|
tmp.write(await file.read())
|
|
|
|
before = busy_us()
|
|
t0 = time.perf_counter()
|
|
try:
|
|
audio, sr = load_audio(upload_path)
|
|
# OpenVINO GenAI WhisperPipeline appears stateful for Whisper generation on
|
|
# this stack: reusing one pipeline produced unstable language detection on
|
|
# repeated short clips. Recreate per request for correctness; OpenVINO's
|
|
# compiled-cache path keeps warm init reasonably fast.
|
|
with _lock:
|
|
pipe = ov_genai.WhisperPipeline(str(MODEL_DIR), DEVICE)
|
|
result = pipe.generate(audio)
|
|
text = str(result).strip()
|
|
elapsed = time.perf_counter() - t0
|
|
after = busy_us()
|
|
if response_format == "text":
|
|
return PlainTextResponse(text)
|
|
return {
|
|
"text": text,
|
|
"duration_seconds": round(elapsed, 4),
|
|
"sample_rate": sr,
|
|
"device": DEVICE,
|
|
"model": model or MODEL_DIR.name,
|
|
"npu_busy_delta_us": None if before is None or after is None else after - before,
|
|
}
|
|
finally:
|
|
try:
|
|
upload_path.unlink()
|
|
except FileNotFoundError:
|
|
pass
|