feat: refresh OpenVINO GenAI NPU worker prototype
This commit is contained in:
@@ -10,31 +10,42 @@ import argparse
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import openvino_genai as ov_genai
|
||||
from typing import Any
|
||||
|
||||
DEFAULT_MODEL = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
|
||||
DEFAULT_CACHE = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
|
||||
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||
|
||||
|
||||
def read_busy() -> int:
|
||||
return int(BUSY_PATH.read_text().strip())
|
||||
def import_openvino_genai() -> Any:
|
||||
import openvino_genai as ov_genai # type: ignore[import-not-found]
|
||||
|
||||
return ov_genai
|
||||
|
||||
|
||||
def read_busy(path: Path = BUSY_PATH) -> int:
|
||||
return int(path.read_text().strip())
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model", default=DEFAULT_MODEL)
|
||||
parser.add_argument("--cache-dir", default=DEFAULT_CACHE)
|
||||
parser.add_argument("--prompt", default="Write a concise title for: User asked Atlas to summarize NPU worker options.")
|
||||
parser.add_argument("--busy-path", default=str(BUSY_PATH))
|
||||
parser.add_argument("--prompt", default="Write a concise title for: Synthetic NPU worker contract smoke with no routing changes.")
|
||||
parser.add_argument("--max-new-tokens", type=int, default=24)
|
||||
args = parser.parse_args()
|
||||
|
||||
model_path = Path(args.model)
|
||||
cache_dir = Path(args.cache_dir)
|
||||
busy_path = Path(args.busy_path)
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
if not model_path.exists():
|
||||
raise SystemExit(f"model path does not exist: {model_path}")
|
||||
if not busy_path.exists():
|
||||
raise SystemExit(f"NPU busy-time counter does not exist: {busy_path}")
|
||||
if args.max_new_tokens < 1 or args.max_new_tokens > 256:
|
||||
raise SystemExit("max-new-tokens must be between 1 and 256")
|
||||
|
||||
config = {
|
||||
"CACHE_DIR": str(cache_dir),
|
||||
@@ -44,15 +55,16 @@ def main() -> int:
|
||||
"GENERATE_HINT": "FAST_COMPILE",
|
||||
}
|
||||
|
||||
before = read_busy()
|
||||
ov_genai = import_openvino_genai()
|
||||
before = read_busy(busy_path)
|
||||
load_start = time.monotonic()
|
||||
pipe = ov_genai.LLMPipeline(str(model_path), "NPU", config)
|
||||
pipe = ov_genai.LLMPipeline(str(model_path), "NPU", **config)
|
||||
load_ms = round((time.monotonic() - load_start) * 1000, 2)
|
||||
|
||||
gen_start = time.monotonic()
|
||||
output = pipe.generate(args.prompt, max_new_tokens=args.max_new_tokens)
|
||||
gen_ms = round((time.monotonic() - gen_start) * 1000, 2)
|
||||
after = read_busy()
|
||||
after = read_busy(busy_path)
|
||||
result = {
|
||||
"model": str(model_path),
|
||||
"device": "NPU",
|
||||
|
||||
Reference in New Issue
Block a user