feat(rag): add OpenVINO NPU embeddings service

This commit is contained in:
William Valentin
2026-06-03 18:28:16 -07:00
parent 7745648a13
commit fe4dea0f07
7 changed files with 268 additions and 14 deletions
+12 -8
View File
@@ -16,7 +16,8 @@ OPENCLAW_PORT ?= 18789
QEMU_URI ?= qemu:///system
LLAMA_CPP_URL ?= http://127.0.0.1:18806
OLLAMA_URL ?= http://127.0.0.1:18807
OLLAMA_EMBED_MODEL ?= nomic-embed-text
OPENVINO_EMBED_URL ?= http://127.0.0.1:18817
OPENVINO_EMBED_MODEL ?= bge-base-en-v1.5-int8-ov
DC := $(COMPOSE) -f $(COMPOSE_FILE)
COMMON_DC := $(COMPOSE) -f $(COMMON_COMPOSE_FILE)
@@ -28,7 +29,7 @@ REQUIRE_CONFIRM = test "$(CONFIRM)" = "yes" || { echo "This target changes VM/ga
REQUIRE_INSTANCE = test -n "$(OPENCLAW_HOST)" -a -n "$(OPENCLAW_DOMAIN)" || { echo "Unknown OpenClaw HOST=$(HOST) in $(OPENCLAW_REGISTRY)"; exit 2; }
.DEFAULT_GOAL := help
.PHONY: help config ps status local-ai-health ollama-embed-health up down restart pull build logs shell clean \
.PHONY: help config ps status local-ai-health openvino-embed-health up down restart pull build logs shell clean \
api-up api-down api-restart api-init api-init-force api-health api-dedup api-logs \
voice-up voice-gpu voice-cpu voice-down voice-build voice-logs \
search-up search-down automation-up automation-down n8n-logs \
@@ -53,7 +54,7 @@ ps: ## Show root Docker Compose service status.
status: ps local-ai-health ## Show Docker service status plus host-side local AI endpoints.
local-ai-health: ## Check host-side llama.cpp LLM and Ollama embeddings endpoints.
local-ai-health: ## Check host-side llama.cpp LLM, Ollama fallback, and OpenVINO NPU embeddings endpoints.
@printf "\nHost-side local AI endpoints:\n"
@printf "llama.cpp (%s): " "$(LLAMA_CPP_URL)"; \
if curl -fsS --max-time 3 "$(LLAMA_CPP_URL)/v1/models" >/tmp/swarm-llama-models.json 2>/dev/null; then \
@@ -62,14 +63,17 @@ local-ai-health: ## Check host-side llama.cpp LLM and Ollama embeddings endpoint
printf "FAILED\n"; \
fi
@printf "ollama.service: "; systemctl --user is-active ollama.service 2>/dev/null || true
@printf "Ollama API (%s): " "$(OLLAMA_URL)"; \
@printf "Ollama fallback API (%s): " "$(OLLAMA_URL)"; \
curl -fsS --max-time 3 "$(OLLAMA_URL)/api/version" 2>/dev/null | jq -r '"OK version=" + .version' || printf "FAILED\n"
@printf "openvino-embeddings.service: "; systemctl --user is-active openvino-embeddings.service 2>/dev/null || true
@printf "OpenVINO NPU embeddings (%s): " "$(OPENVINO_EMBED_URL)"; \
curl -fsS --max-time 3 "$(OPENVINO_EMBED_URL)/healthz" 2>/dev/null | jq -r '"OK model=" + .model + " device=" + .device' || printf "FAILED\n"
ollama-embed-health: ## Smoke-test Ollama embeddings using OLLAMA_EMBED_MODEL=nomic-embed-text.
@curl -fsS --max-time 20 "$(OLLAMA_URL)/api/embed" \
openvino-embed-health: ## Smoke-test OpenVINO NPU embeddings using OPENVINO_EMBED_MODEL=bge-base-en-v1.5-int8-ov.
@curl -fsS --max-time 20 "$(OPENVINO_EMBED_URL)/v1/embeddings" \
-H 'Content-Type: application/json' \
-d '{"model":"$(OLLAMA_EMBED_MODEL)","input":"socket check"}' \
| jq -r '"embeddings=" + ((.embeddings // []) | length | tostring) + " dim=" + (((.embeddings // [[]])[0] // []) | length | tostring)'
-d '{"model":"$(OPENVINO_EMBED_MODEL)","input":"socket check"}' \
| jq -r '"embeddings=" + ((.data // []) | length | tostring) + " dim=" + (((.data // [{embedding: []}])[0].embedding // []) | length | tostring) + " npu_busy_delta_us=" + ((.npu_busy_delta_us // 0) | tostring)'
up: ## Start root compose services. Use PROFILE=api,voice,search,automation or SERVICE=name.
@if [ -n "$(PROFILE)" ]; then \