feat(rag): add OpenVINO NPU embeddings service

2026-06-03 18:28:16 -07:00
parent 7745648a13
commit fe4dea0f07
7 changed files with 268 additions and 14 deletions
@@ -16,7 +16,8 @@ OPENCLAW_PORT ?= 18789
 QEMU_URI ?= qemu:///system
 LLAMA_CPP_URL ?= http://127.0.0.1:18806
 OLLAMA_URL ?= http://127.0.0.1:18807
-OLLAMA_EMBED_MODEL ?= nomic-embed-text
+OPENVINO_EMBED_URL ?= http://127.0.0.1:18817
+OPENVINO_EMBED_MODEL ?= bge-base-en-v1.5-int8-ov

 DC := $(COMPOSE) -f $(COMPOSE_FILE)
 COMMON_DC := $(COMPOSE) -f $(COMMON_COMPOSE_FILE)
@@ -28,7 +29,7 @@ REQUIRE_CONFIRM = test "$(CONFIRM)" = "yes" || { echo "This target changes VM/ga
 REQUIRE_INSTANCE = test -n "$(OPENCLAW_HOST)" -a -n "$(OPENCLAW_DOMAIN)" || { echo "Unknown OpenClaw HOST=$(HOST) in $(OPENCLAW_REGISTRY)"; exit 2; }

 .DEFAULT_GOAL := help
-.PHONY: help config ps status local-ai-health ollama-embed-health up down restart pull build logs shell clean \
+.PHONY: help config ps status local-ai-health openvino-embed-health up down restart pull build logs shell clean \
 	api-up api-down api-restart api-init api-init-force api-health api-dedup api-logs \
 	voice-up voice-gpu voice-cpu voice-down voice-build voice-logs \
 	search-up search-down automation-up automation-down n8n-logs \
@@ -53,7 +54,7 @@ ps: ## Show root Docker Compose service status.

 status: ps local-ai-health ## Show Docker service status plus host-side local AI endpoints.

-local-ai-health: ## Check host-side llama.cpp LLM and Ollama embeddings endpoints.
+local-ai-health: ## Check host-side llama.cpp LLM, Ollama fallback, and OpenVINO NPU embeddings endpoints.
 	@printf "\nHost-side local AI endpoints:\n"
 	@printf "llama.cpp (%s): " "$(LLAMA_CPP_URL)"; \
 		if curl -fsS --max-time 3 "$(LLAMA_CPP_URL)/v1/models" >/tmp/swarm-llama-models.json 2>/dev/null; then \
@@ -62,14 +63,17 @@ local-ai-health: ## Check host-side llama.cpp LLM and Ollama embeddings endpoint
 			printf "FAILED\n"; \
 		fi
 	@printf "ollama.service: "; systemctl --user is-active ollama.service 2>/dev/null || true
-	@printf "Ollama API (%s): " "$(OLLAMA_URL)"; \
+	@printf "Ollama fallback API (%s): " "$(OLLAMA_URL)"; \
 		curl -fsS --max-time 3 "$(OLLAMA_URL)/api/version" 2>/dev/null | jq -r '"OK version=" + .version' || printf "FAILED\n"
+	@printf "openvino-embeddings.service: "; systemctl --user is-active openvino-embeddings.service 2>/dev/null || true
+	@printf "OpenVINO NPU embeddings (%s): " "$(OPENVINO_EMBED_URL)"; \
+		curl -fsS --max-time 3 "$(OPENVINO_EMBED_URL)/healthz" 2>/dev/null | jq -r '"OK model=" + .model + " device=" + .device' || printf "FAILED\n"

-ollama-embed-health: ## Smoke-test Ollama embeddings using OLLAMA_EMBED_MODEL=nomic-embed-text.
-	@curl -fsS --max-time 20 "$(OLLAMA_URL)/api/embed" \
+openvino-embed-health: ## Smoke-test OpenVINO NPU embeddings using OPENVINO_EMBED_MODEL=bge-base-en-v1.5-int8-ov.
+	@curl -fsS --max-time 20 "$(OPENVINO_EMBED_URL)/v1/embeddings" \
 		-H 'Content-Type: application/json' \
-		-d '{"model":"$(OLLAMA_EMBED_MODEL)","input":"socket check"}' \
-		| jq -r '"embeddings=" + ((.embeddings // []) | length | tostring) + " dim=" + (((.embeddings // [[]])[0] // []) | length | tostring)'
+		-d '{"model":"$(OPENVINO_EMBED_MODEL)","input":"socket check"}' \
+		| jq -r '"embeddings=" + ((.data // []) | length | tostring) + " dim=" + (((.data // [{embedding: []}])[0].embedding // []) | length | tostring) + " npu_busy_delta_us=" + ((.npu_busy_delta_us // 0) | tostring)'

 up: ## Start root compose services. Use PROFILE=api,voice,search,automation or SERVICE=name.
 	@if [ -n "$(PROFILE)" ]; then \