feat(rag): add OpenVINO NPU embeddings service
This commit is contained in:
@@ -16,7 +16,8 @@ OPENCLAW_PORT ?= 18789
|
||||
QEMU_URI ?= qemu:///system
|
||||
LLAMA_CPP_URL ?= http://127.0.0.1:18806
|
||||
OLLAMA_URL ?= http://127.0.0.1:18807
|
||||
OLLAMA_EMBED_MODEL ?= nomic-embed-text
|
||||
OPENVINO_EMBED_URL ?= http://127.0.0.1:18817
|
||||
OPENVINO_EMBED_MODEL ?= bge-base-en-v1.5-int8-ov
|
||||
|
||||
DC := $(COMPOSE) -f $(COMPOSE_FILE)
|
||||
COMMON_DC := $(COMPOSE) -f $(COMMON_COMPOSE_FILE)
|
||||
@@ -28,7 +29,7 @@ REQUIRE_CONFIRM = test "$(CONFIRM)" = "yes" || { echo "This target changes VM/ga
|
||||
REQUIRE_INSTANCE = test -n "$(OPENCLAW_HOST)" -a -n "$(OPENCLAW_DOMAIN)" || { echo "Unknown OpenClaw HOST=$(HOST) in $(OPENCLAW_REGISTRY)"; exit 2; }
|
||||
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: help config ps status local-ai-health ollama-embed-health up down restart pull build logs shell clean \
|
||||
.PHONY: help config ps status local-ai-health openvino-embed-health up down restart pull build logs shell clean \
|
||||
api-up api-down api-restart api-init api-init-force api-health api-dedup api-logs \
|
||||
voice-up voice-gpu voice-cpu voice-down voice-build voice-logs \
|
||||
search-up search-down automation-up automation-down n8n-logs \
|
||||
@@ -53,7 +54,7 @@ ps: ## Show root Docker Compose service status.
|
||||
|
||||
status: ps local-ai-health ## Show Docker service status plus host-side local AI endpoints.
|
||||
|
||||
local-ai-health: ## Check host-side llama.cpp LLM and Ollama embeddings endpoints.
|
||||
local-ai-health: ## Check host-side llama.cpp LLM, Ollama fallback, and OpenVINO NPU embeddings endpoints.
|
||||
@printf "\nHost-side local AI endpoints:\n"
|
||||
@printf "llama.cpp (%s): " "$(LLAMA_CPP_URL)"; \
|
||||
if curl -fsS --max-time 3 "$(LLAMA_CPP_URL)/v1/models" >/tmp/swarm-llama-models.json 2>/dev/null; then \
|
||||
@@ -62,14 +63,17 @@ local-ai-health: ## Check host-side llama.cpp LLM and Ollama embeddings endpoint
|
||||
printf "FAILED\n"; \
|
||||
fi
|
||||
@printf "ollama.service: "; systemctl --user is-active ollama.service 2>/dev/null || true
|
||||
@printf "Ollama API (%s): " "$(OLLAMA_URL)"; \
|
||||
@printf "Ollama fallback API (%s): " "$(OLLAMA_URL)"; \
|
||||
curl -fsS --max-time 3 "$(OLLAMA_URL)/api/version" 2>/dev/null | jq -r '"OK version=" + .version' || printf "FAILED\n"
|
||||
@printf "openvino-embeddings.service: "; systemctl --user is-active openvino-embeddings.service 2>/dev/null || true
|
||||
@printf "OpenVINO NPU embeddings (%s): " "$(OPENVINO_EMBED_URL)"; \
|
||||
curl -fsS --max-time 3 "$(OPENVINO_EMBED_URL)/healthz" 2>/dev/null | jq -r '"OK model=" + .model + " device=" + .device' || printf "FAILED\n"
|
||||
|
||||
ollama-embed-health: ## Smoke-test Ollama embeddings using OLLAMA_EMBED_MODEL=nomic-embed-text.
|
||||
@curl -fsS --max-time 20 "$(OLLAMA_URL)/api/embed" \
|
||||
openvino-embed-health: ## Smoke-test OpenVINO NPU embeddings using OPENVINO_EMBED_MODEL=bge-base-en-v1.5-int8-ov.
|
||||
@curl -fsS --max-time 20 "$(OPENVINO_EMBED_URL)/v1/embeddings" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"model":"$(OLLAMA_EMBED_MODEL)","input":"socket check"}' \
|
||||
| jq -r '"embeddings=" + ((.embeddings // []) | length | tostring) + " dim=" + (((.embeddings // [[]])[0] // []) | length | tostring)'
|
||||
-d '{"model":"$(OPENVINO_EMBED_MODEL)","input":"socket check"}' \
|
||||
| jq -r '"embeddings=" + ((.data // []) | length | tostring) + " dim=" + (((.data // [{embedding: []}])[0].embedding // []) | length | tostring) + " npu_busy_delta_us=" + ((.npu_busy_delta_us // 0) | tostring)'
|
||||
|
||||
up: ## Start root compose services. Use PROFILE=api,voice,search,automation or SERVICE=name.
|
||||
@if [ -n "$(PROFILE)" ]; then \
|
||||
|
||||
Reference in New Issue
Block a user