docs: improve local backend reliability and documentation

- Switch local endpoints from localhost to 127.0.0.1 to avoid IPv6 resolution issues\n- Add recommended Ollama user systemd service example with keep-alive and CUDA support
This commit is contained in:
William Valentin
2026-02-26 12:19:28 -08:00
parent d07e05d4cc
commit adcef2249b
2 changed files with 40 additions and 9 deletions
+4 -4
View File
@@ -186,11 +186,11 @@ models:
ollama:
provider: ollama
model: glm-4.7-flash
endpoint: http://localhost:11434
endpoint: http://127.0.0.1:11434
llamacpp:
provider: llamacpp
model: gpt-oss-20b
endpoint: http://localhost:8080
endpoint: http://127.0.0.1:8080
#
# Then reference them in fallback_chain:
# fallback_chain: [ollama, llamacpp, local]
@@ -311,7 +311,7 @@ memory:
enabled: true
provider: ollama
model: nomic-embed-text
endpoint: http://localhost:11434
endpoint: http://127.0.0.1:11434
chunk_size: 512
chunk_overlap: 50
top_k: 5
@@ -565,7 +565,7 @@ automation:
# enabled: true
# provider:
# type: custom # openai, groq, ollama, llamacpp, custom
# endpoint: "http://localhost:18801/v1/audio/transcriptions"
# endpoint: "http://127.0.0.1:18801/v1/audio/transcriptions"
# api_key: "${WHISPER_API_KEY}" # Optional Bearer token
# model: "whisper-1" # Model name (default: whisper-1)
# talk_mode: