docs: improve local backend reliability and documentation

- Switch local endpoints from localhost to 127.0.0.1 to avoid IPv6 resolution issues\n- Add recommended Ollama user systemd service example with keep-alive and CUDA support
2026-02-26 12:19:28 -08:00
parent d07e05d4cc
commit adcef2249b
2 changed files with 40 additions and 9 deletions
@@ -186,11 +186,11 @@ models:
    ollama:
      provider: ollama
      model: glm-4.7-flash
-      endpoint: http://localhost:11434
+      endpoint: http://127.0.0.1:11434
    llamacpp:
      provider: llamacpp
      model: gpt-oss-20b
-      endpoint: http://localhost:8080
+      endpoint: http://127.0.0.1:8080
  #
  # Then reference them in fallback_chain:
  #   fallback_chain: [ollama, llamacpp, local]
@@ -311,7 +311,7 @@ memory:
    enabled: true
    provider: ollama
    model: nomic-embed-text
-    endpoint: http://localhost:11434
+    endpoint: http://127.0.0.1:11434
    chunk_size: 512
    chunk_overlap: 50
    top_k: 5
@@ -565,7 +565,7 @@ automation:
 #   enabled: true
 #   provider:
 #     type: custom                            # openai, groq, ollama, llamacpp, custom
-#     endpoint: "http://localhost:18801/v1/audio/transcriptions"
+#     endpoint: "http://127.0.0.1:18801/v1/audio/transcriptions"
 #     api_key: "${WHISPER_API_KEY}"            # Optional Bearer token
 #     model: "whisper-1"                       # Model name (default: whisper-1)
 #   talk_mode: