docs: update CHANGELOG, state.json, and default config for local model tool calling

2026-02-07 17:27:27 -08:00
parent fb20acfbcd
commit a0f5584220
3 changed files with 34 additions and 3 deletions
@@ -68,6 +68,12 @@ All notable changes to Flynn are documented in this file.
  directory. Available in all tool profiles.
 - **Runtime Context Injection** -- System prompt now automatically includes current
  date and time via a `# Runtime Context` section in every session
+- **Local Model Tool Calling** -- Ollama and llama.cpp clients now support tool
+  calling. Tools are converted to each backend's native format, tool call responses
+  are parsed with generated IDs, and `stopReason` is set to `tool_use`. Ollama
+  streaming also handles `thinking` fields from reasoning models (deepseek-r1,
+  glm-4.7-flash). llama.cpp accumulates streaming tool call deltas across chunks.
+  16 tests (8 Ollama + 8 llama.cpp).

 ### Changed

@@ -37,7 +37,7 @@ models:
    model: claude-sonnet-4-20250514
  local:
    provider: ollama
-    model: llama3.2:1b
+    model: glm-4.7-flash

  # ── Global fallback chain ──────────────────────────────────────────
  # Entries can be tier names (default, fast, complex, local) or keys
@@ -725,6 +725,31 @@
        }
      }
    },
+    "local-model-tool-calling": {
+      "status": "completed",
+      "date": "2026-02-07",
+      "summary": "Add tool calling support to both local model backends (Ollama and llama.cpp), plus thinking/reasoning field handling for Ollama",
+      "phases": {
+        "ollama_tool_calling": {
+          "status": "completed",
+          "description": "Pass tools to Ollama API in correct format, parse tool_calls from responses with generated IDs, set stopReason to 'tool_use'. Handle thinking field from reasoning models (deepseek-r1, glm-4.7-flash) — use as content fallback and expose via thinkingContent. Streaming support for both tool calls and thinking.",
+          "files_modified": [
+            "src/models/local/ollama.ts",
+            "src/models/local/ollama.test.ts"
+          ],
+          "test_status": "8/8 passing (was 1)"
+        },
+        "llamacpp_tool_calling": {
+          "status": "completed",
+          "description": "Pass tools via OpenAI-compatible /v1/chat/completions endpoint, parse tool_calls from responses, accumulate streaming tool call deltas across incremental chunks. Set stopReason to 'tool_use' when tool calls present.",
+          "files_modified": [
+            "src/models/local/llamacpp.ts",
+            "src/models/local/llamacpp.test.ts"
+          ],
+          "test_status": "8/8 passing (was 3)"
+        }
+      }
+    },
    "earlier_plans": {
      "plans": [
        { "file": "2026-02-02-flynn-design.md", "status": "completed" },
@@ -748,7 +773,7 @@
  },

  "overall_progress": {
-    "total_test_count": 983,
+    "total_test_count": 995,
    "all_tests_passing": true,
    "p0_completion": "3/3 (100%)",
    "p1_completion": "4/4 (100%)",
@@ -762,6 +787,6 @@
    "tier1_completion": "5/5 (100%) — !!think prefix, /verbose command, typing indicators (Discord/WhatsApp), session pruning (TTL), tool groups",
    "tier2_completion": "4/4 (100%) — inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
    "feature_gap_scorecard": "88/116 match (76%), 1 partial (1%), 27 missing (23%)",
-    "next_up": "All phases P0-P8 and Tiers 1-2 complete. Tier 3 in progress (file.patch + Gmail done). Remaining gaps: Tier 3 channels (Signal, Matrix, Teams, Google Chat), Tier 4 deferred/niche items"
+    "next_up": "All phases P0-P8 and Tiers 1-3 complete. Local model tool calling added. Remaining gaps: Tier 3 channels (Signal, Matrix, Teams, Google Chat), Tier 4 deferred/niche items"
  }
 }