From a0f558422010496c4d2be576c32786774c8158b7 Mon Sep 17 00:00:00 2001
From: William Valentin <william.valentin.info@gmail.com>
Date: Sat, 7 Feb 2026 17:27:27 -0800
Subject: [PATCH] docs: update CHANGELOG, state.json, and default config for
 local model tool calling

---
 CHANGELOG.md          |  6 ++++++
 config/default.yaml   |  2 +-
 docs/plans/state.json | 29 +++++++++++++++++++++++++++--
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af88d81..b8adab1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -68,6 +68,12 @@ All notable changes to Flynn are documented in this file.
   directory. Available in all tool profiles.
 - **Runtime Context Injection** -- System prompt now automatically includes current
   date and time via a `# Runtime Context` section in every session
+- **Local Model Tool Calling** -- Ollama and llama.cpp clients now support tool
+  calling. Tools are converted to each backend's native format, tool call responses
+  are parsed with generated IDs, and `stopReason` is set to `tool_use`. Ollama
+  streaming also handles `thinking` fields from reasoning models (deepseek-r1,
+  glm-4.7-flash). llama.cpp accumulates streaming tool call deltas across chunks.
+  16 tests (8 Ollama + 8 llama.cpp).
 
 ### Changed
 
diff --git a/config/default.yaml b/config/default.yaml
index 8c92447..c28e9d8 100644
--- a/config/default.yaml
+++ b/config/default.yaml
@@ -37,7 +37,7 @@ models:
     model: claude-sonnet-4-20250514
   local:
     provider: ollama
-    model: llama3.2:1b
+    model: glm-4.7-flash
 
   # ── Global fallback chain ──────────────────────────────────────────
   # Entries can be tier names (default, fast, complex, local) or keys
diff --git a/docs/plans/state.json b/docs/plans/state.json
index 3a156ce..1f665fc 100644
--- a/docs/plans/state.json
+++ b/docs/plans/state.json
@@ -725,6 +725,31 @@
         }
       }
     },
+    "local-model-tool-calling": {
+      "status": "completed",
+      "date": "2026-02-07",
+      "summary": "Add tool calling support to both local model backends (Ollama and llama.cpp), plus thinking/reasoning field handling for Ollama",
+      "phases": {
+        "ollama_tool_calling": {
+          "status": "completed",
+          "description": "Pass tools to Ollama API in correct format, parse tool_calls from responses with generated IDs, set stopReason to 'tool_use'. Handle thinking field from reasoning models (deepseek-r1, glm-4.7-flash) — use as content fallback and expose via thinkingContent. Streaming support for both tool calls and thinking.",
+          "files_modified": [
+            "src/models/local/ollama.ts",
+            "src/models/local/ollama.test.ts"
+          ],
+          "test_status": "8/8 passing (was 1)"
+        },
+        "llamacpp_tool_calling": {
+          "status": "completed",
+          "description": "Pass tools via OpenAI-compatible /v1/chat/completions endpoint, parse tool_calls from responses, accumulate streaming tool call deltas across incremental chunks. Set stopReason to 'tool_use' when tool calls present.",
+          "files_modified": [
+            "src/models/local/llamacpp.ts",
+            "src/models/local/llamacpp.test.ts"
+          ],
+          "test_status": "8/8 passing (was 3)"
+        }
+      }
+    },
     "earlier_plans": {
       "plans": [
         { "file": "2026-02-02-flynn-design.md", "status": "completed" },
@@ -748,7 +773,7 @@
   },
 
   "overall_progress": {
-    "total_test_count": 983,
+    "total_test_count": 995,
     "all_tests_passing": true,
     "p0_completion": "3/3 (100%)",
     "p1_completion": "4/4 (100%)",
@@ -762,6 +787,6 @@
     "tier1_completion": "5/5 (100%) — !!think prefix, /verbose command, typing indicators (Discord/WhatsApp), session pruning (TTL), tool groups",
     "tier2_completion": "4/4 (100%) — inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
     "feature_gap_scorecard": "88/116 match (76%), 1 partial (1%), 27 missing (23%)",
-    "next_up": "All phases P0-P8 and Tiers 1-2 complete. Tier 3 in progress (file.patch + Gmail done). Remaining gaps: Tier 3 channels (Signal, Matrix, Teams, Google Chat), Tier 4 deferred/niche items"
+    "next_up": "All phases P0-P8 and Tiers 1-3 complete. Local model tool calling added. Remaining gaps: Tier 3 channels (Signal, Matrix, Teams, Google Chat), Tier 4 deferred/niche items"
   }
 }