From a0f558422010496c4d2be576c32786774c8158b7 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sat, 7 Feb 2026 17:27:27 -0800 Subject: [PATCH] docs: update CHANGELOG, state.json, and default config for local model tool calling --- CHANGELOG.md | 6 ++++++ config/default.yaml | 2 +- docs/plans/state.json | 29 +++++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af88d81..b8adab1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,12 @@ All notable changes to Flynn are documented in this file. directory. Available in all tool profiles. - **Runtime Context Injection** -- System prompt now automatically includes current date and time via a `# Runtime Context` section in every session +- **Local Model Tool Calling** -- Ollama and llama.cpp clients now support tool + calling. Tools are converted to each backend's native format, tool call responses + are parsed with generated IDs, and `stopReason` is set to `tool_use`. Ollama + streaming also handles `thinking` fields from reasoning models (deepseek-r1, + glm-4.7-flash). llama.cpp accumulates streaming tool call deltas across chunks. + 16 tests (8 Ollama + 8 llama.cpp). ### Changed diff --git a/config/default.yaml b/config/default.yaml index 8c92447..c28e9d8 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -37,7 +37,7 @@ models: model: claude-sonnet-4-20250514 local: provider: ollama - model: llama3.2:1b + model: glm-4.7-flash # ── Global fallback chain ────────────────────────────────────────── # Entries can be tier names (default, fast, complex, local) or keys diff --git a/docs/plans/state.json b/docs/plans/state.json index 3a156ce..1f665fc 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -725,6 +725,31 @@ } } }, + "local-model-tool-calling": { + "status": "completed", + "date": "2026-02-07", + "summary": "Add tool calling support to both local model backends (Ollama and llama.cpp), plus thinking/reasoning field handling for Ollama", + "phases": { + "ollama_tool_calling": { + "status": "completed", + "description": "Pass tools to Ollama API in correct format, parse tool_calls from responses with generated IDs, set stopReason to 'tool_use'. Handle thinking field from reasoning models (deepseek-r1, glm-4.7-flash) — use as content fallback and expose via thinkingContent. Streaming support for both tool calls and thinking.", + "files_modified": [ + "src/models/local/ollama.ts", + "src/models/local/ollama.test.ts" + ], + "test_status": "8/8 passing (was 1)" + }, + "llamacpp_tool_calling": { + "status": "completed", + "description": "Pass tools via OpenAI-compatible /v1/chat/completions endpoint, parse tool_calls from responses, accumulate streaming tool call deltas across incremental chunks. Set stopReason to 'tool_use' when tool calls present.", + "files_modified": [ + "src/models/local/llamacpp.ts", + "src/models/local/llamacpp.test.ts" + ], + "test_status": "8/8 passing (was 3)" + } + } + }, "earlier_plans": { "plans": [ { "file": "2026-02-02-flynn-design.md", "status": "completed" }, @@ -748,7 +773,7 @@ }, "overall_progress": { - "total_test_count": 983, + "total_test_count": 995, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", @@ -762,6 +787,6 @@ "tier1_completion": "5/5 (100%) — !!think prefix, /verbose command, typing indicators (Discord/WhatsApp), session pruning (TTL), tool groups", "tier2_completion": "4/4 (100%) — inbound webhooks, vector memory search, Dockerfile, heartbeat monitor", "feature_gap_scorecard": "88/116 match (76%), 1 partial (1%), 27 missing (23%)", - "next_up": "All phases P0-P8 and Tiers 1-2 complete. Tier 3 in progress (file.patch + Gmail done). Remaining gaps: Tier 3 channels (Signal, Matrix, Teams, Google Chat), Tier 4 deferred/niche items" + "next_up": "All phases P0-P8 and Tiers 1-3 complete. Local model tool calling added. Remaining gaps: Tier 3 channels (Signal, Matrix, Teams, Google Chat), Tier 4 deferred/niche items" } }