docs: update state.json with native audio support feature and test count (1369)

This commit is contained in:
William Valentin
2026-02-11 18:27:50 -08:00
parent 148219153e
commit c62dad2e2e
+52 -1
View File
@@ -1089,7 +1089,7 @@
},
"overall_progress": {
"total_test_count": 1331,
"total_test_count": 1369,
"all_tests_passing": true,
"p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)",
@@ -1107,6 +1107,7 @@
"feature_gap_scorecard": "100/128 match (78%), 0 partial (0%), 28 missing (22%)",
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 1/2 plans complete — metrics backend done, dashboard UI next",
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
"next_up": "End-to-end test that Flynn follows through on tool calls via GitHub Copilot fallback. Remaining gaps: Tier 4 channels (Signal, Matrix, Teams, Google Chat), Tier 5 deferred/niche items"
},
"soul_md_and_cron_create": {
@@ -1137,6 +1138,56 @@
"src/backends/native/agent.test.ts"
]
},
"native-audio-support": {
"status": "completed",
"date": "2026-02-11",
"summary": "Native audio input support — voice messages passed directly to audio-capable models (Gemini, OpenAI, GitHub) instead of always transcribing via Whisper. Smart routing decides per-model whether to pass raw audio or transcribe first.",
"phases": {
"audio_transcribe_tool": {
"status": "completed",
"description": "audio.transcribe tool with Whisper-compatible API support",
"files_created": [
"src/tools/builtin/audio-transcribe.ts"
]
},
"type_system_and_clients": {
"status": "completed",
"description": "AudioSource type, audio content part handling in all model clients (Gemini inlineData, OpenAI input_audio, GitHub input_audio = native; Anthropic, Bedrock = text fallback)",
"files_modified": [
"src/models/types.ts",
"src/models/gemini.ts",
"src/models/openai.ts",
"src/models/github.ts",
"src/models/anthropic.ts",
"src/models/bedrock.ts",
"src/models/media.ts"
]
},
"capabilities_and_routing": {
"status": "completed",
"description": "supportsAudioInput() capability check, smart routing in daemon that transcribes for non-audio models and passes raw audio for capable ones, supports_audio config override",
"files_created": [
"src/models/capabilities.ts",
"src/models/capabilities.test.ts"
],
"files_modified": [
"src/daemon/routing.ts",
"src/config/schema.ts"
],
"test_status": "18/18 passing"
},
"tests_and_token_estimation": {
"status": "completed",
"description": "Audio tests for media helpers, audio token estimation (base64→bytes→duration→tokens at 32 tokens/sec), supports_audio config override wiring",
"files_modified": [
"src/models/media.test.ts",
"src/context/tokens.ts",
"src/context/tokens.test.ts"
],
"test_status": "20/20 tokens tests, 87/87 media tests"
}
}
},
"stopreason-normalization": {
"date": "2026-02-11",
"summary": "Normalize OpenAI/GitHub finish_reason to Flynn stopReason conventions. OpenAI 'stop' → 'end_turn', 'length' → 'max_tokens', 'tool_calls' with tools → 'tool_use', 'tool_calls' without tools → 'end_turn'. Fixes premature agent loop exit when falling back to GitHub Copilot (Anthropic API quota exhausted). Agent loop now accepts both 'tool_use' and 'tool_calls' as belt-and-suspenders.",