docs: update state.json with native audio support feature and test count (1369)
This commit is contained in:
+52
-1
@@ -1089,7 +1089,7 @@
|
||||
},
|
||||
|
||||
"overall_progress": {
|
||||
"total_test_count": 1331,
|
||||
"total_test_count": 1369,
|
||||
"all_tests_passing": true,
|
||||
"p0_completion": "3/3 (100%)",
|
||||
"p1_completion": "4/4 (100%)",
|
||||
@@ -1107,6 +1107,7 @@
|
||||
"feature_gap_scorecard": "100/128 match (78%), 0 partial (0%), 28 missing (22%)",
|
||||
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 1/2 plans complete — metrics backend done, dashboard UI next",
|
||||
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
||||
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
|
||||
"next_up": "End-to-end test that Flynn follows through on tool calls via GitHub Copilot fallback. Remaining gaps: Tier 4 channels (Signal, Matrix, Teams, Google Chat), Tier 5 deferred/niche items"
|
||||
},
|
||||
"soul_md_and_cron_create": {
|
||||
@@ -1137,6 +1138,56 @@
|
||||
"src/backends/native/agent.test.ts"
|
||||
]
|
||||
},
|
||||
"native-audio-support": {
|
||||
"status": "completed",
|
||||
"date": "2026-02-11",
|
||||
"summary": "Native audio input support — voice messages passed directly to audio-capable models (Gemini, OpenAI, GitHub) instead of always transcribing via Whisper. Smart routing decides per-model whether to pass raw audio or transcribe first.",
|
||||
"phases": {
|
||||
"audio_transcribe_tool": {
|
||||
"status": "completed",
|
||||
"description": "audio.transcribe tool with Whisper-compatible API support",
|
||||
"files_created": [
|
||||
"src/tools/builtin/audio-transcribe.ts"
|
||||
]
|
||||
},
|
||||
"type_system_and_clients": {
|
||||
"status": "completed",
|
||||
"description": "AudioSource type, audio content part handling in all model clients (Gemini inlineData, OpenAI input_audio, GitHub input_audio = native; Anthropic, Bedrock = text fallback)",
|
||||
"files_modified": [
|
||||
"src/models/types.ts",
|
||||
"src/models/gemini.ts",
|
||||
"src/models/openai.ts",
|
||||
"src/models/github.ts",
|
||||
"src/models/anthropic.ts",
|
||||
"src/models/bedrock.ts",
|
||||
"src/models/media.ts"
|
||||
]
|
||||
},
|
||||
"capabilities_and_routing": {
|
||||
"status": "completed",
|
||||
"description": "supportsAudioInput() capability check, smart routing in daemon that transcribes for non-audio models and passes raw audio for capable ones, supports_audio config override",
|
||||
"files_created": [
|
||||
"src/models/capabilities.ts",
|
||||
"src/models/capabilities.test.ts"
|
||||
],
|
||||
"files_modified": [
|
||||
"src/daemon/routing.ts",
|
||||
"src/config/schema.ts"
|
||||
],
|
||||
"test_status": "18/18 passing"
|
||||
},
|
||||
"tests_and_token_estimation": {
|
||||
"status": "completed",
|
||||
"description": "Audio tests for media helpers, audio token estimation (base64→bytes→duration→tokens at 32 tokens/sec), supports_audio config override wiring",
|
||||
"files_modified": [
|
||||
"src/models/media.test.ts",
|
||||
"src/context/tokens.ts",
|
||||
"src/context/tokens.test.ts"
|
||||
],
|
||||
"test_status": "20/20 tokens tests, 87/87 media tests"
|
||||
}
|
||||
}
|
||||
},
|
||||
"stopreason-normalization": {
|
||||
"date": "2026-02-11",
|
||||
"summary": "Normalize OpenAI/GitHub finish_reason to Flynn stopReason conventions. OpenAI 'stop' → 'end_turn', 'length' → 'max_tokens', 'tool_calls' with tools → 'tool_use', 'tool_calls' without tools → 'end_turn'. Fixes premature agent loop exit when falling back to GitHub Copilot (Anthropic API quota exhausted). Agent loop now accepts both 'tool_use' and 'tool_calls' as belt-and-suspenders.",
|
||||
|
||||
Reference in New Issue
Block a user