docs: update state.json with native audio support feature and test count (1369)
This commit is contained in:
+52
-1
@@ -1089,7 +1089,7 @@
|
|||||||
},
|
},
|
||||||
|
|
||||||
"overall_progress": {
|
"overall_progress": {
|
||||||
"total_test_count": 1331,
|
"total_test_count": 1369,
|
||||||
"all_tests_passing": true,
|
"all_tests_passing": true,
|
||||||
"p0_completion": "3/3 (100%)",
|
"p0_completion": "3/3 (100%)",
|
||||||
"p1_completion": "4/4 (100%)",
|
"p1_completion": "4/4 (100%)",
|
||||||
@@ -1107,6 +1107,7 @@
|
|||||||
"feature_gap_scorecard": "100/128 match (78%), 0 partial (0%), 28 missing (22%)",
|
"feature_gap_scorecard": "100/128 match (78%), 0 partial (0%), 28 missing (22%)",
|
||||||
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 1/2 plans complete — metrics backend done, dashboard UI next",
|
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 1/2 plans complete — metrics backend done, dashboard UI next",
|
||||||
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
||||||
|
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
|
||||||
"next_up": "End-to-end test that Flynn follows through on tool calls via GitHub Copilot fallback. Remaining gaps: Tier 4 channels (Signal, Matrix, Teams, Google Chat), Tier 5 deferred/niche items"
|
"next_up": "End-to-end test that Flynn follows through on tool calls via GitHub Copilot fallback. Remaining gaps: Tier 4 channels (Signal, Matrix, Teams, Google Chat), Tier 5 deferred/niche items"
|
||||||
},
|
},
|
||||||
"soul_md_and_cron_create": {
|
"soul_md_and_cron_create": {
|
||||||
@@ -1137,6 +1138,56 @@
|
|||||||
"src/backends/native/agent.test.ts"
|
"src/backends/native/agent.test.ts"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"native-audio-support": {
|
||||||
|
"status": "completed",
|
||||||
|
"date": "2026-02-11",
|
||||||
|
"summary": "Native audio input support — voice messages passed directly to audio-capable models (Gemini, OpenAI, GitHub) instead of always transcribing via Whisper. Smart routing decides per-model whether to pass raw audio or transcribe first.",
|
||||||
|
"phases": {
|
||||||
|
"audio_transcribe_tool": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "audio.transcribe tool with Whisper-compatible API support",
|
||||||
|
"files_created": [
|
||||||
|
"src/tools/builtin/audio-transcribe.ts"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"type_system_and_clients": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "AudioSource type, audio content part handling in all model clients (Gemini inlineData, OpenAI input_audio, GitHub input_audio = native; Anthropic, Bedrock = text fallback)",
|
||||||
|
"files_modified": [
|
||||||
|
"src/models/types.ts",
|
||||||
|
"src/models/gemini.ts",
|
||||||
|
"src/models/openai.ts",
|
||||||
|
"src/models/github.ts",
|
||||||
|
"src/models/anthropic.ts",
|
||||||
|
"src/models/bedrock.ts",
|
||||||
|
"src/models/media.ts"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"capabilities_and_routing": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "supportsAudioInput() capability check, smart routing in daemon that transcribes for non-audio models and passes raw audio for capable ones, supports_audio config override",
|
||||||
|
"files_created": [
|
||||||
|
"src/models/capabilities.ts",
|
||||||
|
"src/models/capabilities.test.ts"
|
||||||
|
],
|
||||||
|
"files_modified": [
|
||||||
|
"src/daemon/routing.ts",
|
||||||
|
"src/config/schema.ts"
|
||||||
|
],
|
||||||
|
"test_status": "18/18 passing"
|
||||||
|
},
|
||||||
|
"tests_and_token_estimation": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "Audio tests for media helpers, audio token estimation (base64→bytes→duration→tokens at 32 tokens/sec), supports_audio config override wiring",
|
||||||
|
"files_modified": [
|
||||||
|
"src/models/media.test.ts",
|
||||||
|
"src/context/tokens.ts",
|
||||||
|
"src/context/tokens.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "20/20 tokens tests, 87/87 media tests"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"stopreason-normalization": {
|
"stopreason-normalization": {
|
||||||
"date": "2026-02-11",
|
"date": "2026-02-11",
|
||||||
"summary": "Normalize OpenAI/GitHub finish_reason to Flynn stopReason conventions. OpenAI 'stop' → 'end_turn', 'length' → 'max_tokens', 'tool_calls' with tools → 'tool_use', 'tool_calls' without tools → 'end_turn'. Fixes premature agent loop exit when falling back to GitHub Copilot (Anthropic API quota exhausted). Agent loop now accepts both 'tool_use' and 'tool_calls' as belt-and-suspenders.",
|
"summary": "Normalize OpenAI/GitHub finish_reason to Flynn stopReason conventions. OpenAI 'stop' → 'end_turn', 'length' → 'max_tokens', 'tool_calls' with tools → 'tool_use', 'tool_calls' without tools → 'end_turn'. Fixes premature agent loop exit when falling back to GitHub Copilot (Anthropic API quota exhausted). Agent loop now accepts both 'tool_use' and 'tool_calls' as belt-and-suspenders.",
|
||||||
|
|||||||
Reference in New Issue
Block a user