feat(skills): add rollout promotion contract and sync planning state
This commit is contained in:
+33
-33
@@ -9,28 +9,28 @@ Requirements for this milestone. Each maps to roadmap phases.
|
|||||||
|
|
||||||
### Daemon Decomposition
|
### Daemon Decomposition
|
||||||
|
|
||||||
- [ ] **DECO-01**: Model client creation logic extracted from daemon/index.ts into src/daemon/models.ts with the same public interface
|
- [x] **DECO-01**: Model client creation logic extracted from daemon/index.ts into src/daemon/models.ts with the same public interface
|
||||||
- [ ] **DECO-02**: Channel adapter setup logic extracted into src/daemon/channels.ts
|
- [x] **DECO-02**: Channel adapter setup logic extracted into src/daemon/channels.ts
|
||||||
- [ ] **DECO-03**: Agent cache and factory logic extracted into src/daemon/agents.ts
|
- [x] **DECO-03**: Agent cache and factory logic extracted into src/daemon/agents.ts
|
||||||
- [ ] **DECO-04**: Memory store and vector store initialization extracted into src/daemon/memory.ts
|
- [x] **DECO-04**: Memory store and vector store initialization extracted into src/daemon/memory.ts
|
||||||
- [ ] **DECO-05**: Tool registration and policy wiring extracted into src/daemon/tools.ts
|
- [x] **DECO-05**: Tool registration and policy wiring extracted into src/daemon/tools.ts
|
||||||
- [ ] **DECO-06**: Message routing logic extracted into src/daemon/routing.ts (test file already exists)
|
- [x] **DECO-06**: Message routing logic extracted into src/daemon/routing.ts (test file already exists)
|
||||||
- [ ] **DECO-07**: daemon/index.ts reduced to a thin composition root that imports and wires extracted modules
|
- [x] **DECO-07**: daemon/index.ts reduced to a thin composition root that imports and wires extracted modules
|
||||||
- [ ] **DECO-08**: All 1077+ existing tests continue to pass after decomposition
|
- [x] **DECO-08**: All 1077+ existing tests continue to pass after decomposition
|
||||||
|
|
||||||
### Config Overlays
|
### Config Overlays
|
||||||
|
|
||||||
- [ ] **CONF-01**: User can set FLYNN_ENV environment variable to select a config overlay (e.g., docker, production)
|
- [x] **CONF-01**: User can set FLYNN_ENV environment variable to select a config overlay (e.g., docker, production)
|
||||||
- [ ] **CONF-02**: Config loader merges environment-specific overlay file on top of base config with deep merge
|
- [x] **CONF-02**: Config loader merges environment-specific overlay file on top of base config with deep merge
|
||||||
- [ ] **CONF-03**: flynn doctor validates that the selected environment overlay file exists when FLYNN_ENV is set
|
- [x] **CONF-03**: flynn doctor validates that the selected environment overlay file exists when FLYNN_ENV is set
|
||||||
|
|
||||||
### Live Ops Dashboard
|
### Live Ops Dashboard
|
||||||
|
|
||||||
- [ ] **DASH-01**: Dashboard shows core counters: messages processed, active sessions, queue depth, daemon uptime
|
- [x] **DASH-01**: Dashboard shows core counters: messages processed, active sessions, queue depth, daemon uptime
|
||||||
- [ ] **DASH-02**: Dashboard shows model call metrics: per-call latency, tokens/sec throughput, error rates by provider
|
- [x] **DASH-02**: Dashboard shows model call metrics: per-call latency, tokens/sec throughput, error rates by provider
|
||||||
- [ ] **DASH-03**: Dashboard shows live event stream: scrollable log of errors and events with timestamps and context
|
- [x] **DASH-03**: Dashboard shows live event stream: scrollable log of errors and events with timestamps and context
|
||||||
- [ ] **DASH-04**: Dashboard shows active request tracking: in-flight requests, recent tool executions, active agent sessions
|
- [x] **DASH-04**: Dashboard shows active request tracking: in-flight requests, recent tool executions, active agent sessions
|
||||||
- [ ] **DASH-05**: Gateway exposes /health endpoint returning JSON status for liveness/readiness checks
|
- [x] **DASH-05**: Gateway exposes /health endpoint returning JSON status for liveness/readiness checks
|
||||||
|
|
||||||
## v2 Requirements
|
## v2 Requirements
|
||||||
|
|
||||||
@@ -71,22 +71,22 @@ Which phases cover which requirements. Updated during roadmap creation.
|
|||||||
|
|
||||||
| Requirement | Phase | Status |
|
| Requirement | Phase | Status |
|
||||||
|-------------|-------|--------|
|
|-------------|-------|--------|
|
||||||
| DECO-01 | Phase 1 | Pending |
|
| DECO-01 | Phase 1 | Complete |
|
||||||
| DECO-02 | Phase 1 | Pending |
|
| DECO-02 | Phase 1 | Complete |
|
||||||
| DECO-03 | Phase 1 | Pending |
|
| DECO-03 | Phase 1 | Complete |
|
||||||
| DECO-04 | Phase 1 | Pending |
|
| DECO-04 | Phase 1 | Complete |
|
||||||
| DECO-05 | Phase 1 | Pending |
|
| DECO-05 | Phase 1 | Complete |
|
||||||
| DECO-06 | Phase 1 | Pending |
|
| DECO-06 | Phase 1 | Complete |
|
||||||
| DECO-07 | Phase 1 | Pending |
|
| DECO-07 | Phase 1 | Complete |
|
||||||
| DECO-08 | Phase 1 | Pending |
|
| DECO-08 | Phase 1 | Complete |
|
||||||
| CONF-01 | Phase 2 | Pending |
|
| CONF-01 | Phase 2 | Complete |
|
||||||
| CONF-02 | Phase 2 | Pending |
|
| CONF-02 | Phase 2 | Complete |
|
||||||
| CONF-03 | Phase 2 | Pending |
|
| CONF-03 | Phase 2 | Complete |
|
||||||
| DASH-01 | Phase 3 | Pending |
|
| DASH-01 | Phase 3 | Complete |
|
||||||
| DASH-02 | Phase 3 | Pending |
|
| DASH-02 | Phase 3 | Complete |
|
||||||
| DASH-03 | Phase 3 | Pending |
|
| DASH-03 | Phase 3 | Complete |
|
||||||
| DASH-04 | Phase 3 | Pending |
|
| DASH-04 | Phase 3 | Complete |
|
||||||
| DASH-05 | Phase 3 | Pending |
|
| DASH-05 | Phase 3 | Complete |
|
||||||
|
|
||||||
**Coverage:**
|
**Coverage:**
|
||||||
- v1 requirements: 16 total
|
- v1 requirements: 16 total
|
||||||
@@ -95,4 +95,4 @@ Which phases cover which requirements. Updated during roadmap creation.
|
|||||||
|
|
||||||
---
|
---
|
||||||
*Requirements defined: 2026-02-09*
|
*Requirements defined: 2026-02-09*
|
||||||
*Last updated: 2026-02-09 after initial definition*
|
*Last updated: 2026-02-13 after Phase 3 completion*
|
||||||
|
|||||||
@@ -67,8 +67,8 @@ Plans:
|
|||||||
**Plans:** 2 plans in 2 waves
|
**Plans:** 2 plans in 2 waves
|
||||||
|
|
||||||
Plans:
|
Plans:
|
||||||
- [ ] 03-01-PLAN.md — Backend metrics collector, RPC handlers, HTTP /health endpoint
|
- [x] 03-01-PLAN.md — Backend metrics collector, RPC handlers, HTTP /health endpoint
|
||||||
- [ ] 03-02-PLAN.md — Dashboard UI with live counters, model metrics, event stream, active requests
|
- [x] 03-02-PLAN.md — Dashboard UI with live counters, model metrics, event stream, active requests
|
||||||
|
|
||||||
| Plan | Wave | Objective | Tasks |
|
| Plan | Wave | Objective | Tasks |
|
||||||
|------|------|-----------|-------|
|
|------|------|-----------|-------|
|
||||||
@@ -87,10 +87,10 @@ Plans:
|
|||||||
|-------|--------|--------------|
|
|-------|--------|--------------|
|
||||||
| 1 — Daemon Decomposition | **complete** | DECO-01..08 (8) — 3 plans, 2 waves |
|
| 1 — Daemon Decomposition | **complete** | DECO-01..08 (8) — 3 plans, 2 waves |
|
||||||
| 2 — Config Overlays | **complete** | CONF-01..03 (3) — 2 plans, 2 waves |
|
| 2 — Config Overlays | **complete** | CONF-01..03 (3) — 2 plans, 2 waves |
|
||||||
| 3 — Live Ops Dashboard | not_started | DASH-01..05 (5) |
|
| 3 — Live Ops Dashboard | **complete** | DASH-01..05 (5) — 2 plans, 2 waves |
|
||||||
|
|
||||||
**Coverage:** 16/16 v1 requirements mapped ✓
|
**Coverage:** 16/16 v1 requirements mapped ✓
|
||||||
|
|
||||||
---
|
---
|
||||||
*Roadmap created: 2026-02-09*
|
*Roadmap created: 2026-02-09*
|
||||||
*Last updated: 2026-02-10*
|
*Last updated: 2026-02-13*
|
||||||
|
|||||||
+11
-9
@@ -9,9 +9,9 @@
|
|||||||
## Current Position
|
## Current Position
|
||||||
|
|
||||||
**Phase:** 3 — Live Ops Dashboard
|
**Phase:** 3 — Live Ops Dashboard
|
||||||
**Plan:** 1 of 2 complete (03-01 done)
|
**Plan:** 2 of 2 complete (03-01 and 03-02 done)
|
||||||
**Status:** in_progress
|
**Status:** complete
|
||||||
**Progress:** ██████████ 2.5/3 phases (Phase 3: 1/2 plans)
|
**Progress:** ██████████ 3/3 phases (Phase 3: 2/2 plans)
|
||||||
|
|
||||||
## Phase Status
|
## Phase Status
|
||||||
|
|
||||||
@@ -19,13 +19,13 @@
|
|||||||
|-------|--------|-------|
|
|-------|--------|-------|
|
||||||
| 1 — Daemon Decomposition | **complete** | 3/3 plans complete |
|
| 1 — Daemon Decomposition | **complete** | 3/3 plans complete |
|
||||||
| 2 — Config Overlays | **complete** | 2/2 plans complete |
|
| 2 — Config Overlays | **complete** | 2/2 plans complete |
|
||||||
| 3 — Live Ops Dashboard | **in_progress** | 1/2 plans complete |
|
| 3 — Live Ops Dashboard | **complete** | 2/2 plans complete |
|
||||||
|
|
||||||
## Performance Metrics
|
## Performance Metrics
|
||||||
|
|
||||||
| Metric | Value |
|
| Metric | Value |
|
||||||
|--------|-------|
|
|--------|-------|
|
||||||
| Test count | 1107 (verified after 03-01, +20 metrics tests from 1087 baseline) |
|
| Test count | 1597 (verified after runtime-cancellation follow-up) |
|
||||||
| daemon/index.ts lines | 140 (from 1087 baseline, -87%) |
|
| daemon/index.ts lines | 140 (from 1087 baseline, -87%) |
|
||||||
| Total daemon modules | 9 files, 1271 lines |
|
| Total daemon modules | 9 files, 1271 lines |
|
||||||
| Plan 01-01 duration | 9 min |
|
| Plan 01-01 duration | 9 min |
|
||||||
@@ -40,6 +40,8 @@
|
|||||||
| Plan 02-02 tasks | 1/1 |
|
| Plan 02-02 tasks | 1/1 |
|
||||||
| Plan 03-01 duration | ~2 min |
|
| Plan 03-01 duration | ~2 min |
|
||||||
| Plan 03-01 tasks | 2/2 |
|
| Plan 03-01 tasks | 2/2 |
|
||||||
|
| Plan 03-02 status | implemented and verified with typecheck/build/test; summary backfilled |
|
||||||
|
| Plan 03-02 tasks | 2/2 |
|
||||||
|
|
||||||
## Accumulated Context
|
## Accumulated Context
|
||||||
|
|
||||||
@@ -91,10 +93,10 @@ _(none)_
|
|||||||
|
|
||||||
## Session Continuity
|
## Session Continuity
|
||||||
|
|
||||||
**Last session:** Plan 03-01 (metrics collection backend) completed
|
**Last session:** Phase 3 closure and dashboard verification run
|
||||||
**Stopped at:** Completed 03-01-PLAN.md — Phase 3 plan 1 of 2 done
|
**Stopped at:** Completed 03-02 plan summary and roadmap/requirements status sync
|
||||||
**Next action:** Execute 03-02-PLAN.md (Dashboard UI)
|
**Next action:** Start next milestone or pick a new planning phase
|
||||||
|
|
||||||
---
|
---
|
||||||
*State initialized: 2026-02-09*
|
*State initialized: 2026-02-09*
|
||||||
*Last updated: 2026-02-10T05:29Z*
|
*Last updated: 2026-02-13T08:20Z*
|
||||||
|
|||||||
@@ -0,0 +1,96 @@
|
|||||||
|
---
|
||||||
|
phase: 03-live-ops-dashboard
|
||||||
|
plan: 02
|
||||||
|
subsystem: gateway-ui
|
||||||
|
tags: [dashboard, ui, metrics, events, active-requests, monitoring]
|
||||||
|
|
||||||
|
# Dependency graph
|
||||||
|
requires:
|
||||||
|
- phase: 03-live-ops-dashboard
|
||||||
|
provides: "MetricsCollector, system.metrics/system.events/system.activeRequests RPC handlers, and /health endpoint"
|
||||||
|
provides:
|
||||||
|
- "Live Ops dashboard UI sections for counters, model performance, event stream, active requests, and channels"
|
||||||
|
- "Dual refresh cadence: fast (3s) metrics/events/requests and slow (10s) health/channels"
|
||||||
|
- "Event stream styling and model metrics summary styling"
|
||||||
|
affects: [operator-observability, milestone-closure]
|
||||||
|
|
||||||
|
# Tech tracking
|
||||||
|
tech-stack:
|
||||||
|
added: []
|
||||||
|
patterns:
|
||||||
|
- "Targeted section updates via stable DOM IDs"
|
||||||
|
- "Split polling cadence for high-churn vs low-churn data"
|
||||||
|
|
||||||
|
key-files:
|
||||||
|
created: []
|
||||||
|
modified:
|
||||||
|
- src/gateway/ui/pages/dashboard.js
|
||||||
|
- src/gateway/ui/style.css
|
||||||
|
|
||||||
|
key-decisions:
|
||||||
|
- "Keep vanilla JS page module pattern (render/teardown), no framework migration"
|
||||||
|
- "Use two polling timers (3s and 10s) to reduce unnecessary RPC load"
|
||||||
|
- "Render newest events at the bottom with auto-scroll for log readability"
|
||||||
|
|
||||||
|
patterns-established:
|
||||||
|
- "Dashboard section IDs as update boundaries: ops-counters, ops-model-table, ops-events, ops-requests, ops-channels"
|
||||||
|
|
||||||
|
# Metrics
|
||||||
|
duration: unknown (implementation commit predates this summary backfill)
|
||||||
|
completed: 2026-02-13
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 3 Plan 2: Live Ops Dashboard UI Summary
|
||||||
|
|
||||||
|
**Extended the existing dashboard with live counters, model performance telemetry, event stream, and active request visibility, backed by Phase 3 Plan 1 RPC endpoints.**
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
- **Summary date:** 2026-02-13
|
||||||
|
- **Tasks:** 2/2 (implementation + closure/verification)
|
||||||
|
- **Files modified:** 2
|
||||||
|
|
||||||
|
## Accomplishments
|
||||||
|
- Implemented dashboard sections for core counters, model metrics table, event stream, active requests, and channels
|
||||||
|
- Wired RPC calls to `system.metrics`, `system.events`, `system.activeRequests`, `system.health`, and `system.channels`
|
||||||
|
- Added 3-second fast refresh for dynamic ops data and 10-second slow refresh for health/channel state
|
||||||
|
- Added event stream and model summary styling in shared gateway UI stylesheet
|
||||||
|
|
||||||
|
## Task Commits
|
||||||
|
|
||||||
|
Implementation was already present in commit history and is now formally closed with planning artifacts:
|
||||||
|
|
||||||
|
1. **Task 1: Extend dashboard page with live ops sections** - `c3ca3f3` (feat)
|
||||||
|
2. **Follow-up style cleanup** - `6090508` (style)
|
||||||
|
|
||||||
|
## Files Created/Modified
|
||||||
|
- `src/gateway/ui/pages/dashboard.js` - Live ops dashboard structure, polling, and targeted section updates
|
||||||
|
- `src/gateway/ui/style.css` - Event stream and model metrics summary styles
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
Automated checks run during this closure:
|
||||||
|
|
||||||
|
- `pnpm typecheck` ✅
|
||||||
|
- `pnpm build` ✅
|
||||||
|
- `pnpm test:run` ✅ (1590/1590 passed)
|
||||||
|
|
||||||
|
Manual browser verification (visual sanity check) remains recommended as a final operator check.
|
||||||
|
|
||||||
|
## Deviations from Plan
|
||||||
|
|
||||||
|
No functional deviations. This summary was backfilled after implementation had already landed.
|
||||||
|
|
||||||
|
## Issues Encountered
|
||||||
|
None
|
||||||
|
|
||||||
|
## User Setup Required
|
||||||
|
None
|
||||||
|
|
||||||
|
## Next Phase Readiness
|
||||||
|
- Phase 3 is complete from an implementation and automated validation perspective
|
||||||
|
- Milestone artifacts are now synchronized (`STATE.md`, `ROADMAP.md`, `REQUIREMENTS.md`)
|
||||||
|
|
||||||
|
---
|
||||||
|
*Phase: 03-live-ops-dashboard*
|
||||||
|
*Completed: 2026-02-13*
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Created: 2026-02-12
|
Created: 2026-02-12
|
||||||
Owner: Flynn core
|
Owner: Flynn core
|
||||||
Status: ready to implement
|
Status: completed
|
||||||
|
|
||||||
## Goal
|
## Goal
|
||||||
|
|
||||||
@@ -86,3 +86,12 @@ pnpm build
|
|||||||
## Commit Message
|
## Commit Message
|
||||||
|
|
||||||
`feat(session): add history indexing and topic search metadata`
|
`feat(session): add history indexing and topic search metadata`
|
||||||
|
|
||||||
|
## Completion Notes (2026-02-13)
|
||||||
|
|
||||||
|
- Implemented `history_index` config with defaults and bounds.
|
||||||
|
- Added migration-safe message metadata persistence in SQLite.
|
||||||
|
- Implemented indexing/tokenization and ranked history search with recency weighting.
|
||||||
|
- Wired indexing/search lifecycle in `SessionManager` and routing boost hook in daemon routing.
|
||||||
|
- Added gateway handlers for `history.search` and `history.reindex`.
|
||||||
|
- Verified with full suite: `pnpm test:run` (`1593/1593`), plus `pnpm typecheck` and `pnpm build`.
|
||||||
|
|||||||
+60
-7
@@ -1146,6 +1146,35 @@
|
|||||||
],
|
],
|
||||||
"test_status": "typecheck + targeted policy/intents/routing tests + full test suite + build passing; lint currently fails due pre-existing unrelated repo issues"
|
"test_status": "typecheck + targeted policy/intents/routing tests + full test suite + build passing; lint currently fails due pre-existing unrelated repo issues"
|
||||||
},
|
},
|
||||||
|
"remaining-phases-phase2-pr3-history-index": {
|
||||||
|
"file": "phase2-pr3-history-index-checklist.md",
|
||||||
|
"status": "completed",
|
||||||
|
"date": "2026-02-13",
|
||||||
|
"summary": "Added lightweight session history indexing and topic search with migration-safe metadata persistence, ranked keyword search + recency scoring, gateway search/reindex handlers, and optional routing confidence boost from historical overlap.",
|
||||||
|
"files_created": [
|
||||||
|
"src/session/indexer.ts",
|
||||||
|
"src/session/search.ts",
|
||||||
|
"src/session/indexer.test.ts",
|
||||||
|
"src/session/search.test.ts",
|
||||||
|
"src/gateway/handlers/history.ts"
|
||||||
|
],
|
||||||
|
"files_modified": [
|
||||||
|
"src/config/schema.ts",
|
||||||
|
"src/config/schema.test.ts",
|
||||||
|
"src/session/store.ts",
|
||||||
|
"src/session/store.test.ts",
|
||||||
|
"src/session/manager.ts",
|
||||||
|
"src/session/manager.test.ts",
|
||||||
|
"src/session/index.ts",
|
||||||
|
"src/daemon/index.ts",
|
||||||
|
"src/daemon/routing.ts",
|
||||||
|
"src/daemon/routing.test.ts",
|
||||||
|
"src/gateway/handlers/index.ts",
|
||||||
|
"src/gateway/handlers/handlers.test.ts",
|
||||||
|
"src/gateway/server.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm typecheck + pnpm test:run (1593/1593) + pnpm build passing"
|
||||||
|
},
|
||||||
"remaining-phases-phase3-pr1-adaptive-memory-compaction": {
|
"remaining-phases-phase3-pr1-adaptive-memory-compaction": {
|
||||||
"file": "phase3-pr1-adaptive-memory-compaction-checklist.md",
|
"file": "phase3-pr1-adaptive-memory-compaction-checklist.md",
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
@@ -1240,9 +1269,24 @@
|
|||||||
],
|
],
|
||||||
"test_status": "pnpm typecheck + pnpm test:run (1586/1586) + pnpm build passing"
|
"test_status": "pnpm typecheck + pnpm test:run (1586/1586) + pnpm build passing"
|
||||||
},
|
},
|
||||||
|
"gateway-agent-cancel-runtime": {
|
||||||
|
"status": "completed",
|
||||||
|
"date": "2026-02-13",
|
||||||
|
"summary": "Implemented real runtime cancellation wiring for `agent.cancel`: active requests are now cancellable at safe points in `NativeAgent`, queued lane work is cleared, and gateway/session bridge cancellation paths return explicit status messages.",
|
||||||
|
"files_modified": [
|
||||||
|
"src/backends/native/agent.ts",
|
||||||
|
"src/backends/native/agent.test.ts",
|
||||||
|
"src/backends/native/orchestrator.ts",
|
||||||
|
"src/gateway/session-bridge.ts",
|
||||||
|
"src/gateway/session-bridge.test.ts",
|
||||||
|
"src/gateway/handlers/agent.ts",
|
||||||
|
"src/gateway/handlers/handlers.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm typecheck + pnpm test:run (1597/1597) + pnpm build passing"
|
||||||
|
},
|
||||||
"skills_infrastructure": {
|
"skills_infrastructure": {
|
||||||
"file": "2026-02-11-skills-infrastructure-plan.md",
|
"file": "2026-02-11-skills-infrastructure-plan.md",
|
||||||
"status": "planned",
|
"status": "completed",
|
||||||
"date": "2026-02-11",
|
"date": "2026-02-11",
|
||||||
"summary": "Three-phase plan to improve skills system: Command Dispatch (P0), Skills Watcher (P1), Installer Specs (P1). Infrastructure-first approach before integrating ClawHub skills. Estimated 8-11 hours total. Model strategy: glm-4.7-flash for mechanical tasks, glm-4.7 for complex/orchestration tasks.",
|
"summary": "Three-phase plan to improve skills system: Command Dispatch (P0), Skills Watcher (P1), Installer Specs (P1). Infrastructure-first approach before integrating ClawHub skills. Estimated 8-11 hours total. Model strategy: glm-4.7-flash for mechanical tasks, glm-4.7 for complex/orchestration tasks.",
|
||||||
"phases": {
|
"phases": {
|
||||||
@@ -1314,7 +1358,7 @@
|
|||||||
},
|
},
|
||||||
"phase_2_skills_watcher": {
|
"phase_2_skills_watcher": {
|
||||||
"priority": "P1",
|
"priority": "P1",
|
||||||
"status": "in_progress",
|
"status": "completed",
|
||||||
"description": "Auto-reload skills with chokidar file watcher, configurable debounce",
|
"description": "Auto-reload skills with chokidar file watcher, configurable debounce",
|
||||||
"effort": "3-4 hours",
|
"effort": "3-4 hours",
|
||||||
"sub_slices": {
|
"sub_slices": {
|
||||||
@@ -1378,7 +1422,7 @@
|
|||||||
},
|
},
|
||||||
"phase_3_installer_specs": {
|
"phase_3_installer_specs": {
|
||||||
"priority": "P1",
|
"priority": "P1",
|
||||||
"status": "in_progress",
|
"status": "completed",
|
||||||
"description": "Auto-install dependencies (brew/node/go/download) with package manager detection",
|
"description": "Auto-install dependencies (brew/node/go/download) with package manager detection",
|
||||||
"effort": "3-4 hours",
|
"effort": "3-4 hours",
|
||||||
"sub_slices": {
|
"sub_slices": {
|
||||||
@@ -1670,6 +1714,15 @@
|
|||||||
"src/cli/skills.test.ts"
|
"src/cli/skills.test.ts"
|
||||||
],
|
],
|
||||||
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
|
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
|
||||||
|
},
|
||||||
|
"shell_runner_promotion_contract_output": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "Added dedicated machine-readable promotion contract output for `skills rollout-status` (`--contract`) with stable schema, CI-friendly gate/exit code semantics, and optional `--out` export support",
|
||||||
|
"files_modified": [
|
||||||
|
"src/cli/skills.ts",
|
||||||
|
"src/cli/skills.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm test:run src/cli/skills.test.ts + pnpm typecheck + pnpm build passing"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1698,7 +1751,7 @@
|
|||||||
},
|
},
|
||||||
|
|
||||||
"overall_progress": {
|
"overall_progress": {
|
||||||
"total_test_count": 1586,
|
"total_test_count": 1597,
|
||||||
"all_tests_passing": true,
|
"all_tests_passing": true,
|
||||||
"p0_completion": "3/3 (100%)",
|
"p0_completion": "3/3 (100%)",
|
||||||
"p1_completion": "4/4 (100%)",
|
"p1_completion": "4/4 (100%)",
|
||||||
@@ -1714,11 +1767,11 @@
|
|||||||
"tier3_completion": "5/5 (100%) — lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
|
"tier3_completion": "5/5 (100%) — lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
|
||||||
"tier4_completion": "4/4 (100%) — gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
|
"tier4_completion": "4/4 (100%) — gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
|
||||||
"feature_gap_scorecard": "100/128 match (78%), 0 partial (0%), 28 missing (22%)",
|
"feature_gap_scorecard": "100/128 match (78%), 0 partial (0%), 28 missing (22%)",
|
||||||
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 1/2 plans complete — metrics backend done, dashboard UI next",
|
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done",
|
||||||
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
||||||
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
|
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
|
||||||
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
|
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
|
||||||
"next_up": "Skills infrastructure follow-up: expose promotion-policy status as a dedicated machine-readable contract for automation consumers (e.g., CI gate or dashboard ingest) before broader shell-runner rollout"
|
"next_up": "Define next milestone and create a new implementation checklist (all remaining-phases PR slices complete)"
|
||||||
},
|
},
|
||||||
"soul_md_and_cron_create": {
|
"soul_md_and_cron_create": {
|
||||||
"date": "2026-02-11",
|
"date": "2026-02-11",
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ import {
|
|||||||
recommendShellRunnerRolloutPhase,
|
recommendShellRunnerRolloutPhase,
|
||||||
sanitizeSkillInstallerAuditReason,
|
sanitizeSkillInstallerAuditReason,
|
||||||
summarizeShellRunnerAuditWindow,
|
summarizeShellRunnerAuditWindow,
|
||||||
|
toShellRunnerPromotionContract,
|
||||||
resolveSkillInstallerCommandRunner,
|
resolveSkillInstallerCommandRunner,
|
||||||
runSkillExecuteAction,
|
runSkillExecuteAction,
|
||||||
runSkillInstallAction,
|
runSkillInstallAction,
|
||||||
@@ -753,6 +754,70 @@ describe('skills CLI helpers', () => {
|
|||||||
expect(policy.blockers).toContain('failures increased by 1 vs previous window');
|
expect(policy.blockers).toContain('failures increased by 1 vs previous window');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('builds machine-readable promotion contract with gate status and blockers', () => {
|
||||||
|
const contract = toShellRunnerPromotionContract({
|
||||||
|
generatedAt: '2026-02-13T00:00:00.000Z',
|
||||||
|
days: 7,
|
||||||
|
recommendation: 'guarded_review',
|
||||||
|
guardrails: { blockers: ['skills.installation_execution must be enabled'] },
|
||||||
|
summary: {
|
||||||
|
command_result_total: 4,
|
||||||
|
command_result_failed: 1,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 3,
|
||||||
|
unhashed_command_count: 1,
|
||||||
|
},
|
||||||
|
trend: {
|
||||||
|
current: {
|
||||||
|
command_result_total: 4,
|
||||||
|
command_result_failed: 1,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 3,
|
||||||
|
unhashed_command_count: 1,
|
||||||
|
},
|
||||||
|
previous: {
|
||||||
|
command_result_total: 4,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 4,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
deltas: {
|
||||||
|
failures: 1,
|
||||||
|
allowlist_blocks: 0,
|
||||||
|
hash_coverage_pct: -25,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
promotionPolicy: {
|
||||||
|
eligible: false,
|
||||||
|
recommendation: 'not_eligible',
|
||||||
|
cadence_days: 7,
|
||||||
|
reviewed_window_days: 7,
|
||||||
|
success_rate: 0.75,
|
||||||
|
minimum_success_rate: 0.9,
|
||||||
|
failures_delta: 1,
|
||||||
|
allowlist_blocks_delta: 0,
|
||||||
|
hash_coverage_delta_pct: -25,
|
||||||
|
blockers: ['success rate 75.00% below minimum 90.00%'],
|
||||||
|
},
|
||||||
|
governance: {
|
||||||
|
owner: 'skills-team',
|
||||||
|
review_cadence_days: 7,
|
||||||
|
promotion_min_success_rate: 0.9,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(contract.schema).toBe('skills.rollout.promotion_contract.v1');
|
||||||
|
expect(contract.gate.status).toBe('fail');
|
||||||
|
expect(contract.gate.exit_code).toBe(1);
|
||||||
|
expect(contract.gate.blockers).toContain('skills.installation_execution must be enabled');
|
||||||
|
expect(contract.gate.blockers).toContain('success rate 75.00% below minimum 90.00%');
|
||||||
|
expect(contract.summary.hash_coverage_pct).toBe(75);
|
||||||
|
});
|
||||||
|
|
||||||
it('marks promotion policy eligible when thresholds and trends are healthy', () => {
|
it('marks promotion policy eligible when thresholds and trends are healthy', () => {
|
||||||
const policy = evaluateShellRunnerPromotionPolicy({
|
const policy = evaluateShellRunnerPromotionPolicy({
|
||||||
trend: {
|
trend: {
|
||||||
@@ -2161,6 +2226,92 @@ describe('skills CLI helpers', () => {
|
|||||||
rmSync(root, { recursive: true, force: true });
|
rmSync(root, { recursive: true, force: true });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('skills rollout-status emits dedicated promotion contract JSON with exit code', async () => {
|
||||||
|
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||||
|
const configPath = join(root, 'config.yaml');
|
||||||
|
const managedDir = join(root, 'managed');
|
||||||
|
const bundledDir = join(root, 'bundled');
|
||||||
|
const workspaceDir = join(root, 'workspace');
|
||||||
|
const auditPath = join(root, 'audit.log');
|
||||||
|
mkdirSync(managedDir, { recursive: true });
|
||||||
|
mkdirSync(bundledDir, { recursive: true });
|
||||||
|
mkdirSync(workspaceDir, { recursive: true });
|
||||||
|
writeFileSync(auditPath, '', 'utf-8');
|
||||||
|
writeSkillsCliConfig(configPath, {
|
||||||
|
managedDir,
|
||||||
|
bundledDir,
|
||||||
|
workspaceDir,
|
||||||
|
installationExecution: 'enabled',
|
||||||
|
allowShellRunner: true,
|
||||||
|
shellRunnerAllowlist: ['npm install*'],
|
||||||
|
shellRunnerGovernanceOwner: 'skills-team',
|
||||||
|
auditPath,
|
||||||
|
});
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
registerSkillsCommand(program);
|
||||||
|
|
||||||
|
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
|
||||||
|
process.exitCode = undefined;
|
||||||
|
|
||||||
|
await program.parseAsync(['skills', 'rollout-status', '--contract', '-c', configPath], { from: 'user' });
|
||||||
|
|
||||||
|
const payload = JSON.parse(String(logSpy.mock.calls[0]?.[0]));
|
||||||
|
expect(payload.schema).toBe('skills.rollout.promotion_contract.v1');
|
||||||
|
expect(payload.gate.status).toBe('fail');
|
||||||
|
expect(payload.gate.exit_code).toBe(1);
|
||||||
|
expect(payload.governance.owner).toBe('skills-team');
|
||||||
|
expect(process.exitCode).toBe(1);
|
||||||
|
|
||||||
|
logSpy.mockRestore();
|
||||||
|
process.exitCode = undefined;
|
||||||
|
rmSync(root, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skills rollout-status writes dedicated promotion contract to output file', async () => {
|
||||||
|
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||||
|
const configPath = join(root, 'config.yaml');
|
||||||
|
const managedDir = join(root, 'managed');
|
||||||
|
const bundledDir = join(root, 'bundled');
|
||||||
|
const workspaceDir = join(root, 'workspace');
|
||||||
|
const auditPath = join(root, 'audit.log');
|
||||||
|
const outputPath = join(root, 'rollout-contract.json');
|
||||||
|
mkdirSync(managedDir, { recursive: true });
|
||||||
|
mkdirSync(bundledDir, { recursive: true });
|
||||||
|
mkdirSync(workspaceDir, { recursive: true });
|
||||||
|
writeFileSync(auditPath, '', 'utf-8');
|
||||||
|
writeSkillsCliConfig(configPath, {
|
||||||
|
managedDir,
|
||||||
|
bundledDir,
|
||||||
|
workspaceDir,
|
||||||
|
installationExecution: 'enabled',
|
||||||
|
allowShellRunner: true,
|
||||||
|
shellRunnerAllowlist: ['npm install*'],
|
||||||
|
shellRunnerGovernanceOwner: 'skills-team',
|
||||||
|
auditPath,
|
||||||
|
});
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
registerSkillsCommand(program);
|
||||||
|
|
||||||
|
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
|
||||||
|
process.exitCode = undefined;
|
||||||
|
|
||||||
|
await program.parseAsync(['skills', 'rollout-status', '--contract', '--out', outputPath, '-c', configPath], {
|
||||||
|
from: 'user',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(existsSync(outputPath)).toBe(true);
|
||||||
|
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
|
||||||
|
expect(payload.schema).toBe('skills.rollout.promotion_contract.v1');
|
||||||
|
expect(payload.gate).toBeDefined();
|
||||||
|
expect(payload.summary).toBeDefined();
|
||||||
|
|
||||||
|
logSpy.mockRestore();
|
||||||
|
process.exitCode = undefined;
|
||||||
|
rmSync(root, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
it('skills rollout-status includes trend deltas across adjacent windows', async () => {
|
it('skills rollout-status includes trend deltas across adjacent windows', async () => {
|
||||||
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||||
const configPath = join(root, 'config.yaml');
|
const configPath = join(root, 'config.yaml');
|
||||||
|
|||||||
+99
-2
@@ -132,6 +132,32 @@ export interface ShellRunnerPromotionPolicyStatus {
|
|||||||
blockers: string[];
|
blockers: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface ShellRunnerPromotionContract {
|
||||||
|
schema: 'skills.rollout.promotion_contract.v1';
|
||||||
|
generated_at: string;
|
||||||
|
window_days: number;
|
||||||
|
gate: {
|
||||||
|
status: 'pass' | 'fail';
|
||||||
|
exit_code: 0 | 1;
|
||||||
|
reason: 'promotion_eligible' | 'promotion_not_eligible';
|
||||||
|
blockers: string[];
|
||||||
|
};
|
||||||
|
recommendation: ShellRunnerRolloutRecommendation;
|
||||||
|
governance: {
|
||||||
|
owner: string | null;
|
||||||
|
review_cadence_days: number;
|
||||||
|
promotion_min_success_rate: number;
|
||||||
|
};
|
||||||
|
summary: {
|
||||||
|
command_result_total: number;
|
||||||
|
command_result_failed: number;
|
||||||
|
allowlist_blocked: number;
|
||||||
|
hash_coverage_pct: number;
|
||||||
|
};
|
||||||
|
promotion_policy: ShellRunnerPromotionPolicyStatus;
|
||||||
|
trend: ShellRunnerAuditTrendSnapshot['deltas'];
|
||||||
|
}
|
||||||
|
|
||||||
export type ShellRunnerRolloutRecommendation = 'locked' | 'guarded_observe' | 'guarded_review' | 'expand_candidate';
|
export type ShellRunnerRolloutRecommendation = 'locked' | 'guarded_observe' | 'guarded_review' | 'expand_candidate';
|
||||||
|
|
||||||
export function evaluateShellRunnerRolloutGuardrails(
|
export function evaluateShellRunnerRolloutGuardrails(
|
||||||
@@ -321,6 +347,50 @@ export function recommendShellRunnerRolloutPhase(
|
|||||||
return 'expand_candidate';
|
return 'expand_candidate';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function toShellRunnerPromotionContract(args: {
|
||||||
|
generatedAt: string;
|
||||||
|
days: number;
|
||||||
|
recommendation: ShellRunnerRolloutRecommendation;
|
||||||
|
guardrails: ShellRunnerRolloutGuardrailStatus;
|
||||||
|
summary: ShellRunnerAuditWindowSummary;
|
||||||
|
trend: ShellRunnerAuditTrendSnapshot;
|
||||||
|
promotionPolicy: ShellRunnerPromotionPolicyStatus;
|
||||||
|
governance: {
|
||||||
|
owner: string | null;
|
||||||
|
review_cadence_days: number;
|
||||||
|
promotion_min_success_rate: number;
|
||||||
|
};
|
||||||
|
}): ShellRunnerPromotionContract {
|
||||||
|
const blockers = [...args.guardrails.blockers, ...args.promotionPolicy.blockers];
|
||||||
|
const eligible = args.promotionPolicy.eligible && blockers.length === 0;
|
||||||
|
|
||||||
|
return {
|
||||||
|
schema: 'skills.rollout.promotion_contract.v1',
|
||||||
|
generated_at: args.generatedAt,
|
||||||
|
window_days: args.days,
|
||||||
|
gate: {
|
||||||
|
status: eligible ? 'pass' : 'fail',
|
||||||
|
exit_code: eligible ? 0 : 1,
|
||||||
|
reason: eligible ? 'promotion_eligible' : 'promotion_not_eligible',
|
||||||
|
blockers,
|
||||||
|
},
|
||||||
|
recommendation: args.recommendation,
|
||||||
|
governance: {
|
||||||
|
owner: args.governance.owner,
|
||||||
|
review_cadence_days: args.governance.review_cadence_days,
|
||||||
|
promotion_min_success_rate: args.governance.promotion_min_success_rate,
|
||||||
|
},
|
||||||
|
summary: {
|
||||||
|
command_result_total: args.summary.command_result_total,
|
||||||
|
command_result_failed: args.summary.command_result_failed,
|
||||||
|
allowlist_blocked: args.summary.allowlist_blocked,
|
||||||
|
hash_coverage_pct: calculateShellRunnerHashCoveragePercent(args.summary),
|
||||||
|
},
|
||||||
|
promotion_policy: args.promotionPolicy,
|
||||||
|
trend: args.trend.deltas,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
function expandHomePath(pathValue: string): string {
|
function expandHomePath(pathValue: string): string {
|
||||||
if (pathValue.startsWith('~/')) {
|
if (pathValue.startsWith('~/')) {
|
||||||
return resolve(homedir(), pathValue.slice(2));
|
return resolve(homedir(), pathValue.slice(2));
|
||||||
@@ -1337,10 +1407,11 @@ export function registerSkillsCommand(program: Command): void {
|
|||||||
.command('rollout-status')
|
.command('rollout-status')
|
||||||
.description('Show shell runner rollout guardrails and audit review summary')
|
.description('Show shell runner rollout guardrails and audit review summary')
|
||||||
.option('--days <n>', 'Look back N days in audit logs (default: 7)', '7')
|
.option('--days <n>', 'Look back N days in audit logs (default: 7)', '7')
|
||||||
|
.option('--contract', 'Output dedicated machine-readable promotion contract JSON')
|
||||||
.option('--out <path>', 'Write rollout JSON payload to file')
|
.option('--out <path>', 'Write rollout JSON payload to file')
|
||||||
.option('--json', 'Output as JSON')
|
.option('--json', 'Output as JSON')
|
||||||
.option('-c, --config <path>', 'Config file path')
|
.option('-c, --config <path>', 'Config file path')
|
||||||
.action(async (opts: { days?: string; out?: string; json?: boolean; config?: string }) => {
|
.action(async (opts: { days?: string; contract?: boolean; out?: string; json?: boolean; config?: string }) => {
|
||||||
const loaded = loadConfigSafe(opts.config);
|
const loaded = loadConfigSafe(opts.config);
|
||||||
if (loaded.error || !loaded.config) {
|
if (loaded.error || !loaded.config) {
|
||||||
console.error(loaded.error ?? 'Failed to load config');
|
console.error(loaded.error ?? 'Failed to load config');
|
||||||
@@ -1380,7 +1451,9 @@ export function registerSkillsCommand(program: Command): void {
|
|||||||
promotion_min_success_rate: governance.promotion_min_success_rate,
|
promotion_min_success_rate: governance.promotion_min_success_rate,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
const generatedAt = new Date(nowMs).toISOString();
|
||||||
const rolloutPayload = {
|
const rolloutPayload = {
|
||||||
|
generated_at: generatedAt,
|
||||||
days: parsedDays,
|
days: parsedDays,
|
||||||
guardrails,
|
guardrails,
|
||||||
summary: trend.current,
|
summary: trend.current,
|
||||||
@@ -1393,9 +1466,33 @@ export function registerSkillsCommand(program: Command): void {
|
|||||||
promotion_min_success_rate: governance.promotion_min_success_rate,
|
promotion_min_success_rate: governance.promotion_min_success_rate,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
const promotionContract = toShellRunnerPromotionContract({
|
||||||
|
generatedAt,
|
||||||
|
days: parsedDays,
|
||||||
|
recommendation,
|
||||||
|
guardrails,
|
||||||
|
summary: trend.current,
|
||||||
|
trend,
|
||||||
|
promotionPolicy,
|
||||||
|
governance: {
|
||||||
|
owner: governance.owner ?? null,
|
||||||
|
review_cadence_days: governance.review_cadence_days,
|
||||||
|
promotion_min_success_rate: governance.promotion_min_success_rate,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
if (opts.out) {
|
if (opts.out) {
|
||||||
writeFileSync(expandHomePath(opts.out), JSON.stringify(rolloutPayload, null, 2), 'utf-8');
|
writeFileSync(
|
||||||
|
expandHomePath(opts.out),
|
||||||
|
JSON.stringify(opts.contract ? promotionContract : rolloutPayload, null, 2),
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opts.contract) {
|
||||||
|
console.log(JSON.stringify(promotionContract, null, 2));
|
||||||
|
process.exitCode = promotionContract.gate.exit_code;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opts.json) {
|
if (opts.json) {
|
||||||
|
|||||||
Reference in New Issue
Block a user