From 68cdc2cf8b0531dc609bae693d99432bb7028724 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Fri, 27 Feb 2026 08:47:31 -0800 Subject: [PATCH] feat(audit): add backend-scoped phase0 live baseline capture --- README.md | 18 +- docs/api/PROTOCOL.md | 2 +- docs/architecture/AGENT_DIAGRAM.md | 1 + .../GATEWAY_SESSIONS_AND_QUEUE.md | 1 + ...phase0-instrumentation-ticket-checklist.md | 2 +- .../phase0_baseline_live_2026-02-27.json | 124 +++---- .../phase0_baseline_live_2026-02-27.jsonl | 16 + .../phase0_baseline_live_2026-02-27.md | 26 +- ...seline_live_backend_native_2026-02-27.json | 229 ++++++++++++ ...eline_live_backend_native_2026-02-27.jsonl | 13 + ...baseline_live_backend_native_2026-02-27.md | 55 +++ ...e_live_backend_pi_embedded_2026-02-27.json | 333 ++++++++++++++++++ ..._live_backend_pi_embedded_2026-02-27.jsonl | 56 +++ ...ine_live_backend_pi_embedded_2026-02-27.md | 62 ++++ ...ase0_baseline_live_gateway_2026-02-27.json | 2 +- docs/plans/state.json | 41 ++- package.json | 2 + scripts/capture-phase0-live-baseline.ts | 54 ++- src/audit/phase0LiveBaseline.test.ts | 31 +- src/audit/phase0LiveBaseline.ts | 78 +++- 20 files changed, 1055 insertions(+), 91 deletions(-) create mode 100644 docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json create mode 100644 docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.jsonl create mode 100644 docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.md create mode 100644 docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json create mode 100644 docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.jsonl create mode 100644 docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.md diff --git a/README.md b/README.md index 53accbe..28064bd 100644 --- a/README.md +++ b/README.md @@ -1629,6 +1629,12 @@ Live baseline artifacts (sample JSONL + JSON/Markdown summaries) can be captured pnpm audit:phase0-baseline:live ``` +Backend-scoped channel windows: +```bash +pnpm audit:phase0-baseline:live:pi +pnpm audit:phase0-baseline:live:native +``` + One-shot refresh for both channel + gateway live windows: ```bash pnpm audit:phase0-baseline:live:refresh @@ -1645,17 +1651,19 @@ Gateway-origin windows can be captured separately (for example when validating c pnpm audit:phase0-baseline:live:gateway ``` -The gateway command auto-selects the most recent session window containing both `run.cancel` and `run.state=cancelled` (with configurable padding). You can still capture explicit windows by restricting source + time bounds: +The gateway command auto-selects the most recent session window containing both `run.cancel` and `run.state=cancelled` (with configurable padding). You can also capture explicit backend-scoped channel windows by restricting source/backend/time bounds: ```bash node --import tsx/esm scripts/capture-phase0-live-baseline.ts \ --audit ~/.local/share/flynn/audit.log \ - --source gateway \ + --source channel \ + --backend pi_embedded \ --since \ --until \ - --sample-out docs/plans/artifacts/phase0_baseline_live_gateway_.jsonl \ - --summary-json-out docs/plans/artifacts/phase0_baseline_live_gateway_.json \ - --summary-md-out docs/plans/artifacts/phase0_baseline_live_gateway_.md + --sample-out docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_.jsonl \ + --summary-json-out docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_.json \ + --summary-md-out docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_.md ``` +Use `--backend native` for native-only windows, or omit `--backend` for all backends. ## Gateway Lock diff --git a/docs/api/PROTOCOL.md b/docs/api/PROTOCOL.md index 71a70b4..9de9caf 100644 --- a/docs/api/PROTOCOL.md +++ b/docs/api/PROTOCOL.md @@ -23,7 +23,7 @@ The gateway provides: - **HTTP Server**: Serves static dashboard and handles webhook endpoints - **Node Capability Negotiation**: Optional companion-node role/capability registration -Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap `), release-bundle export (`--export-release-bundle ` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle ` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template `), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, and `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of both windows. These scripts default to current UTC-date tags unless `--tag` is explicitly provided. +Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap `), release-bundle export (`--export-release-bundle ` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle ` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template `), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, and `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of both windows. These scripts default to current UTC-date tags unless `--tag` is explicitly provided. ### Execution Model (Sessions + Per-Session Queue) diff --git a/docs/architecture/AGENT_DIAGRAM.md b/docs/architecture/AGENT_DIAGRAM.md index 401ac2f..c73f8c0 100644 --- a/docs/architecture/AGENT_DIAGRAM.md +++ b/docs/architecture/AGENT_DIAGRAM.md @@ -167,6 +167,7 @@ Gateway streaming UX signals: - `.github/workflows/companion-reference-apps-check.yml` enforces reference-app generator sync in CI. - `flynn companion` can bootstrap status/location/push metadata on connect (`node.status.set` + optional `node.location.set`/`node.push_token.set`) so thin companion shells can register operational context in one launch. - `pnpm audit:phase0-baseline:live` captures anonymized channel-origin live run/reaction baseline artifacts from real audit logs. +- `pnpm audit:phase0-baseline:live:pi` and `pnpm audit:phase0-baseline:live:native` capture backend-scoped channel windows using `backend.route` timelines. - `pnpm audit:phase0-baseline:live:gateway` captures gateway-origin baseline windows by auto-selecting the latest cancel/cancelled session window (or use `scripts/capture-phase0-live-baseline.ts --source gateway --since ... --until ...` for explicit windows). - `pnpm audit:phase0-baseline:live:refresh` runs both channel + gateway capture commands in one step for cadence refreshes. - `audit:phase0-baseline:live*` scripts are cadence-safe by default (UTC-date tags auto-generated unless explicitly overridden). diff --git a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md index df3d952..d813d85 100644 --- a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md +++ b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md @@ -32,6 +32,7 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`. - CI workflow `.github/workflows/companion-release-bundle.yml` mirrors this pipeline for manual artifact generation/upload. - CI workflow `.github/workflows/companion-reference-apps-check.yml` enforces reference-app generator sync on pull requests. - Audit phase-0 live telemetry snapshots can be regenerated with `pnpm audit:phase0-baseline:live` (channel-origin anonymized sample JSONL + summary JSON/markdown artifacts). +- Backend-scoped channel snapshots can be regenerated with `pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native` (`--backend` filtering via `backend.route` timelines). - Gateway-origin phase-0 windows (including cancel-path samples) can be captured with `pnpm audit:phase0-baseline:live:gateway` (auto-detect latest cancel window) or `scripts/capture-phase0-live-baseline.ts --source gateway --since ... --until ...` for explicit bounds. - `pnpm audit:phase0-baseline:live:refresh` runs both capture paths to refresh channel + gateway artifacts in one command. - `audit:phase0-baseline:live*` package scripts now omit fixed tags so scheduled runs automatically roll to current UTC-date artifact tags. diff --git a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md index 349b4ba..570e3af 100644 --- a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md +++ b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md @@ -203,7 +203,7 @@ Phase 0 is complete when: 2. A baseline summary artifact is generated and committed under `docs/plans/artifacts/`. 3. No user-visible response behavior changed compared to pre-phase baseline. -Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), and both windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (scheduling example included in README). +Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), both windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (scheduling example included in README), and backend-scoped channel windows are now available via `pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`. ## Subagent Model Assignment Plan diff --git a/docs/plans/artifacts/phase0_baseline_live_2026-02-27.json b/docs/plans/artifacts/phase0_baseline_live_2026-02-27.json index 92033a5..83c2722 100644 --- a/docs/plans/artifacts/phase0_baseline_live_2026-02-27.json +++ b/docs/plans/artifacts/phase0_baseline_live_2026-02-27.json @@ -1,8 +1,8 @@ { - "generated_at": "2026-02-27T08:43:10.518Z", + "generated_at": "2026-02-27T16:46:42.576Z", "source_audit_path": "~/.local/share/flynn/audit.log", - "source_event_count": 94, - "sampled_event_count": 88, + "source_event_count": 110, + "sampled_event_count": 104, "filters": { "sources": [ "channel" @@ -22,19 +22,19 @@ }, "summary": { "event_counts": { - "run_state": 55, + "run_state": 65, "run_cancel": 0, "reaction_match": 0, - "reaction_skip": 33 + "reaction_skip": 39 }, "run_outcomes": { "overall": { - "total_outcomes": 23, - "complete": 23, + "total_outcomes": 27, + "complete": 27, "cancelled": 0, "error": 0, "cancel_requested": 0, - "start": 32, + "start": 38, "completion_rate_pct": 100, "cancel_rate_pct": 0, "error_rate_pct": 0 @@ -43,12 +43,12 @@ { "key": "gmail", "stats": { - "total_outcomes": 22, - "complete": 22, + "total_outcomes": 25, + "complete": 25, "cancelled": 0, "error": 0, "cancel_requested": 0, - "start": 22, + "start": 25, "completion_rate_pct": 100, "cancel_rate_pct": 0, "error_rate_pct": 0 @@ -57,12 +57,12 @@ { "key": "cron", "stats": { - "total_outcomes": 1, - "complete": 1, + "total_outcomes": 2, + "complete": 2, "cancelled": 0, "error": 0, "cancel_requested": 0, - "start": 10, + "start": 13, "completion_rate_pct": 100, "cancel_rate_pct": 0, "error_rate_pct": 0 @@ -73,12 +73,12 @@ { "key": "session_2f2f1e414e81", "stats": { - "total_outcomes": 4, - "complete": 4, + "total_outcomes": 5, + "complete": 5, "cancelled": 0, "error": 0, "cancel_requested": 0, - "start": 4, + "start": 5, "completion_rate_pct": 100, "cancel_rate_pct": 0, "error_rate_pct": 0 @@ -126,6 +126,20 @@ "error_rate_pct": 0 } }, + { + "key": "session_3ffb2e631ab1", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, { "key": "session_4d9e843358a3", "stats": { @@ -154,6 +168,20 @@ "error_rate_pct": 0 } }, + { + "key": "session_5ae4ad331184", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, { "key": "session_7d3c3ff67d4f", "stats": { @@ -168,6 +196,20 @@ "error_rate_pct": 0 } }, + { + "key": "session_7db5014f64fe", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, { "key": "session_8849a4464275", "stats": { @@ -307,62 +349,20 @@ "cancel_rate_pct": 0, "error_rate_pct": 0 } - }, - { - "key": "session_2b07a8d38406", - "stats": { - "total_outcomes": 0, - "complete": 0, - "cancelled": 0, - "error": 0, - "cancel_requested": 0, - "start": 1, - "completion_rate_pct": null, - "cancel_rate_pct": null, - "error_rate_pct": null - } - }, - { - "key": "session_2d8872945bf8", - "stats": { - "total_outcomes": 0, - "complete": 0, - "cancelled": 0, - "error": 0, - "cancel_requested": 0, - "start": 1, - "completion_rate_pct": null, - "cancel_rate_pct": null, - "error_rate_pct": null - } - }, - { - "key": "session_31b6400467ce", - "stats": { - "total_outcomes": 0, - "complete": 0, - "cancelled": 0, - "error": 0, - "cancel_requested": 0, - "start": 1, - "completion_rate_pct": null, - "cancel_rate_pct": null, - "error_rate_pct": null - } } ] }, "cancel_latency_ms": null, "reactions": { "matched": 0, - "skipped": 33, - "total": 33, + "skipped": 39, + "total": 39, "match_rate_pct": 0, "skip_rate_pct": 100, "skip_reasons": [ { "reason": "no_rules", - "count": 33, + "count": 39, "pct": 100 } ] diff --git a/docs/plans/artifacts/phase0_baseline_live_2026-02-27.jsonl b/docs/plans/artifacts/phase0_baseline_live_2026-02-27.jsonl index 275f9e0..39cacf1 100644 --- a/docs/plans/artifacts/phase0_baseline_live_2026-02-27.jsonl +++ b/docs/plans/artifacts/phase0_baseline_live_2026-02-27.jsonl @@ -86,3 +86,19 @@ {"level":"info","event_type":"run.state","event":{"session_id":"session_683372f346c3","channel":"cron","sender":"sender_485b96f48f25","source":"channel","state":"start","request_id":"request_caa9ec775af8"},"timestamp":1772168400038} {"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_31b6400467ce","channel":"cron","sender":"sender_8255de70c756","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772175600039} {"level":"info","event_type":"run.state","event":{"session_id":"session_31b6400467ce","channel":"cron","sender":"sender_8255de70c756","source":"channel","state":"start","request_id":"request_40d7589e715b"},"timestamp":1772175600040} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_a3f64a8e3c1e","channel":"cron","sender":"sender_a31bd6d4a95a","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772182800033} +{"level":"info","event_type":"run.state","event":{"session_id":"session_a3f64a8e3c1e","channel":"cron","sender":"sender_a31bd6d4a95a","source":"channel","state":"start","request_id":"request_fc572d83d4c6"},"timestamp":1772182800034} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_7db5014f64fe","channel":"gmail","sender":"sender_81e925eaae0e","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772197156530} +{"level":"info","event_type":"run.state","event":{"session_id":"session_7db5014f64fe","channel":"gmail","sender":"sender_81e925eaae0e","source":"channel","state":"start","request_id":"request_7f5d82d1085a"},"timestamp":1772197156530} +{"level":"info","event_type":"run.state","event":{"session_id":"session_7db5014f64fe","channel":"gmail","sender":"sender_81e925eaae0e","source":"channel","state":"complete","request_id":"request_7f5d82d1085a","duration_ms":5231},"timestamp":1772197161761} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_d576f98a5348","channel":"cron","sender":"sender_86045f8e1835","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772197200013} +{"level":"info","event_type":"run.state","event":{"session_id":"session_d576f98a5348","channel":"cron","sender":"sender_86045f8e1835","source":"channel","state":"start","request_id":"request_ce8df697a3cf"},"timestamp":1772197200014} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_3ffb2e631ab1","channel":"gmail","sender":"sender_745442e44534","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772199256802} +{"level":"info","event_type":"run.state","event":{"session_id":"session_3ffb2e631ab1","channel":"gmail","sender":"sender_745442e44534","source":"channel","state":"start","request_id":"request_3d4d23510682"},"timestamp":1772199256803} +{"level":"info","event_type":"run.state","event":{"session_id":"session_3ffb2e631ab1","channel":"gmail","sender":"sender_745442e44534","source":"channel","state":"complete","request_id":"request_3d4d23510682","duration_ms":3836},"timestamp":1772199260639} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772206157229} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"start","request_id":"request_ab73d670c119"},"timestamp":1772206157229} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"complete","request_id":"request_ab73d670c119","duration_ms":3850},"timestamp":1772206161079} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_5ae4ad331184","channel":"cron","sender":"sender_a912a223d950","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772208000012} +{"level":"info","event_type":"run.state","event":{"session_id":"session_5ae4ad331184","channel":"cron","sender":"sender_a912a223d950","source":"channel","state":"start","request_id":"request_a3bafbb93755"},"timestamp":1772208000013} +{"level":"info","event_type":"run.state","event":{"session_id":"session_5ae4ad331184","channel":"cron","sender":"sender_a912a223d950","source":"channel","state":"complete","request_id":"request_a3bafbb93755","duration_ms":35239},"timestamp":1772208035252} diff --git a/docs/plans/artifacts/phase0_baseline_live_2026-02-27.md b/docs/plans/artifacts/phase0_baseline_live_2026-02-27.md index ca5228b..c160436 100644 --- a/docs/plans/artifacts/phase0_baseline_live_2026-02-27.md +++ b/docs/plans/artifacts/phase0_baseline_live_2026-02-27.md @@ -1,39 +1,42 @@ # Phase 0 Baseline Telemetry Summary -- Run state events: 55 +- Run state events: 65 - Run cancel events: 0 - Reaction matches: 0 -- Reaction skips: 33 +- Reaction skips: 39 - Sources: channel ## Run Outcomes (Overall) -- Total outcomes: 23 -- Complete: 23 (100.00%) +- Total outcomes: 27 +- Complete: 27 (100.00%) - Cancelled: 0 (0.00%) - Errors: 0 (0.00%) - Cancel requested: 0 -- Starts: 32 +- Starts: 38 ## Run Outcomes by Channel | Channel | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts | | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | -| gmail | 22 | 22 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 22 | -| cron | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 10 | +| gmail | 25 | 25 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 25 | +| cron | 2 | 2 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 13 | ## Run Outcomes by Session | Session | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts | | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | -| session_2f2f1e414e81 | 4 | 4 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 4 | +| session_2f2f1e414e81 | 5 | 5 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 5 | | session_f4d8ddc04194 | 3 | 3 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 3 | | session_eabc3c2a91b9 | 2 | 2 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 2 | | session_33469de5a1ee | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_3ffb2e631ab1 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | | session_4d9e843358a3 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | | session_58a64b6f2c91 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_5ae4ad331184 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | | session_7d3c3ff67d4f | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_7db5014f64fe | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | | session_8849a4464275 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | | session_8b51db8cde21 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | | session_9067cf5e3558 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | @@ -44,9 +47,6 @@ | session_ea839415979e | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | | session_f6304f25e43b | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | | session_fd6536fa5ff4 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | -| session_2b07a8d38406 | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | -| session_2d8872945bf8 | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | -| session_31b6400467ce | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | ## Cancel Latency @@ -55,11 +55,11 @@ ## Reaction Decisions - Matched: 0 (0.00%) -- Skipped: 33 (100.00%) +- Skipped: 39 (100.00%) ### Skip Reasons | Reason | Count | Percent | | --- | ---: | ---: | -| no_rules | 33 | 100.00% | +| no_rules | 39 | 100.00% | diff --git a/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json b/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json new file mode 100644 index 0000000..27dd98d --- /dev/null +++ b/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json @@ -0,0 +1,229 @@ +{ + "generated_at": "2026-02-27T16:45:18.490Z", + "source_audit_path": "~/.local/share/flynn/audit.log", + "source_event_count": 110, + "sampled_event_count": 13, + "filters": { + "sources": [ + "channel" + ], + "backend_targets": [ + "native" + ], + "exclude_session_substrings": [ + "probe" + ], + "anonymized_identifiers": true, + "backend_route_event_count": 127 + }, + "options": { + "sources": [ + "channel" + ], + "maxSessions": 20, + "maxChannels": 20, + "maxSkipReasons": 10 + }, + "summary": { + "event_counts": { + "run_state": 13, + "run_cancel": 0, + "reaction_match": 0, + "reaction_skip": 0 + }, + "run_outcomes": { + "overall": { + "total_outcomes": 2, + "complete": 2, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 11, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + }, + "by_channel": [ + { + "key": "cron", + "stats": { + "total_outcomes": 2, + "complete": 2, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 11, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + } + ], + "by_session": [ + { + "key": "session_5ae4ad331184", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_a83fde4c8fdb", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_2d8872945bf8", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + }, + { + "key": "session_31b6400467ce", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + }, + { + "key": "session_3c43a0cc0a62", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + }, + { + "key": "session_494cb3b392af", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + }, + { + "key": "session_49b700741e03", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + }, + { + "key": "session_4cd8ba5e6df5", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + }, + { + "key": "session_683372f346c3", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + }, + { + "key": "session_a3f64a8e3c1e", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + }, + { + "key": "session_ffcee254d546", + "stats": { + "total_outcomes": 0, + "complete": 0, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": null, + "cancel_rate_pct": null, + "error_rate_pct": null + } + } + ] + }, + "cancel_latency_ms": null, + "reactions": { + "matched": 0, + "skipped": 0, + "total": 0, + "match_rate_pct": null, + "skip_rate_pct": null, + "skip_reasons": [] + } + } +} diff --git a/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.jsonl b/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.jsonl new file mode 100644 index 0000000..6ad563e --- /dev/null +++ b/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.jsonl @@ -0,0 +1,13 @@ +{"level":"info","event_type":"run.state","event":{"session_id":"session_2d8872945bf8","channel":"cron","sender":"sender_4787722f90c7","source":"channel","state":"start","request_id":"request_eabae852ec40"},"timestamp":1772082000082} +{"level":"info","event_type":"run.state","event":{"session_id":"session_ffcee254d546","channel":"cron","sender":"sender_75c445c6fdad","source":"channel","state":"start","request_id":"request_f99421283d74"},"timestamp":1772089200086} +{"level":"info","event_type":"run.state","event":{"session_id":"session_49b700741e03","channel":"cron","sender":"sender_ecc7d1bae06e","source":"channel","state":"start","request_id":"request_33cb5e8b6843"},"timestamp":1772096400008} +{"level":"info","event_type":"run.state","event":{"session_id":"session_4cd8ba5e6df5","channel":"cron","sender":"sender_247a7c21dbdd","source":"channel","state":"start","request_id":"request_60b338f30d46"},"timestamp":1772110800019} +{"level":"info","event_type":"run.state","event":{"session_id":"session_a83fde4c8fdb","channel":"cron","sender":"sender_99a8e5949abe","source":"channel","state":"start","request_id":"request_011dde9a88a3"},"timestamp":1772121600017} +{"level":"info","event_type":"run.state","event":{"session_id":"session_a83fde4c8fdb","channel":"cron","sender":"sender_99a8e5949abe","source":"channel","state":"complete","request_id":"request_011dde9a88a3","duration_ms":41131},"timestamp":1772121641148} +{"level":"info","event_type":"run.state","event":{"session_id":"session_3c43a0cc0a62","channel":"cron","sender":"sender_a2f138926e17","source":"channel","state":"start","request_id":"request_eabfc26524d0"},"timestamp":1772125200026} +{"level":"info","event_type":"run.state","event":{"session_id":"session_494cb3b392af","channel":"cron","sender":"sender_dfc2df9eb18e","source":"channel","state":"start","request_id":"request_ea48f4337dc3"},"timestamp":1772139600017} +{"level":"info","event_type":"run.state","event":{"session_id":"session_683372f346c3","channel":"cron","sender":"sender_485b96f48f25","source":"channel","state":"start","request_id":"request_caa9ec775af8"},"timestamp":1772168400038} +{"level":"info","event_type":"run.state","event":{"session_id":"session_31b6400467ce","channel":"cron","sender":"sender_8255de70c756","source":"channel","state":"start","request_id":"request_40d7589e715b"},"timestamp":1772175600040} +{"level":"info","event_type":"run.state","event":{"session_id":"session_a3f64a8e3c1e","channel":"cron","sender":"sender_a31bd6d4a95a","source":"channel","state":"start","request_id":"request_fc572d83d4c6"},"timestamp":1772182800034} +{"level":"info","event_type":"run.state","event":{"session_id":"session_5ae4ad331184","channel":"cron","sender":"sender_a912a223d950","source":"channel","state":"start","request_id":"request_a3bafbb93755"},"timestamp":1772208000013} +{"level":"info","event_type":"run.state","event":{"session_id":"session_5ae4ad331184","channel":"cron","sender":"sender_a912a223d950","source":"channel","state":"complete","request_id":"request_a3bafbb93755","duration_ms":35239},"timestamp":1772208035252} diff --git a/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.md b/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.md new file mode 100644 index 0000000..ab51d57 --- /dev/null +++ b/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.md @@ -0,0 +1,55 @@ +# Phase 0 Baseline Telemetry Summary + +- Run state events: 13 +- Run cancel events: 0 +- Reaction matches: 0 +- Reaction skips: 0 + +- Sources: channel + +## Run Outcomes (Overall) + +- Total outcomes: 2 +- Complete: 2 (100.00%) +- Cancelled: 0 (0.00%) +- Errors: 0 (0.00%) +- Cancel requested: 0 +- Starts: 11 + +## Run Outcomes by Channel + +| Channel | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| cron | 2 | 2 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 11 | + +## Run Outcomes by Session + +| Session | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| session_5ae4ad331184 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_a83fde4c8fdb | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_2d8872945bf8 | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | +| session_31b6400467ce | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | +| session_3c43a0cc0a62 | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | +| session_494cb3b392af | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | +| session_49b700741e03 | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | +| session_4cd8ba5e6df5 | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | +| session_683372f346c3 | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | +| session_a3f64a8e3c1e | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | +| session_ffcee254d546 | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 1 | + +## Cancel Latency + +- No cancel latency samples. + +## Reaction Decisions + +- Matched: 0 (n/a) +- Skipped: 0 (n/a) + +### Skip Reasons + +| Reason | Count | Percent | +| --- | ---: | ---: | +| _none_ | 0 | 0.00% | + diff --git a/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json b/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json new file mode 100644 index 0000000..2e01ba4 --- /dev/null +++ b/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json @@ -0,0 +1,333 @@ +{ + "generated_at": "2026-02-27T16:45:18.488Z", + "source_audit_path": "~/.local/share/flynn/audit.log", + "source_event_count": 110, + "sampled_event_count": 56, + "filters": { + "sources": [ + "channel" + ], + "backend_targets": [ + "pi_embedded" + ], + "exclude_session_substrings": [ + "probe" + ], + "anonymized_identifiers": true, + "backend_route_event_count": 127 + }, + "options": { + "sources": [ + "channel" + ], + "maxSessions": 20, + "maxChannels": 20, + "maxSkipReasons": 10 + }, + "summary": { + "event_counts": { + "run_state": 42, + "run_cancel": 0, + "reaction_match": 0, + "reaction_skip": 14 + }, + "run_outcomes": { + "overall": { + "total_outcomes": 25, + "complete": 25, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 17, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + }, + "by_channel": [ + { + "key": "gmail", + "stats": { + "total_outcomes": 25, + "complete": 25, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 17, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + } + ], + "by_session": [ + { + "key": "session_2f2f1e414e81", + "stats": { + "total_outcomes": 5, + "complete": 5, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 5, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_f4d8ddc04194", + "stats": { + "total_outcomes": 3, + "complete": 3, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 3, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_eabc3c2a91b9", + "stats": { + "total_outcomes": 2, + "complete": 2, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_33469de5a1ee", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_3ffb2e631ab1", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 0, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_4d9e843358a3", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 0, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_58a64b6f2c91", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 0, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_7d3c3ff67d4f", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 0, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_7db5014f64fe", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 0, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_8849a4464275", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 0, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_8b51db8cde21", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_9067cf5e3558", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_a4b91821c664", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 0, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_cb9a69d8a362", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_e0a2a17b7329", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_ea839415979e", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_f6304f25e43b", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "session_fd6536fa5ff4", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + } + ] + }, + "cancel_latency_ms": null, + "reactions": { + "matched": 0, + "skipped": 14, + "total": 14, + "match_rate_pct": 0, + "skip_rate_pct": 100, + "skip_reasons": [ + { + "reason": "no_rules", + "count": 14, + "pct": 100 + } + ] + } + } +} diff --git a/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.jsonl b/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.jsonl new file mode 100644 index 0000000..717fe6b --- /dev/null +++ b/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.jsonl @@ -0,0 +1,56 @@ +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772107655159} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"start","request_id":"request_7da150aff098"},"timestamp":1772107655160} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"complete","request_id":"request_7da150aff098","duration_ms":3324},"timestamp":1772107658484} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772114255688} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"start","request_id":"request_55013bd2ec5f"},"timestamp":1772114255688} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"complete","request_id":"request_55013bd2ec5f","duration_ms":3006},"timestamp":1772114258694} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772119955933} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"start","request_id":"request_a01be9a4284b"},"timestamp":1772119955933} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"complete","request_id":"request_a01be9a4284b","duration_ms":2379},"timestamp":1772119958312} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772120856043} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"start","request_id":"request_49c2900b17a3"},"timestamp":1772120856043} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"complete","request_id":"request_49c2900b17a3","duration_ms":4223},"timestamp":1772120860266} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_cb9a69d8a362","channel":"gmail","sender":"sender_48feae1a0ad8","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772122956201} +{"level":"info","event_type":"run.state","event":{"session_id":"session_cb9a69d8a362","channel":"gmail","sender":"sender_48feae1a0ad8","source":"channel","state":"start","request_id":"request_59fd88029c97"},"timestamp":1772122956201} +{"level":"info","event_type":"run.state","event":{"session_id":"session_cb9a69d8a362","channel":"gmail","sender":"sender_48feae1a0ad8","source":"channel","state":"complete","request_id":"request_59fd88029c97","duration_ms":2854},"timestamp":1772122959055} +{"level":"info","event_type":"run.state","event":{"session_id":"session_a4b91821c664","channel":"gmail","sender":"sender_9eff7c852e06","source":"channel","state":"complete","request_id":"request_7734a5dbc98e","duration_ms":5944},"timestamp":1772129748001} +{"level":"info","event_type":"run.state","event":{"session_id":"session_eabc3c2a91b9","channel":"gmail","sender":"sender_4fe02519d59e","source":"channel","state":"complete","request_id":"request_81d00f26b8cb","duration_ms":8017},"timestamp":1772129750396} +{"level":"info","event_type":"run.state","event":{"session_id":"session_58a64b6f2c91","channel":"gmail","sender":"sender_4222a55cdd53","source":"channel","state":"complete","request_id":"request_e59d82ef75e8","duration_ms":5232},"timestamp":1772131247844} +{"level":"info","event_type":"run.state","event":{"session_id":"session_e0a2a17b7329","channel":"gmail","sender":"sender_5c8cb7bfc88d","source":"channel","state":"start","request_id":"request_ed877aec1e58"},"timestamp":1772131542651} +{"level":"info","event_type":"run.state","event":{"session_id":"session_e0a2a17b7329","channel":"gmail","sender":"sender_5c8cb7bfc88d","source":"channel","state":"complete","request_id":"request_ed877aec1e58","duration_ms":6190},"timestamp":1772131548841} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_f6304f25e43b","channel":"gmail","sender":"sender_311c7608cc58","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772132142832} +{"level":"info","event_type":"run.state","event":{"session_id":"session_f6304f25e43b","channel":"gmail","sender":"sender_311c7608cc58","source":"channel","state":"start","request_id":"request_8fdb3054a74d"},"timestamp":1772132142833} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_eabc3c2a91b9","channel":"gmail","sender":"sender_4fe02519d59e","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772132142976} +{"level":"info","event_type":"run.state","event":{"session_id":"session_eabc3c2a91b9","channel":"gmail","sender":"sender_4fe02519d59e","source":"channel","state":"start","request_id":"request_487012d053f7"},"timestamp":1772132142976} +{"level":"info","event_type":"run.state","event":{"session_id":"session_f6304f25e43b","channel":"gmail","sender":"sender_311c7608cc58","source":"channel","state":"complete","request_id":"request_8fdb3054a74d","duration_ms":3727},"timestamp":1772132146560} +{"level":"info","event_type":"run.state","event":{"session_id":"session_eabc3c2a91b9","channel":"gmail","sender":"sender_4fe02519d59e","source":"channel","state":"complete","request_id":"request_487012d053f7","duration_ms":4848},"timestamp":1772132147824} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_ea839415979e","channel":"gmail","sender":"sender_63a36881e696","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772133342779} +{"level":"info","event_type":"run.state","event":{"session_id":"session_ea839415979e","channel":"gmail","sender":"sender_63a36881e696","source":"channel","state":"start","request_id":"request_46feddcf35ba"},"timestamp":1772133342779} +{"level":"info","event_type":"run.state","event":{"session_id":"session_ea839415979e","channel":"gmail","sender":"sender_63a36881e696","source":"channel","state":"complete","request_id":"request_46feddcf35ba","duration_ms":2091},"timestamp":1772133344870} +{"level":"info","event_type":"run.state","event":{"session_id":"session_4d9e843358a3","channel":"gmail","sender":"sender_597782907690","source":"channel","state":"complete","request_id":"request_a43658b8d10f","duration_ms":7829},"timestamp":1772135687339} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_84ba9e30a4aa","channel":"telegram","sender":"sender_403740748465","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772135865178} +{"level":"info","event_type":"run.state","event":{"session_id":"session_7d3c3ff67d4f","channel":"gmail","sender":"sender_1625f89b7500","source":"channel","state":"complete","request_id":"request_882bdf0a0b51","duration_ms":5046},"timestamp":1772136369558} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772142493741} +{"level":"info","event_type":"run.state","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","state":"start","request_id":"request_b67bb7dcfb3e"},"timestamp":1772142493741} +{"level":"info","event_type":"run.state","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","state":"complete","request_id":"request_b67bb7dcfb3e","duration_ms":6271},"timestamp":1772142500012} +{"level":"info","event_type":"run.state","event":{"session_id":"session_8849a4464275","channel":"gmail","sender":"sender_5aa2798b691a","source":"channel","state":"complete","request_id":"request_f0b319de0e0f","duration_ms":3148},"timestamp":1772143096664} +{"level":"info","event_type":"run.state","event":{"session_id":"session_9067cf5e3558","channel":"gmail","sender":"sender_5375920e43c6","source":"channel","state":"start","request_id":"request_802086e4ecb7"},"timestamp":1772144293784} +{"level":"info","event_type":"run.state","event":{"session_id":"session_9067cf5e3558","channel":"gmail","sender":"sender_5375920e43c6","source":"channel","state":"complete","request_id":"request_802086e4ecb7","duration_ms":3535},"timestamp":1772144297319} +{"level":"info","event_type":"run.state","event":{"session_id":"session_33469de5a1ee","channel":"gmail","sender":"sender_23f2c718f92a","source":"channel","state":"start","request_id":"request_e1ee7948be6e"},"timestamp":1772147893923} +{"level":"info","event_type":"run.state","event":{"session_id":"session_33469de5a1ee","channel":"gmail","sender":"sender_23f2c718f92a","source":"channel","state":"complete","request_id":"request_e1ee7948be6e","duration_ms":4090},"timestamp":1772147898013} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772151794148} +{"level":"info","event_type":"run.state","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","state":"start","request_id":"request_dc04b15aeb0d"},"timestamp":1772151794148} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772151794297} +{"level":"info","event_type":"run.state","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","state":"start","request_id":"request_f786a5385b23"},"timestamp":1772151794297} +{"level":"info","event_type":"run.state","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","state":"complete","request_id":"request_dc04b15aeb0d","duration_ms":8516},"timestamp":1772151802664} +{"level":"info","event_type":"run.state","event":{"session_id":"session_f4d8ddc04194","channel":"gmail","sender":"sender_c8a436a5eb54","source":"channel","state":"complete","request_id":"request_f786a5385b23","duration_ms":9850},"timestamp":1772151804147} +{"level":"info","event_type":"run.state","event":{"session_id":"session_fd6536fa5ff4","channel":"gmail","sender":"sender_fcf96878ddcb","source":"channel","state":"start","request_id":"request_ff70daf25a96"},"timestamp":1772158394779} +{"level":"info","event_type":"run.state","event":{"session_id":"session_fd6536fa5ff4","channel":"gmail","sender":"sender_fcf96878ddcb","source":"channel","state":"complete","request_id":"request_ff70daf25a96","duration_ms":5690},"timestamp":1772158400469} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_8b51db8cde21","channel":"gmail","sender":"sender_c9788a77f027","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772163195186} +{"level":"info","event_type":"run.state","event":{"session_id":"session_8b51db8cde21","channel":"gmail","sender":"sender_c9788a77f027","source":"channel","state":"start","request_id":"request_9054b78eda1d"},"timestamp":1772163195187} +{"level":"info","event_type":"run.state","event":{"session_id":"session_8b51db8cde21","channel":"gmail","sender":"sender_c9788a77f027","source":"channel","state":"complete","request_id":"request_9054b78eda1d","duration_ms":2356},"timestamp":1772163197543} +{"level":"info","event_type":"run.state","event":{"session_id":"session_7db5014f64fe","channel":"gmail","sender":"sender_81e925eaae0e","source":"channel","state":"complete","request_id":"request_7f5d82d1085a","duration_ms":5231},"timestamp":1772197161761} +{"level":"info","event_type":"run.state","event":{"session_id":"session_3ffb2e631ab1","channel":"gmail","sender":"sender_745442e44534","source":"channel","state":"complete","request_id":"request_3d4d23510682","duration_ms":3836},"timestamp":1772199260639} +{"level":"debug","event_type":"reaction.skip","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","reason":"no_rules","candidate_count":0},"timestamp":1772206157229} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"start","request_id":"request_ab73d670c119"},"timestamp":1772206157229} +{"level":"info","event_type":"run.state","event":{"session_id":"session_2f2f1e414e81","channel":"gmail","sender":"sender_323cedc3233a","source":"channel","state":"complete","request_id":"request_ab73d670c119","duration_ms":3850},"timestamp":1772206161079} diff --git a/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.md b/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.md new file mode 100644 index 0000000..154047c --- /dev/null +++ b/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.md @@ -0,0 +1,62 @@ +# Phase 0 Baseline Telemetry Summary + +- Run state events: 42 +- Run cancel events: 0 +- Reaction matches: 0 +- Reaction skips: 14 + +- Sources: channel + +## Run Outcomes (Overall) + +- Total outcomes: 25 +- Complete: 25 (100.00%) +- Cancelled: 0 (0.00%) +- Errors: 0 (0.00%) +- Cancel requested: 0 +- Starts: 17 + +## Run Outcomes by Channel + +| Channel | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| gmail | 25 | 25 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 17 | + +## Run Outcomes by Session + +| Session | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| session_2f2f1e414e81 | 5 | 5 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 5 | +| session_f4d8ddc04194 | 3 | 3 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 3 | +| session_eabc3c2a91b9 | 2 | 2 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_33469de5a1ee | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_3ffb2e631ab1 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 0 | +| session_4d9e843358a3 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 0 | +| session_58a64b6f2c91 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 0 | +| session_7d3c3ff67d4f | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 0 | +| session_7db5014f64fe | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 0 | +| session_8849a4464275 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 0 | +| session_8b51db8cde21 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_9067cf5e3558 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_a4b91821c664 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 0 | +| session_cb9a69d8a362 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_e0a2a17b7329 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_ea839415979e | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_f6304f25e43b | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| session_fd6536fa5ff4 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | + +## Cancel Latency + +- No cancel latency samples. + +## Reaction Decisions + +- Matched: 0 (0.00%) +- Skipped: 14 (100.00%) + +### Skip Reasons + +| Reason | Count | Percent | +| --- | ---: | ---: | +| no_rules | 14 | 100.00% | + diff --git a/docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.json b/docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.json index 0e086f8..5711b12 100644 --- a/docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.json +++ b/docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.json @@ -1,5 +1,5 @@ { - "generated_at": "2026-02-27T08:43:10.946Z", + "generated_at": "2026-02-27T16:46:42.880Z", "source_audit_path": "~/.local/share/flynn/audit.log", "source_event_count": 6, "sampled_event_count": 6, diff --git a/docs/plans/state.json b/docs/plans/state.json index 195d9ba..4b520be 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -88,7 +88,7 @@ "status": "completed", "date": "2026-02-27", "updated": "2026-02-27", - "summary": "Added a dedicated live phase-0 baseline capture flow that reads audit logs, filters run/reaction telemetry, excludes probe sessions, anonymizes session/sender/request IDs, and writes sample + summary artifacts for operational refreshes across both channel-origin and gateway-origin windows. Gateway mode now supports auto-detection of the latest cancel/cancelled window.", + "summary": "Added a dedicated live phase-0 baseline capture flow that reads audit logs, filters run/reaction telemetry, excludes probe sessions, anonymizes session/sender/request IDs, and writes sample + summary artifacts for operational refreshes across both channel-origin and gateway-origin windows. Gateway mode now supports auto-detection of the latest cancel/cancelled window, and channel captures can be backend-scoped (`--backend`) using backend-route timelines.", "files_modified": [ "src/audit/phase0LiveBaseline.ts", "src/audit/phase0LiveBaseline.test.ts", @@ -107,9 +107,15 @@ "docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.jsonl", "docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.md", "docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.json", + "docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.jsonl", + "docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.md", + "docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json", + "docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.jsonl", + "docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.md", + "docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json", "docs/plans/state.json" ], - "test_status": "pnpm audit:phase0-baseline:live:refresh + pnpm test:run src/audit/phase0GatewayWindow.test.ts src/audit/phase0LiveBaseline.test.ts src/audit/phase0BaselineSummary.test.ts + pnpm typecheck passing" + "test_status": "pnpm audit:phase0-baseline:live:refresh + pnpm audit:phase0-baseline:live:pi + pnpm audit:phase0-baseline:live:native + pnpm test:run src/audit/phase0GatewayWindow.test.ts src/audit/phase0LiveBaseline.test.ts src/audit/phase0BaselineSummary.test.ts + pnpm typecheck passing" }, "phase0-live-baseline-gateway-window": { "status": "completed", @@ -165,6 +171,31 @@ ], "test_status": "documentation/package-script runbook update only; validated via pnpm audit:phase0-baseline:live:refresh + pnpm typecheck" }, + "phase0-live-baseline-backend-scoping": { + "status": "completed", + "date": "2026-02-27", + "updated": "2026-02-27", + "summary": "Added backend-scoped live baseline capture for phase-0 so channel windows can be filtered by routed backend (`--backend native|pi_embedded|...`) using `backend.route` timelines. Added package commands for Pi/native windows and committed live backend-scoped artifacts.", + "files_modified": [ + "src/audit/phase0LiveBaseline.ts", + "src/audit/phase0LiveBaseline.test.ts", + "scripts/capture-phase0-live-baseline.ts", + "package.json", + "README.md", + "docs/api/PROTOCOL.md", + "docs/architecture/AGENT_DIAGRAM.md", + "docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md", + "docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.jsonl", + "docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.md", + "docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json", + "docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.jsonl", + "docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.md", + "docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json", + "docs/plans/state.json" + ], + "test_status": "pnpm audit:phase0-baseline:live:pi + pnpm audit:phase0-baseline:live:native + pnpm test:run src/audit/phase0LiveBaseline.test.ts src/audit/phase0BaselineSummary.test.ts + pnpm typecheck passing" + }, "phase0-instrumentation-ticket-checklist": { "status": "completed", "date": "2026-02-25", @@ -7314,7 +7345,7 @@ } }, "overall_progress": { - "total_test_count": 2588, + "total_test_count": 2589, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", @@ -7350,8 +7381,8 @@ "deeper_surfaces_phase0_ticket_02": "completed — gateway + daemon routing emit run lifecycle/cancel telemetry and reaction match/skip audit events with filter summaries and cancellation latency, plus focused tests", "deeper_surfaces_phase0_ticket_03": "completed — gateway metrics now track run-state outcomes, cancel latency samples, and reaction decision counters with routing/gateway emitters", "deeper_surfaces_phase0_ticket_04": "completed — added phase-0 baseline summary tooling for run outcomes, cancel latency, and reaction decisions with markdown/json CLI output", - "deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, and generated anonymized live baseline artifacts for both channel-origin and gateway-origin traffic (including cancel-path coverage)", - "next_up": "Apply `pnpm audit:phase0-baseline:live:refresh` to the host scheduler (cron/systemd timer) in each active environment and monitor artifact freshness over at least one full cadence cycle before additional run-control/reaction semantic changes.", + "deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, and generated anonymized live baseline artifacts for channel, gateway, and backend-scoped (pi/native) traffic windows", + "next_up": "Apply scheduled `pnpm audit:phase0-baseline:live:refresh` in each active environment and monitor backend-scoped (`pi_embedded` vs `native`) artifact freshness/drift over at least one full cadence cycle before additional run-control/reaction semantic changes.", "pi_embedded_canary_spike": "completed — added optional pi_embedded backend adapter, canary-safe no-tools routing guard, backend success/fallback latency audit events, and docs/diagram updates while native remains default", "pi_embedded_evaluation_phase": "completed — final decision rollback (applied in runtime config): Window A failed latency/fallback gates (p50 +259ms, p95 +5695ms, fallback 25%, categories: pi_module_interface/empty_assistant_text); Window B remained sample-insufficient; controlled probes verified guard coverage (pi_no_tools_mode/capability_query/attachments_present each hit once)", "pi_embedded_manual_mode": "completed — added persisted runtime backend controls for manual Pi activation/deactivation (`/runtime` preferred, `/backend` alias; `status`, `activate pi`, `deactivate pi`, `use config`) while keeping config-driven default routing", diff --git a/package.json b/package.json index d00159f..14477ba 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,8 @@ "audit:backend-canary": "node --import tsx/esm scripts/summarize-backend-canary.ts", "audit:phase0-baseline": "node --import tsx/esm scripts/summarize-phase0-baseline.ts", "audit:phase0-baseline:live": "node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --exclude-session-substring probe", + "audit:phase0-baseline:live:pi": "node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --backend pi_embedded --exclude-session-substring probe", + "audit:phase0-baseline:live:native": "node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --backend native --exclude-session-substring probe", "audit:phase0-baseline:live:gateway": "node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source gateway --auto-gateway-cancel-window", "audit:phase0-baseline:live:refresh": "pnpm audit:phase0-baseline:live && pnpm audit:phase0-baseline:live:gateway", "audit:backend-canary:probes": "node --import tsx/esm scripts/run-pi-canary-guard-probes.ts", diff --git a/scripts/capture-phase0-live-baseline.ts b/scripts/capture-phase0-live-baseline.ts index b3ccb9b..ca10e17 100644 --- a/scripts/capture-phase0-live-baseline.ts +++ b/scripts/capture-phase0-live-baseline.ts @@ -4,7 +4,10 @@ import { mkdir, writeFile } from 'node:fs/promises'; import { dirname, resolve } from 'node:path'; import { parseArgs } from 'node:util'; import { queryAuditLogs } from '../src/audit/export.js'; -import { capturePhase0LiveBaselineEvents } from '../src/audit/phase0LiveBaseline.js'; +import { + capturePhase0LiveBaselineEvents, + type Phase0BackendTarget, +} from '../src/audit/phase0LiveBaseline.js'; import { findLatestGatewayCancelWindow } from '../src/audit/phase0GatewayWindow.js'; import { renderPhase0BaselineMarkdown, @@ -14,6 +17,14 @@ import { } from '../src/audit/phase0BaselineSummary.js'; const DEFAULT_EVENT_TYPES = ['run.state', 'run.cancel', 'reaction.match', 'reaction.skip'] as const; +const BACKEND_TARGETS: readonly Phase0BackendTarget[] = [ + 'native', + 'claude_code', + 'opencode', + 'codex', + 'gemini', + 'pi_embedded', +]; function usage(): string { return [ @@ -25,6 +36,7 @@ function usage(): string { ' --until End time filter', ' --channel Restrict sample to channels', ' --source Restrict sample to sources', + ' --backend Restrict sample to selected backends (via backend.route timeline)', ' --exclude-session-substring Exclude sessions containing any substring (default: probe)', ' --auto-gateway-cancel-window Auto-select latest gateway cancel/cancelled session window', ' --window-padding-ms Milliseconds added before/after auto-selected window (default: 250)', @@ -113,6 +125,22 @@ function parseOptionalNumber(raw: string | undefined, flag: string): number | un return parsed; } +function parseBackendTargets(raw: string | undefined): Phase0BackendTarget[] | undefined { + const values = parseCsv(raw); + if (!values) { + return undefined; + } + const parsed: Phase0BackendTarget[] = []; + for (const value of values) { + if (BACKEND_TARGETS.includes(value as Phase0BackendTarget)) { + parsed.push(value as Phase0BackendTarget); + continue; + } + throw new Error(`Invalid backend "${value}".`); + } + return parsed; +} + function isoDateTagNow(): string { return new Date().toISOString().slice(0, 10); } @@ -130,6 +158,7 @@ async function main(): Promise { until: { type: 'string' }, channel: { type: 'string' }, source: { type: 'string' }, + backend: { type: 'string' }, 'exclude-session-substring': { type: 'string' }, 'auto-gateway-cancel-window': { type: 'boolean' }, 'window-padding-ms': { type: 'string' }, @@ -156,6 +185,7 @@ async function main(): Promise { const tag = values.tag ?? isoDateTagNow(); const channels = parseCsv(values.channel); let sources = parseSources(values.source); + const backendTargets = parseBackendTargets(values.backend); const excludeSessionSubstrings = parseCsv(values['exclude-session-substring']) ?? ['probe']; const autoGatewayCancelWindow = Boolean(values['auto-gateway-cancel-window']); const windowPaddingMs = parseOptionalNumber(values['window-padding-ms'], '--window-padding-ms'); @@ -190,8 +220,15 @@ async function main(): Promise { } const isGatewayOnly = sources?.length === 1 && sources[0] === 'gateway'; + const backendSuffix = backendTargets && backendTargets.length > 0 + ? backendTargets.length === 1 + ? `backend_${backendTargets[0]}` + : 'backend_scoped' + : undefined; const defaultBaseName = isGatewayOnly ? `docs/plans/artifacts/phase0_baseline_live_gateway_${tag}` + : backendSuffix + ? `docs/plans/artifacts/phase0_baseline_live_${backendSuffix}_${tag}` : `docs/plans/artifacts/phase0_baseline_live_${tag}`; const sampleOut = values['sample-out'] ?? `${defaultBaseName}.jsonl`; const summaryJsonOut = values['summary-json-out'] ?? `${defaultBaseName}.json`; @@ -205,6 +242,14 @@ async function main(): Promise { maxSkipReasons: parseOptionalNumber(values['max-skip-reasons'], '--max-skip-reasons') ?? 10, }; + const backendRouteEvents = backendTargets && backendTargets.length > 0 + ? await queryAuditLogs(auditPath, { + start_time: startTime, + end_time: endTime, + event_types: ['backend.route'], + }) + : []; + const sourceEvents = await queryAuditLogs(auditPath, { start_time: startTime, end_time: endTime, @@ -214,6 +259,8 @@ async function main(): Promise { const sampledEvents = capturePhase0LiveBaselineEvents(sourceEvents, { channels, sources, + backendTargets, + backendRouteEvents, excludeSessionSubstrings, anonymizeIdentifiers: !values['raw-identifiers'], }); @@ -231,6 +278,7 @@ async function main(): Promise { until_ms: endTime, channels, sources, + backend_targets: backendTargets, exclude_session_substrings: excludeSessionSubstrings, anonymized_identifiers: !values['raw-identifiers'], auto_gateway_cancel_window: autoWindow @@ -239,6 +287,7 @@ async function main(): Promise { padding_ms: windowPaddingMs ?? 250, } : undefined, + backend_route_event_count: backendRouteEvents.length > 0 ? backendRouteEvents.length : undefined, }, options: summaryOptions, summary, @@ -252,6 +301,9 @@ async function main(): Promise { if (autoWindow) { process.stdout.write(`- auto gateway window: session=${autoWindow.session_id} start=${autoWindow.start_time_ms} end=${autoWindow.end_time_ms}\n`); } + if (backendTargets && backendTargets.length > 0) { + process.stdout.write(`- backend targets: ${backendTargets.join(', ')} (route events: ${backendRouteEvents.length})\n`); + } process.stdout.write(`- sample: ${sampleOut}\n`); process.stdout.write(`- summary json: ${summaryJsonOut}\n`); process.stdout.write(`- summary md: ${summaryMdOut}\n`); diff --git a/src/audit/phase0LiveBaseline.test.ts b/src/audit/phase0LiveBaseline.test.ts index 6dc5dc3..e4bba13 100644 --- a/src/audit/phase0LiveBaseline.test.ts +++ b/src/audit/phase0LiveBaseline.test.ts @@ -75,5 +75,34 @@ describe('capturePhase0LiveBaselineEvents', () => { expect(first.request_id).not.toBe(second.request_id); expect(first.lane_id).not.toBe(second.lane_id); }); -}); + it('filters phase-0 events by backend route timelines when backend targets are provided', () => { + const events: AuditEvent[] = [ + event(15, 'run.state', { session_id: 's1', channel: 'gmail', sender: 'u1', source: 'channel', state: 'start' }), + event(20, 'reaction.skip', { session_id: 's1', channel: 'gmail', sender: 'u1', source: 'channel', reason: 'no_rules', candidate_count: 0 }), + event(35, 'run.state', { session_id: 's1', channel: 'gmail', sender: 'u1', source: 'channel', state: 'complete' }), + event(45, 'run.state', { session_id: 's2', channel: 'gmail', sender: 'u2', source: 'channel', state: 'complete' }), + event(55, 'run.state', { session_id: 's3', channel: 'gmail', sender: 'u3', source: 'channel', state: 'start' }), + ]; + + const backendRouteEvents: AuditEvent[] = [ + event(10, 'backend.route', { session_id: 's1', selected_backend: 'pi_embedded' }), + event(30, 'backend.route', { session_id: 's1', selected_backend: 'native' }), + event(40, 'backend.route', { session_id: 's2', selected_backend: 'pi_embedded' }), + ]; + + const piOnly = capturePhase0LiveBaselineEvents(events, { + backendTargets: ['pi_embedded'], + backendRouteEvents, + anonymizeIdentifiers: false, + }); + expect(piOnly.map((entry) => entry.timestamp)).toEqual([15, 20, 45]); + + const nativeOnly = capturePhase0LiveBaselineEvents(events, { + backendTargets: ['native'], + backendRouteEvents, + anonymizeIdentifiers: false, + }); + expect(nativeOnly.map((entry) => entry.timestamp)).toEqual([35]); + }); +}); diff --git a/src/audit/phase0LiveBaseline.ts b/src/audit/phase0LiveBaseline.ts index e49b80d..588cc23 100644 --- a/src/audit/phase0LiveBaseline.ts +++ b/src/audit/phase0LiveBaseline.ts @@ -2,6 +2,8 @@ import { createHash } from 'node:crypto'; import type { AuditEvent, AuditEventType } from './types.js'; import type { AuditSource } from './phase0BaselineSummary.js'; +export type Phase0BackendTarget = 'native' | 'claude_code' | 'opencode' | 'codex' | 'gemini' | 'pi_embedded'; + const PHASE0_BASELINE_EVENT_TYPES: readonly AuditEventType[] = [ 'run.state', 'run.cancel', @@ -9,9 +11,20 @@ const PHASE0_BASELINE_EVENT_TYPES: readonly AuditEventType[] = [ 'reaction.skip', ]; +const BACKEND_TARGETS: readonly Phase0BackendTarget[] = [ + 'native', + 'claude_code', + 'opencode', + 'codex', + 'gemini', + 'pi_embedded', +]; + export interface CapturePhase0LiveBaselineOptions { channels?: string[]; sources?: AuditSource[]; + backendTargets?: Phase0BackendTarget[]; + backendRouteEvents?: AuditEvent[]; excludeSessionSubstrings?: string[]; anonymizeIdentifiers?: boolean; } @@ -27,6 +40,57 @@ function toPayload(value: unknown): Record { : {}; } +function isBackendTarget(value: string): value is Phase0BackendTarget { + return BACKEND_TARGETS.includes(value as Phase0BackendTarget); +} + +function buildBackendRouteTimeline( + events: AuditEvent[], +): Map> { + const bySession = new Map>(); + + for (const event of events) { + if (event.event_type !== 'backend.route') { + continue; + } + const payload = toPayload(event.event); + const sessionId = readStringField(payload, 'session_id'); + const selectedBackend = readStringField(payload, 'selected_backend'); + if (!sessionId || !selectedBackend || !isBackendTarget(selectedBackend)) { + continue; + } + const rows = bySession.get(sessionId) ?? []; + rows.push({ at: event.timestamp, backend: selectedBackend }); + bySession.set(sessionId, rows); + } + + for (const rows of bySession.values()) { + rows.sort((a, b) => a.at - b.at); + } + + return bySession; +} + +function resolveBackendForEvent( + timelineBySession: Map>, + sessionId: string, + timestamp: number, +): Phase0BackendTarget | undefined { + const timeline = timelineBySession.get(sessionId); + if (!timeline || timeline.length === 0) { + return undefined; + } + + let selected: Phase0BackendTarget | undefined; + for (const row of timeline) { + if (row.at > timestamp) { + break; + } + selected = row.backend; + } + return selected; +} + function hashIdentifier(prefix: string, value: string): string { const digest = createHash('sha256').update(value).digest('hex').slice(0, 12); return `${prefix}_${digest}`; @@ -61,6 +125,10 @@ export function capturePhase0LiveBaselineEvents( ): AuditEvent[] { const channelFilter = new Set((options.channels ?? []).filter((value) => value.length > 0)); const sourceFilter = new Set(options.sources ?? []); + const backendFilter = new Set((options.backendTargets ?? []).filter((value) => value.length > 0)); + const backendTimelineBySession = backendFilter.size > 0 + ? buildBackendRouteTimeline(options.backendRouteEvents ?? []) + : new Map>(); const excludeSessionSubstrings = (options.excludeSessionSubstrings ?? []) .map((value) => value.trim().toLowerCase()) .filter((value) => value.length > 0); @@ -90,6 +158,15 @@ export function capturePhase0LiveBaselineEvents( ) { continue; } + if (backendFilter.size > 0) { + if (!sessionId) { + continue; + } + const selectedBackend = resolveBackendForEvent(backendTimelineBySession, sessionId, event.timestamp); + if (!selectedBackend || !backendFilter.has(selectedBackend)) { + continue; + } + } const nextPayload = anonymizeIdentifiers ? anonymizePayloadIdentifiers(payload) @@ -103,4 +180,3 @@ export function capturePhase0LiveBaselineEvents( return filtered.sort((a, b) => a.timestamp - b.timestamp); } -