From ae21681958535534cf1deaed64e7ed87b3901267 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Wed, 25 Feb 2026 09:42:48 -0800 Subject: [PATCH] docs(observability): seed phase-0 baseline probe artifacts --- .../artifacts/phase0_baseline_2026-02-25.json | 156 +++++++++++++++--- .../artifacts/phase0_baseline_2026-02-25.md | 42 +++-- .../phase0_baseline_probe_2026-02-25.jsonl | 11 ++ docs/plans/state.json | 9 +- 4 files changed, 171 insertions(+), 47 deletions(-) create mode 100644 docs/plans/artifacts/phase0_baseline_probe_2026-02-25.jsonl diff --git a/docs/plans/artifacts/phase0_baseline_2026-02-25.json b/docs/plans/artifacts/phase0_baseline_2026-02-25.json index 11a5728..ac8f3d2 100644 --- a/docs/plans/artifacts/phase0_baseline_2026-02-25.json +++ b/docs/plans/artifacts/phase0_baseline_2026-02-25.json @@ -1,9 +1,7 @@ { - "generated_at": "2026-02-25T17:20:35.391Z", - "event_count": 0, - "filters": { - "since_ms": 1771977600000 - }, + "generated_at": "2026-02-25T17:41:51.949Z", + "event_count": 11, + "filters": {}, "options": { "maxSessions": 20, "maxChannels": 20, @@ -11,34 +9,138 @@ }, "summary": { "event_counts": { - "run_state": 0, - "run_cancel": 0, - "reaction_match": 0, - "reaction_skip": 0 + "run_state": 7, + "run_cancel": 1, + "reaction_match": 1, + "reaction_skip": 2 }, "run_outcomes": { "overall": { - "total_outcomes": 0, - "complete": 0, - "cancelled": 0, - "error": 0, - "cancel_requested": 0, - "start": 0, - "completion_rate_pct": null, - "cancel_rate_pct": null, - "error_rate_pct": null + "total_outcomes": 3, + "complete": 1, + "cancelled": 1, + "error": 1, + "cancel_requested": 1, + "start": 3, + "completion_rate_pct": 33.33, + "cancel_rate_pct": 33.33, + "error_rate_pct": 33.33 }, - "by_channel": [], - "by_session": [] + "by_channel": [ + { + "key": "discord", + "stats": { + "total_outcomes": 1, + "complete": 0, + "cancelled": 0, + "error": 1, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 0, + "cancel_rate_pct": 0, + "error_rate_pct": 100 + } + }, + { + "key": "telegram", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "ws", + "stats": { + "total_outcomes": 1, + "complete": 0, + "cancelled": 1, + "error": 0, + "cancel_requested": 1, + "start": 1, + "completion_rate_pct": 0, + "cancel_rate_pct": 100, + "error_rate_pct": 0 + } + } + ], + "by_session": [ + { + "key": "discord:probe-2", + "stats": { + "total_outcomes": 1, + "complete": 0, + "cancelled": 0, + "error": 1, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 0, + "cancel_rate_pct": 0, + "error_rate_pct": 100 + } + }, + { + "key": "telegram:probe-1", + "stats": { + "total_outcomes": 1, + "complete": 1, + "cancelled": 0, + "error": 0, + "cancel_requested": 0, + "start": 1, + "completion_rate_pct": 100, + "cancel_rate_pct": 0, + "error_rate_pct": 0 + } + }, + { + "key": "ws:probe-1", + "stats": { + "total_outcomes": 1, + "complete": 0, + "cancelled": 1, + "error": 0, + "cancel_requested": 1, + "start": 1, + "completion_rate_pct": 0, + "cancel_rate_pct": 100, + "error_rate_pct": 0 + } + } + ] + }, + "cancel_latency_ms": { + "count": 1, + "avg_ms": 140, + "p50_ms": 140, + "p95_ms": 140, + "min_ms": 140, + "max_ms": 140 }, - "cancel_latency_ms": null, "reactions": { - "matched": 0, - "skipped": 0, - "total": 0, - "match_rate_pct": null, - "skip_rate_pct": null, - "skip_reasons": [] + "matched": 1, + "skipped": 2, + "total": 3, + "match_rate_pct": 33.33, + "skip_rate_pct": 66.67, + "skip_reasons": [ + { + "reason": "no_match", + "count": 1, + "pct": 50 + }, + { + "reason": "no_rules", + "count": 1, + "pct": 50 + } + ] } } } diff --git a/docs/plans/artifacts/phase0_baseline_2026-02-25.md b/docs/plans/artifacts/phase0_baseline_2026-02-25.md index d4f7fc0..ad37e8f 100644 --- a/docs/plans/artifacts/phase0_baseline_2026-02-25.md +++ b/docs/plans/artifacts/phase0_baseline_2026-02-25.md @@ -1,43 +1,53 @@ # Phase 0 Baseline Telemetry Summary -- Run state events: 0 -- Run cancel events: 0 -- Reaction matches: 0 -- Reaction skips: 0 +- Run state events: 7 +- Run cancel events: 1 +- Reaction matches: 1 +- Reaction skips: 2 ## Run Outcomes (Overall) -- Total outcomes: 0 -- Complete: 0 (n/a) -- Cancelled: 0 (n/a) -- Errors: 0 (n/a) -- Cancel requested: 0 -- Starts: 0 +- Total outcomes: 3 +- Complete: 1 (33.33%) +- Cancelled: 1 (33.33%) +- Errors: 1 (33.33%) +- Cancel requested: 1 +- Starts: 3 ## Run Outcomes by Channel | Channel | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts | | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | -| _none_ | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 0 | +| discord | 1 | 0 | 0 | 1 | 0.00% | 0.00% | 100.00% | 0 | 1 | +| telegram | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| ws | 1 | 0 | 1 | 0 | 0.00% | 100.00% | 0.00% | 1 | 1 | ## Run Outcomes by Session | Session | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts | | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | -| _none_ | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 0 | +| discord:probe-2 | 1 | 0 | 0 | 1 | 0.00% | 0.00% | 100.00% | 0 | 1 | +| telegram:probe-1 | 1 | 1 | 0 | 0 | 100.00% | 0.00% | 0.00% | 0 | 1 | +| ws:probe-1 | 1 | 0 | 1 | 0 | 0.00% | 100.00% | 0.00% | 1 | 1 | ## Cancel Latency -- No cancel latency samples. +- Count: 1 +- Avg: 140ms +- P50: 140ms +- P95: 140ms +- Min: 140ms +- Max: 140ms ## Reaction Decisions -- Matched: 0 (n/a) -- Skipped: 0 (n/a) +- Matched: 1 (33.33%) +- Skipped: 2 (66.67%) ### Skip Reasons | Reason | Count | Percent | | --- | ---: | ---: | -| _none_ | 0 | 0.00% | +| no_match | 1 | 50.00% | +| no_rules | 1 | 50.00% | diff --git a/docs/plans/artifacts/phase0_baseline_probe_2026-02-25.jsonl b/docs/plans/artifacts/phase0_baseline_probe_2026-02-25.jsonl new file mode 100644 index 0000000..25ed232 --- /dev/null +++ b/docs/plans/artifacts/phase0_baseline_probe_2026-02-25.jsonl @@ -0,0 +1,11 @@ +{"timestamp": 1761264000000, "level": "info", "event_type": "run.state", "event": {"session_id": "telegram:probe-1", "channel": "telegram", "sender": "probe-user", "source": "channel", "state": "start", "request_id": "m1"}} +{"timestamp": 1761264000500, "level": "info", "event_type": "reaction.match", "event": {"session_id": "telegram:probe-1", "channel": "telegram", "sender": "probe-user", "source": "channel", "rule_name": "boss-email", "candidate_count": 2}} +{"timestamp": 1761264000600, "level": "debug", "event_type": "reaction.skip", "event": {"session_id": "telegram:probe-1", "channel": "telegram", "sender": "probe-user", "source": "channel", "reason": "no_match", "candidate_count": 2}} +{"timestamp": 1761264000900, "level": "info", "event_type": "run.state", "event": {"session_id": "telegram:probe-1", "channel": "telegram", "sender": "probe-user", "source": "channel", "state": "complete", "request_id": "m1", "duration_ms": 900}} +{"timestamp": 1761264001000, "level": "info", "event_type": "run.state", "event": {"session_id": "ws:probe-1", "channel": "ws", "sender": "conn-1", "source": "gateway", "state": "start", "request_id": "r1"}} +{"timestamp": 1761264001200, "level": "info", "event_type": "run.cancel", "event": {"session_id": "ws:probe-1", "channel": "ws", "sender": "conn-1", "source": "gateway", "requested": true, "acknowledged": true, "request_id": "r1", "latency_ms": 140}} +{"timestamp": 1761264001250, "level": "info", "event_type": "run.state", "event": {"session_id": "ws:probe-1", "channel": "ws", "sender": "conn-1", "source": "gateway", "state": "cancel_requested", "request_id": "r1", "duration_ms": 140}} +{"timestamp": 1761264001600, "level": "info", "event_type": "run.state", "event": {"session_id": "ws:probe-1", "channel": "ws", "sender": "conn-1", "source": "gateway", "state": "cancelled", "request_id": "r1", "duration_ms": 600}} +{"timestamp": 1761264002000, "level": "info", "event_type": "run.state", "event": {"session_id": "discord:probe-2", "channel": "discord", "sender": "probe-user-2", "source": "channel", "state": "start", "request_id": "m2"}} +{"timestamp": 1761264002300, "level": "error", "event_type": "run.state", "event": {"session_id": "discord:probe-2", "channel": "discord", "sender": "probe-user-2", "source": "channel", "state": "error", "request_id": "m2", "error": "probe failure"}} +{"timestamp": 1761264002400, "level": "debug", "event_type": "reaction.skip", "event": {"session_id": "discord:probe-2", "channel": "discord", "sender": "probe-user-2", "source": "channel", "reason": "no_rules", "candidate_count": 0}} diff --git a/docs/plans/state.json b/docs/plans/state.json index d63fd8d..e197a73 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -67,18 +67,19 @@ "status": "completed", "date": "2026-02-25", "updated": "2026-02-25", - "summary": "Updated protocol/docs/diagrams for phase-0 telemetry fields, documented baseline workflow, and generated initial phase-0 baseline artifacts (empty sample window).", + "summary": "Updated protocol/docs/diagrams for phase-0 telemetry fields, documented baseline workflow, and generated phase-0 baseline artifacts using a probe log with representative channel + gateway run/reaction events.", "files_modified": [ "README.md", "docs/api/PROTOCOL.md", "docs/architecture/AGENT_DIAGRAM.md", "docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md", "docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "docs/plans/artifacts/phase0_baseline_probe_2026-02-25.jsonl", "docs/plans/artifacts/phase0_baseline_2026-02-25.md", "docs/plans/artifacts/phase0_baseline_2026-02-25.json", "docs/plans/state.json" ], - "test_status": "pnpm audit:phase0-baseline --audit ~/.local/share/flynn/audit.log --since 2026-02-25T00:00:00Z --format markdown/json (0 events in window)" + "test_status": "pnpm audit:phase0-baseline --audit docs/plans/artifacts/phase0_baseline_probe_2026-02-25.jsonl --format markdown/json (probe log with representative events)" }, "phase0-instrumentation-ticket-checklist": { "status": "completed", @@ -6694,8 +6695,8 @@ "deeper_surfaces_phase0_ticket_02": "completed — gateway + daemon routing emit run lifecycle/cancel telemetry and reaction match/skip audit events with filter summaries and cancellation latency, plus focused tests", "deeper_surfaces_phase0_ticket_03": "completed — gateway metrics now track run-state outcomes, cancel latency samples, and reaction decision counters with routing/gateway emitters", "deeper_surfaces_phase0_ticket_04": "completed — added phase-0 baseline summary tooling for run outcomes, cancel latency, and reaction decisions with markdown/json CLI output", - "deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, and generated baseline artifacts", - "next_up": "Exercise gateway + channel sessions to emit run/reaction events, then regenerate phase-0 baseline artifacts with real samples", + "deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, and generated baseline artifacts from a probe log with representative channel + gateway events", + "next_up": "Replace probe baseline artifacts with live audit samples once gateway/channel sessions emit real run/reaction events", "pi_embedded_canary_spike": "completed — added optional pi_embedded backend adapter, canary-safe no-tools routing guard, backend success/fallback latency audit events, and docs/diagram updates while native remains default", "pi_embedded_evaluation_phase": "completed — final decision rollback (applied in runtime config): Window A failed latency/fallback gates (p50 +259ms, p95 +5695ms, fallback 25%, categories: pi_module_interface/empty_assistant_text); Window B remained sample-insufficient; controlled probes verified guard coverage (pi_no_tools_mode/capability_query/attachments_present each hit once)", "pi_embedded_manual_mode": "completed — added persisted runtime backend controls for manual Pi activation/deactivation (`/runtime` preferred, `/backend` alias; `status`, `activate pi`, `deactivate pi`, `use config`) while keeping config-driven default routing",