From 23b813a92fc5a93fa1d4ec9444cb3c427463cdf8 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Wed, 25 Feb 2026 00:12:31 -0800 Subject: [PATCH] feat(audit): add phase0 run/reaction baseline audit events --- ...aces-and-integrated-behavior-stack-plan.md | 259 ++++++++++++++++++ ...phase0-instrumentation-ticket-checklist.md | 205 ++++++++++++++ docs/plans/state.json | 44 ++- src/audit/logger.test.ts | 110 +++++++- src/audit/logger.ts | 26 ++ src/audit/types.ts | 43 +++ 6 files changed, 684 insertions(+), 3 deletions(-) create mode 100644 docs/plans/2026-02-25-deeper-end-user-surfaces-and-integrated-behavior-stack-plan.md create mode 100644 docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md diff --git a/docs/plans/2026-02-25-deeper-end-user-surfaces-and-integrated-behavior-stack-plan.md b/docs/plans/2026-02-25-deeper-end-user-surfaces-and-integrated-behavior-stack-plan.md new file mode 100644 index 0000000..fc659b4 --- /dev/null +++ b/docs/plans/2026-02-25-deeper-end-user-surfaces-and-integrated-behavior-stack-plan.md @@ -0,0 +1,259 @@ +# Flynn Deeper End-User Surfaces + Integrated Behavior Stack Plan + +Date: 2026-02-25 +Status: proposed roadmap +Scope: deepen assistant "product feel" (behavior semantics + user-facing surfaces) without rewriting Flynn core architecture + +## Summary + +This plan adopts a balanced hybrid strategy: + +1. Improve behavior semantics first where correctness risk is highest (interrupt/cancel/run control). +2. In parallel, ship selective deeper user surfaces (companion, canvas persistence, voice continuity). +3. Land each slice with explicit observability gates so rollout decisions are data-driven. + +## Why This Plan + +Flynn already has strong foundations: + +- Queue + session orchestration: `src/gateway/lane-queue.ts`, `src/gateway/session-bridge.ts` +- Multi-path routing and backend fallback: `src/daemon/routing.ts` +- Companion RPC foundation: `src/companion/runtimeClient.ts`, `src/gateway/protocol.ts` +- Canvas API baseline: `src/gateway/handlers/canvas.ts`, `src/gateway/canvas-store.ts` +- Voice in/out primitives: `src/models/media.ts`, `src/models/tts.ts` +- Reactions baseline: `src/automation/reactions.ts` + +Largest remaining gap vs OpenClaw-like "assistant feel" is integration behavior across those systems, not missing foundational architecture. + +## Goals and Success Criteria + +1. Deterministic active-run control under bursty traffic +2. Rich, safe proactive behavior stack +3. Durable end-user surfaces for companion/canvas/voice +4. Measurable reliability improvements across canary phases + +Quantitative success gates: + +1. Cancel-to-ack p95 <= 500ms on gateway sessions. +2. Duplicate assistant responses caused by run preemption: 0 in integration tests. +3. Reaction false-positive rate <= 3% in canary logs. +4. Companion reconnect success >= 99% in soak tests. +5. Canvas artifact persistence survives daemon restart in integration tests. +6. Voice failures degrade to text-only replies with no dropped responses. + +## Out of Scope + +1. Full native macOS/iOS/Android app suite in this phase set. +2. Broad protocol redesign or protocol-version breaking changes. +3. Pi backend expansion (kept separate from this roadmap until re-approval). + +## Workstreams and Complexity + +| Workstream | Complexity | Main Risk | +| --- | --- | --- | +| Run-control semantics unification | High | race conditions and cancellation ordering | +| Reactions + proactive behavior v2 | Medium-High | noisy or looping automation | +| Companion + canvas + voice deepening | High | cross-surface consistency and restart behavior | +| Rollout hardening + observability | Medium | incomplete canary signals | + +## Phase 0 - Baseline Instrumentation and Guardrails + +Duration: 3-5 days + +Execution checklist: +- `docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md` + +### Deliverables + +1. Add baseline event and metric instrumentation for: + - run-state transitions, + - cancellation path timings, + - reaction match/skip reasons, + - surface delivery outcomes. +2. Define canary gate calculator inputs for each later phase. + +### Files + +1. `src/audit/types.ts` +2. `src/audit/logger.ts` +3. `src/gateway/metrics.ts` +4. `docs/api/PROTOCOL.md` (event semantics if needed) +5. `docs/plans/state.json` + +### Acceptance + +1. Baseline report generated before behavior changes. +2. No runtime behavior changes in this phase, only observability. + +## Phase 1 - Run-Control Semantics + Gateway UX Signals + +Duration: 2-3 weeks + +### Objectives + +1. Enforce "latest wins" semantics when queue policy is `interrupt`. +2. Align cancellation behavior between gateway and channel router paths. +3. Expose user-visible run lifecycle state in gateway events/UI. + +### Implementation + +1. Queue semantics hardening: + - `src/gateway/lane-queue.ts` + - ensure queued + active behavior rules are explicit and testable. +2. Active run cancellation wiring: + - `src/gateway/handlers/agent.ts` + - `src/gateway/session-bridge.ts` + - `src/daemon/routing.ts` (`activeRuns` parity behavior). +3. Event surface: + - add additive `run_state` event in `src/gateway/protocol.ts` + - consume/render in `src/gateway/ui/pages/chat.js`. + +### Test Plan + +1. `src/gateway/lane-queue.test.ts`: preemption ordering, overflow with interrupt, debounce edge cases. +2. `src/gateway/handlers/agent.test.ts`: interrupt + active cancel + queued supersede flows. +3. `src/daemon/routing.test.ts`: channel-path cancellation parity. +4. `src/gateway/ui/pages/chat.test.ts`: `run_state` rendering and transitions. + +### Acceptance + +1. Cancel-to-ack p95 <= 500ms. +2. Zero duplicate final responses in integration suite. +3. Backward compatibility for clients ignoring `run_state`. + +## Phase 2 - Reactions and Proactive Behavior Stack V2 + +Duration: 2 weeks + +### Objectives + +1. Replace first-match reaction behavior with deterministic priority + cooldown semantics. +2. Keep announce delivery safe and auditable. +3. Prevent recursion/looping behavior. + +### Config/API Additions (backward-compatible) + +Extend `automation.reactions[]` in `src/config/schema.ts` with: + +1. `priority` (number, default `100`) +2. `cooldown_ms` (number, default `0`) +3. `stop_on_match` (boolean, default `true`) + +Existing fields remain valid and unchanged. + +### Implementation + +1. Reaction engine expansion: + - `src/automation/reactions.ts` (or split `reactionEngine.ts` if needed). +2. Routing integration: + - `src/daemon/routing.ts` deterministic reaction resolution. +3. Delivery consistency: + - `src/automation/cron.ts` + - `src/automation/webhooks.ts` + - preserve `delivery_mode` semantics and audit metadata. + +### Test Plan + +1. `src/automation/reactions.test.ts`: + - priority conflict resolution, + - cooldown suppression, + - metadata and template rendering. +2. `src/daemon/routing.test.ts`: + - reaction trigger integration and command-path exclusion. +3. `src/automation/cron.test.ts` / `src/automation/webhooks.test.ts`: + - announce/isolation metadata correctness. + +### Acceptance + +1. False-positive match rate <= 3% in canary. +2. No reaction recursion loops. +3. Deterministic rule selection under overlap. + +## Phase 3 - Deeper Surfaces: Companion, Canvas Durability, Voice Continuity + +Duration: 3-4 weeks + +### Objectives + +1. Upgrade companion from heartbeat-only utility to reliable daily-use surface. +2. Make canvas artifacts durable across restart. +3. Improve voice continuity behavior around cancellation and fallbacks. + +### Implementation + +1. Companion hardening: + - `src/cli/companion.ts` + - `src/companion/runtimeClient.ts` + - `src/gateway/handlers/node.ts` + - focus on reconnect and subscription resilience. +2. Canvas persistence: + - `src/gateway/canvas-store.ts` (durable backing instead of in-memory only) + - `src/gateway/handlers/canvas.ts` + - UI rendering/inspection in `src/gateway/ui/pages/chat.js` (or dedicated canvas page). +3. Voice continuity: + - `src/daemon/routing.ts` (talk-mode + cancellation + output behavior) + - `src/models/tts.ts` + - channel adapter output checks where required. + +### Test Plan + +1. Companion integration tests for reconnect and event continuity. +2. Canvas store tests for restart durability and eviction policy. +3. Voice tests for TTS errors, fallback to text, and interrupted runs. + +### Acceptance + +1. Companion reconnect success >= 99% in soak. +2. Canvas survives daemon restart in integration suite. +3. Voice path never drops assistant reply when TTS fails. + +## Phase 4 - Rollout, Hardening, and Operator Readiness + +Duration: 1 week + +### Deliverables + +1. Canary rollout plan by feature flag/surface. +2. Explicit rollback playbook. +3. Operator docs and architecture/protocol docs synchronized. + +### Documentation Updates (required in same PRs) + +1. `README.md` +2. `docs/api/PROTOCOL.md` +3. `docs/architecture/AGENT_DIAGRAM.md` +4. `docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md` +5. `docs/plans/state.json` + +## Execution and Commit Structure + +1. Branch: `feature/deeper-surfaces-integrated-behavior-stack` +2. Atomic commits per slice: + - implementation + tests + docs + `state.json` together +3. Rebase onto `main` before merge. +4. Fast-forward merge only. + +## Model-Tier Delegation Plan for Implementation Work + +1. `claude-haiku-4.5`: + - mechanical schema/test/doc updates. +2. `claude-sonnet-4.6`: + - default implementation tasks across queue/routing/companion/canvas. +3. `claude-opus-4.6`: + - concurrency semantics review, failure-mode design, and rollout gate design. + +## Risks and Mitigations + +1. Risk: preemption races create duplicate or orphaned replies. + Mitigation: run-state event model + deterministic cancellation tests. +2. Risk: proactive rules become noisy. + Mitigation: priority/cooldown/stop semantics + canary thresholds. +3. Risk: deeper surfaces drift from core behavior semantics. + Mitigation: shared gateway protocol contracts and integration tests across surfaces. + +## Default Decisions Locked + +1. Keep gateway protocol backward-compatible (additive only). +2. Prioritize behavior reliability before broadening platform count. +3. Use companion CLI/runtime path as first deep-surface target. +4. Keep Pi expansion out of this roadmap until separate canary re-approval. diff --git a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md new file mode 100644 index 0000000..fb5e5c0 --- /dev/null +++ b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md @@ -0,0 +1,205 @@ +# Phase 0 Ticket Checklist: Baseline Instrumentation and Guardrails + +Created: 2026-02-25 +Owner: Flynn core +Status: ready to implement +Parent: `docs/plans/2026-02-25-deeper-end-user-surfaces-and-integrated-behavior-stack-plan.md` + +## Goal + +Establish a measurement baseline for deeper-surface and behavior-stack work before changing runtime semantics. + +This checklist intentionally avoids behavior changes. It adds instrumentation and reporting only. + +## PR Boundary + +In scope: + +1. Audit event coverage for run lifecycle, cancel intent/result, and reaction decision paths. +2. Gateway metrics support for cancel latency and run transition counters. +3. Baseline summary tooling for repeatable canary comparisons. +4. Documentation updates for new telemetry fields and collection workflow. + +Out of scope: + +1. Queue semantic changes (`interrupt`, `steer`, active preemption behavior). +2. New gateway event surfaces to clients (`run_state` comes in Phase 1). +3. Reactions behavior changes (priority/cooldown/stop semantics come in Phase 2). +4. Companion/canvas persistence behavior changes (Phase 3). + +## Ticket 0.1 — Audit Schema Extension + +Status: completed (2026-02-25) + +### Scope + +Add additive audit event types and payload contracts: + +1. `run.state` (start, complete, cancel_requested, cancelled, error) +2. `run.cancel` (source, requested, acknowledged, latency_ms) +3. `reaction.match` (rule_name, channel, sender, filter_summary) +4. `reaction.skip` (reason category, candidate_count) + +### Files + +1. `src/audit/types.ts` +2. `src/audit/logger.ts` +3. `src/audit/logger.test.ts` + +### Acceptance + +1. All new events are additive and backward-compatible. +2. Audit logger exposes typed methods for each new event. +3. Unit tests cover serialization and path expansion still passes. + +### Suggested commit message + +`feat(audit): add run lifecycle and reaction decision event types` + +## Ticket 0.2 — Gateway/Router Emitters for Baseline Events + +### Scope + +Emit new audit events without changing request handling behavior: + +1. Gateway `agent.send` path: + - emit run start/complete/error states, + - emit cancel request/ack timing when `/stop` or cancellation path is used. +2. Daemon routing path: + - emit matching run lifecycle events for channel-origin sessions, + - emit reaction match/skip events around `matchReactionPrompt(...)`. + +### Files + +1. `src/gateway/handlers/agent.ts` +2. `src/daemon/routing.ts` +3. `src/gateway/handlers/agent.test.ts` +4. `src/daemon/routing.test.ts` + +### Acceptance + +1. Event emission is side-effect free and does not alter reply behavior. +2. Run IDs/session IDs are consistent with existing audit conventions. +3. Existing tests continue passing; new assertions verify emitted metadata. + +### Suggested commit message + +`feat(observability): emit run and reaction baseline audit events` + +## Ticket 0.3 — Metrics Collector Baseline Counters + +### Scope + +Extend in-memory gateway metrics with baseline counters: + +1. Run-state counters by outcome (`completed`, `cancelled`, `errored`). +2. Cancel-latency histogram buckets (or ring-buffer sample list). +3. Reaction decision counters (`matched`, `skipped`, per-reason). + +### Files + +1. `src/gateway/metrics.ts` +2. `src/gateway/metrics.test.ts` +3. `src/gateway/handlers/observability.ts` (if exposing additional metrics fields) +4. `src/gateway/handlers/observability.test.ts` (if needed) + +### Acceptance + +1. New metrics appear in snapshots with stable defaults. +2. No regression to existing dashboard/observability consumers. +3. Tests validate accumulation, reset behavior (if any), and shape compatibility. + +### Suggested commit message + +`feat(metrics): add baseline run and reaction counters for phase-0` + +## Ticket 0.4 — Baseline Summary Tooling + +### Scope + +Add or extend report tooling to summarize phase-0 telemetry slices: + +1. Run completion/error/cancel rates by channel/session. +2. Cancel latency p50/p95. +3. Reaction match/skip rates and top skip reasons. +4. Optional markdown + json output for plan artifacts. + +### Files + +1. `src/audit/backendCanarySummary.ts` (extend or split shared summarizer helpers) +2. `src/audit/backendCanarySummary.test.ts` +3. `scripts/summarize-backend-canary.ts` (or new script if cleaner) +4. `package.json` (script entry if needed) + +### Acceptance + +1. Tool works on current audit logs without requiring new event types. +2. New event types are incorporated when present. +3. Output is deterministic and safe on missing fields. + +### Suggested commit message + +`feat(audit): add phase-0 baseline summary metrics for runs and reactions` + +## Ticket 0.5 — Docs + Diagram + State Sync + +### Scope + +Document new observability fields and baseline workflow: + +1. Protocol/observability notes (if gateway payloads changed). +2. Architecture docs review for run lifecycle observability path. +3. Plan/state updates with executed commands and artifacts. + +### Files + +1. `README.md` +2. `docs/api/PROTOCOL.md` +3. `docs/architecture/AGENT_DIAGRAM.md` +4. `docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md` +5. `docs/plans/state.json` +6. `docs/plans/artifacts/*` (baseline outputs) + +### Acceptance + +1. Diagram review is explicitly documented. +2. `state.json` includes ticket completion/test status. +3. Artifact links are present and reproducible. + +### Suggested commit message + +`docs(observability): document phase-0 telemetry and baseline workflow` + +## Validation Commands (per completed ticket set) + +Run focused suites first, then full validation when phase is complete: + +```bash +pnpm typecheck +pnpm test:run src/audit/logger.test.ts +pnpm test:run src/gateway/metrics.test.ts +pnpm test:run src/gateway/handlers/agent.test.ts +pnpm test:run src/daemon/routing.test.ts +pnpm test:run src/audit/backendCanarySummary.test.ts +pnpm test:run src/gateway/handlers/observability.test.ts +pnpm test:run +pnpm lint +pnpm build +``` + +## Rollout and Exit Criteria + +Phase 0 is complete when: + +1. Baseline metrics are emitted for at least one representative channel session and one gateway session. +2. A baseline summary artifact is generated and committed under `docs/plans/artifacts/`. +3. No user-visible response behavior changed compared to pre-phase baseline. + +## Subagent Model Assignment Plan + +1. `claude-haiku-4.5`: + - schema additions, mechanical logger wiring, docs consistency edits. +2. `claude-sonnet-4.6`: + - gateway/router instrumentation and metrics implementation. +3. `claude-opus-4.6`: + - event taxonomy review, baseline gate design, failure-mode coverage review. diff --git a/docs/plans/state.json b/docs/plans/state.json index 51bef95..29843ce 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -1,8 +1,45 @@ { "version": "1.0", - "updated_at": "2026-02-24", + "updated_at": "2026-02-25", "description": "Tracks the status of all Flynn plans and implementation phases", "plans": { + "phase0-ticket-0.1-audit-schema-extension": { + "status": "completed", + "date": "2026-02-25", + "updated": "2026-02-25", + "summary": "Implemented Phase 0 Ticket 0.1 by extending audit event taxonomy with run lifecycle and reaction decision events (`run.state`, `run.cancel`, `reaction.match`, `reaction.skip`), adding typed logger methods/level routing, and adding regression coverage for serialization and level expectations.", + "files_modified": [ + "src/audit/types.ts", + "src/audit/logger.ts", + "src/audit/logger.test.ts", + "docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/audit/logger.test.ts + pnpm typecheck passing" + }, + "phase0-instrumentation-ticket-checklist": { + "status": "completed", + "date": "2026-02-25", + "updated": "2026-02-25", + "summary": "Added a decision-complete Phase 0 implementation ticket checklist for deeper surfaces/behavior-stack baseline instrumentation, including atomic ticket boundaries, file-level scope, acceptance criteria, validation commands, and model-tier subagent assignments.", + "files_modified": [ + "docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "docs/plans/2026-02-25-deeper-end-user-surfaces-and-integrated-behavior-stack-plan.md", + "docs/plans/state.json" + ], + "test_status": "planning/docs update only; no runtime code changes" + }, + "deeper-end-user-surfaces-and-integrated-behavior-stack-plan": { + "status": "completed", + "date": "2026-02-25", + "updated": "2026-02-25", + "summary": "Added a decision-complete roadmap for deepening Flynn's end-user surfaces and integrated behavior stack (balanced hybrid: run-control semantics, reactions/proactive behavior v2, companion/canvas/voice surface depth, and canary rollout gates) with explicit success criteria and file-level implementation scope.", + "files_modified": [ + "docs/plans/2026-02-25-deeper-end-user-surfaces-and-integrated-behavior-stack-plan.md", + "docs/plans/state.json" + ], + "test_status": "planning/docs update only; no runtime code changes" + }, "pi-embedded-manual-runtime-mode-control": { "status": "completed", "date": "2026-02-24", @@ -6588,7 +6625,10 @@ "model_router_correctness": "completed — fallback paths now avoid duplicate clients, apply retry policy consistently, and reject unsupported OpenAI OAuth tool requests early", "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening, tool.args_rewritten audit metric, transient fetch retry/timeout hardening, localhost->127.0.0.1 fallback for transcription endpoint connectivity, and whisper docker-compose entrypoint arg fix for port 18801", "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening", - "next_up": "Track OpenClaw evolution regularly for inspiration and feature ideas", + "deeper_surfaces_behavior_stack_plan": "completed — documented a decision-complete balanced-hybrid roadmap for OpenClaw-like end-user surface depth plus integrated behavior semantics with phased scope, acceptance gates, and rollout constraints", + "deeper_surfaces_phase0_ticket_pack": "completed — produced an atomic implementation checklist for Phase 0 baseline observability work (audit events, router/gateway emitters, metrics counters, baseline summary tooling, docs sync)", + "deeper_surfaces_phase0_ticket_01": "completed — audit schema/logger now capture run lifecycle and reaction decision baseline events (`run.state`, `run.cancel`, `reaction.match`, `reaction.skip`) with regression test coverage", + "next_up": "Implement Ticket 0.2 from docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", "pi_embedded_canary_spike": "completed — added optional pi_embedded backend adapter, canary-safe no-tools routing guard, backend success/fallback latency audit events, and docs/diagram updates while native remains default", "pi_embedded_evaluation_phase": "completed — final decision rollback (applied in runtime config): Window A failed latency/fallback gates (p50 +259ms, p95 +5695ms, fallback 25%, categories: pi_module_interface/empty_assistant_text); Window B remained sample-insufficient; controlled probes verified guard coverage (pi_no_tools_mode/capability_query/attachments_present each hit once)", "pi_embedded_manual_mode": "completed — added persisted runtime backend controls for manual Pi activation/deactivation (`/runtime` preferred, `/backend` alias; `status`, `activate pi`, `deactivate pi`, `use config`) while keeping config-driven default routing", diff --git a/src/audit/logger.test.ts b/src/audit/logger.test.ts index 3c22e74..ce98383 100644 --- a/src/audit/logger.test.ts +++ b/src/audit/logger.test.ts @@ -4,6 +4,20 @@ import { join, resolve } from 'path'; import { describe, expect, it } from 'vitest'; import { AuditLogger } from './logger.js'; +function waitForFlush(): Promise { + return new Promise((resolvePromise) => setTimeout(resolvePromise, 25)); +} + +function readAuditEvents(filePath: string): Array<{ event_type: string; level: string; event: Record }> { + const content = readFileSync(filePath, 'utf-8').trim(); + if (!content) { + return []; + } + return content + .split('\n') + .map((line) => JSON.parse(line) as { event_type: string; level: string; event: Record }); +} + describe('AuditLogger', () => { it('expands ~ in audit path before writing logs', async () => { const previousHome = process.env.HOME; @@ -28,7 +42,7 @@ describe('AuditLogger', () => { logger.systemStart('test-component'); await logger.close(); - await new Promise((resolvePromise) => setTimeout(resolvePromise, 25)); + await waitForFlush(); const loggerPath = (logger as unknown as { config: { path: string } }).config.path; const expectedPath = resolve(tempHome, '.local/share/flynn/audit.log'); @@ -46,4 +60,98 @@ describe('AuditLogger', () => { rmSync(tempHome, { recursive: true, force: true }); } }); + + it('writes run and reaction baseline events with expected levels', async () => { + const previousHome = process.env.HOME; + const tempHome = mkdtempSync(join(tmpdir(), 'flynn-audit-home-')); + process.env.HOME = tempHome; + + try { + const logger = new AuditLogger({ + enabled: true, + path: '~/.local/share/flynn/audit.log', + max_size_mb: 10, + keep_days: 30, + levels: { + tools: 'debug', + sessions: 'debug', + automation: 'debug', + }, + }); + + logger.runState({ + session_id: 'telegram:123', + channel: 'telegram', + sender: '123', + source: 'channel', + state: 'start', + request_id: 'req-1', + }); + logger.runState({ + session_id: 'telegram:123', + channel: 'telegram', + sender: '123', + source: 'channel', + state: 'error', + request_id: 'req-1', + error: 'model timeout', + }); + logger.runCancel({ + session_id: 'telegram:123', + channel: 'telegram', + sender: '123', + source: 'channel', + requested: true, + acknowledged: true, + request_id: 'req-1', + latency_ms: 120, + }); + logger.reactionMatch({ + session_id: 'telegram:123', + channel: 'telegram', + sender: '123', + source: 'channel', + rule_name: 'daily-briefing-hint', + candidate_count: 4, + filter_summary: 'contains:briefing', + }); + logger.reactionSkip({ + session_id: 'telegram:123', + channel: 'telegram', + sender: '123', + source: 'channel', + reason: 'no_match', + candidate_count: 4, + }); + + await logger.close(); + await waitForFlush(); + + const expectedPath = resolve(tempHome, '.local/share/flynn/audit.log'); + const events = readAuditEvents(expectedPath); + const eventTypes = events.map((event) => event.event_type); + + expect(eventTypes).toContain('run.state'); + expect(eventTypes).toContain('run.cancel'); + expect(eventTypes).toContain('reaction.match'); + expect(eventTypes).toContain('reaction.skip'); + + const runError = events.find((event) => ( + event.event_type === 'run.state' + && event.event.state === 'error' + )); + expect(runError?.level).toBe('error'); + + const reactionSkip = events.find((event) => event.event_type === 'reaction.skip'); + expect(reactionSkip?.level).toBe('debug'); + expect(reactionSkip?.event.reason).toBe('no_match'); + } finally { + if (previousHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = previousHome; + } + rmSync(tempHome, { recursive: true, force: true }); + } + }); }); diff --git a/src/audit/logger.ts b/src/audit/logger.ts index 442cbb9..0ae5606 100644 --- a/src/audit/logger.ts +++ b/src/audit/logger.ts @@ -22,6 +22,10 @@ import type { SessionAutoCompactEvent, UserActionEvent, QueuePreemptEvent, + RunStateEvent, + RunCancelEvent, + ReactionMatchEvent, + ReactionSkipEvent, BackendRouteEvent, BackendSuccessEvent, BackendFallbackEvent, @@ -211,6 +215,28 @@ export class AuditLogger { this.write({ level: 'info', event_type: 'queue.preempt', event: event as unknown as Record }); } + runState(event: RunStateEvent): void { + const level = event.state === 'error' ? 'error' : 'info'; + if (!this.shouldLog('sessions', level)) {return;} + this.write({ level, event_type: 'run.state', event: event as unknown as Record }); + } + + runCancel(event: RunCancelEvent): void { + const level = event.acknowledged ? 'info' : 'warn'; + if (!this.shouldLog('sessions', level)) {return;} + this.write({ level, event_type: 'run.cancel', event: event as unknown as Record }); + } + + reactionMatch(event: ReactionMatchEvent): void { + if (!this.shouldLog('sessions', 'info')) {return;} + this.write({ level: 'info', event_type: 'reaction.match', event: event as unknown as Record }); + } + + reactionSkip(event: ReactionSkipEvent): void { + if (!this.shouldLog('sessions', 'debug')) {return;} + this.write({ level: 'debug', event_type: 'reaction.skip', event: event as unknown as Record }); + } + backendRoute(event: BackendRouteEvent): void { if (!this.shouldLog('sessions', 'info')) {return;} this.write({ level: 'info', event_type: 'backend.route', event: event as unknown as Record }); diff --git a/src/audit/types.ts b/src/audit/types.ts index fab5008..cbb5333 100644 --- a/src/audit/types.ts +++ b/src/audit/types.ts @@ -12,6 +12,8 @@ export type AuditEventType = // Session lifecycle | 'session.create' | 'session.message' | 'session.delete' | 'session.transfer' | 'session.compact' | 'session.checkpoint' | 'session.auto_compact' | 'user.action' | 'queue.preempt' + | 'run.state' | 'run.cancel' + | 'reaction.match' | 'reaction.skip' | 'backend.route' | 'backend.success' | 'backend.fallback' // Automation - Cron | 'cron.trigger' | 'cron.sent' | 'cron.add' | 'cron.remove' @@ -232,6 +234,47 @@ export interface QueuePreemptEvent { cancelled_active_run: boolean; } +export interface RunStateEvent { + session_id: string; + channel: string; + sender: string; + source: 'gateway' | 'channel'; + state: 'start' | 'complete' | 'cancel_requested' | 'cancelled' | 'error'; + request_id?: string; + duration_ms?: number; + error?: string; +} + +export interface RunCancelEvent { + session_id: string; + channel: string; + sender: string; + source: 'gateway' | 'channel'; + requested: boolean; + acknowledged: boolean; + request_id?: string; + latency_ms?: number; +} + +export interface ReactionMatchEvent { + session_id?: string; + channel: string; + sender: string; + source: 'gateway' | 'channel'; + rule_name: string; + candidate_count?: number; + filter_summary?: string; +} + +export interface ReactionSkipEvent { + session_id?: string; + channel: string; + sender: string; + source: 'gateway' | 'channel'; + reason: 'no_rules' | 'no_match' | 'disabled' | 'channel_mismatch' | 'filter_miss'; + candidate_count: number; +} + export interface BackendRouteEvent { session_id: string; channel: string;