From b6792616835c9242616010fb7d5f1ac50febcba2 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 26 Feb 2026 13:12:53 -0800 Subject: [PATCH] feat(subagents): add idle ttl cleanup and summary tool --- README.md | 3 + config/default.yaml | 1 + docs/api/PROTOCOL.md | 2 +- docs/architecture/AGENT_DIAGRAM.md | 2 +- .../GATEWAY_SESSIONS_AND_QUEUE.md | 2 +- ...-personal-assistant-productization-plan.md | 2 +- .../2026-02-26-subagents-support-plan.md | 10 +-- docs/plans/state.json | 10 +-- src/backends/native/subagents.test.ts | 38 +++++++++++ src/backends/native/subagents.ts | 65 +++++++++++++++++++ src/config/schema.test.ts | 14 ++++ src/config/schema.ts | 1 + src/daemon/routing.ts | 2 + src/tools/builtin/subagents.test.ts | 23 +++++++ src/tools/builtin/subagents.ts | 62 +++++++++++++++++- src/tools/policy.test.ts | 1 + src/tools/policy.ts | 3 + 17 files changed, 226 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 0d4e42a..cf83652 100644 --- a/README.md +++ b/README.md @@ -804,12 +804,14 @@ Available tools: - `subagent.list` — list active child sessions - `subagent.cancel` — request cancellation for a running child turn - `subagent.delete` — remove a child session and clear its history +- `subagent.summary` — inspect transcript summary for a child session Example flow: ```json {"name":"subagent.spawn","args":{"agent":"research","subagent_id":"plan-research","task":"Survey backup strategies for a 3-node homelab."}} {"name":"subagent.send","args":{"subagent_id":"plan-research","message":"Now compare operational risk and recovery-time tradeoffs."}} +{"name":"subagent.summary","args":{"subagent_id":"plan-research","limit":20}} {"name":"subagent.list","args":{}} ``` @@ -820,6 +822,7 @@ agents: subagents: enabled: true max_active_sessions: 6 + idle_ttl_ms: 3600000 ``` ## Running as Service diff --git a/config/default.yaml b/config/default.yaml index 5a29575..7694a50 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -301,6 +301,7 @@ agents: subagents: enabled: true max_active_sessions: 6 + idle_ttl_ms: 3600000 # ── Memory / Embeddings ────────────────────────────────────────────── # Enable hybrid keyword + vector search using local Ollama embeddings. diff --git a/docs/api/PROTOCOL.md b/docs/api/PROTOCOL.md index faaba60..a4a9158 100644 --- a/docs/api/PROTOCOL.md +++ b/docs/api/PROTOCOL.md @@ -41,7 +41,7 @@ The gateway serialises agent work **per session**, not per WebSocket connection: - The gateway `agent.send` command path and channel-router path use the same runtime backend-mode command service; `flynn tui` forwards `/runtime ...` through this gateway path for parity. - Backend routing and fallback outcomes are emitted to audit logs (`backend.route`, `backend.success`, `backend.fallback`) for rollout evaluation; this telemetry is outside JSON-RPC response payloads. - Session-start memory injection (`user/profile` + `user/working`) is server-side and controlled by `memory.user_namespace`; it does not affect protocol payloads. -- Multi-turn child agents are exposed through tool calls (`subagent.spawn/send/list/cancel/delete`) inside the agent loop; they do not add new JSON-RPC methods. +- Multi-turn child agents are exposed through tool calls (`subagent.spawn/send/list/cancel/delete/summary`) inside the agent loop; they do not add new JSON-RPC methods. This is implemented via a per-lane queue (`LaneQueue`) in the gateway server, and used by `agent.send` and `agent.cancel`. diff --git a/docs/architecture/AGENT_DIAGRAM.md b/docs/architecture/AGENT_DIAGRAM.md index 0b9c3ea..3ef91d4 100644 --- a/docs/architecture/AGENT_DIAGRAM.md +++ b/docs/architecture/AGENT_DIAGRAM.md @@ -137,7 +137,7 @@ Tool Calls (inside NativeAgent loop) +---------------------------> AuditLogger (redacted) Subagent sessions (multi-turn child agents) - parent AgentOrchestrator -> subagent.* tools -> SubagentManager + parent AgentOrchestrator -> subagent.* tools -> SubagentManager (TTL cleanup) SubagentManager -> child AgentOrchestrator (session namespace: subagent::) child AgentOrchestrator -> NativeAgent/tool loop (same policy engine, recursion tools removed) diff --git a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md index 3e23a42..173bd48 100644 --- a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md +++ b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md @@ -17,7 +17,7 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`. - Backend routing outcomes are auditable via `backend.route` / `backend.success` / `backend.fallback`, which enables offline canary evaluation without changing gateway protocol methods. - Run lifecycle/cancel intent and reaction decisions are emitted to audit logs, and aggregated into `system.metrics` counters (runStates, cancelLatencyMs, reactions) for dashboards. - Reaction matching is deterministic (priority + cooldown + recursion guard) before intent/agent routing. -- `subagent.*` tools create child orchestrators scoped to the parent conversation (`subagent::`); this is tool-loop behavior, not a separate gateway RPC session lane. +- `subagent.*` tools create child orchestrators scoped to the parent conversation (`subagent::`) with idle TTL cleanup; this is tool-loop behavior, not a separate gateway RPC session lane. - Companion `node.*` registration is per WebSocket connection; reconnects must re-register capabilities before invoking node RPC methods. - Canvas artifacts are persisted per session under the gateway data directory for UI recovery across restarts. - TTS output is best-effort; synthesis failures fall back to text-only responses. diff --git a/docs/plans/2026-02-26-personal-assistant-productization-plan.md b/docs/plans/2026-02-26-personal-assistant-productization-plan.md index 87c0cd2..f1cb1b1 100644 --- a/docs/plans/2026-02-26-personal-assistant-productization-plan.md +++ b/docs/plans/2026-02-26-personal-assistant-productization-plan.md @@ -20,7 +20,7 @@ The following were previously treated as gaps but are already implemented in Fly 2. Voice UX is functional but not yet a polished, end-to-end daily-driver experience across surfaces. 3. Browser tools exist but lack task-level reliability primitives (checkpoints/retries/guardrails) for autonomous workflows. 4. Onboarding lacks a "first success" guided path that validates real integrations live during setup. -5. Subagent sessions are now available (`subagent.*`) but need lifecycle hardening (TTL/budgeting/UI visibility) for larger autonomous workflows. +5. Subagent sessions are now available (`subagent.*`) with idle TTL cleanup and transcript summary support, but still need budgeting/UI visibility for larger autonomous workflows. ## Product Goal diff --git a/docs/plans/2026-02-26-subagents-support-plan.md b/docs/plans/2026-02-26-subagents-support-plan.md index bfad5e8..145211b 100644 --- a/docs/plans/2026-02-26-subagents-support-plan.md +++ b/docs/plans/2026-02-26-subagents-support-plan.md @@ -1,7 +1,7 @@ # Subagents Support Plan (Flynn) Date: 2026-02-26 -Status: phase 1 implemented +Status: phase 1 implemented, phase 2 partially implemented Scope: add OpenClaw-style multi-turn subagent session support in Flynn without changing channel surface scope (Telegram-first) ## Constraints @@ -32,10 +32,10 @@ Scope: add OpenClaw-style multi-turn subagent session support in Flynn without c ## Phase 2 (Next) -1. Add per-subagent TTL/idle eviction and auto-cleanup metrics. -2. Add optional transcript export/summarization (`subagent.summary`). -3. Add per-subagent tool-profile override (read-only by default for risky workloads). -4. Add parent-child trace IDs in audit events for easier debugging. +1. Add per-subagent TTL/idle eviction and auto-cleanup metrics. (implemented: TTL eviction) +2. Add optional transcript export/summarization (`subagent.summary`). (implemented) +3. Add per-subagent tool-profile override (read-only by default for risky workloads). (pending) +4. Add parent-child trace IDs in audit events for easier debugging. (pending) ## Phase 3 (Stretch) diff --git a/docs/plans/state.json b/docs/plans/state.json index f65b6ea..d27bc2b 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -6800,7 +6800,7 @@ "status": "completed", "date": "2026-02-26", "updated": "2026-02-26", - "summary": "Implemented Phase 1 subagent support: added a SubagentManager with multi-turn child sessions, new `subagent.*` tools (spawn/send/list/cancel/delete), routing wiring, config guardrails, policy/profile integration, docs/diagram updates, and focused test coverage.", + "summary": "Implemented Phase 1 and partial Phase 2 subagent support: added a SubagentManager with multi-turn child sessions, idle TTL cleanup, new `subagent.*` tools (spawn/send/list/cancel/delete/summary), routing wiring, config guardrails, policy/profile integration, docs/diagram updates, and focused test coverage.", "files_modified": [ "src/backends/native/subagents.ts", "src/backends/native/subagents.test.ts", @@ -6824,11 +6824,11 @@ "docs/plans/2026-02-26-personal-assistant-productization-plan.md", "docs/plans/state.json" ], - "test_status": "pnpm test:run src/backends/native/subagents.test.ts src/tools/builtin/subagents.test.ts src/tools/policy.test.ts src/config/schema.test.ts passing" + "test_status": "pnpm test:run src/backends/native/subagents.test.ts src/tools/builtin/subagents.test.ts src/tools/policy.test.ts src/config/schema.test.ts src/daemon/routing.test.ts passing + pnpm typecheck" } }, "overall_progress": { - "total_test_count": 2531, + "total_test_count": 2533, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", @@ -6843,7 +6843,7 @@ "tier2_completion": "4/4 (100%) \u2014 inbound webhooks, vector memory search, Dockerfile, heartbeat monitor", "tier3_completion": "5/5 (100%) \u2014 lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings", "tier4_completion": "4/4 (100%) \u2014 gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes", - "feature_gap_scorecard": "rebaselined 2026-02-26 — channel breadth, setup wizard, baseline browser automation, and phase-1 multi-turn subagent sessions (`subagent.*`) are implemented; remaining high-impact personal-assistant gaps center on shipped companion apps (desktop/mobile), voice UX polish, browser workflow reliability primitives, and first-success onboarding funnel optimization.", + "feature_gap_scorecard": "rebaselined 2026-02-26 — channel breadth, setup wizard, baseline browser automation, and partial phase-2 subagent support (`subagent.*` + idle TTL cleanup + transcript summary) are implemented; remaining high-impact personal-assistant gaps center on shipped companion apps (desktop/mobile), voice UX polish, browser workflow reliability primitives, and first-success onboarding funnel optimization.", "operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete \u2014 milestone done", "dashboard_observability": "completed \u2014 service health graphs + core service log viewer added to web UI via observability RPCs and bounded backend sampling", "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", @@ -6877,7 +6877,7 @@ "deeper_surfaces_phase4_rollout": "completed \u2014 phase 4 rollout and operator readiness plan documented: canary rollout plan by feature flag/surface, explicit rollback playbook, operator docs and architecture/protocol docs synchronized", "post_phase_test_fixes": "completed \u2014 fixed 4 test failures introduced by phases 1-3: iOS/Android push listNodes (missing publishHeartbeat before platform-filtered query), server.test agent.send (run_state events now precede done; added sendAndWaitForDone helper), httpBody 413 (req.destroy() closed socket before response could be sent; replaced with Connection: close header on 413 responses)", "personal_assistant_productization_plan": "proposed \u2014 8-10 week phased roadmap defined (companion MVP surfaces, voice reliability hardening, browser workflow reliability layer, onboarding 2.0 first-success funnel) with measurable exit gates.", - "subagents_support": "completed \u2014 phase-1 subagent runtime support added with `subagent.spawn/send/list/cancel/delete`, per-parent child-session orchestration, config guardrails (`agents.subagents.*`), and focused regression tests." + "subagents_support": "completed \u2014 phase-1 plus partial phase-2 subagent runtime support added with `subagent.spawn/send/list/cancel/delete/summary`, per-parent child-session orchestration, idle TTL cleanup (`agents.subagents.idle_ttl_ms`), config guardrails, and focused regression tests." }, "soul_md_and_cron_create": { "date": "2026-02-11", diff --git a/src/backends/native/subagents.test.ts b/src/backends/native/subagents.test.ts index bdfc757..6ec4ebc 100644 --- a/src/backends/native/subagents.test.ts +++ b/src/backends/native/subagents.test.ts @@ -136,6 +136,7 @@ describe('SubagentManager', () => { 'subagent.list', 'subagent.cancel', 'subagent.delete', + 'subagent.summary', ]) { tools.register({ name, @@ -163,6 +164,7 @@ describe('SubagentManager', () => { defaultPrimaryTier: 'default', maxIterations: 12, maxActiveSessions: 2, + idleTtlMs: 60000, }); const spawned = manager.spawn({ agent: 'research', subagentId: 'planner' }); @@ -177,6 +179,7 @@ describe('SubagentManager', () => { expect(childToolNames).not.toContain('agent.delegate'); expect(childToolNames).not.toContain('council.run'); expect(childToolNames).not.toContain('subagent.spawn'); + expect(childToolNames).not.toContain('subagent.summary'); const firstSend = await manager.send('planner', 'Draft a rollout plan'); expect(firstSend.content).toBe('subagent:Draft a rollout plan'); @@ -186,6 +189,10 @@ describe('SubagentManager', () => { expect(listed).toHaveLength(1); expect(listed[0].id).toBe('planner'); expect(listed[0].messageCount).toBe(2); + const transcript = manager.getTranscript('planner'); + expect(transcript.messages).toHaveLength(2); + expect(transcript.messages[0].role).toBe('user'); + expect(transcript.messages[1].role).toBe('assistant'); expect(manager.cancel('planner')).toBe(true); expect(mocks.cancelCalls).toBe(1); @@ -214,6 +221,7 @@ describe('SubagentManager', () => { maxDelegationDepth: 3, defaultPrimaryTier: 'default', maxActiveSessions: 1, + idleTtlMs: 60000, }); manager.spawn({ agent: 'helper', subagentId: 'one' }); @@ -242,8 +250,38 @@ describe('SubagentManager', () => { maxDelegationDepth: 3, defaultPrimaryTier: 'default', maxActiveSessions: 3, + idleTtlMs: 60000, }); expect(() => manager.spawn({ agent: 'unknown' })).toThrow('not found'); }); + + it('evicts idle subagent sessions based on ttl', async () => { + const sessionManager = createSessionManagerMock(); + const manager = new SubagentManager({ + parentSessionId: 'telegram:dave', + modelRouter: {} as never, + sessionManager: sessionManager.api as never, + toolRegistry: new ToolRegistry(), + toolExecutor: {} as never, + agentConfigRegistry: createAgentRegistryMock() as never, + delegation: { + compaction: 'fast', + memory_extraction: 'fast', + classification: 'fast', + tool_summarisation: 'fast', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 3, + defaultPrimaryTier: 'default', + maxActiveSessions: 3, + idleTtlMs: 1000, + }); + + manager.spawn({ agent: 'helper', subagentId: 'ttl-one' }); + await manager.send('ttl-one', 'hello'); + const removed = manager.cleanupExpired(Date.now() + 2000); + expect(removed).toEqual(['ttl-one']); + expect(manager.list()).toEqual([]); + }); }); diff --git a/src/backends/native/subagents.ts b/src/backends/native/subagents.ts index 5ff483e..108d98d 100644 --- a/src/backends/native/subagents.ts +++ b/src/backends/native/subagents.ts @@ -1,5 +1,6 @@ import { randomUUID } from 'node:crypto'; import type { AgentConfigRegistry } from '../../agents/registry.js'; +import type { Message } from '../../models/types.js'; import type { ToolPolicyContext } from '../../tools/policy.js'; import type { ModelRouter, ModelTier } from '../../models/router.js'; import type { SessionManager } from '../../session/manager.js'; @@ -17,6 +18,7 @@ const BLOCKED_SUBAGENT_TOOL_NAMES = [ 'subagent.list', 'subagent.cancel', 'subagent.delete', + 'subagent.summary', ]; export interface SubagentManagerConfig { @@ -31,6 +33,7 @@ export interface SubagentManagerConfig { defaultPrimaryTier: ModelTier; maxIterations?: number; maxActiveSessions: number; + idleTtlMs: number; toolPolicyContext?: ToolPolicyContext; } @@ -67,6 +70,17 @@ export interface SubagentSendResult { session: SubagentSessionSummary; } +export interface SubagentTranscriptEntry { + role: string; + content: string; + timestamp?: number; +} + +export interface SubagentTranscriptResult { + session: SubagentSessionSummary; + messages: SubagentTranscriptEntry[]; +} + /** * Manages multi-turn child subagent sessions scoped to a parent session. */ @@ -76,6 +90,8 @@ export class SubagentManager { constructor(private readonly config: SubagentManagerConfig) {} spawn(request: SpawnSubagentRequest): SubagentSessionSummary { + this.cleanupExpired(); + const agentName = request.agent.trim(); if (!agentName) { throw new Error('agent is required'); @@ -149,6 +165,8 @@ export class SubagentManager { } async send(subagentId: string, message: string): Promise { + this.cleanupExpired(); + const subagent = this.requireSubagent(subagentId); const trimmed = message.trim(); if (!trimmed) { @@ -170,6 +188,8 @@ export class SubagentManager { } cancel(subagentId: string): boolean { + this.cleanupExpired(); + const subagent = this.requireSubagent(subagentId); if (!subagent.orchestrator.isCancellable()) { return false; @@ -197,11 +217,48 @@ export class SubagentManager { } list(): SubagentSessionSummary[] { + this.cleanupExpired(); + return [...this.sessions.values()] .map((entry) => this.getSummary(entry)) .sort((a, b) => a.id.localeCompare(b.id)); } + getTranscript(subagentId: string, limit?: number): SubagentTranscriptResult { + this.cleanupExpired(); + + const subagent = this.requireSubagent(subagentId); + const session = this.config.sessionManager.getSession(SUBAGENT_FRONTEND, subagent.sessionUserId); + const history = session.getHistory(); + const max = typeof limit === 'number' && Number.isFinite(limit) && limit > 0 + ? Math.floor(limit) + : history.length; + const tail = history.slice(Math.max(0, history.length - max)); + const messages = tail.map((entry) => this.toTranscriptEntry(entry)); + return { + session: this.getSummary(subagent), + messages, + }; + } + + cleanupExpired(nowMs: number = Date.now()): string[] { + if (this.config.idleTtlMs <= 0) { + return []; + } + const removed: string[] = []; + for (const [id, session] of this.sessions.entries()) { + if (session.busy) { + continue; + } + if ((nowMs - session.updatedAt) <= this.config.idleTtlMs) { + continue; + } + this.delete(id); + removed.push(id); + } + return removed; + } + private resolveSubagentId(rawId: string | undefined): string { const explicit = rawId?.trim(); if (explicit) { @@ -236,4 +293,12 @@ export class SubagentManager { busy: subagent.busy, }; } + + private toTranscriptEntry(entry: Message): SubagentTranscriptEntry { + return { + role: entry.role, + content: typeof entry.content === 'string' ? entry.content : '[multipart]', + timestamp: entry.timestamp, + }; + } } diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts index f37ec7f..dfe6714 100644 --- a/src/config/schema.test.ts +++ b/src/config/schema.test.ts @@ -1700,6 +1700,7 @@ describe('configSchema — agents truthfulness/autonomy', () => { expect(result.agents.sensitive_mode).toBe('confirm_without_elevation'); expect(result.agents.subagents.enabled).toBe(true); expect(result.agents.subagents.max_active_sessions).toBe(6); + expect(result.agents.subagents.idle_ttl_ms).toBe(3600000); expect(result.agents.immutable_denylist).toEqual( expect.arrayContaining([ expect.objectContaining({ tool: 'shell.exec', args_pattern: 'git push origin main' }), @@ -1718,6 +1719,7 @@ describe('configSchema — agents truthfulness/autonomy', () => { subagents: { enabled: false, max_active_sessions: 3, + idle_ttl_ms: 120000, }, immutable_denylist: [ { tool: 'shell.exec', args_pattern: 'rm -rf /', reason: 'too destructive' }, @@ -1730,6 +1732,7 @@ describe('configSchema — agents truthfulness/autonomy', () => { expect(result.agents.sensitive_mode).toBe('confirm_without_elevation'); expect(result.agents.subagents.enabled).toBe(false); expect(result.agents.subagents.max_active_sessions).toBe(3); + expect(result.agents.subagents.idle_ttl_ms).toBe(120000); expect(result.agents.immutable_denylist).toEqual([ { tool: 'shell.exec', args_pattern: 'rm -rf /', reason: 'too destructive' }, ]); @@ -1746,6 +1749,17 @@ describe('configSchema — agents truthfulness/autonomy', () => { })).toThrow(); }); + it('rejects invalid subagent idle ttl', () => { + expect(() => configSchema.parse({ + ...minimalConfig, + agents: { + subagents: { + idle_ttl_ms: 5000, + }, + }, + })).toThrow(); + }); + it('rejects invalid truthfulness_mode', () => { expect(() => configSchema.parse({ ...minimalConfig, diff --git a/src/config/schema.ts b/src/config/schema.ts index 15d6701..99f8eff 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -538,6 +538,7 @@ const agentsSchema = z.object({ subagents: z.object({ enabled: z.boolean().default(true), max_active_sessions: z.number().min(1).max(32).default(6), + idle_ttl_ms: z.number().min(60_000).max(86_400_000).default(3_600_000), }).default({}), auto_escalate: z.boolean().default(false), max_delegation_depth: z.number().min(1).max(10).default(3), diff --git a/src/daemon/routing.ts b/src/daemon/routing.ts index 98a3a50..1c47cb4 100644 --- a/src/daemon/routing.ts +++ b/src/daemon/routing.ts @@ -697,6 +697,7 @@ export function createMessageRouter(deps: { defaultPrimaryTier: effectiveTier, maxIterations: deps.config.agents.max_iterations, maxActiveSessions: maxSubagentSessions, + idleTtlMs: deps.config.agents.subagents?.idle_ttl_ms ?? 3_600_000, }); for (const tool of createSubagentTools(subagentManager)) { effectiveToolRegistry.register(tool); @@ -1049,6 +1050,7 @@ export function createMessageRouter(deps: { names.add('subagent.list'); names.add('subagent.cancel'); names.add('subagent.delete'); + names.add('subagent.summary'); } const sorted = [...names].sort(); return [ diff --git a/src/tools/builtin/subagents.test.ts b/src/tools/builtin/subagents.test.ts index 55f3e54..65a949d 100644 --- a/src/tools/builtin/subagents.test.ts +++ b/src/tools/builtin/subagents.test.ts @@ -5,6 +5,7 @@ const mockController = { spawn: vi.fn(), send: vi.fn(), list: vi.fn(), + getTranscript: vi.fn(), cancel: vi.fn(), delete: vi.fn(), }; @@ -14,6 +15,7 @@ describe('subagent tools', () => { mockController.spawn.mockReset(); mockController.send.mockReset(); mockController.list.mockReset(); + mockController.getTranscript.mockReset(); mockController.cancel.mockReset(); mockController.delete.mockReset(); }); @@ -89,12 +91,28 @@ describe('subagent tools', () => { ]); mockController.cancel.mockReturnValue(true); mockController.delete.mockReturnValue(true); + mockController.getTranscript.mockReturnValue({ + session: { + id: 'planner', + agent: 'research', + tier: 'complex', + messageCount: 4, + createdAt: 1, + updatedAt: 3, + busy: false, + }, + messages: [ + { role: 'user', content: 'Refine the plan' }, + { role: 'assistant', content: 'Follow-up answer' }, + ], + }); const tools = createSubagentTools(mockController); const send = tools.find((tool) => tool.name === 'subagent.send'); const list = tools.find((tool) => tool.name === 'subagent.list'); const cancel = tools.find((tool) => tool.name === 'subagent.cancel'); + const summary = tools.find((tool) => tool.name === 'subagent.summary'); const del = tools.find((tool) => tool.name === 'subagent.delete'); const sendResult = await send!.execute({ subagent_id: 'planner', message: 'Refine the plan' }); @@ -109,6 +127,11 @@ describe('subagent tools', () => { expect(cancelResult.success).toBe(true); expect(cancelResult.output).toContain('Cancellation requested'); + const summaryResult = await summary!.execute({ subagent_id: 'planner' }); + expect(summaryResult.success).toBe(true); + expect(summaryResult.output).toContain('Subagent summary'); + expect(summaryResult.output).toContain('transcript_messages=2'); + const deleteResult = await del!.execute({ subagent_id: 'planner' }); expect(deleteResult.success).toBe(true); expect(deleteResult.output).toContain('Deleted subagent session'); diff --git a/src/tools/builtin/subagents.ts b/src/tools/builtin/subagents.ts index 494488e..c655eb7 100644 --- a/src/tools/builtin/subagents.ts +++ b/src/tools/builtin/subagents.ts @@ -23,6 +23,14 @@ interface SubagentController { session: SubagentSessionSummary; }>; list(): SubagentSessionSummary[]; + getTranscript(subagentId: string, limit?: number): { + session: SubagentSessionSummary; + messages: Array<{ + role: string; + content: string; + timestamp?: number; + }>; + }; cancel(subagentId: string): boolean; delete(subagentId: string): boolean; } @@ -44,6 +52,11 @@ interface SessionArgs { subagent_id: string; } +interface SummaryArgs { + subagent_id: string; + limit?: number; +} + function formatSummary(summary: SubagentSessionSummary): string { return [ `id=${summary.id}`, @@ -204,6 +217,53 @@ export function createSubagentTools(controller: SubagentController): Tool[] { }, }; + const summaryTool: Tool = { + name: 'subagent.summary', + description: 'Get a compact transcript summary for a subagent session.', + inputSchema: { + type: 'object', + properties: { + subagent_id: { type: 'string', description: 'Subagent session ID to summarize.' }, + limit: { type: 'number', description: 'Maximum number of trailing messages to include (default 20).' }, + }, + required: ['subagent_id'], + }, + execute: async (rawArgs: unknown): Promise => { + try { + const args = rawArgs as SummaryArgs; + const transcript = controller.getTranscript(args.subagent_id, args.limit ?? 20); + const userCount = transcript.messages.filter((entry) => entry.role === 'user').length; + const assistantCount = transcript.messages.filter((entry) => entry.role === 'assistant').length; + const lastUser = [...transcript.messages].reverse().find((entry) => entry.role === 'user'); + const lastAssistant = [...transcript.messages].reverse().find((entry) => entry.role === 'assistant'); + const previewLines = transcript.messages.map((entry, idx) => ( + `${idx + 1}. [${entry.role}] ${entry.content.slice(0, 200)}` + )); + + return { + success: true, + output: [ + `Subagent summary (${formatSummary(transcript.session)}):`, + `- transcript_messages=${transcript.messages.length}`, + `- user_messages=${userCount}`, + `- assistant_messages=${assistantCount}`, + `- last_user=${lastUser ? JSON.stringify(lastUser.content.slice(0, 200)) : 'none'}`, + `- last_assistant=${lastAssistant ? JSON.stringify(lastAssistant.content.slice(0, 200)) : 'none'}`, + '', + 'Transcript:', + ...(previewLines.length > 0 ? previewLines : ['(empty)']), + ].join('\n'), + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; + const deleteTool: Tool = { name: 'subagent.delete', description: 'Delete a subagent session and clear its conversation state.', @@ -239,5 +299,5 @@ export function createSubagentTools(controller: SubagentController): Tool[] { }, }; - return [spawnTool, sendTool, listTool, cancelTool, deleteTool]; + return [spawnTool, sendTool, listTool, cancelTool, summaryTool, deleteTool]; } diff --git a/src/tools/policy.test.ts b/src/tools/policy.test.ts index 276a43d..b1a0ac1 100644 --- a/src/tools/policy.test.ts +++ b/src/tools/policy.test.ts @@ -113,6 +113,7 @@ describe('PROFILE_TOOLS', () => { expect(PROFILE_TOOLS.coding.has('file.write')).toBe(true); expect(PROFILE_TOOLS.coding.has('process.start')).toBe(true); expect(PROFILE_TOOLS.coding.has('subagent.send')).toBe(true); + expect(PROFILE_TOOLS.coding.has('subagent.summary')).toBe(true); }); it('full is empty (special: matches everything)', () => { diff --git a/src/tools/policy.ts b/src/tools/policy.ts index bf475b8..5c59ead 100644 --- a/src/tools/policy.ts +++ b/src/tools/policy.ts @@ -55,6 +55,7 @@ const PROFILE_TOOLS: Record> = { 'subagent.list', 'subagent.cancel', 'subagent.delete', + 'subagent.summary', ]), coding: new Set([ 'file.read', @@ -117,6 +118,7 @@ const PROFILE_TOOLS: Record> = { 'subagent.list', 'subagent.cancel', 'subagent.delete', + 'subagent.summary', ]), full: new Set(), // Special: matches everything }; @@ -146,6 +148,7 @@ export const TOOL_GROUPS: Record = { 'subagent.list', 'subagent.cancel', 'subagent.delete', + 'subagent.summary', ], };