feat(subagents): add idle ttl cleanup and summary tool

This commit is contained in:
William Valentin
2026-02-26 13:12:53 -08:00
parent 2171346116
commit b679261683
17 changed files with 226 additions and 15 deletions
+3
View File
@@ -804,12 +804,14 @@ Available tools:
- `subagent.list` — list active child sessions - `subagent.list` — list active child sessions
- `subagent.cancel` — request cancellation for a running child turn - `subagent.cancel` — request cancellation for a running child turn
- `subagent.delete` — remove a child session and clear its history - `subagent.delete` — remove a child session and clear its history
- `subagent.summary` — inspect transcript summary for a child session
Example flow: Example flow:
```json ```json
{"name":"subagent.spawn","args":{"agent":"research","subagent_id":"plan-research","task":"Survey backup strategies for a 3-node homelab."}} {"name":"subagent.spawn","args":{"agent":"research","subagent_id":"plan-research","task":"Survey backup strategies for a 3-node homelab."}}
{"name":"subagent.send","args":{"subagent_id":"plan-research","message":"Now compare operational risk and recovery-time tradeoffs."}} {"name":"subagent.send","args":{"subagent_id":"plan-research","message":"Now compare operational risk and recovery-time tradeoffs."}}
{"name":"subagent.summary","args":{"subagent_id":"plan-research","limit":20}}
{"name":"subagent.list","args":{}} {"name":"subagent.list","args":{}}
``` ```
@@ -820,6 +822,7 @@ agents:
subagents: subagents:
enabled: true enabled: true
max_active_sessions: 6 max_active_sessions: 6
idle_ttl_ms: 3600000
``` ```
## Running as Service ## Running as Service
+1
View File
@@ -301,6 +301,7 @@ agents:
subagents: subagents:
enabled: true enabled: true
max_active_sessions: 6 max_active_sessions: 6
idle_ttl_ms: 3600000
# ── Memory / Embeddings ────────────────────────────────────────────── # ── Memory / Embeddings ──────────────────────────────────────────────
# Enable hybrid keyword + vector search using local Ollama embeddings. # Enable hybrid keyword + vector search using local Ollama embeddings.
+1 -1
View File
@@ -41,7 +41,7 @@ The gateway serialises agent work **per session**, not per WebSocket connection:
- The gateway `agent.send` command path and channel-router path use the same runtime backend-mode command service; `flynn tui` forwards `/runtime ...` through this gateway path for parity. - The gateway `agent.send` command path and channel-router path use the same runtime backend-mode command service; `flynn tui` forwards `/runtime ...` through this gateway path for parity.
- Backend routing and fallback outcomes are emitted to audit logs (`backend.route`, `backend.success`, `backend.fallback`) for rollout evaluation; this telemetry is outside JSON-RPC response payloads. - Backend routing and fallback outcomes are emitted to audit logs (`backend.route`, `backend.success`, `backend.fallback`) for rollout evaluation; this telemetry is outside JSON-RPC response payloads.
- Session-start memory injection (`user/profile` + `user/working`) is server-side and controlled by `memory.user_namespace`; it does not affect protocol payloads. - Session-start memory injection (`user/profile` + `user/working`) is server-side and controlled by `memory.user_namespace`; it does not affect protocol payloads.
- Multi-turn child agents are exposed through tool calls (`subagent.spawn/send/list/cancel/delete`) inside the agent loop; they do not add new JSON-RPC methods. - Multi-turn child agents are exposed through tool calls (`subagent.spawn/send/list/cancel/delete/summary`) inside the agent loop; they do not add new JSON-RPC methods.
This is implemented via a per-lane queue (`LaneQueue`) in the gateway server, and used by `agent.send` and `agent.cancel`. This is implemented via a per-lane queue (`LaneQueue`) in the gateway server, and used by `agent.send` and `agent.cancel`.
+1 -1
View File
@@ -137,7 +137,7 @@ Tool Calls (inside NativeAgent loop)
+---------------------------> AuditLogger (redacted) +---------------------------> AuditLogger (redacted)
Subagent sessions (multi-turn child agents) Subagent sessions (multi-turn child agents)
parent AgentOrchestrator -> subagent.* tools -> SubagentManager parent AgentOrchestrator -> subagent.* tools -> SubagentManager (TTL cleanup)
SubagentManager -> child AgentOrchestrator (session namespace: subagent:<parent>:<id>) SubagentManager -> child AgentOrchestrator (session namespace: subagent:<parent>:<id>)
child AgentOrchestrator -> NativeAgent/tool loop (same policy engine, recursion tools removed) child AgentOrchestrator -> NativeAgent/tool loop (same policy engine, recursion tools removed)
@@ -17,7 +17,7 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`.
- Backend routing outcomes are auditable via `backend.route` / `backend.success` / `backend.fallback`, which enables offline canary evaluation without changing gateway protocol methods. - Backend routing outcomes are auditable via `backend.route` / `backend.success` / `backend.fallback`, which enables offline canary evaluation without changing gateway protocol methods.
- Run lifecycle/cancel intent and reaction decisions are emitted to audit logs, and aggregated into `system.metrics` counters (runStates, cancelLatencyMs, reactions) for dashboards. - Run lifecycle/cancel intent and reaction decisions are emitted to audit logs, and aggregated into `system.metrics` counters (runStates, cancelLatencyMs, reactions) for dashboards.
- Reaction matching is deterministic (priority + cooldown + recursion guard) before intent/agent routing. - Reaction matching is deterministic (priority + cooldown + recursion guard) before intent/agent routing.
- `subagent.*` tools create child orchestrators scoped to the parent conversation (`subagent:<parentSessionId>:<childId>`); this is tool-loop behavior, not a separate gateway RPC session lane. - `subagent.*` tools create child orchestrators scoped to the parent conversation (`subagent:<parentSessionId>:<childId>`) with idle TTL cleanup; this is tool-loop behavior, not a separate gateway RPC session lane.
- Companion `node.*` registration is per WebSocket connection; reconnects must re-register capabilities before invoking node RPC methods. - Companion `node.*` registration is per WebSocket connection; reconnects must re-register capabilities before invoking node RPC methods.
- Canvas artifacts are persisted per session under the gateway data directory for UI recovery across restarts. - Canvas artifacts are persisted per session under the gateway data directory for UI recovery across restarts.
- TTS output is best-effort; synthesis failures fall back to text-only responses. - TTS output is best-effort; synthesis failures fall back to text-only responses.
@@ -20,7 +20,7 @@ The following were previously treated as gaps but are already implemented in Fly
2. Voice UX is functional but not yet a polished, end-to-end daily-driver experience across surfaces. 2. Voice UX is functional but not yet a polished, end-to-end daily-driver experience across surfaces.
3. Browser tools exist but lack task-level reliability primitives (checkpoints/retries/guardrails) for autonomous workflows. 3. Browser tools exist but lack task-level reliability primitives (checkpoints/retries/guardrails) for autonomous workflows.
4. Onboarding lacks a "first success" guided path that validates real integrations live during setup. 4. Onboarding lacks a "first success" guided path that validates real integrations live during setup.
5. Subagent sessions are now available (`subagent.*`) but need lifecycle hardening (TTL/budgeting/UI visibility) for larger autonomous workflows. 5. Subagent sessions are now available (`subagent.*`) with idle TTL cleanup and transcript summary support, but still need budgeting/UI visibility for larger autonomous workflows.
## Product Goal ## Product Goal
@@ -1,7 +1,7 @@
# Subagents Support Plan (Flynn) # Subagents Support Plan (Flynn)
Date: 2026-02-26 Date: 2026-02-26
Status: phase 1 implemented Status: phase 1 implemented, phase 2 partially implemented
Scope: add OpenClaw-style multi-turn subagent session support in Flynn without changing channel surface scope (Telegram-first) Scope: add OpenClaw-style multi-turn subagent session support in Flynn without changing channel surface scope (Telegram-first)
## Constraints ## Constraints
@@ -32,10 +32,10 @@ Scope: add OpenClaw-style multi-turn subagent session support in Flynn without c
## Phase 2 (Next) ## Phase 2 (Next)
1. Add per-subagent TTL/idle eviction and auto-cleanup metrics. 1. Add per-subagent TTL/idle eviction and auto-cleanup metrics. (implemented: TTL eviction)
2. Add optional transcript export/summarization (`subagent.summary`). 2. Add optional transcript export/summarization (`subagent.summary`). (implemented)
3. Add per-subagent tool-profile override (read-only by default for risky workloads). 3. Add per-subagent tool-profile override (read-only by default for risky workloads). (pending)
4. Add parent-child trace IDs in audit events for easier debugging. 4. Add parent-child trace IDs in audit events for easier debugging. (pending)
## Phase 3 (Stretch) ## Phase 3 (Stretch)
+5 -5
View File
@@ -6800,7 +6800,7 @@
"status": "completed", "status": "completed",
"date": "2026-02-26", "date": "2026-02-26",
"updated": "2026-02-26", "updated": "2026-02-26",
"summary": "Implemented Phase 1 subagent support: added a SubagentManager with multi-turn child sessions, new `subagent.*` tools (spawn/send/list/cancel/delete), routing wiring, config guardrails, policy/profile integration, docs/diagram updates, and focused test coverage.", "summary": "Implemented Phase 1 and partial Phase 2 subagent support: added a SubagentManager with multi-turn child sessions, idle TTL cleanup, new `subagent.*` tools (spawn/send/list/cancel/delete/summary), routing wiring, config guardrails, policy/profile integration, docs/diagram updates, and focused test coverage.",
"files_modified": [ "files_modified": [
"src/backends/native/subagents.ts", "src/backends/native/subagents.ts",
"src/backends/native/subagents.test.ts", "src/backends/native/subagents.test.ts",
@@ -6824,11 +6824,11 @@
"docs/plans/2026-02-26-personal-assistant-productization-plan.md", "docs/plans/2026-02-26-personal-assistant-productization-plan.md",
"docs/plans/state.json" "docs/plans/state.json"
], ],
"test_status": "pnpm test:run src/backends/native/subagents.test.ts src/tools/builtin/subagents.test.ts src/tools/policy.test.ts src/config/schema.test.ts passing" "test_status": "pnpm test:run src/backends/native/subagents.test.ts src/tools/builtin/subagents.test.ts src/tools/policy.test.ts src/config/schema.test.ts src/daemon/routing.test.ts passing + pnpm typecheck"
} }
}, },
"overall_progress": { "overall_progress": {
"total_test_count": 2531, "total_test_count": 2533,
"all_tests_passing": true, "all_tests_passing": true,
"p0_completion": "3/3 (100%)", "p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)", "p1_completion": "4/4 (100%)",
@@ -6843,7 +6843,7 @@
"tier2_completion": "4/4 (100%) \u2014 inbound webhooks, vector memory search, Dockerfile, heartbeat monitor", "tier2_completion": "4/4 (100%) \u2014 inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
"tier3_completion": "5/5 (100%) \u2014 lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings", "tier3_completion": "5/5 (100%) \u2014 lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
"tier4_completion": "4/4 (100%) \u2014 gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes", "tier4_completion": "4/4 (100%) \u2014 gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
"feature_gap_scorecard": "rebaselined 2026-02-26 — channel breadth, setup wizard, baseline browser automation, and phase-1 multi-turn subagent sessions (`subagent.*`) are implemented; remaining high-impact personal-assistant gaps center on shipped companion apps (desktop/mobile), voice UX polish, browser workflow reliability primitives, and first-success onboarding funnel optimization.", "feature_gap_scorecard": "rebaselined 2026-02-26 — channel breadth, setup wizard, baseline browser automation, and partial phase-2 subagent support (`subagent.*` + idle TTL cleanup + transcript summary) are implemented; remaining high-impact personal-assistant gaps center on shipped companion apps (desktop/mobile), voice UX polish, browser workflow reliability primitives, and first-success onboarding funnel optimization.",
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete \u2014 milestone done", "operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete \u2014 milestone done",
"dashboard_observability": "completed \u2014 service health graphs + core service log viewer added to web UI via observability RPCs and bounded backend sampling", "dashboard_observability": "completed \u2014 service health graphs + core service log viewer added to web UI via observability RPCs and bounded backend sampling",
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
@@ -6877,7 +6877,7 @@
"deeper_surfaces_phase4_rollout": "completed \u2014 phase 4 rollout and operator readiness plan documented: canary rollout plan by feature flag/surface, explicit rollback playbook, operator docs and architecture/protocol docs synchronized", "deeper_surfaces_phase4_rollout": "completed \u2014 phase 4 rollout and operator readiness plan documented: canary rollout plan by feature flag/surface, explicit rollback playbook, operator docs and architecture/protocol docs synchronized",
"post_phase_test_fixes": "completed \u2014 fixed 4 test failures introduced by phases 1-3: iOS/Android push listNodes (missing publishHeartbeat before platform-filtered query), server.test agent.send (run_state events now precede done; added sendAndWaitForDone helper), httpBody 413 (req.destroy() closed socket before response could be sent; replaced with Connection: close header on 413 responses)", "post_phase_test_fixes": "completed \u2014 fixed 4 test failures introduced by phases 1-3: iOS/Android push listNodes (missing publishHeartbeat before platform-filtered query), server.test agent.send (run_state events now precede done; added sendAndWaitForDone helper), httpBody 413 (req.destroy() closed socket before response could be sent; replaced with Connection: close header on 413 responses)",
"personal_assistant_productization_plan": "proposed \u2014 8-10 week phased roadmap defined (companion MVP surfaces, voice reliability hardening, browser workflow reliability layer, onboarding 2.0 first-success funnel) with measurable exit gates.", "personal_assistant_productization_plan": "proposed \u2014 8-10 week phased roadmap defined (companion MVP surfaces, voice reliability hardening, browser workflow reliability layer, onboarding 2.0 first-success funnel) with measurable exit gates.",
"subagents_support": "completed \u2014 phase-1 subagent runtime support added with `subagent.spawn/send/list/cancel/delete`, per-parent child-session orchestration, config guardrails (`agents.subagents.*`), and focused regression tests." "subagents_support": "completed \u2014 phase-1 plus partial phase-2 subagent runtime support added with `subagent.spawn/send/list/cancel/delete/summary`, per-parent child-session orchestration, idle TTL cleanup (`agents.subagents.idle_ttl_ms`), config guardrails, and focused regression tests."
}, },
"soul_md_and_cron_create": { "soul_md_and_cron_create": {
"date": "2026-02-11", "date": "2026-02-11",
+38
View File
@@ -136,6 +136,7 @@ describe('SubagentManager', () => {
'subagent.list', 'subagent.list',
'subagent.cancel', 'subagent.cancel',
'subagent.delete', 'subagent.delete',
'subagent.summary',
]) { ]) {
tools.register({ tools.register({
name, name,
@@ -163,6 +164,7 @@ describe('SubagentManager', () => {
defaultPrimaryTier: 'default', defaultPrimaryTier: 'default',
maxIterations: 12, maxIterations: 12,
maxActiveSessions: 2, maxActiveSessions: 2,
idleTtlMs: 60000,
}); });
const spawned = manager.spawn({ agent: 'research', subagentId: 'planner' }); const spawned = manager.spawn({ agent: 'research', subagentId: 'planner' });
@@ -177,6 +179,7 @@ describe('SubagentManager', () => {
expect(childToolNames).not.toContain('agent.delegate'); expect(childToolNames).not.toContain('agent.delegate');
expect(childToolNames).not.toContain('council.run'); expect(childToolNames).not.toContain('council.run');
expect(childToolNames).not.toContain('subagent.spawn'); expect(childToolNames).not.toContain('subagent.spawn');
expect(childToolNames).not.toContain('subagent.summary');
const firstSend = await manager.send('planner', 'Draft a rollout plan'); const firstSend = await manager.send('planner', 'Draft a rollout plan');
expect(firstSend.content).toBe('subagent:Draft a rollout plan'); expect(firstSend.content).toBe('subagent:Draft a rollout plan');
@@ -186,6 +189,10 @@ describe('SubagentManager', () => {
expect(listed).toHaveLength(1); expect(listed).toHaveLength(1);
expect(listed[0].id).toBe('planner'); expect(listed[0].id).toBe('planner');
expect(listed[0].messageCount).toBe(2); expect(listed[0].messageCount).toBe(2);
const transcript = manager.getTranscript('planner');
expect(transcript.messages).toHaveLength(2);
expect(transcript.messages[0].role).toBe('user');
expect(transcript.messages[1].role).toBe('assistant');
expect(manager.cancel('planner')).toBe(true); expect(manager.cancel('planner')).toBe(true);
expect(mocks.cancelCalls).toBe(1); expect(mocks.cancelCalls).toBe(1);
@@ -214,6 +221,7 @@ describe('SubagentManager', () => {
maxDelegationDepth: 3, maxDelegationDepth: 3,
defaultPrimaryTier: 'default', defaultPrimaryTier: 'default',
maxActiveSessions: 1, maxActiveSessions: 1,
idleTtlMs: 60000,
}); });
manager.spawn({ agent: 'helper', subagentId: 'one' }); manager.spawn({ agent: 'helper', subagentId: 'one' });
@@ -242,8 +250,38 @@ describe('SubagentManager', () => {
maxDelegationDepth: 3, maxDelegationDepth: 3,
defaultPrimaryTier: 'default', defaultPrimaryTier: 'default',
maxActiveSessions: 3, maxActiveSessions: 3,
idleTtlMs: 60000,
}); });
expect(() => manager.spawn({ agent: 'unknown' })).toThrow('not found'); expect(() => manager.spawn({ agent: 'unknown' })).toThrow('not found');
}); });
it('evicts idle subagent sessions based on ttl', async () => {
const sessionManager = createSessionManagerMock();
const manager = new SubagentManager({
parentSessionId: 'telegram:dave',
modelRouter: {} as never,
sessionManager: sessionManager.api as never,
toolRegistry: new ToolRegistry(),
toolExecutor: {} as never,
agentConfigRegistry: createAgentRegistryMock() as never,
delegation: {
compaction: 'fast',
memory_extraction: 'fast',
classification: 'fast',
tool_summarisation: 'fast',
complex_reasoning: 'complex',
},
maxDelegationDepth: 3,
defaultPrimaryTier: 'default',
maxActiveSessions: 3,
idleTtlMs: 1000,
});
manager.spawn({ agent: 'helper', subagentId: 'ttl-one' });
await manager.send('ttl-one', 'hello');
const removed = manager.cleanupExpired(Date.now() + 2000);
expect(removed).toEqual(['ttl-one']);
expect(manager.list()).toEqual([]);
});
}); });
+65
View File
@@ -1,5 +1,6 @@
import { randomUUID } from 'node:crypto'; import { randomUUID } from 'node:crypto';
import type { AgentConfigRegistry } from '../../agents/registry.js'; import type { AgentConfigRegistry } from '../../agents/registry.js';
import type { Message } from '../../models/types.js';
import type { ToolPolicyContext } from '../../tools/policy.js'; import type { ToolPolicyContext } from '../../tools/policy.js';
import type { ModelRouter, ModelTier } from '../../models/router.js'; import type { ModelRouter, ModelTier } from '../../models/router.js';
import type { SessionManager } from '../../session/manager.js'; import type { SessionManager } from '../../session/manager.js';
@@ -17,6 +18,7 @@ const BLOCKED_SUBAGENT_TOOL_NAMES = [
'subagent.list', 'subagent.list',
'subagent.cancel', 'subagent.cancel',
'subagent.delete', 'subagent.delete',
'subagent.summary',
]; ];
export interface SubagentManagerConfig { export interface SubagentManagerConfig {
@@ -31,6 +33,7 @@ export interface SubagentManagerConfig {
defaultPrimaryTier: ModelTier; defaultPrimaryTier: ModelTier;
maxIterations?: number; maxIterations?: number;
maxActiveSessions: number; maxActiveSessions: number;
idleTtlMs: number;
toolPolicyContext?: ToolPolicyContext; toolPolicyContext?: ToolPolicyContext;
} }
@@ -67,6 +70,17 @@ export interface SubagentSendResult {
session: SubagentSessionSummary; session: SubagentSessionSummary;
} }
export interface SubagentTranscriptEntry {
role: string;
content: string;
timestamp?: number;
}
export interface SubagentTranscriptResult {
session: SubagentSessionSummary;
messages: SubagentTranscriptEntry[];
}
/** /**
* Manages multi-turn child subagent sessions scoped to a parent session. * Manages multi-turn child subagent sessions scoped to a parent session.
*/ */
@@ -76,6 +90,8 @@ export class SubagentManager {
constructor(private readonly config: SubagentManagerConfig) {} constructor(private readonly config: SubagentManagerConfig) {}
spawn(request: SpawnSubagentRequest): SubagentSessionSummary { spawn(request: SpawnSubagentRequest): SubagentSessionSummary {
this.cleanupExpired();
const agentName = request.agent.trim(); const agentName = request.agent.trim();
if (!agentName) { if (!agentName) {
throw new Error('agent is required'); throw new Error('agent is required');
@@ -149,6 +165,8 @@ export class SubagentManager {
} }
async send(subagentId: string, message: string): Promise<SubagentSendResult> { async send(subagentId: string, message: string): Promise<SubagentSendResult> {
this.cleanupExpired();
const subagent = this.requireSubagent(subagentId); const subagent = this.requireSubagent(subagentId);
const trimmed = message.trim(); const trimmed = message.trim();
if (!trimmed) { if (!trimmed) {
@@ -170,6 +188,8 @@ export class SubagentManager {
} }
cancel(subagentId: string): boolean { cancel(subagentId: string): boolean {
this.cleanupExpired();
const subagent = this.requireSubagent(subagentId); const subagent = this.requireSubagent(subagentId);
if (!subagent.orchestrator.isCancellable()) { if (!subagent.orchestrator.isCancellable()) {
return false; return false;
@@ -197,11 +217,48 @@ export class SubagentManager {
} }
list(): SubagentSessionSummary[] { list(): SubagentSessionSummary[] {
this.cleanupExpired();
return [...this.sessions.values()] return [...this.sessions.values()]
.map((entry) => this.getSummary(entry)) .map((entry) => this.getSummary(entry))
.sort((a, b) => a.id.localeCompare(b.id)); .sort((a, b) => a.id.localeCompare(b.id));
} }
getTranscript(subagentId: string, limit?: number): SubagentTranscriptResult {
this.cleanupExpired();
const subagent = this.requireSubagent(subagentId);
const session = this.config.sessionManager.getSession(SUBAGENT_FRONTEND, subagent.sessionUserId);
const history = session.getHistory();
const max = typeof limit === 'number' && Number.isFinite(limit) && limit > 0
? Math.floor(limit)
: history.length;
const tail = history.slice(Math.max(0, history.length - max));
const messages = tail.map((entry) => this.toTranscriptEntry(entry));
return {
session: this.getSummary(subagent),
messages,
};
}
cleanupExpired(nowMs: number = Date.now()): string[] {
if (this.config.idleTtlMs <= 0) {
return [];
}
const removed: string[] = [];
for (const [id, session] of this.sessions.entries()) {
if (session.busy) {
continue;
}
if ((nowMs - session.updatedAt) <= this.config.idleTtlMs) {
continue;
}
this.delete(id);
removed.push(id);
}
return removed;
}
private resolveSubagentId(rawId: string | undefined): string { private resolveSubagentId(rawId: string | undefined): string {
const explicit = rawId?.trim(); const explicit = rawId?.trim();
if (explicit) { if (explicit) {
@@ -236,4 +293,12 @@ export class SubagentManager {
busy: subagent.busy, busy: subagent.busy,
}; };
} }
private toTranscriptEntry(entry: Message): SubagentTranscriptEntry {
return {
role: entry.role,
content: typeof entry.content === 'string' ? entry.content : '[multipart]',
timestamp: entry.timestamp,
};
}
} }
+14
View File
@@ -1700,6 +1700,7 @@ describe('configSchema — agents truthfulness/autonomy', () => {
expect(result.agents.sensitive_mode).toBe('confirm_without_elevation'); expect(result.agents.sensitive_mode).toBe('confirm_without_elevation');
expect(result.agents.subagents.enabled).toBe(true); expect(result.agents.subagents.enabled).toBe(true);
expect(result.agents.subagents.max_active_sessions).toBe(6); expect(result.agents.subagents.max_active_sessions).toBe(6);
expect(result.agents.subagents.idle_ttl_ms).toBe(3600000);
expect(result.agents.immutable_denylist).toEqual( expect(result.agents.immutable_denylist).toEqual(
expect.arrayContaining([ expect.arrayContaining([
expect.objectContaining({ tool: 'shell.exec', args_pattern: 'git push origin main' }), expect.objectContaining({ tool: 'shell.exec', args_pattern: 'git push origin main' }),
@@ -1718,6 +1719,7 @@ describe('configSchema — agents truthfulness/autonomy', () => {
subagents: { subagents: {
enabled: false, enabled: false,
max_active_sessions: 3, max_active_sessions: 3,
idle_ttl_ms: 120000,
}, },
immutable_denylist: [ immutable_denylist: [
{ tool: 'shell.exec', args_pattern: 'rm -rf /', reason: 'too destructive' }, { tool: 'shell.exec', args_pattern: 'rm -rf /', reason: 'too destructive' },
@@ -1730,6 +1732,7 @@ describe('configSchema — agents truthfulness/autonomy', () => {
expect(result.agents.sensitive_mode).toBe('confirm_without_elevation'); expect(result.agents.sensitive_mode).toBe('confirm_without_elevation');
expect(result.agents.subagents.enabled).toBe(false); expect(result.agents.subagents.enabled).toBe(false);
expect(result.agents.subagents.max_active_sessions).toBe(3); expect(result.agents.subagents.max_active_sessions).toBe(3);
expect(result.agents.subagents.idle_ttl_ms).toBe(120000);
expect(result.agents.immutable_denylist).toEqual([ expect(result.agents.immutable_denylist).toEqual([
{ tool: 'shell.exec', args_pattern: 'rm -rf /', reason: 'too destructive' }, { tool: 'shell.exec', args_pattern: 'rm -rf /', reason: 'too destructive' },
]); ]);
@@ -1746,6 +1749,17 @@ describe('configSchema — agents truthfulness/autonomy', () => {
})).toThrow(); })).toThrow();
}); });
it('rejects invalid subagent idle ttl', () => {
expect(() => configSchema.parse({
...minimalConfig,
agents: {
subagents: {
idle_ttl_ms: 5000,
},
},
})).toThrow();
});
it('rejects invalid truthfulness_mode', () => { it('rejects invalid truthfulness_mode', () => {
expect(() => configSchema.parse({ expect(() => configSchema.parse({
...minimalConfig, ...minimalConfig,
+1
View File
@@ -538,6 +538,7 @@ const agentsSchema = z.object({
subagents: z.object({ subagents: z.object({
enabled: z.boolean().default(true), enabled: z.boolean().default(true),
max_active_sessions: z.number().min(1).max(32).default(6), max_active_sessions: z.number().min(1).max(32).default(6),
idle_ttl_ms: z.number().min(60_000).max(86_400_000).default(3_600_000),
}).default({}), }).default({}),
auto_escalate: z.boolean().default(false), auto_escalate: z.boolean().default(false),
max_delegation_depth: z.number().min(1).max(10).default(3), max_delegation_depth: z.number().min(1).max(10).default(3),
+2
View File
@@ -697,6 +697,7 @@ export function createMessageRouter(deps: {
defaultPrimaryTier: effectiveTier, defaultPrimaryTier: effectiveTier,
maxIterations: deps.config.agents.max_iterations, maxIterations: deps.config.agents.max_iterations,
maxActiveSessions: maxSubagentSessions, maxActiveSessions: maxSubagentSessions,
idleTtlMs: deps.config.agents.subagents?.idle_ttl_ms ?? 3_600_000,
}); });
for (const tool of createSubagentTools(subagentManager)) { for (const tool of createSubagentTools(subagentManager)) {
effectiveToolRegistry.register(tool); effectiveToolRegistry.register(tool);
@@ -1049,6 +1050,7 @@ export function createMessageRouter(deps: {
names.add('subagent.list'); names.add('subagent.list');
names.add('subagent.cancel'); names.add('subagent.cancel');
names.add('subagent.delete'); names.add('subagent.delete');
names.add('subagent.summary');
} }
const sorted = [...names].sort(); const sorted = [...names].sort();
return [ return [
+23
View File
@@ -5,6 +5,7 @@ const mockController = {
spawn: vi.fn(), spawn: vi.fn(),
send: vi.fn(), send: vi.fn(),
list: vi.fn(), list: vi.fn(),
getTranscript: vi.fn(),
cancel: vi.fn(), cancel: vi.fn(),
delete: vi.fn(), delete: vi.fn(),
}; };
@@ -14,6 +15,7 @@ describe('subagent tools', () => {
mockController.spawn.mockReset(); mockController.spawn.mockReset();
mockController.send.mockReset(); mockController.send.mockReset();
mockController.list.mockReset(); mockController.list.mockReset();
mockController.getTranscript.mockReset();
mockController.cancel.mockReset(); mockController.cancel.mockReset();
mockController.delete.mockReset(); mockController.delete.mockReset();
}); });
@@ -89,12 +91,28 @@ describe('subagent tools', () => {
]); ]);
mockController.cancel.mockReturnValue(true); mockController.cancel.mockReturnValue(true);
mockController.delete.mockReturnValue(true); mockController.delete.mockReturnValue(true);
mockController.getTranscript.mockReturnValue({
session: {
id: 'planner',
agent: 'research',
tier: 'complex',
messageCount: 4,
createdAt: 1,
updatedAt: 3,
busy: false,
},
messages: [
{ role: 'user', content: 'Refine the plan' },
{ role: 'assistant', content: 'Follow-up answer' },
],
});
const tools = createSubagentTools(mockController); const tools = createSubagentTools(mockController);
const send = tools.find((tool) => tool.name === 'subagent.send'); const send = tools.find((tool) => tool.name === 'subagent.send');
const list = tools.find((tool) => tool.name === 'subagent.list'); const list = tools.find((tool) => tool.name === 'subagent.list');
const cancel = tools.find((tool) => tool.name === 'subagent.cancel'); const cancel = tools.find((tool) => tool.name === 'subagent.cancel');
const summary = tools.find((tool) => tool.name === 'subagent.summary');
const del = tools.find((tool) => tool.name === 'subagent.delete'); const del = tools.find((tool) => tool.name === 'subagent.delete');
const sendResult = await send!.execute({ subagent_id: 'planner', message: 'Refine the plan' }); const sendResult = await send!.execute({ subagent_id: 'planner', message: 'Refine the plan' });
@@ -109,6 +127,11 @@ describe('subagent tools', () => {
expect(cancelResult.success).toBe(true); expect(cancelResult.success).toBe(true);
expect(cancelResult.output).toContain('Cancellation requested'); expect(cancelResult.output).toContain('Cancellation requested');
const summaryResult = await summary!.execute({ subagent_id: 'planner' });
expect(summaryResult.success).toBe(true);
expect(summaryResult.output).toContain('Subagent summary');
expect(summaryResult.output).toContain('transcript_messages=2');
const deleteResult = await del!.execute({ subagent_id: 'planner' }); const deleteResult = await del!.execute({ subagent_id: 'planner' });
expect(deleteResult.success).toBe(true); expect(deleteResult.success).toBe(true);
expect(deleteResult.output).toContain('Deleted subagent session'); expect(deleteResult.output).toContain('Deleted subagent session');
+61 -1
View File
@@ -23,6 +23,14 @@ interface SubagentController {
session: SubagentSessionSummary; session: SubagentSessionSummary;
}>; }>;
list(): SubagentSessionSummary[]; list(): SubagentSessionSummary[];
getTranscript(subagentId: string, limit?: number): {
session: SubagentSessionSummary;
messages: Array<{
role: string;
content: string;
timestamp?: number;
}>;
};
cancel(subagentId: string): boolean; cancel(subagentId: string): boolean;
delete(subagentId: string): boolean; delete(subagentId: string): boolean;
} }
@@ -44,6 +52,11 @@ interface SessionArgs {
subagent_id: string; subagent_id: string;
} }
interface SummaryArgs {
subagent_id: string;
limit?: number;
}
function formatSummary(summary: SubagentSessionSummary): string { function formatSummary(summary: SubagentSessionSummary): string {
return [ return [
`id=${summary.id}`, `id=${summary.id}`,
@@ -204,6 +217,53 @@ export function createSubagentTools(controller: SubagentController): Tool[] {
}, },
}; };
const summaryTool: Tool = {
name: 'subagent.summary',
description: 'Get a compact transcript summary for a subagent session.',
inputSchema: {
type: 'object',
properties: {
subagent_id: { type: 'string', description: 'Subagent session ID to summarize.' },
limit: { type: 'number', description: 'Maximum number of trailing messages to include (default 20).' },
},
required: ['subagent_id'],
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
try {
const args = rawArgs as SummaryArgs;
const transcript = controller.getTranscript(args.subagent_id, args.limit ?? 20);
const userCount = transcript.messages.filter((entry) => entry.role === 'user').length;
const assistantCount = transcript.messages.filter((entry) => entry.role === 'assistant').length;
const lastUser = [...transcript.messages].reverse().find((entry) => entry.role === 'user');
const lastAssistant = [...transcript.messages].reverse().find((entry) => entry.role === 'assistant');
const previewLines = transcript.messages.map((entry, idx) => (
`${idx + 1}. [${entry.role}] ${entry.content.slice(0, 200)}`
));
return {
success: true,
output: [
`Subagent summary (${formatSummary(transcript.session)}):`,
`- transcript_messages=${transcript.messages.length}`,
`- user_messages=${userCount}`,
`- assistant_messages=${assistantCount}`,
`- last_user=${lastUser ? JSON.stringify(lastUser.content.slice(0, 200)) : 'none'}`,
`- last_assistant=${lastAssistant ? JSON.stringify(lastAssistant.content.slice(0, 200)) : 'none'}`,
'',
'Transcript:',
...(previewLines.length > 0 ? previewLines : ['(empty)']),
].join('\n'),
};
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
const deleteTool: Tool = { const deleteTool: Tool = {
name: 'subagent.delete', name: 'subagent.delete',
description: 'Delete a subagent session and clear its conversation state.', description: 'Delete a subagent session and clear its conversation state.',
@@ -239,5 +299,5 @@ export function createSubagentTools(controller: SubagentController): Tool[] {
}, },
}; };
return [spawnTool, sendTool, listTool, cancelTool, deleteTool]; return [spawnTool, sendTool, listTool, cancelTool, summaryTool, deleteTool];
} }
+1
View File
@@ -113,6 +113,7 @@ describe('PROFILE_TOOLS', () => {
expect(PROFILE_TOOLS.coding.has('file.write')).toBe(true); expect(PROFILE_TOOLS.coding.has('file.write')).toBe(true);
expect(PROFILE_TOOLS.coding.has('process.start')).toBe(true); expect(PROFILE_TOOLS.coding.has('process.start')).toBe(true);
expect(PROFILE_TOOLS.coding.has('subagent.send')).toBe(true); expect(PROFILE_TOOLS.coding.has('subagent.send')).toBe(true);
expect(PROFILE_TOOLS.coding.has('subagent.summary')).toBe(true);
}); });
it('full is empty (special: matches everything)', () => { it('full is empty (special: matches everything)', () => {
+3
View File
@@ -55,6 +55,7 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
'subagent.list', 'subagent.list',
'subagent.cancel', 'subagent.cancel',
'subagent.delete', 'subagent.delete',
'subagent.summary',
]), ]),
coding: new Set([ coding: new Set([
'file.read', 'file.read',
@@ -117,6 +118,7 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
'subagent.list', 'subagent.list',
'subagent.cancel', 'subagent.cancel',
'subagent.delete', 'subagent.delete',
'subagent.summary',
]), ]),
full: new Set(), // Special: matches everything full: new Set(), // Special: matches everything
}; };
@@ -146,6 +148,7 @@ export const TOOL_GROUPS: Record<string, string[]> = {
'subagent.list', 'subagent.list',
'subagent.cancel', 'subagent.cancel',
'subagent.delete', 'subagent.delete',
'subagent.summary',
], ],
}; };