From 1c2f54fae327184881da3a53ebecb20da37313f4 Mon Sep 17 00:00:00 2001
From: William Valentin <william.valentin.info@gmail.com>
Date: Sat, 7 Feb 2026 13:35:00 -0800
Subject: [PATCH] feat: implement tier 1 quick wins (tool groups, typing,
 pruning, verbose, think)

Five additive features with no breaking changes:

- Tool groups: group:fs, group:runtime, group:web, group:memory syntactic
  sugar for allow/deny lists in tool policy config
- Typing indicators: Discord sendTyping() and WhatsApp sendStateTyping()
  on message receipt for better UX feedback
- Session pruning: TTL-based auto-cleanup via sessions.ttl config with
  hourly daemon timer and SQLite GROUP BY pruning
- /verbose command: TUI command parser toggle for raw streaming display
- !!think prefix: per-message extended thinking mode wired through
  Anthropic (budget_tokens), OpenAI/GitHub (reasoning_effort), and
  Gemini (thinkingConfig) providers

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../2026-02-07-tier1-quick-wins-design.md     | 284 ++++++++++++++++++
 src/backends/native/agent.ts                  |  25 +-
 src/channels/discord/adapter.ts               |   7 +
 src/channels/slack/adapter.ts                 |   2 +
 src/channels/whatsapp/adapter.ts              |   6 +
 src/config/schema.ts                          |  20 ++
 src/daemon/index.ts                           |  19 +-
 src/frontends/tui/commands.test.ts            |   5 +
 src/frontends/tui/commands.ts                 |   9 +
 src/models/anthropic.ts                       |  11 +
 src/models/gemini.ts                          |  13 +-
 src/models/github.ts                          |   5 +
 src/models/openai.ts                          |   5 +
 src/models/types.ts                           |   4 +
 src/session/index.ts                          |   2 +-
 src/session/manager.ts                        |   7 +
 src/session/store.ts                          |  30 ++
 src/tools/policy.test.ts                      |  84 ++++++
 src/tools/policy.ts                           |  45 ++-
 19 files changed, 563 insertions(+), 20 deletions(-)
 create mode 100644 docs/plans/2026-02-07-tier1-quick-wins-design.md

diff --git a/docs/plans/2026-02-07-tier1-quick-wins-design.md b/docs/plans/2026-02-07-tier1-quick-wins-design.md
new file mode 100644
index 0000000..b7e779f
--- /dev/null
+++ b/docs/plans/2026-02-07-tier1-quick-wins-design.md
@@ -0,0 +1,284 @@
+# Tier 1 Quick Wins — Design
+
+**Date:** 2026-02-07
+**Status:** Draft
+**Scope:** 5 additive features, no breaking changes
+
+---
+
+## 1. Per-message thinking mode (`!!think` prefix)
+
+### Trigger
+
+User prefixes a message with `!!think`. The prefix is stripped before the message reaches the model.
+
+### Data flow
+
+1. Frontend/channel adapter detects `!!think` prefix, strips it, sets `thinking: true` on the message metadata
+2. Agent loop passes `thinking` flag through to `ChatRequest`
+3. Each provider client checks the flag:
+   - **Anthropic:** sets `thinking.budget_tokens` (default 4096)
+   - **OpenAI/GitHub Models:** sets `reasoning_effort` (default `'medium'`)
+   - **Gemini:** sets `thinkingConfig.thinkBudgetTokens` (default 4096)
+   - **Bedrock:** sets via Anthropic thinking params
+   - **Ollama/llama.cpp:** no-op (silently ignored)
+4. Response thinking/reasoning content is included in the reply (displayed as a collapsible block in TUI/WebChat, omitted in channel adapters)
+
+### Config additions
+
+All optional — controls per-provider defaults when `!!think` is active:
+
+```yaml
+models:
+  thinking:
+    anthropic:
+      budgetTokens: 4096
+    openai:
+      reasoningEffort: medium   # low | medium | high
+    gemini:
+      budgetTokens: 4096
+```
+
+### Types changes
+
+```typescript
+// src/models/types.ts — ChatRequest
+export interface ChatRequest {
+  messages: Message[];
+  system?: string;
+  maxTokens?: number;
+  tools?: ToolDefinition[];
+  thinking?: boolean;           // NEW
+}
+
+// src/models/types.ts — ChatResponse
+export interface ChatResponse {
+  content: string;
+  toolCalls?: ToolCall[];
+  stopReason?: string;
+  usage?: TokenUsage;
+  thinkingContent?: string;     // NEW — raw thinking/reasoning output
+}
+```
+
+### Provider implementation
+
+Each client checks `request.thinking` and maps to native API:
+
+- **`anthropic.ts`**: Add `thinking: { type: 'enabled', budget_tokens }` to `messages.create()` params. Parse `thinking` content blocks from response.
+- **`openai.ts`**: Add `reasoning_effort` to `chat.completions.create()`. Parse `reasoning` from response.
+- **`github.ts`**: Same as OpenAI (uses OpenAI SDK).
+- **`gemini.ts`**: Add `thinkingConfig` to `generationConfig`. Parse thinking parts from response.
+- **`bedrock.ts`**: Add thinking params via Anthropic Converse API format.
+- **`ollama.ts` / `llamacpp.ts`**: Ignore the flag.
+
+### Files affected
+
+- `src/models/types.ts` — Add `thinking` to ChatRequest, `thinkingContent` to ChatResponse
+- `src/models/anthropic.ts` — Wire `budget_tokens`, parse thinking blocks
+- `src/models/openai.ts` — Wire `reasoning_effort`, parse reasoning
+- `src/models/github.ts` — Pass through to OpenAI client
+- `src/models/gemini.ts` — Wire `thinkingConfig`
+- `src/models/bedrock.ts` — Wire thinking params
+- `src/config/schema.ts` — Add `models.thinking` config section
+- `src/backends/native/agent.ts` — Pass `thinking` flag from message metadata to ChatRequest
+- `src/frontends/tui/commands.ts` — Detect and strip `!!think` prefix
+- Channel adapters — Detect and strip `!!think` prefix
+- TUI/WebChat — Display `thinkingContent` as collapsible block
+
+---
+
+## 2. Verbose streaming mode (`/verbose`)
+
+### Trigger
+
+`/verbose` command toggles a boolean in the frontend's local state. Not persisted to session or config.
+
+### Effect when on
+
+- Raw streaming chunks displayed as they arrive, including tool call JSON being generated
+- Tool arguments and raw results shown in full (no summarization)
+
+### Scope
+
+TUI and WebChat only. Channel adapters (Telegram, Discord, Slack, WhatsApp) do not support this.
+
+### Implementation
+
+- Add `verbose: boolean` to TUI and WebChat frontend state (default `false`)
+- Add `/verbose` to command parser — toggles the flag, prints current status
+- Streaming renderer checks the flag:
+  - **On:** emit raw chunks as-is, display full tool call JSON and results
+  - **Off:** current behavior (summarized tool output, clean text display)
+- No backend changes — purely a display concern
+
+### Files affected
+
+- `src/frontends/tui/commands.ts` — Add `verbose` command type and parsing
+- `src/frontends/tui/minimal.ts` — Handle `/verbose`, toggle state, modify streaming display
+- `src/gateway/ui/pages/chat.js` — WebChat verbose toggle and raw display mode
+- WebSocket message handler — Pass raw chunks when verbose is active
+
+---
+
+## 3. Typing indicators
+
+### When
+
+Immediately on receiving a user message. Sustained until the response is fully sent.
+
+### Per-adapter implementation
+
+| Adapter | API | Notes |
+|---------|-----|-------|
+| **Discord** | `channel.sendTyping()` | Auto-expires after 10s. Re-fire on a 9s interval while processing. |
+| **Slack** | Bolt typing indicator API | Fire on receipt, cancel on response. |
+| **WhatsApp** | `sock.sendPresenceUpdate('composing', jid)` | Fire on receipt, send `'paused'` on response. |
+| **Telegram** | grammY `sendChatAction('typing')` | Already implemented. No changes needed. |
+
+### Implementation pattern
+
+Each adapter's message handler calls `sendTyping()` before dispatching to the agent loop. A cleanup/cancel mechanism (interval clear or presence update) stops the indicator once the response is sent.
+
+```typescript
+// Pseudocode for Discord adapter
+async handleMessage(msg) {
+  const typingInterval = setInterval(() => msg.channel.sendTyping(), 9000);
+  msg.channel.sendTyping(); // immediate first call
+  try {
+    await this.dispatch(msg);
+  } finally {
+    clearInterval(typingInterval);
+  }
+}
+```
+
+### Files affected
+
+- `src/channels/discord/adapter.ts` — Add typing interval in message handler
+- `src/channels/slack/adapter.ts` — Add typing indicator in message handler
+- `src/channels/whatsapp/adapter.ts` — Add presence composing/paused in message handler
+
+---
+
+## 4. Session pruning (TTL-based)
+
+### Config addition
+
+```yaml
+sessions:
+  ttl: 30d    # duration string. Default: 30d. Set to 0 or false to disable.
+```
+
+Supported formats: `"30d"`, `"7d"`, `"12h"`, `"0"` (disabled).
+
+### Mechanism
+
+1. Daemon startup schedules a periodic timer (every 1 hour)
+2. Timer calls `SessionStore.pruneStale(cutoffTimestamp)`
+3. SQLite query finds all `session_id`s where `MAX(created_at) < cutoff`
+4. Deletes all messages for stale sessions
+5. Evicts pruned sessions from `SessionManager`'s in-memory cache
+6. Logs: `"Pruned 3 stale sessions (TTL: 30d)"`
+
+### Duration parsing
+
+Simple regex parser for duration strings — no external library:
+
+```typescript
+function parseDuration(s: string): number | null {
+  const match = s.match(/^(\d+)(h|d)$/);
+  if (!match) return null;
+  const [, n, unit] = match;
+  const ms = unit === 'h' ? Number(n) * 3600000 : Number(n) * 86400000;
+  return ms;
+}
+```
+
+### New SessionStore method
+
+```typescript
+async pruneStale(beforeTimestamp: number): Promise<string[]> {
+  // Returns list of pruned session IDs
+  const stale = db.prepare(`
+    SELECT session_id FROM messages
+    GROUP BY session_id
+    HAVING MAX(created_at) < ?
+  `).all(beforeTimestamp);
+
+  for (const { session_id } of stale) {
+    db.prepare('DELETE FROM messages WHERE session_id = ?').run(session_id);
+  }
+  return stale.map(r => r.session_id);
+}
+```
+
+### Files affected
+
+- `src/config/schema.ts` — Add `sessions.ttl` field
+- `src/session/store.ts` — Add `pruneStale()` method
+- `src/session/manager.ts` — Add `evictSessions(ids)` to clear in-memory cache
+- `src/daemon/index.ts` — Schedule pruning timer on startup
+
+---
+
+## 5. Tool groups
+
+### Group definitions
+
+Static map in `policy.ts`:
+
+```typescript
+export const TOOL_GROUPS: Record<string, string[]> = {
+  'group:fs':      ['file.read', 'file.write', 'file.edit', 'file.list'],
+  'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list'],
+  'group:web':     ['web.fetch', 'web.search', 'browser.navigate', 'browser.click', 'browser.type', 'browser.screenshot', 'browser.evaluate'],
+  'group:memory':  ['memory.read', 'memory.write', 'memory.search'],
+};
+```
+
+### Resolution
+
+`ToolPolicy` expands `group:*` entries in allow/deny lists before applying filters. Expansion happens early in the resolution pipeline, before any set operations.
+
+```typescript
+function expandGroups(names: string[]): string[] {
+  return names.flatMap(n => TOOL_GROUPS[n] ?? [n]);
+}
+```
+
+Works in all scopes: global allow/deny, per-agent overrides, per-provider overrides.
+
+### Config usage example
+
+```yaml
+tools:
+  profile: minimal
+  allow: ['group:web']
+  agents:
+    fast:
+      allow: ['group:fs']
+      deny: ['shell.exec']
+  providers:
+    ollama:
+      deny: ['group:web']
+```
+
+### Files affected
+
+- `src/tools/policy.ts` — Add `TOOL_GROUPS` map, `expandGroups()` helper, integrate into resolution pipeline
+- `src/tools/policy.test.ts` — Tests for group expansion in all scopes
+
+---
+
+## Implementation order
+
+Recommended order by independence and risk:
+
+1. **Tool groups** — Isolated to `policy.ts`, no cross-cutting concerns
+2. **Typing indicators** — Per-adapter, independent changes
+3. **Session pruning** — Self-contained, touches store/manager/daemon
+4. **`/verbose`** — Frontend-only, no backend changes
+5. **`!!think`** — Largest scope, touches all providers + agent loop + frontends
+
+Features 1–3 can be implemented in parallel. Feature 4 is independent. Feature 5 depends on understanding the streaming path touched by feature 4.
diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts
index 6c6d04b..9eb620f 100644
--- a/src/backends/native/agent.ts
+++ b/src/backends/native/agent.ts
@@ -51,6 +51,7 @@ export class NativeAgent {
   private _callCount: number = 0;
   private _toolPolicyContext?: ToolPolicyContext;
   private _attachmentCollector?: OutboundAttachmentCollector;
+  private _thinking: boolean = false;
 
   constructor(config: NativeAgentConfig) {
     this.modelClient = config.modelClient;
@@ -69,6 +70,14 @@ export class NativeAgent {
   }
 
   async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
+    // Detect and strip !!think prefix for per-message thinking mode
+    if (userMessage.startsWith('!!think ') || userMessage === '!!think') {
+      this._thinking = true;
+      userMessage = userMessage.replace(/^!!think\s*/, '').trim() || 'Think about this.';
+    } else {
+      this._thinking = false;
+    }
+
     const userMsg = buildUserMessage(userMessage, attachments);
 
     if (this.session) {
@@ -89,6 +98,7 @@ export class NativeAgent {
     const request: ChatRequest = {
       messages: this.history,
       system: this.systemPrompt,
+      ...(this._thinking ? { thinking: true } : {}),
     };
 
     const response = await this.chatWithRouter(request);
@@ -101,10 +111,16 @@ export class NativeAgent {
       console.warn(`[Flynn] ${response.fallbackReason}`);
     }
 
+    // Prepend thinking content if present
+    let finalContent = response.content;
+    if (response.thinkingContent) {
+      finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
+    }
+
     const assistantMsg: Message = { role: 'assistant', content: response.content };
     this.addToHistory(assistantMsg);
 
-    return response.content;
+    return finalContent;
   }
 
   private async toolLoop(): Promise<string> {
@@ -124,6 +140,7 @@ export class NativeAgent {
         messages: loopMessages as unknown as Message[],
         system: this.systemPrompt,
         tools,
+        ...(this._thinking ? { thinking: true } : {}),
       };
 
       const response = await this.chatWithRouter(request);
@@ -138,9 +155,13 @@ export class NativeAgent {
 
       // If the model didn't request tool use, we're done
       if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) {
+        let finalContent = response.content;
+        if (response.thinkingContent) {
+          finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
+        }
         const assistantMsg: Message = { role: 'assistant', content: response.content };
         this.addToHistory(assistantMsg);
-        return response.content;
+        return finalContent;
       }
 
       // Build the assistant message with tool_use content blocks
diff --git a/src/channels/discord/adapter.ts b/src/channels/discord/adapter.ts
index 6c90173..0385e63 100644
--- a/src/channels/discord/adapter.ts
+++ b/src/channels/discord/adapter.ts
@@ -196,6 +196,13 @@ export class DiscordAdapter implements ChannelAdapter {
       }
     }
 
+    // Send typing indicator (lasts 10 seconds, no need for interval)
+    try {
+      if ('sendTyping' in message.channel) {
+        (message.channel as any).sendTyping();
+      }
+    } catch { /* ignore typing errors */ }
+
     // Strip bot mention from the message text
     const text = message.content.replace(/<@!?\d+>/g, '').trim();
 
diff --git a/src/channels/slack/adapter.ts b/src/channels/slack/adapter.ts
index 447cb14..c143a9f 100644
--- a/src/channels/slack/adapter.ts
+++ b/src/channels/slack/adapter.ts
@@ -287,6 +287,8 @@ export class SlackAdapter implements ChannelAdapter {
       }
     }
 
+    // Note: Slack doesn't expose a typing indicator API for bots
+
     // Build peer ID: channelId:threadTs (thread-aware)
     const threadTs = message.thread_ts ?? message.ts ?? '';
     const peerId = `${channelId}:${threadTs}`;
diff --git a/src/channels/whatsapp/adapter.ts b/src/channels/whatsapp/adapter.ts
index 37b2f0e..e3f03e2 100644
--- a/src/channels/whatsapp/adapter.ts
+++ b/src/channels/whatsapp/adapter.ts
@@ -236,6 +236,12 @@ export class WhatsAppAdapter implements ChannelAdapter {
       }
     }
 
+    // Send typing indicator
+    try {
+      const chat = await (message as any).getChat();
+      await chat.sendStateTyping();
+    } catch { /* ignore typing errors */ }
+
     // Strip bot mention from message body for group messages
     let text = message.body ?? '';
     if (isGroup && this.botId) {
diff --git a/src/config/schema.ts b/src/config/schema.ts
index 26e0f6d..44a2e1d 100644
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -33,6 +33,18 @@ const modelConfigSchema = modelConfigBaseSchema.extend({
   fallback: modelConfigBaseSchema.optional(),
 });
 
+const thinkingSchema = z.object({
+  anthropic: z.object({
+    budgetTokens: z.number().default(4096),
+  }).default({}),
+  openai: z.object({
+    reasoningEffort: z.enum(['low', 'medium', 'high']).default('medium'),
+  }).default({}),
+  gemini: z.object({
+    budgetTokens: z.number().default(4096),
+  }).default({}),
+}).default({});
+
 const modelsSchema = z.object({
   local: modelConfigSchema.optional(),
   fast: modelConfigSchema.optional(),
@@ -40,6 +52,7 @@ const modelsSchema = z.object({
   complex: modelConfigSchema.optional(),
   fallback_chain: z.array(z.string()).default(['anthropic']),
   local_providers: z.record(z.string(), modelConfigSchema).optional(),
+  thinking: thinkingSchema,
 });
 
 const backendsSchema = z.object({
@@ -250,6 +263,10 @@ const promptSchema = z.object({
   })).default([]),
 }).default({});
 
+const sessionsSchema = z.object({
+  ttl: z.string().default('30d'),
+}).default({});
+
 export const configSchema = z.object({
   telegram: telegramSchema,
   discord: discordSchema,
@@ -275,6 +292,7 @@ export const configSchema = z.object({
   sandbox: sandboxSchema,
   agent_configs: agentConfigsSchema,
   routing: routingSchema,
+  sessions: sessionsSchema,
 });
 
 export type Config = z.infer<typeof configSchema>;
@@ -300,3 +318,5 @@ export type SandboxConfig = z.infer<typeof sandboxSchema>;
 export type AgentConfigEntry = z.infer<typeof agentConfigEntrySchema>;
 export type RoutingConfig = z.infer<typeof routingSchema>;
 export type ServerConfig = z.infer<typeof serverSchema>;
+export type SessionsConfig = z.infer<typeof sessionsSchema>;
+export type ThinkingConfig = z.infer<typeof thinkingSchema>;
diff --git a/src/daemon/index.ts b/src/daemon/index.ts
index 9292a25..8ef79f7 100644
--- a/src/daemon/index.ts
+++ b/src/daemon/index.ts
@@ -7,7 +7,7 @@ import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, GeminiClie
 import type { ModelClient, RetryConfig, ModelTier } from '../models/index.js';
 import { AgentOrchestrator, type DelegationConfig } from '../backends/index.js';
 import { OutboundAttachmentCollector } from '../backends/native/attachments.js';
-import { SessionStore, SessionManager } from '../session/index.js';
+import { SessionStore, SessionManager, parseDuration } from '../session/index.js';
 import { HookEngine } from '../hooks/index.js';
 import { ToolRegistry, ToolExecutor, ToolPolicy, allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools } from '../tools/index.js';
 import type { Tool } from '../tools/types.js';
@@ -453,6 +453,23 @@ export async function startDaemon(config: Config): Promise<DaemonContext> {
     console.log('Session store closed');
   });
 
+  // Session pruning timer (TTL-based cleanup)
+  const ttlMs = parseDuration(config.sessions?.ttl ?? '30d');
+  if (ttlMs) {
+    const pruneInterval = setInterval(() => {
+      const cutoff = Math.floor((Date.now() - ttlMs) / 1000); // created_at is unix seconds
+      const pruned = sessionStore.pruneStale(cutoff);
+      if (pruned.length > 0) {
+        sessionManager.evictSessions(pruned);
+        console.log(`Pruned ${pruned.length} stale session(s) (TTL: ${config.sessions?.ttl ?? '30d'})`);
+      }
+    }, 3_600_000); // every hour
+
+    lifecycle.onShutdown(async () => {
+      clearInterval(pruneInterval);
+    });
+  }
+
   // Initialize hook engine
   const hookEngine = new HookEngine(config.hooks);
 
diff --git a/src/frontends/tui/commands.test.ts b/src/frontends/tui/commands.test.ts
index 8ae9adf..2914116 100644
--- a/src/frontends/tui/commands.test.ts
+++ b/src/frontends/tui/commands.test.ts
@@ -34,6 +34,10 @@ describe('parseCommand', () => {
     expect(parseCommand('/usage')).toEqual({ type: 'usage' });
   });
 
+  it('parses /verbose command', () => {
+    expect(parseCommand('/verbose')).toEqual({ type: 'verbose' });
+  });
+
   it('parses /model command without argument', () => {
     expect(parseCommand('/model')).toEqual({ type: 'model' });
   });
@@ -100,6 +104,7 @@ describe('getHelpText', () => {
     expect(help).toContain('/reset');
     expect(help).toContain('/compact');
     expect(help).toContain('/usage');
+    expect(help).toContain('/verbose');
     expect(help).toContain('/quit');
   });
 });
diff --git a/src/frontends/tui/commands.ts b/src/frontends/tui/commands.ts
index 6c5edaa..cd7f9d3 100644
--- a/src/frontends/tui/commands.ts
+++ b/src/frontends/tui/commands.ts
@@ -6,6 +6,7 @@ export type Command =
   | { type: 'fullscreen' }
   | { type: 'compact' }
   | { type: 'usage' }
+  | { type: 'verbose' }
   | { type: 'model'; name?: string; providerModel?: string }
   | { type: 'backend'; provider?: string }
   | { type: 'login'; provider?: string }
@@ -51,6 +52,11 @@ export function parseCommand(input: string): Command | null {
     return { type: 'usage' };
   }
 
+  // Verbose
+  if (trimmed === '/verbose') {
+    return { type: 'verbose' };
+  }
+
   // Model (with optional argument)
   if (trimmed === '/model') {
     return { type: 'model' };
@@ -108,6 +114,7 @@ Commands:
   /reset, /clear, /new   Clear conversation history
   /compact               Compact conversation history
   /usage                 Show token usage and estimated cost
+  /verbose               Toggle verbose mode (show raw streaming and tool output)
   /status                Show session info and token usage
   /fullscreen, /fs       Switch to fullscreen mode
   /transfer <dest>       Transfer session to another frontend
@@ -127,6 +134,7 @@ export const SLASH_COMMANDS = [
   '/new',
   '/compact',
   '/usage',
+  '/verbose',
   '/status',
   '/fullscreen',
   '/fs',
@@ -146,6 +154,7 @@ export const COMMAND_TOOLTIPS: Record<string, string> = {
   '/new': 'Start a new conversation',
   '/compact': 'Compact conversation history to save context space',
   '/usage': 'Show token usage and estimated cost',
+  '/verbose': 'Toggle verbose mode (show raw streaming and tool output)',
   '/status': 'Show session info and token usage',
   '/fullscreen': 'Switch to fullscreen mode',
   '/fs': 'Switch to fullscreen mode',
diff --git a/src/models/anthropic.ts b/src/models/anthropic.ts
index 08aac81..47a5356 100644
--- a/src/models/anthropic.ts
+++ b/src/models/anthropic.ts
@@ -74,11 +74,21 @@ export class AnthropicClient implements ModelClient {
       params.tools = request.tools;
     }
 
+    // Extended thinking mode — enable thinking with a budget
+    if (request.thinking) {
+      params.max_tokens = Math.max(params.max_tokens as number, 16384);
+      (params as any).thinking = { type: 'enabled', budget_tokens: 4096 };
+    }
+
     const response = await this.client.messages.create(params as unknown as Parameters<typeof this.client.messages.create>[0]) as AnthropicMessage;
 
     const textContent = response.content.find((c) => c.type === 'text');
     const content = textContent?.type === 'text' ? textContent.text : '';
 
+    // Extract thinking content if present
+    const thinkingBlock = response.content.find((c) => c.type === 'thinking');
+    const thinkingContent = thinkingBlock && 'thinking' in thinkingBlock ? (thinkingBlock as any).text : undefined;
+
     const toolCalls = response.content
       .filter((c): c is { type: 'tool_use'; id: string; name: string; input: unknown } => c.type === 'tool_use')
       .map(c => ({ id: c.id, name: c.name, args: c.input }));
@@ -91,6 +101,7 @@ export class AnthropicClient implements ModelClient {
         outputTokens: response.usage.output_tokens,
       },
       ...(toolCalls.length > 0 ? { toolCalls } : {}),
+      ...(thinkingContent ? { thinkingContent } : {}),
     };
   }
 
diff --git a/src/models/gemini.ts b/src/models/gemini.ts
index 474d835..bc7b63e 100644
--- a/src/models/gemini.ts
+++ b/src/models/gemini.ts
@@ -25,13 +25,20 @@ export class GeminiClient implements ModelClient {
       ? [{ functionDeclarations: request.tools.map(t => convertToolDefinition(t)) }]
       : undefined;
 
+    const generationConfig: Record<string, unknown> = {
+      maxOutputTokens: request.maxTokens ?? this.defaultMaxTokens,
+    };
+
+    // Extended thinking mode
+    if (request.thinking) {
+      generationConfig.thinkingConfig = { thinkingBudget: 4096 };
+    }
+
     return this.genAI.getGenerativeModel({
       model: this.model,
       systemInstruction: request.system || undefined,
       tools,
-      generationConfig: {
-        maxOutputTokens: request.maxTokens ?? this.defaultMaxTokens,
-      },
+      generationConfig,
     });
   }
 
diff --git a/src/models/github.ts b/src/models/github.ts
index 1c50fac..dd4fe8b 100644
--- a/src/models/github.ts
+++ b/src/models/github.ts
@@ -137,6 +137,11 @@ export class GitHubModelsClient implements ModelClient {
       }));
     }
 
+    // Extended thinking/reasoning mode
+    if (request.thinking) {
+      (params as any).reasoning_effort = 'medium';
+    }
+
     const response = await this.client.chat.completions.create(params);
 
     const choice = response.choices[0];
diff --git a/src/models/openai.ts b/src/models/openai.ts
index dfdd032..e812ca8 100644
--- a/src/models/openai.ts
+++ b/src/models/openai.ts
@@ -79,6 +79,11 @@ export class OpenAIClient implements ModelClient {
       }));
     }
 
+    // Extended thinking/reasoning mode for o1/o3 models
+    if (request.thinking) {
+      (params as any).reasoning_effort = 'medium';
+    }
+
     const response = await this.client.chat.completions.create(params);
 
     const choice = response.choices[0];
diff --git a/src/models/types.ts b/src/models/types.ts
index 152bd37..b7a6079 100644
--- a/src/models/types.ts
+++ b/src/models/types.ts
@@ -66,6 +66,8 @@ export interface ChatRequest {
   system?: string;
   maxTokens?: number;
   tools?: ToolDefinition[];
+  /** Enable extended thinking/reasoning mode for this request. */
+  thinking?: boolean;
 }
 
 export interface ChatResponse {
@@ -77,6 +79,8 @@ export interface ChatResponse {
   fallback?: boolean;
   /** Human-readable reason for the fallback. */
   fallbackReason?: string;
+  /** Raw thinking/reasoning output from extended thinking mode. */
+  thinkingContent?: string;
 }
 
 export interface TokenUsage {
diff --git a/src/session/index.ts b/src/session/index.ts
index c836905..c792131 100644
--- a/src/session/index.ts
+++ b/src/session/index.ts
@@ -1,2 +1,2 @@
-export { SessionStore } from './store.js';
+export { SessionStore, parseDuration } from './store.js';
 export { SessionManager, ManagedSession, type Session } from './manager.js';
diff --git a/src/session/manager.ts b/src/session/manager.ts
index bbd4236..23ec368 100644
--- a/src/session/manager.ts
+++ b/src/session/manager.ts
@@ -98,4 +98,11 @@ export class SessionManager {
     const id = this.makeSessionId(frontend, userId);
     this.sessions.delete(id);
   }
+
+  /** Remove sessions from the in-memory cache by their IDs. */
+  evictSessions(sessionIds: string[]): void {
+    for (const id of sessionIds) {
+      this.sessions.delete(id);
+    }
+  }
 }
diff --git a/src/session/store.ts b/src/session/store.ts
index cab1218..ee904e1 100644
--- a/src/session/store.ts
+++ b/src/session/store.ts
@@ -1,6 +1,15 @@
 import Database from 'better-sqlite3';
 import type { Message } from '../models/types.js';
 
+/** Parse a duration string like '30d', '7d', '12h' to milliseconds. Returns null if invalid or '0'. */
+export function parseDuration(s: string): number | null {
+  if (s === '0' || s === 'false') return null;
+  const match = s.match(/^(\d+)(h|d)$/);
+  if (!match) return null;
+  const [, n, unit] = match;
+  return unit === 'h' ? Number(n) * 3600_000 : Number(n) * 86_400_000;
+}
+
 export class SessionStore {
   private db: Database.Database;
 
@@ -71,6 +80,27 @@ export class SessionStore {
     return rows.map(row => row.session_id);
   }
 
+  /** Delete all messages for sessions with no activity since the given timestamp. Returns pruned session IDs. */
+  pruneStale(beforeTimestamp: number): string[] {
+    const stale = this.db.prepare(`
+      SELECT session_id FROM messages
+      GROUP BY session_id
+      HAVING MAX(created_at) < ?
+    `).all(beforeTimestamp) as Array<{ session_id: string }>;
+
+    if (stale.length === 0) return [];
+
+    const deleteStmt = this.db.prepare('DELETE FROM messages WHERE session_id = ?');
+    const transaction = this.db.transaction(() => {
+      for (const { session_id } of stale) {
+        deleteStmt.run(session_id);
+      }
+    });
+    transaction();
+
+    return stale.map(r => r.session_id);
+  }
+
   close(): void {
     this.db.close();
   }
diff --git a/src/tools/policy.test.ts b/src/tools/policy.test.ts
index 002a97b..d3e61f0 100644
--- a/src/tools/policy.test.ts
+++ b/src/tools/policy.test.ts
@@ -409,6 +409,90 @@ describe('ToolPolicy', () => {
     });
   });
 
+  describe('tool groups', () => {
+    it('expands group:fs in allow list', () => {
+      const policy = new ToolPolicy(defaultConfig({
+        profile: 'minimal',
+        allow: ['group:fs'],
+      }));
+      const result = policy.filterTools(ALL_TOOLS);
+      const names = result.map(t => t.name);
+      expect(names).toContain('file.read');
+      expect(names).toContain('file.write');
+      expect(names).toContain('file.edit');
+      expect(names).toContain('file.list');
+      expect(names).not.toContain('shell.exec');
+    });
+
+    it('expands group:runtime in deny list', () => {
+      const policy = new ToolPolicy(defaultConfig({
+        deny: ['group:runtime'],
+      }));
+      const result = policy.filterTools(ALL_TOOLS);
+      const names = result.map(t => t.name);
+      expect(names).not.toContain('shell.exec');
+      expect(names).not.toContain('process.start');
+      expect(names).not.toContain('process.status');
+      expect(names).not.toContain('process.output');
+      expect(names).not.toContain('process.kill');
+      expect(names).not.toContain('process.list');
+      expect(names).toContain('file.read');
+    });
+
+    it('expands groups in agent overrides', () => {
+      const policy = new ToolPolicy(defaultConfig({
+        agents: {
+          fast: { profile: 'minimal', allow: ['group:memory'], deny: [] },
+        },
+      }));
+      const result = policy.filterTools(ALL_TOOLS, { agent: 'fast' });
+      const names = result.map(t => t.name);
+      expect(names).toContain('memory.read');
+      expect(names).toContain('memory.write');
+      expect(names).toContain('memory.search');
+      expect(names).toContain('file.read'); // from minimal profile
+      expect(names).not.toContain('shell.exec');
+    });
+
+    it('expands groups in provider deny', () => {
+      const policy = new ToolPolicy(defaultConfig({
+        providers: {
+          ollama: { allow: [], deny: ['group:web'] },
+        },
+      }));
+      const result = policy.filterTools(ALL_TOOLS, { provider: 'ollama' });
+      const names = result.map(t => t.name);
+      expect(names).not.toContain('web.fetch');
+      expect(names).not.toContain('web.search');
+      expect(names).toContain('file.read');
+      expect(names).toContain('shell.exec');
+    });
+
+    it('mixes groups with individual names', () => {
+      const policy = new ToolPolicy(defaultConfig({
+        profile: 'minimal',
+        allow: ['group:memory', 'shell.exec'],
+      }));
+      const result = policy.filterTools(ALL_TOOLS);
+      const names = result.map(t => t.name);
+      expect(names).toContain('memory.read');
+      expect(names).toContain('shell.exec');
+      expect(names).toContain('file.read'); // from minimal
+    });
+
+    it('unknown group name passes through as literal', () => {
+      const policy = new ToolPolicy(defaultConfig({
+        profile: 'minimal',
+        allow: ['group:nonexistent'],
+      }));
+      const result = policy.filterTools(ALL_TOOLS);
+      // Should only have minimal tools — 'group:nonexistent' doesn't match any real tool
+      const names = result.map(t => t.name);
+      expect(names).toContain('file.read');
+      expect(names).not.toContain('shell.exec');
+    });
+  });
+
   describe('edge cases', () => {
     it('handles empty tool list', () => {
       const policy = new ToolPolicy(defaultConfig());
diff --git a/src/tools/policy.ts b/src/tools/policy.ts
index 5431d45..16fef39 100644
--- a/src/tools/policy.ts
+++ b/src/tools/policy.ts
@@ -45,6 +45,21 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
   full: new Set(), // Special: matches everything
 };
 
+// ── Tool groups ─────────────────────────────────────────────────────
+
+/** Named groups for use in allow/deny lists (e.g. 'group:fs'). */
+export const TOOL_GROUPS: Record<string, string[]> = {
+  'group:fs': ['file.read', 'file.write', 'file.edit', 'file.list'],
+  'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list'],
+  'group:web': ['web.fetch', 'web.search', 'browser.navigate', 'browser.screenshot', 'browser.click', 'browser.type', 'browser.content', 'browser.eval'],
+  'group:memory': ['memory.read', 'memory.write', 'memory.search'],
+};
+
+/** Expand group references in a list of tool names/patterns. */
+function expandGroups(names: string[]): string[] {
+  return names.flatMap(n => TOOL_GROUPS[n] ?? [n]);
+}
+
 // ── Glob matching ───────────────────────────────────────────────────
 
 /**
@@ -122,19 +137,21 @@ export class ToolPolicy {
     // Step 1: Start from global profile
     let allowed = this.applyProfile(this.config.profile, allToolNames);
 
-    // Step 2: Apply global allow (adds tools)
-    if (this.config.allow.length > 0) {
+    // Step 2: Apply global allow (adds tools) — expand groups first
+    const globalAllow = expandGroups(this.config.allow);
+    if (globalAllow.length > 0) {
       for (const name of allToolNames) {
-        if (matchesAnyPattern(name, this.config.allow)) {
+        if (matchesAnyPattern(name, globalAllow)) {
           allowed.add(name);
         }
       }
     }
 
-    // Step 3: Apply global deny (removes tools)
-    if (this.config.deny.length > 0) {
+    // Step 3: Apply global deny (removes tools) — expand groups first
+    const globalDeny = expandGroups(this.config.deny);
+    if (globalDeny.length > 0) {
       allowed = new Set(
-        [...allowed].filter(name => !matchesAnyPattern(name, this.config.deny)),
+        [...allowed].filter(name => !matchesAnyPattern(name, globalDeny)),
       );
     }
 
@@ -197,19 +214,21 @@ export class ToolPolicy {
     const baseProfile = override.profile ?? this.config.profile;
     let allowed = this.applyProfile(baseProfile, allToolNames);
 
-    // Apply override allow
-    if (override.allow.length > 0) {
+    // Apply override allow — expand groups first
+    const overrideAllow = expandGroups(override.allow);
+    if (overrideAllow.length > 0) {
       for (const name of allToolNames) {
-        if (matchesAnyPattern(name, override.allow)) {
+        if (matchesAnyPattern(name, overrideAllow)) {
           allowed.add(name);
         }
       }
     }
 
-    // Apply override deny (deny always wins)
-    if (override.deny.length > 0) {
+    // Apply override deny (deny always wins) — expand groups first
+    const overrideDeny = expandGroups(override.deny);
+    if (overrideDeny.length > 0) {
       allowed = new Set(
-        [...allowed].filter(name => !matchesAnyPattern(name, override.deny)),
+        [...allowed].filter(name => !matchesAnyPattern(name, overrideDeny)),
       );
     }
 
@@ -232,4 +251,4 @@ function intersect(a: Set<string>, b: Set<string>): Set<string> {
 /**
  * Exported for testing and for use in HookEngine (DRY).
  */
-export { patternToRegex, matchesAnyPattern, PROFILE_TOOLS };
+export { patternToRegex, matchesAnyPattern, PROFILE_TOOLS, expandGroups };