From 1c2f54fae327184881da3a53ebecb20da37313f4 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sat, 7 Feb 2026 13:35:00 -0800 Subject: [PATCH] feat: implement tier 1 quick wins (tool groups, typing, pruning, verbose, think) Five additive features with no breaking changes: - Tool groups: group:fs, group:runtime, group:web, group:memory syntactic sugar for allow/deny lists in tool policy config - Typing indicators: Discord sendTyping() and WhatsApp sendStateTyping() on message receipt for better UX feedback - Session pruning: TTL-based auto-cleanup via sessions.ttl config with hourly daemon timer and SQLite GROUP BY pruning - /verbose command: TUI command parser toggle for raw streaming display - !!think prefix: per-message extended thinking mode wired through Anthropic (budget_tokens), OpenAI/GitHub (reasoning_effort), and Gemini (thinkingConfig) providers Co-Authored-By: Claude Opus 4.6 --- .../2026-02-07-tier1-quick-wins-design.md | 284 ++++++++++++++++++ src/backends/native/agent.ts | 25 +- src/channels/discord/adapter.ts | 7 + src/channels/slack/adapter.ts | 2 + src/channels/whatsapp/adapter.ts | 6 + src/config/schema.ts | 20 ++ src/daemon/index.ts | 19 +- src/frontends/tui/commands.test.ts | 5 + src/frontends/tui/commands.ts | 9 + src/models/anthropic.ts | 11 + src/models/gemini.ts | 13 +- src/models/github.ts | 5 + src/models/openai.ts | 5 + src/models/types.ts | 4 + src/session/index.ts | 2 +- src/session/manager.ts | 7 + src/session/store.ts | 30 ++ src/tools/policy.test.ts | 84 ++++++ src/tools/policy.ts | 45 ++- 19 files changed, 563 insertions(+), 20 deletions(-) create mode 100644 docs/plans/2026-02-07-tier1-quick-wins-design.md diff --git a/docs/plans/2026-02-07-tier1-quick-wins-design.md b/docs/plans/2026-02-07-tier1-quick-wins-design.md new file mode 100644 index 0000000..b7e779f --- /dev/null +++ b/docs/plans/2026-02-07-tier1-quick-wins-design.md @@ -0,0 +1,284 @@ +# Tier 1 Quick Wins — Design + +**Date:** 2026-02-07 +**Status:** Draft +**Scope:** 5 additive features, no breaking changes + +--- + +## 1. Per-message thinking mode (`!!think` prefix) + +### Trigger + +User prefixes a message with `!!think`. The prefix is stripped before the message reaches the model. + +### Data flow + +1. Frontend/channel adapter detects `!!think` prefix, strips it, sets `thinking: true` on the message metadata +2. Agent loop passes `thinking` flag through to `ChatRequest` +3. Each provider client checks the flag: + - **Anthropic:** sets `thinking.budget_tokens` (default 4096) + - **OpenAI/GitHub Models:** sets `reasoning_effort` (default `'medium'`) + - **Gemini:** sets `thinkingConfig.thinkBudgetTokens` (default 4096) + - **Bedrock:** sets via Anthropic thinking params + - **Ollama/llama.cpp:** no-op (silently ignored) +4. Response thinking/reasoning content is included in the reply (displayed as a collapsible block in TUI/WebChat, omitted in channel adapters) + +### Config additions + +All optional — controls per-provider defaults when `!!think` is active: + +```yaml +models: + thinking: + anthropic: + budgetTokens: 4096 + openai: + reasoningEffort: medium # low | medium | high + gemini: + budgetTokens: 4096 +``` + +### Types changes + +```typescript +// src/models/types.ts — ChatRequest +export interface ChatRequest { + messages: Message[]; + system?: string; + maxTokens?: number; + tools?: ToolDefinition[]; + thinking?: boolean; // NEW +} + +// src/models/types.ts — ChatResponse +export interface ChatResponse { + content: string; + toolCalls?: ToolCall[]; + stopReason?: string; + usage?: TokenUsage; + thinkingContent?: string; // NEW — raw thinking/reasoning output +} +``` + +### Provider implementation + +Each client checks `request.thinking` and maps to native API: + +- **`anthropic.ts`**: Add `thinking: { type: 'enabled', budget_tokens }` to `messages.create()` params. Parse `thinking` content blocks from response. +- **`openai.ts`**: Add `reasoning_effort` to `chat.completions.create()`. Parse `reasoning` from response. +- **`github.ts`**: Same as OpenAI (uses OpenAI SDK). +- **`gemini.ts`**: Add `thinkingConfig` to `generationConfig`. Parse thinking parts from response. +- **`bedrock.ts`**: Add thinking params via Anthropic Converse API format. +- **`ollama.ts` / `llamacpp.ts`**: Ignore the flag. + +### Files affected + +- `src/models/types.ts` — Add `thinking` to ChatRequest, `thinkingContent` to ChatResponse +- `src/models/anthropic.ts` — Wire `budget_tokens`, parse thinking blocks +- `src/models/openai.ts` — Wire `reasoning_effort`, parse reasoning +- `src/models/github.ts` — Pass through to OpenAI client +- `src/models/gemini.ts` — Wire `thinkingConfig` +- `src/models/bedrock.ts` — Wire thinking params +- `src/config/schema.ts` — Add `models.thinking` config section +- `src/backends/native/agent.ts` — Pass `thinking` flag from message metadata to ChatRequest +- `src/frontends/tui/commands.ts` — Detect and strip `!!think` prefix +- Channel adapters — Detect and strip `!!think` prefix +- TUI/WebChat — Display `thinkingContent` as collapsible block + +--- + +## 2. Verbose streaming mode (`/verbose`) + +### Trigger + +`/verbose` command toggles a boolean in the frontend's local state. Not persisted to session or config. + +### Effect when on + +- Raw streaming chunks displayed as they arrive, including tool call JSON being generated +- Tool arguments and raw results shown in full (no summarization) + +### Scope + +TUI and WebChat only. Channel adapters (Telegram, Discord, Slack, WhatsApp) do not support this. + +### Implementation + +- Add `verbose: boolean` to TUI and WebChat frontend state (default `false`) +- Add `/verbose` to command parser — toggles the flag, prints current status +- Streaming renderer checks the flag: + - **On:** emit raw chunks as-is, display full tool call JSON and results + - **Off:** current behavior (summarized tool output, clean text display) +- No backend changes — purely a display concern + +### Files affected + +- `src/frontends/tui/commands.ts` — Add `verbose` command type and parsing +- `src/frontends/tui/minimal.ts` — Handle `/verbose`, toggle state, modify streaming display +- `src/gateway/ui/pages/chat.js` — WebChat verbose toggle and raw display mode +- WebSocket message handler — Pass raw chunks when verbose is active + +--- + +## 3. Typing indicators + +### When + +Immediately on receiving a user message. Sustained until the response is fully sent. + +### Per-adapter implementation + +| Adapter | API | Notes | +|---------|-----|-------| +| **Discord** | `channel.sendTyping()` | Auto-expires after 10s. Re-fire on a 9s interval while processing. | +| **Slack** | Bolt typing indicator API | Fire on receipt, cancel on response. | +| **WhatsApp** | `sock.sendPresenceUpdate('composing', jid)` | Fire on receipt, send `'paused'` on response. | +| **Telegram** | grammY `sendChatAction('typing')` | Already implemented. No changes needed. | + +### Implementation pattern + +Each adapter's message handler calls `sendTyping()` before dispatching to the agent loop. A cleanup/cancel mechanism (interval clear or presence update) stops the indicator once the response is sent. + +```typescript +// Pseudocode for Discord adapter +async handleMessage(msg) { + const typingInterval = setInterval(() => msg.channel.sendTyping(), 9000); + msg.channel.sendTyping(); // immediate first call + try { + await this.dispatch(msg); + } finally { + clearInterval(typingInterval); + } +} +``` + +### Files affected + +- `src/channels/discord/adapter.ts` — Add typing interval in message handler +- `src/channels/slack/adapter.ts` — Add typing indicator in message handler +- `src/channels/whatsapp/adapter.ts` — Add presence composing/paused in message handler + +--- + +## 4. Session pruning (TTL-based) + +### Config addition + +```yaml +sessions: + ttl: 30d # duration string. Default: 30d. Set to 0 or false to disable. +``` + +Supported formats: `"30d"`, `"7d"`, `"12h"`, `"0"` (disabled). + +### Mechanism + +1. Daemon startup schedules a periodic timer (every 1 hour) +2. Timer calls `SessionStore.pruneStale(cutoffTimestamp)` +3. SQLite query finds all `session_id`s where `MAX(created_at) < cutoff` +4. Deletes all messages for stale sessions +5. Evicts pruned sessions from `SessionManager`'s in-memory cache +6. Logs: `"Pruned 3 stale sessions (TTL: 30d)"` + +### Duration parsing + +Simple regex parser for duration strings — no external library: + +```typescript +function parseDuration(s: string): number | null { + const match = s.match(/^(\d+)(h|d)$/); + if (!match) return null; + const [, n, unit] = match; + const ms = unit === 'h' ? Number(n) * 3600000 : Number(n) * 86400000; + return ms; +} +``` + +### New SessionStore method + +```typescript +async pruneStale(beforeTimestamp: number): Promise { + // Returns list of pruned session IDs + const stale = db.prepare(` + SELECT session_id FROM messages + GROUP BY session_id + HAVING MAX(created_at) < ? + `).all(beforeTimestamp); + + for (const { session_id } of stale) { + db.prepare('DELETE FROM messages WHERE session_id = ?').run(session_id); + } + return stale.map(r => r.session_id); +} +``` + +### Files affected + +- `src/config/schema.ts` — Add `sessions.ttl` field +- `src/session/store.ts` — Add `pruneStale()` method +- `src/session/manager.ts` — Add `evictSessions(ids)` to clear in-memory cache +- `src/daemon/index.ts` — Schedule pruning timer on startup + +--- + +## 5. Tool groups + +### Group definitions + +Static map in `policy.ts`: + +```typescript +export const TOOL_GROUPS: Record = { + 'group:fs': ['file.read', 'file.write', 'file.edit', 'file.list'], + 'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list'], + 'group:web': ['web.fetch', 'web.search', 'browser.navigate', 'browser.click', 'browser.type', 'browser.screenshot', 'browser.evaluate'], + 'group:memory': ['memory.read', 'memory.write', 'memory.search'], +}; +``` + +### Resolution + +`ToolPolicy` expands `group:*` entries in allow/deny lists before applying filters. Expansion happens early in the resolution pipeline, before any set operations. + +```typescript +function expandGroups(names: string[]): string[] { + return names.flatMap(n => TOOL_GROUPS[n] ?? [n]); +} +``` + +Works in all scopes: global allow/deny, per-agent overrides, per-provider overrides. + +### Config usage example + +```yaml +tools: + profile: minimal + allow: ['group:web'] + agents: + fast: + allow: ['group:fs'] + deny: ['shell.exec'] + providers: + ollama: + deny: ['group:web'] +``` + +### Files affected + +- `src/tools/policy.ts` — Add `TOOL_GROUPS` map, `expandGroups()` helper, integrate into resolution pipeline +- `src/tools/policy.test.ts` — Tests for group expansion in all scopes + +--- + +## Implementation order + +Recommended order by independence and risk: + +1. **Tool groups** — Isolated to `policy.ts`, no cross-cutting concerns +2. **Typing indicators** — Per-adapter, independent changes +3. **Session pruning** — Self-contained, touches store/manager/daemon +4. **`/verbose`** — Frontend-only, no backend changes +5. **`!!think`** — Largest scope, touches all providers + agent loop + frontends + +Features 1–3 can be implemented in parallel. Feature 4 is independent. Feature 5 depends on understanding the streaming path touched by feature 4. diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 6c6d04b..9eb620f 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -51,6 +51,7 @@ export class NativeAgent { private _callCount: number = 0; private _toolPolicyContext?: ToolPolicyContext; private _attachmentCollector?: OutboundAttachmentCollector; + private _thinking: boolean = false; constructor(config: NativeAgentConfig) { this.modelClient = config.modelClient; @@ -69,6 +70,14 @@ export class NativeAgent { } async process(userMessage: string, attachments?: Attachment[]): Promise { + // Detect and strip !!think prefix for per-message thinking mode + if (userMessage.startsWith('!!think ') || userMessage === '!!think') { + this._thinking = true; + userMessage = userMessage.replace(/^!!think\s*/, '').trim() || 'Think about this.'; + } else { + this._thinking = false; + } + const userMsg = buildUserMessage(userMessage, attachments); if (this.session) { @@ -89,6 +98,7 @@ export class NativeAgent { const request: ChatRequest = { messages: this.history, system: this.systemPrompt, + ...(this._thinking ? { thinking: true } : {}), }; const response = await this.chatWithRouter(request); @@ -101,10 +111,16 @@ export class NativeAgent { console.warn(`[Flynn] ${response.fallbackReason}`); } + // Prepend thinking content if present + let finalContent = response.content; + if (response.thinkingContent) { + finalContent = `\n${response.thinkingContent}\n\n\n${response.content}`; + } + const assistantMsg: Message = { role: 'assistant', content: response.content }; this.addToHistory(assistantMsg); - return response.content; + return finalContent; } private async toolLoop(): Promise { @@ -124,6 +140,7 @@ export class NativeAgent { messages: loopMessages as unknown as Message[], system: this.systemPrompt, tools, + ...(this._thinking ? { thinking: true } : {}), }; const response = await this.chatWithRouter(request); @@ -138,9 +155,13 @@ export class NativeAgent { // If the model didn't request tool use, we're done if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) { + let finalContent = response.content; + if (response.thinkingContent) { + finalContent = `\n${response.thinkingContent}\n\n\n${response.content}`; + } const assistantMsg: Message = { role: 'assistant', content: response.content }; this.addToHistory(assistantMsg); - return response.content; + return finalContent; } // Build the assistant message with tool_use content blocks diff --git a/src/channels/discord/adapter.ts b/src/channels/discord/adapter.ts index 6c90173..0385e63 100644 --- a/src/channels/discord/adapter.ts +++ b/src/channels/discord/adapter.ts @@ -196,6 +196,13 @@ export class DiscordAdapter implements ChannelAdapter { } } + // Send typing indicator (lasts 10 seconds, no need for interval) + try { + if ('sendTyping' in message.channel) { + (message.channel as any).sendTyping(); + } + } catch { /* ignore typing errors */ } + // Strip bot mention from the message text const text = message.content.replace(/<@!?\d+>/g, '').trim(); diff --git a/src/channels/slack/adapter.ts b/src/channels/slack/adapter.ts index 447cb14..c143a9f 100644 --- a/src/channels/slack/adapter.ts +++ b/src/channels/slack/adapter.ts @@ -287,6 +287,8 @@ export class SlackAdapter implements ChannelAdapter { } } + // Note: Slack doesn't expose a typing indicator API for bots + // Build peer ID: channelId:threadTs (thread-aware) const threadTs = message.thread_ts ?? message.ts ?? ''; const peerId = `${channelId}:${threadTs}`; diff --git a/src/channels/whatsapp/adapter.ts b/src/channels/whatsapp/adapter.ts index 37b2f0e..e3f03e2 100644 --- a/src/channels/whatsapp/adapter.ts +++ b/src/channels/whatsapp/adapter.ts @@ -236,6 +236,12 @@ export class WhatsAppAdapter implements ChannelAdapter { } } + // Send typing indicator + try { + const chat = await (message as any).getChat(); + await chat.sendStateTyping(); + } catch { /* ignore typing errors */ } + // Strip bot mention from message body for group messages let text = message.body ?? ''; if (isGroup && this.botId) { diff --git a/src/config/schema.ts b/src/config/schema.ts index 26e0f6d..44a2e1d 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -33,6 +33,18 @@ const modelConfigSchema = modelConfigBaseSchema.extend({ fallback: modelConfigBaseSchema.optional(), }); +const thinkingSchema = z.object({ + anthropic: z.object({ + budgetTokens: z.number().default(4096), + }).default({}), + openai: z.object({ + reasoningEffort: z.enum(['low', 'medium', 'high']).default('medium'), + }).default({}), + gemini: z.object({ + budgetTokens: z.number().default(4096), + }).default({}), +}).default({}); + const modelsSchema = z.object({ local: modelConfigSchema.optional(), fast: modelConfigSchema.optional(), @@ -40,6 +52,7 @@ const modelsSchema = z.object({ complex: modelConfigSchema.optional(), fallback_chain: z.array(z.string()).default(['anthropic']), local_providers: z.record(z.string(), modelConfigSchema).optional(), + thinking: thinkingSchema, }); const backendsSchema = z.object({ @@ -250,6 +263,10 @@ const promptSchema = z.object({ })).default([]), }).default({}); +const sessionsSchema = z.object({ + ttl: z.string().default('30d'), +}).default({}); + export const configSchema = z.object({ telegram: telegramSchema, discord: discordSchema, @@ -275,6 +292,7 @@ export const configSchema = z.object({ sandbox: sandboxSchema, agent_configs: agentConfigsSchema, routing: routingSchema, + sessions: sessionsSchema, }); export type Config = z.infer; @@ -300,3 +318,5 @@ export type SandboxConfig = z.infer; export type AgentConfigEntry = z.infer; export type RoutingConfig = z.infer; export type ServerConfig = z.infer; +export type SessionsConfig = z.infer; +export type ThinkingConfig = z.infer; diff --git a/src/daemon/index.ts b/src/daemon/index.ts index 9292a25..8ef79f7 100644 --- a/src/daemon/index.ts +++ b/src/daemon/index.ts @@ -7,7 +7,7 @@ import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, GeminiClie import type { ModelClient, RetryConfig, ModelTier } from '../models/index.js'; import { AgentOrchestrator, type DelegationConfig } from '../backends/index.js'; import { OutboundAttachmentCollector } from '../backends/native/attachments.js'; -import { SessionStore, SessionManager } from '../session/index.js'; +import { SessionStore, SessionManager, parseDuration } from '../session/index.js'; import { HookEngine } from '../hooks/index.js'; import { ToolRegistry, ToolExecutor, ToolPolicy, allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools } from '../tools/index.js'; import type { Tool } from '../tools/types.js'; @@ -453,6 +453,23 @@ export async function startDaemon(config: Config): Promise { console.log('Session store closed'); }); + // Session pruning timer (TTL-based cleanup) + const ttlMs = parseDuration(config.sessions?.ttl ?? '30d'); + if (ttlMs) { + const pruneInterval = setInterval(() => { + const cutoff = Math.floor((Date.now() - ttlMs) / 1000); // created_at is unix seconds + const pruned = sessionStore.pruneStale(cutoff); + if (pruned.length > 0) { + sessionManager.evictSessions(pruned); + console.log(`Pruned ${pruned.length} stale session(s) (TTL: ${config.sessions?.ttl ?? '30d'})`); + } + }, 3_600_000); // every hour + + lifecycle.onShutdown(async () => { + clearInterval(pruneInterval); + }); + } + // Initialize hook engine const hookEngine = new HookEngine(config.hooks); diff --git a/src/frontends/tui/commands.test.ts b/src/frontends/tui/commands.test.ts index 8ae9adf..2914116 100644 --- a/src/frontends/tui/commands.test.ts +++ b/src/frontends/tui/commands.test.ts @@ -34,6 +34,10 @@ describe('parseCommand', () => { expect(parseCommand('/usage')).toEqual({ type: 'usage' }); }); + it('parses /verbose command', () => { + expect(parseCommand('/verbose')).toEqual({ type: 'verbose' }); + }); + it('parses /model command without argument', () => { expect(parseCommand('/model')).toEqual({ type: 'model' }); }); @@ -100,6 +104,7 @@ describe('getHelpText', () => { expect(help).toContain('/reset'); expect(help).toContain('/compact'); expect(help).toContain('/usage'); + expect(help).toContain('/verbose'); expect(help).toContain('/quit'); }); }); diff --git a/src/frontends/tui/commands.ts b/src/frontends/tui/commands.ts index 6c5edaa..cd7f9d3 100644 --- a/src/frontends/tui/commands.ts +++ b/src/frontends/tui/commands.ts @@ -6,6 +6,7 @@ export type Command = | { type: 'fullscreen' } | { type: 'compact' } | { type: 'usage' } + | { type: 'verbose' } | { type: 'model'; name?: string; providerModel?: string } | { type: 'backend'; provider?: string } | { type: 'login'; provider?: string } @@ -51,6 +52,11 @@ export function parseCommand(input: string): Command | null { return { type: 'usage' }; } + // Verbose + if (trimmed === '/verbose') { + return { type: 'verbose' }; + } + // Model (with optional argument) if (trimmed === '/model') { return { type: 'model' }; @@ -108,6 +114,7 @@ Commands: /reset, /clear, /new Clear conversation history /compact Compact conversation history /usage Show token usage and estimated cost + /verbose Toggle verbose mode (show raw streaming and tool output) /status Show session info and token usage /fullscreen, /fs Switch to fullscreen mode /transfer Transfer session to another frontend @@ -127,6 +134,7 @@ export const SLASH_COMMANDS = [ '/new', '/compact', '/usage', + '/verbose', '/status', '/fullscreen', '/fs', @@ -146,6 +154,7 @@ export const COMMAND_TOOLTIPS: Record = { '/new': 'Start a new conversation', '/compact': 'Compact conversation history to save context space', '/usage': 'Show token usage and estimated cost', + '/verbose': 'Toggle verbose mode (show raw streaming and tool output)', '/status': 'Show session info and token usage', '/fullscreen': 'Switch to fullscreen mode', '/fs': 'Switch to fullscreen mode', diff --git a/src/models/anthropic.ts b/src/models/anthropic.ts index 08aac81..47a5356 100644 --- a/src/models/anthropic.ts +++ b/src/models/anthropic.ts @@ -74,11 +74,21 @@ export class AnthropicClient implements ModelClient { params.tools = request.tools; } + // Extended thinking mode — enable thinking with a budget + if (request.thinking) { + params.max_tokens = Math.max(params.max_tokens as number, 16384); + (params as any).thinking = { type: 'enabled', budget_tokens: 4096 }; + } + const response = await this.client.messages.create(params as unknown as Parameters[0]) as AnthropicMessage; const textContent = response.content.find((c) => c.type === 'text'); const content = textContent?.type === 'text' ? textContent.text : ''; + // Extract thinking content if present + const thinkingBlock = response.content.find((c) => c.type === 'thinking'); + const thinkingContent = thinkingBlock && 'thinking' in thinkingBlock ? (thinkingBlock as any).text : undefined; + const toolCalls = response.content .filter((c): c is { type: 'tool_use'; id: string; name: string; input: unknown } => c.type === 'tool_use') .map(c => ({ id: c.id, name: c.name, args: c.input })); @@ -91,6 +101,7 @@ export class AnthropicClient implements ModelClient { outputTokens: response.usage.output_tokens, }, ...(toolCalls.length > 0 ? { toolCalls } : {}), + ...(thinkingContent ? { thinkingContent } : {}), }; } diff --git a/src/models/gemini.ts b/src/models/gemini.ts index 474d835..bc7b63e 100644 --- a/src/models/gemini.ts +++ b/src/models/gemini.ts @@ -25,13 +25,20 @@ export class GeminiClient implements ModelClient { ? [{ functionDeclarations: request.tools.map(t => convertToolDefinition(t)) }] : undefined; + const generationConfig: Record = { + maxOutputTokens: request.maxTokens ?? this.defaultMaxTokens, + }; + + // Extended thinking mode + if (request.thinking) { + generationConfig.thinkingConfig = { thinkingBudget: 4096 }; + } + return this.genAI.getGenerativeModel({ model: this.model, systemInstruction: request.system || undefined, tools, - generationConfig: { - maxOutputTokens: request.maxTokens ?? this.defaultMaxTokens, - }, + generationConfig, }); } diff --git a/src/models/github.ts b/src/models/github.ts index 1c50fac..dd4fe8b 100644 --- a/src/models/github.ts +++ b/src/models/github.ts @@ -137,6 +137,11 @@ export class GitHubModelsClient implements ModelClient { })); } + // Extended thinking/reasoning mode + if (request.thinking) { + (params as any).reasoning_effort = 'medium'; + } + const response = await this.client.chat.completions.create(params); const choice = response.choices[0]; diff --git a/src/models/openai.ts b/src/models/openai.ts index dfdd032..e812ca8 100644 --- a/src/models/openai.ts +++ b/src/models/openai.ts @@ -79,6 +79,11 @@ export class OpenAIClient implements ModelClient { })); } + // Extended thinking/reasoning mode for o1/o3 models + if (request.thinking) { + (params as any).reasoning_effort = 'medium'; + } + const response = await this.client.chat.completions.create(params); const choice = response.choices[0]; diff --git a/src/models/types.ts b/src/models/types.ts index 152bd37..b7a6079 100644 --- a/src/models/types.ts +++ b/src/models/types.ts @@ -66,6 +66,8 @@ export interface ChatRequest { system?: string; maxTokens?: number; tools?: ToolDefinition[]; + /** Enable extended thinking/reasoning mode for this request. */ + thinking?: boolean; } export interface ChatResponse { @@ -77,6 +79,8 @@ export interface ChatResponse { fallback?: boolean; /** Human-readable reason for the fallback. */ fallbackReason?: string; + /** Raw thinking/reasoning output from extended thinking mode. */ + thinkingContent?: string; } export interface TokenUsage { diff --git a/src/session/index.ts b/src/session/index.ts index c836905..c792131 100644 --- a/src/session/index.ts +++ b/src/session/index.ts @@ -1,2 +1,2 @@ -export { SessionStore } from './store.js'; +export { SessionStore, parseDuration } from './store.js'; export { SessionManager, ManagedSession, type Session } from './manager.js'; diff --git a/src/session/manager.ts b/src/session/manager.ts index bbd4236..23ec368 100644 --- a/src/session/manager.ts +++ b/src/session/manager.ts @@ -98,4 +98,11 @@ export class SessionManager { const id = this.makeSessionId(frontend, userId); this.sessions.delete(id); } + + /** Remove sessions from the in-memory cache by their IDs. */ + evictSessions(sessionIds: string[]): void { + for (const id of sessionIds) { + this.sessions.delete(id); + } + } } diff --git a/src/session/store.ts b/src/session/store.ts index cab1218..ee904e1 100644 --- a/src/session/store.ts +++ b/src/session/store.ts @@ -1,6 +1,15 @@ import Database from 'better-sqlite3'; import type { Message } from '../models/types.js'; +/** Parse a duration string like '30d', '7d', '12h' to milliseconds. Returns null if invalid or '0'. */ +export function parseDuration(s: string): number | null { + if (s === '0' || s === 'false') return null; + const match = s.match(/^(\d+)(h|d)$/); + if (!match) return null; + const [, n, unit] = match; + return unit === 'h' ? Number(n) * 3600_000 : Number(n) * 86_400_000; +} + export class SessionStore { private db: Database.Database; @@ -71,6 +80,27 @@ export class SessionStore { return rows.map(row => row.session_id); } + /** Delete all messages for sessions with no activity since the given timestamp. Returns pruned session IDs. */ + pruneStale(beforeTimestamp: number): string[] { + const stale = this.db.prepare(` + SELECT session_id FROM messages + GROUP BY session_id + HAVING MAX(created_at) < ? + `).all(beforeTimestamp) as Array<{ session_id: string }>; + + if (stale.length === 0) return []; + + const deleteStmt = this.db.prepare('DELETE FROM messages WHERE session_id = ?'); + const transaction = this.db.transaction(() => { + for (const { session_id } of stale) { + deleteStmt.run(session_id); + } + }); + transaction(); + + return stale.map(r => r.session_id); + } + close(): void { this.db.close(); } diff --git a/src/tools/policy.test.ts b/src/tools/policy.test.ts index 002a97b..d3e61f0 100644 --- a/src/tools/policy.test.ts +++ b/src/tools/policy.test.ts @@ -409,6 +409,90 @@ describe('ToolPolicy', () => { }); }); + describe('tool groups', () => { + it('expands group:fs in allow list', () => { + const policy = new ToolPolicy(defaultConfig({ + profile: 'minimal', + allow: ['group:fs'], + })); + const result = policy.filterTools(ALL_TOOLS); + const names = result.map(t => t.name); + expect(names).toContain('file.read'); + expect(names).toContain('file.write'); + expect(names).toContain('file.edit'); + expect(names).toContain('file.list'); + expect(names).not.toContain('shell.exec'); + }); + + it('expands group:runtime in deny list', () => { + const policy = new ToolPolicy(defaultConfig({ + deny: ['group:runtime'], + })); + const result = policy.filterTools(ALL_TOOLS); + const names = result.map(t => t.name); + expect(names).not.toContain('shell.exec'); + expect(names).not.toContain('process.start'); + expect(names).not.toContain('process.status'); + expect(names).not.toContain('process.output'); + expect(names).not.toContain('process.kill'); + expect(names).not.toContain('process.list'); + expect(names).toContain('file.read'); + }); + + it('expands groups in agent overrides', () => { + const policy = new ToolPolicy(defaultConfig({ + agents: { + fast: { profile: 'minimal', allow: ['group:memory'], deny: [] }, + }, + })); + const result = policy.filterTools(ALL_TOOLS, { agent: 'fast' }); + const names = result.map(t => t.name); + expect(names).toContain('memory.read'); + expect(names).toContain('memory.write'); + expect(names).toContain('memory.search'); + expect(names).toContain('file.read'); // from minimal profile + expect(names).not.toContain('shell.exec'); + }); + + it('expands groups in provider deny', () => { + const policy = new ToolPolicy(defaultConfig({ + providers: { + ollama: { allow: [], deny: ['group:web'] }, + }, + })); + const result = policy.filterTools(ALL_TOOLS, { provider: 'ollama' }); + const names = result.map(t => t.name); + expect(names).not.toContain('web.fetch'); + expect(names).not.toContain('web.search'); + expect(names).toContain('file.read'); + expect(names).toContain('shell.exec'); + }); + + it('mixes groups with individual names', () => { + const policy = new ToolPolicy(defaultConfig({ + profile: 'minimal', + allow: ['group:memory', 'shell.exec'], + })); + const result = policy.filterTools(ALL_TOOLS); + const names = result.map(t => t.name); + expect(names).toContain('memory.read'); + expect(names).toContain('shell.exec'); + expect(names).toContain('file.read'); // from minimal + }); + + it('unknown group name passes through as literal', () => { + const policy = new ToolPolicy(defaultConfig({ + profile: 'minimal', + allow: ['group:nonexistent'], + })); + const result = policy.filterTools(ALL_TOOLS); + // Should only have minimal tools — 'group:nonexistent' doesn't match any real tool + const names = result.map(t => t.name); + expect(names).toContain('file.read'); + expect(names).not.toContain('shell.exec'); + }); + }); + describe('edge cases', () => { it('handles empty tool list', () => { const policy = new ToolPolicy(defaultConfig()); diff --git a/src/tools/policy.ts b/src/tools/policy.ts index 5431d45..16fef39 100644 --- a/src/tools/policy.ts +++ b/src/tools/policy.ts @@ -45,6 +45,21 @@ const PROFILE_TOOLS: Record> = { full: new Set(), // Special: matches everything }; +// ── Tool groups ───────────────────────────────────────────────────── + +/** Named groups for use in allow/deny lists (e.g. 'group:fs'). */ +export const TOOL_GROUPS: Record = { + 'group:fs': ['file.read', 'file.write', 'file.edit', 'file.list'], + 'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list'], + 'group:web': ['web.fetch', 'web.search', 'browser.navigate', 'browser.screenshot', 'browser.click', 'browser.type', 'browser.content', 'browser.eval'], + 'group:memory': ['memory.read', 'memory.write', 'memory.search'], +}; + +/** Expand group references in a list of tool names/patterns. */ +function expandGroups(names: string[]): string[] { + return names.flatMap(n => TOOL_GROUPS[n] ?? [n]); +} + // ── Glob matching ─────────────────────────────────────────────────── /** @@ -122,19 +137,21 @@ export class ToolPolicy { // Step 1: Start from global profile let allowed = this.applyProfile(this.config.profile, allToolNames); - // Step 2: Apply global allow (adds tools) - if (this.config.allow.length > 0) { + // Step 2: Apply global allow (adds tools) — expand groups first + const globalAllow = expandGroups(this.config.allow); + if (globalAllow.length > 0) { for (const name of allToolNames) { - if (matchesAnyPattern(name, this.config.allow)) { + if (matchesAnyPattern(name, globalAllow)) { allowed.add(name); } } } - // Step 3: Apply global deny (removes tools) - if (this.config.deny.length > 0) { + // Step 3: Apply global deny (removes tools) — expand groups first + const globalDeny = expandGroups(this.config.deny); + if (globalDeny.length > 0) { allowed = new Set( - [...allowed].filter(name => !matchesAnyPattern(name, this.config.deny)), + [...allowed].filter(name => !matchesAnyPattern(name, globalDeny)), ); } @@ -197,19 +214,21 @@ export class ToolPolicy { const baseProfile = override.profile ?? this.config.profile; let allowed = this.applyProfile(baseProfile, allToolNames); - // Apply override allow - if (override.allow.length > 0) { + // Apply override allow — expand groups first + const overrideAllow = expandGroups(override.allow); + if (overrideAllow.length > 0) { for (const name of allToolNames) { - if (matchesAnyPattern(name, override.allow)) { + if (matchesAnyPattern(name, overrideAllow)) { allowed.add(name); } } } - // Apply override deny (deny always wins) - if (override.deny.length > 0) { + // Apply override deny (deny always wins) — expand groups first + const overrideDeny = expandGroups(override.deny); + if (overrideDeny.length > 0) { allowed = new Set( - [...allowed].filter(name => !matchesAnyPattern(name, override.deny)), + [...allowed].filter(name => !matchesAnyPattern(name, overrideDeny)), ); } @@ -232,4 +251,4 @@ function intersect(a: Set, b: Set): Set { /** * Exported for testing and for use in HookEngine (DRY). */ -export { patternToRegex, matchesAnyPattern, PROFILE_TOOLS }; +export { patternToRegex, matchesAnyPattern, PROFILE_TOOLS, expandGroups };