feat: implement tier 1 quick wins (tool groups, typing, pruning, verbose, think)

Five additive features with no breaking changes:

- Tool groups: group:fs, group:runtime, group:web, group:memory syntactic
  sugar for allow/deny lists in tool policy config
- Typing indicators: Discord sendTyping() and WhatsApp sendStateTyping()
  on message receipt for better UX feedback
- Session pruning: TTL-based auto-cleanup via sessions.ttl config with
  hourly daemon timer and SQLite GROUP BY pruning
- /verbose command: TUI command parser toggle for raw streaming display
- !!think prefix: per-message extended thinking mode wired through
  Anthropic (budget_tokens), OpenAI/GitHub (reasoning_effort), and
  Gemini (thinkingConfig) providers

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
William Valentin
2026-02-07 13:35:00 -08:00
parent 6bb424cddc
commit 1c2f54fae3
19 changed files with 563 additions and 20 deletions
+11
View File
@@ -74,11 +74,21 @@ export class AnthropicClient implements ModelClient {
params.tools = request.tools;
}
// Extended thinking mode — enable thinking with a budget
if (request.thinking) {
params.max_tokens = Math.max(params.max_tokens as number, 16384);
(params as any).thinking = { type: 'enabled', budget_tokens: 4096 };
}
const response = await this.client.messages.create(params as unknown as Parameters<typeof this.client.messages.create>[0]) as AnthropicMessage;
const textContent = response.content.find((c) => c.type === 'text');
const content = textContent?.type === 'text' ? textContent.text : '';
// Extract thinking content if present
const thinkingBlock = response.content.find((c) => c.type === 'thinking');
const thinkingContent = thinkingBlock && 'thinking' in thinkingBlock ? (thinkingBlock as any).text : undefined;
const toolCalls = response.content
.filter((c): c is { type: 'tool_use'; id: string; name: string; input: unknown } => c.type === 'tool_use')
.map(c => ({ id: c.id, name: c.name, args: c.input }));
@@ -91,6 +101,7 @@ export class AnthropicClient implements ModelClient {
outputTokens: response.usage.output_tokens,
},
...(toolCalls.length > 0 ? { toolCalls } : {}),
...(thinkingContent ? { thinkingContent } : {}),
};
}
+10 -3
View File
@@ -25,13 +25,20 @@ export class GeminiClient implements ModelClient {
? [{ functionDeclarations: request.tools.map(t => convertToolDefinition(t)) }]
: undefined;
const generationConfig: Record<string, unknown> = {
maxOutputTokens: request.maxTokens ?? this.defaultMaxTokens,
};
// Extended thinking mode
if (request.thinking) {
generationConfig.thinkingConfig = { thinkingBudget: 4096 };
}
return this.genAI.getGenerativeModel({
model: this.model,
systemInstruction: request.system || undefined,
tools,
generationConfig: {
maxOutputTokens: request.maxTokens ?? this.defaultMaxTokens,
},
generationConfig,
});
}
+5
View File
@@ -137,6 +137,11 @@ export class GitHubModelsClient implements ModelClient {
}));
}
// Extended thinking/reasoning mode
if (request.thinking) {
(params as any).reasoning_effort = 'medium';
}
const response = await this.client.chat.completions.create(params);
const choice = response.choices[0];
+5
View File
@@ -79,6 +79,11 @@ export class OpenAIClient implements ModelClient {
}));
}
// Extended thinking/reasoning mode for o1/o3 models
if (request.thinking) {
(params as any).reasoning_effort = 'medium';
}
const response = await this.client.chat.completions.create(params);
const choice = response.choices[0];
+4
View File
@@ -66,6 +66,8 @@ export interface ChatRequest {
system?: string;
maxTokens?: number;
tools?: ToolDefinition[];
/** Enable extended thinking/reasoning mode for this request. */
thinking?: boolean;
}
export interface ChatResponse {
@@ -77,6 +79,8 @@ export interface ChatResponse {
fallback?: boolean;
/** Human-readable reason for the fallback. */
fallbackReason?: string;
/** Raw thinking/reasoning output from extended thinking mode. */
thinkingContent?: string;
}
export interface TokenUsage {