feat(session): persist model tier overrides per session

Store per-session config in SQLite and route /model and /reset through command fast-paths so channel sessions keep independent model selection across reconnects and restarts.
This commit is contained in:
William Valentin
2026-02-13 01:04:26 -08:00
parent 3472a0b926
commit 9f81c01603
35 changed files with 1438 additions and 144 deletions
+96 -3
View File
@@ -4,7 +4,10 @@ import { ModelRouter } from '../../models/router.js';
import type { ChatResponse, ModelClient } from '../../models/types.js';
import { ToolRegistry, ToolExecutor } from '../../tools/index.js';
import { HookEngine } from '../../hooks/engine.js';
import type { SubAgentRequest } from './orchestrator.js';
import { MemoryStore } from '../../memory/store.js';
import { mkdtempSync, rmSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
describe('AgentOrchestrator', () => {
let mockDefaultClient: ModelClient;
@@ -33,6 +36,14 @@ describe('AgentOrchestrator', () => {
});
});
const requireClient = (tier: 'default' | 'fast' | 'complex'): ModelClient => {
const client = mockRouter.getClient(tier);
if (!client) {
throw new Error(`Expected ${tier} model client to exist in test router`);
}
return client;
};
describe('delegate()', () => {
it('routes to the correct tier when specified', async () => {
const orchestrator = new AgentOrchestrator({
@@ -69,7 +80,7 @@ describe('AgentOrchestrator', () => {
});
const mockToolExecutor = new ToolExecutor(mockToolRegistry, hooks);
const mockFastChatClient = mockRouter.getClient('fast')!;
const mockFastChatClient = requireClient('fast');
const mockFastChatFn = vi.fn().mockResolvedValue({
content: 'response with tools',
stopReason: 'end_turn',
@@ -298,7 +309,7 @@ describe('AgentOrchestrator', () => {
describe('process()', () => {
it('proxies to NativeAgent for user messages', async () => {
const mockDefaultChatClient = mockRouter.getClient('default')!;
const mockDefaultChatClient = requireClient('default');
const mockDefaultChatFn = vi.fn().mockResolvedValue({
content: 'Agent response',
stopReason: 'end_turn',
@@ -355,6 +366,88 @@ describe('AgentOrchestrator', () => {
expect(history[4]).toEqual({ role: 'user', content: 'Tell me about yourself' });
expect(history[5]).toEqual({ role: 'assistant', content: 'default response' });
});
it('uses adaptive memory injection strategy when configured', async () => {
const tempDir = mkdtempSync(join(tmpdir(), 'flynn-orchestrator-memory-'));
const memoryStore = new MemoryStore({ dir: tempDir, maxContextTokens: 2000 });
memoryStore.writeCategory('user', 'preferences', 'User prefers concise output.', 'replace');
const mockDefaultChatClient = requireClient('default');
const mockDefaultChatFn = vi.fn().mockResolvedValue({
content: 'Agent response',
stopReason: 'end_turn',
usage: { inputTokens: 50, outputTokens: 25 },
} as ChatResponse);
Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
const orchestrator = new AgentOrchestrator({
modelRouter: mockRouter,
systemPrompt: 'You are a helpful agent.',
primaryTier: 'default',
delegation: {
compaction: 'fast',
memory_extraction: 'default',
classification: 'complex',
tool_summarisation: 'default',
complex_reasoning: 'complex',
},
maxDelegationDepth: 10,
memoryStore,
memoryInjectionStrategy: 'adaptive',
memoryMaxInjectionTokens: 100,
});
await orchestrator.process('Keep this concise please');
expect(mockDefaultChatFn).toHaveBeenCalled();
const callArgs = mockDefaultChatFn.mock.calls[0][0];
expect(callArgs.system).toContain('# Memory Context');
expect(callArgs.system).toContain('concise');
rmSync(tempDir, { recursive: true, force: true });
});
it('falls back to default memory context when adaptive injection errors', async () => {
const tempDir = mkdtempSync(join(tmpdir(), 'flynn-orchestrator-memory-fallback-'));
const memoryStore = new MemoryStore({ dir: tempDir, maxContextTokens: 2000 });
memoryStore.write('user', 'Fallback memory content', 'replace');
const getPromptSectionsSpy = vi.spyOn(memoryStore, 'getPromptSections').mockImplementationOnce(() => {
throw new Error('boom');
});
const mockDefaultChatClient = requireClient('default');
const mockDefaultChatFn = vi.fn().mockResolvedValue({
content: 'Agent response',
stopReason: 'end_turn',
usage: { inputTokens: 50, outputTokens: 25 },
} as ChatResponse);
Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
const orchestrator = new AgentOrchestrator({
modelRouter: mockRouter,
systemPrompt: 'You are a helpful agent.',
primaryTier: 'default',
delegation: {
compaction: 'fast',
memory_extraction: 'default',
classification: 'complex',
tool_summarisation: 'default',
complex_reasoning: 'complex',
},
maxDelegationDepth: 10,
memoryStore,
memoryInjectionStrategy: 'adaptive',
memoryMaxInjectionTokens: 100,
});
await orchestrator.process('test message');
const callArgs = mockDefaultChatFn.mock.calls[0][0];
expect(callArgs.system).toContain('Fallback memory content');
getPromptSectionsSpy.mockRestore();
rmSync(tempDir, { recursive: true, force: true });
});
});
describe('reset()', () => {
+45 -3
View File
@@ -13,6 +13,7 @@ import { shouldCompact } from '../../context/tokens.js';
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
import { estimateCost } from '../../models/costs.js';
import { auditLogger } from '../../audit/index.js';
import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';
// ── Public types ──────────────────────────────────────────────────────
@@ -91,6 +92,10 @@ export interface OrchestratorConfig {
contextWindow?: number;
/** Optional memory store for injecting persistent memory into the system prompt. */
memoryStore?: MemoryStore;
/** Strategy for memory prompt injection. */
memoryInjectionStrategy?: 'all' | 'recent' | 'adaptive';
/** Maximum tokens allowed for injected memory context. */
memoryMaxInjectionTokens?: number;
/** Policy context for tool filtering (agent tier, provider). */
toolPolicyContext?: ToolPolicyContext;
/** Collector for outbound attachments queued by tools (e.g. media.send). */
@@ -118,6 +123,8 @@ export class AgentOrchestrator {
private _modelName?: string;
private _contextWindow?: number;
private _memoryStore?: MemoryStore;
private _memoryInjectionStrategy: 'all' | 'recent' | 'adaptive';
private _memoryMaxInjectionTokens: number;
private _systemPromptBase: string;
private _usageByTier: Map<string, TierUsageStats> = new Map();
@@ -131,6 +138,8 @@ export class AgentOrchestrator {
this._modelName = config.modelName;
this._contextWindow = config.contextWindow;
this._memoryStore = config.memoryStore;
this._memoryInjectionStrategy = config.memoryInjectionStrategy ?? 'all';
this._memoryMaxInjectionTokens = config.memoryMaxInjectionTokens ?? 2000;
this._systemPromptBase = config.systemPrompt;
// Create the primary NativeAgent for user-facing conversation
@@ -216,7 +225,7 @@ export class AgentOrchestrator {
* exceeds the context window threshold and compacts it before processing.
*/
async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
this._injectMemoryContext();
this._injectMemoryContext(userMessage);
await this.compactIfNeeded();
return this._agent.process(userMessage, attachments);
}
@@ -355,12 +364,34 @@ export class AgentOrchestrator {
* system prompt. If no memory store is configured or no memory content
* exists, restores the original base prompt.
*/
private _injectMemoryContext(): void {
private _injectMemoryContext(userMessage: string): void {
if (!this._memoryStore) {
return;
}
const memoryContext = this._memoryStore.getContextForPrompt();
let memoryContext = '';
try {
if (this._memoryInjectionStrategy === 'recent') {
memoryContext = buildRecentMemoryContext(this._memoryStore, this._memoryMaxInjectionTokens);
} else if (this._memoryInjectionStrategy === 'adaptive') {
memoryContext = buildAdaptiveMemoryContext({
store: this._memoryStore,
userMessage,
recentMessages: this.getHistory(),
config: {
maxTokens: this._memoryMaxInjectionTokens,
},
});
} else {
memoryContext = this._memoryStore.getContextForPrompt();
}
} catch (error) {
console.warn('[Flynn:memory] Adaptive memory injection failed, falling back to default context:', error);
memoryContext = this._memoryStore.getContextForPrompt();
}
memoryContext = this._clipMemoryContext(memoryContext);
if (!memoryContext) {
this._agent.setSystemPrompt(this._systemPromptBase);
return;
@@ -370,6 +401,17 @@ export class AgentOrchestrator {
this._agent.setSystemPrompt(enrichedPrompt);
}
private _clipMemoryContext(context: string): string {
if (!context) {
return context;
}
const maxChars = this._memoryMaxInjectionTokens * 4;
if (context.length <= maxChars) {
return context;
}
return context.slice(0, maxChars);
}
/**
* Check whether automatic compaction should run, and if so, compact.
* Called before each `process()` call when compaction is configured.