feat(session): persist model tier overrides per session
Store per-session config in SQLite and route /model and /reset through command fast-paths so channel sessions keep independent model selection across reconnects and restarts.
This commit is contained in:
@@ -4,7 +4,10 @@ import { ModelRouter } from '../../models/router.js';
|
||||
import type { ChatResponse, ModelClient } from '../../models/types.js';
|
||||
import { ToolRegistry, ToolExecutor } from '../../tools/index.js';
|
||||
import { HookEngine } from '../../hooks/engine.js';
|
||||
import type { SubAgentRequest } from './orchestrator.js';
|
||||
import { MemoryStore } from '../../memory/store.js';
|
||||
import { mkdtempSync, rmSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
|
||||
describe('AgentOrchestrator', () => {
|
||||
let mockDefaultClient: ModelClient;
|
||||
@@ -33,6 +36,14 @@ describe('AgentOrchestrator', () => {
|
||||
});
|
||||
});
|
||||
|
||||
const requireClient = (tier: 'default' | 'fast' | 'complex'): ModelClient => {
|
||||
const client = mockRouter.getClient(tier);
|
||||
if (!client) {
|
||||
throw new Error(`Expected ${tier} model client to exist in test router`);
|
||||
}
|
||||
return client;
|
||||
};
|
||||
|
||||
describe('delegate()', () => {
|
||||
it('routes to the correct tier when specified', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
@@ -69,7 +80,7 @@ describe('AgentOrchestrator', () => {
|
||||
});
|
||||
const mockToolExecutor = new ToolExecutor(mockToolRegistry, hooks);
|
||||
|
||||
const mockFastChatClient = mockRouter.getClient('fast')!;
|
||||
const mockFastChatClient = requireClient('fast');
|
||||
const mockFastChatFn = vi.fn().mockResolvedValue({
|
||||
content: 'response with tools',
|
||||
stopReason: 'end_turn',
|
||||
@@ -298,7 +309,7 @@ describe('AgentOrchestrator', () => {
|
||||
|
||||
describe('process()', () => {
|
||||
it('proxies to NativeAgent for user messages', async () => {
|
||||
const mockDefaultChatClient = mockRouter.getClient('default')!;
|
||||
const mockDefaultChatClient = requireClient('default');
|
||||
const mockDefaultChatFn = vi.fn().mockResolvedValue({
|
||||
content: 'Agent response',
|
||||
stopReason: 'end_turn',
|
||||
@@ -355,6 +366,88 @@ describe('AgentOrchestrator', () => {
|
||||
expect(history[4]).toEqual({ role: 'user', content: 'Tell me about yourself' });
|
||||
expect(history[5]).toEqual({ role: 'assistant', content: 'default response' });
|
||||
});
|
||||
|
||||
it('uses adaptive memory injection strategy when configured', async () => {
|
||||
const tempDir = mkdtempSync(join(tmpdir(), 'flynn-orchestrator-memory-'));
|
||||
const memoryStore = new MemoryStore({ dir: tempDir, maxContextTokens: 2000 });
|
||||
memoryStore.writeCategory('user', 'preferences', 'User prefers concise output.', 'replace');
|
||||
|
||||
const mockDefaultChatClient = requireClient('default');
|
||||
const mockDefaultChatFn = vi.fn().mockResolvedValue({
|
||||
content: 'Agent response',
|
||||
stopReason: 'end_turn',
|
||||
usage: { inputTokens: 50, outputTokens: 25 },
|
||||
} as ChatResponse);
|
||||
Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
|
||||
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful agent.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
memoryStore,
|
||||
memoryInjectionStrategy: 'adaptive',
|
||||
memoryMaxInjectionTokens: 100,
|
||||
});
|
||||
|
||||
await orchestrator.process('Keep this concise please');
|
||||
|
||||
expect(mockDefaultChatFn).toHaveBeenCalled();
|
||||
const callArgs = mockDefaultChatFn.mock.calls[0][0];
|
||||
expect(callArgs.system).toContain('# Memory Context');
|
||||
expect(callArgs.system).toContain('concise');
|
||||
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('falls back to default memory context when adaptive injection errors', async () => {
|
||||
const tempDir = mkdtempSync(join(tmpdir(), 'flynn-orchestrator-memory-fallback-'));
|
||||
const memoryStore = new MemoryStore({ dir: tempDir, maxContextTokens: 2000 });
|
||||
memoryStore.write('user', 'Fallback memory content', 'replace');
|
||||
const getPromptSectionsSpy = vi.spyOn(memoryStore, 'getPromptSections').mockImplementationOnce(() => {
|
||||
throw new Error('boom');
|
||||
});
|
||||
|
||||
const mockDefaultChatClient = requireClient('default');
|
||||
const mockDefaultChatFn = vi.fn().mockResolvedValue({
|
||||
content: 'Agent response',
|
||||
stopReason: 'end_turn',
|
||||
usage: { inputTokens: 50, outputTokens: 25 },
|
||||
} as ChatResponse);
|
||||
Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
|
||||
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful agent.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
memoryStore,
|
||||
memoryInjectionStrategy: 'adaptive',
|
||||
memoryMaxInjectionTokens: 100,
|
||||
});
|
||||
|
||||
await orchestrator.process('test message');
|
||||
|
||||
const callArgs = mockDefaultChatFn.mock.calls[0][0];
|
||||
expect(callArgs.system).toContain('Fallback memory content');
|
||||
|
||||
getPromptSectionsSpy.mockRestore();
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe('reset()', () => {
|
||||
|
||||
@@ -13,6 +13,7 @@ import { shouldCompact } from '../../context/tokens.js';
|
||||
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
|
||||
import { estimateCost } from '../../models/costs.js';
|
||||
import { auditLogger } from '../../audit/index.js';
|
||||
import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';
|
||||
|
||||
// ── Public types ──────────────────────────────────────────────────────
|
||||
|
||||
@@ -91,6 +92,10 @@ export interface OrchestratorConfig {
|
||||
contextWindow?: number;
|
||||
/** Optional memory store for injecting persistent memory into the system prompt. */
|
||||
memoryStore?: MemoryStore;
|
||||
/** Strategy for memory prompt injection. */
|
||||
memoryInjectionStrategy?: 'all' | 'recent' | 'adaptive';
|
||||
/** Maximum tokens allowed for injected memory context. */
|
||||
memoryMaxInjectionTokens?: number;
|
||||
/** Policy context for tool filtering (agent tier, provider). */
|
||||
toolPolicyContext?: ToolPolicyContext;
|
||||
/** Collector for outbound attachments queued by tools (e.g. media.send). */
|
||||
@@ -118,6 +123,8 @@ export class AgentOrchestrator {
|
||||
private _modelName?: string;
|
||||
private _contextWindow?: number;
|
||||
private _memoryStore?: MemoryStore;
|
||||
private _memoryInjectionStrategy: 'all' | 'recent' | 'adaptive';
|
||||
private _memoryMaxInjectionTokens: number;
|
||||
private _systemPromptBase: string;
|
||||
private _usageByTier: Map<string, TierUsageStats> = new Map();
|
||||
|
||||
@@ -131,6 +138,8 @@ export class AgentOrchestrator {
|
||||
this._modelName = config.modelName;
|
||||
this._contextWindow = config.contextWindow;
|
||||
this._memoryStore = config.memoryStore;
|
||||
this._memoryInjectionStrategy = config.memoryInjectionStrategy ?? 'all';
|
||||
this._memoryMaxInjectionTokens = config.memoryMaxInjectionTokens ?? 2000;
|
||||
this._systemPromptBase = config.systemPrompt;
|
||||
|
||||
// Create the primary NativeAgent for user-facing conversation
|
||||
@@ -216,7 +225,7 @@ export class AgentOrchestrator {
|
||||
* exceeds the context window threshold and compacts it before processing.
|
||||
*/
|
||||
async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
|
||||
this._injectMemoryContext();
|
||||
this._injectMemoryContext(userMessage);
|
||||
await this.compactIfNeeded();
|
||||
return this._agent.process(userMessage, attachments);
|
||||
}
|
||||
@@ -355,12 +364,34 @@ export class AgentOrchestrator {
|
||||
* system prompt. If no memory store is configured or no memory content
|
||||
* exists, restores the original base prompt.
|
||||
*/
|
||||
private _injectMemoryContext(): void {
|
||||
private _injectMemoryContext(userMessage: string): void {
|
||||
if (!this._memoryStore) {
|
||||
return;
|
||||
}
|
||||
|
||||
const memoryContext = this._memoryStore.getContextForPrompt();
|
||||
let memoryContext = '';
|
||||
try {
|
||||
if (this._memoryInjectionStrategy === 'recent') {
|
||||
memoryContext = buildRecentMemoryContext(this._memoryStore, this._memoryMaxInjectionTokens);
|
||||
} else if (this._memoryInjectionStrategy === 'adaptive') {
|
||||
memoryContext = buildAdaptiveMemoryContext({
|
||||
store: this._memoryStore,
|
||||
userMessage,
|
||||
recentMessages: this.getHistory(),
|
||||
config: {
|
||||
maxTokens: this._memoryMaxInjectionTokens,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
memoryContext = this._memoryStore.getContextForPrompt();
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('[Flynn:memory] Adaptive memory injection failed, falling back to default context:', error);
|
||||
memoryContext = this._memoryStore.getContextForPrompt();
|
||||
}
|
||||
|
||||
memoryContext = this._clipMemoryContext(memoryContext);
|
||||
|
||||
if (!memoryContext) {
|
||||
this._agent.setSystemPrompt(this._systemPromptBase);
|
||||
return;
|
||||
@@ -370,6 +401,17 @@ export class AgentOrchestrator {
|
||||
this._agent.setSystemPrompt(enrichedPrompt);
|
||||
}
|
||||
|
||||
private _clipMemoryContext(context: string): string {
|
||||
if (!context) {
|
||||
return context;
|
||||
}
|
||||
const maxChars = this._memoryMaxInjectionTokens * 4;
|
||||
if (context.length <= maxChars) {
|
||||
return context;
|
||||
}
|
||||
return context.slice(0, maxChars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether automatic compaction should run, and if so, compact.
|
||||
* Called before each `process()` call when compaction is configured.
|
||||
|
||||
Reference in New Issue
Block a user