feat(session): persist model tier overrides per session

Store per-session config in SQLite and route /model and /reset through command fast-paths so channel sessions keep independent model selection across reconnects and restarts.
This commit is contained in:
William Valentin
2026-02-13 01:04:26 -08:00
parent 3472a0b926
commit 9f81c01603
35 changed files with 1438 additions and 144 deletions
+45 -3
View File
@@ -13,6 +13,7 @@ import { shouldCompact } from '../../context/tokens.js';
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
import { estimateCost } from '../../models/costs.js';
import { auditLogger } from '../../audit/index.js';
import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';
// ── Public types ──────────────────────────────────────────────────────
@@ -91,6 +92,10 @@ export interface OrchestratorConfig {
contextWindow?: number;
/** Optional memory store for injecting persistent memory into the system prompt. */
memoryStore?: MemoryStore;
/** Strategy for memory prompt injection. */
memoryInjectionStrategy?: 'all' | 'recent' | 'adaptive';
/** Maximum tokens allowed for injected memory context. */
memoryMaxInjectionTokens?: number;
/** Policy context for tool filtering (agent tier, provider). */
toolPolicyContext?: ToolPolicyContext;
/** Collector for outbound attachments queued by tools (e.g. media.send). */
@@ -118,6 +123,8 @@ export class AgentOrchestrator {
private _modelName?: string;
private _contextWindow?: number;
private _memoryStore?: MemoryStore;
private _memoryInjectionStrategy: 'all' | 'recent' | 'adaptive';
private _memoryMaxInjectionTokens: number;
private _systemPromptBase: string;
private _usageByTier: Map<string, TierUsageStats> = new Map();
@@ -131,6 +138,8 @@ export class AgentOrchestrator {
this._modelName = config.modelName;
this._contextWindow = config.contextWindow;
this._memoryStore = config.memoryStore;
this._memoryInjectionStrategy = config.memoryInjectionStrategy ?? 'all';
this._memoryMaxInjectionTokens = config.memoryMaxInjectionTokens ?? 2000;
this._systemPromptBase = config.systemPrompt;
// Create the primary NativeAgent for user-facing conversation
@@ -216,7 +225,7 @@ export class AgentOrchestrator {
* exceeds the context window threshold and compacts it before processing.
*/
async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
this._injectMemoryContext();
this._injectMemoryContext(userMessage);
await this.compactIfNeeded();
return this._agent.process(userMessage, attachments);
}
@@ -355,12 +364,34 @@ export class AgentOrchestrator {
* system prompt. If no memory store is configured or no memory content
* exists, restores the original base prompt.
*/
private _injectMemoryContext(): void {
private _injectMemoryContext(userMessage: string): void {
if (!this._memoryStore) {
return;
}
const memoryContext = this._memoryStore.getContextForPrompt();
let memoryContext = '';
try {
if (this._memoryInjectionStrategy === 'recent') {
memoryContext = buildRecentMemoryContext(this._memoryStore, this._memoryMaxInjectionTokens);
} else if (this._memoryInjectionStrategy === 'adaptive') {
memoryContext = buildAdaptiveMemoryContext({
store: this._memoryStore,
userMessage,
recentMessages: this.getHistory(),
config: {
maxTokens: this._memoryMaxInjectionTokens,
},
});
} else {
memoryContext = this._memoryStore.getContextForPrompt();
}
} catch (error) {
console.warn('[Flynn:memory] Adaptive memory injection failed, falling back to default context:', error);
memoryContext = this._memoryStore.getContextForPrompt();
}
memoryContext = this._clipMemoryContext(memoryContext);
if (!memoryContext) {
this._agent.setSystemPrompt(this._systemPromptBase);
return;
@@ -370,6 +401,17 @@ export class AgentOrchestrator {
this._agent.setSystemPrompt(enrichedPrompt);
}
private _clipMemoryContext(context: string): string {
if (!context) {
return context;
}
const maxChars = this._memoryMaxInjectionTokens * 4;
if (context.length <= maxChars) {
return context;
}
return context.slice(0, maxChars);
}
/**
* Check whether automatic compaction should run, and if so, compact.
* Called before each `process()` call when compaction is configured.