From 027f7ad2835df36b047d3280ea44021bab4b5642 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 19 Feb 2026 09:52:45 -0800 Subject: [PATCH] feat(models): add background task model override config and runtime wiring --- src/backends/native/orchestrator.ts | 41 +++++++- src/config/schema.ts | 32 ++++++ src/context/compaction.ts | 2 + src/daemon/routing.ts | 66 +++++++++++- src/gateway/handlers/config.ts | 152 +++++++++++++++++++++++++++- src/gateway/session-bridge.ts | 78 +++++++++++++- 6 files changed, 367 insertions(+), 4 deletions(-) diff --git a/src/backends/native/orchestrator.ts b/src/backends/native/orchestrator.ts index e7b1d54..ed41577 100644 --- a/src/backends/native/orchestrator.ts +++ b/src/backends/native/orchestrator.ts @@ -1,5 +1,5 @@ import type { ModelRouter, ModelTier } from '../../models/router.js'; -import type { ChatRequest, Message, TokenUsage } from '../../models/types.js'; +import type { ChatRequest, Message, ModelClient, TokenUsage } from '../../models/types.js'; import type { Session } from '../../session/index.js'; import type { ToolRegistry } from '../../tools/registry.js'; import type { ToolExecutor } from '../../tools/executor.js'; @@ -22,6 +22,7 @@ import { CONTEXT_CHECKPOINT_PROMPT, MEMORY_EXTRACTION_PROMPT } from './prompts.j /** A single-turn, stateless request to a sub-agent at a specific tier. */ export interface SubAgentRequest { tier: ModelTier; + task?: keyof DelegationConfig; systemPrompt: string; message: string; maxTokens?: number; @@ -106,6 +107,12 @@ export interface OrchestratorConfig { primaryTier: ModelTier; /** Which tier to use for each delegation task type. */ delegation: DelegationConfig; + /** Optional direct provider/model overrides for specific delegation task types. */ + backgroundModelOverrides?: Partial>; /** Maximum nesting depth for delegation calls (safety guard). */ maxDelegationDepth: number; onToolUse?: (event: ToolUseEvent) => void; @@ -157,6 +164,11 @@ export class AgentOrchestrator { private _agent: NativeAgent; private _modelRouter: ModelRouter; private _delegation: DelegationConfig; + private _backgroundModelOverrides: Partial>; private _maxDelegationDepth: number; private _toolRegistry?: ToolRegistry; private _session?: Session; @@ -185,6 +197,7 @@ export class AgentOrchestrator { constructor(config: OrchestratorConfig) { this._modelRouter = config.modelRouter; this._delegation = config.delegation; + this._backgroundModelOverrides = config.backgroundModelOverrides ?? {}; this._maxDelegationDepth = config.maxDelegationDepth; this._toolRegistry = config.toolRegistry; this._session = config.session; @@ -262,6 +275,30 @@ export class AgentOrchestrator { chatRequest.tools = this._toolRegistry.filteredToAnthropicFormat(policyContext); } + const override = request.task ? this._backgroundModelOverrides[request.task] : undefined; + if (override) { + try { + const response = await override.client.chat(chatRequest); + this._trackUsage(tier, response.usage); + console.log( + `[Flynn:delegate] task=${request.task} provider_model=${override.label} fallback_tier=${override.fallbackTier} ` + + `tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`, + ); + return { + content: response.content, + usage: response.usage, + tier, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn( + `[Flynn:delegate] task=${request.task} provider/model override failed (${override.label}): ${message}; ` + + `falling back to tier='${override.fallbackTier}'`, + ); + tier = override.fallbackTier; + } + } + const response = await this._modelRouter.chat(chatRequest, tier); // Track cumulative usage for this tier @@ -645,6 +682,7 @@ export class AgentOrchestrator { try { const extractionTier = this.getDelegationTier('memory_extraction'); const extraction = await this.delegate({ + task: 'memory_extraction', tier: extractionTier, systemPrompt: MEMORY_EXTRACTION_PROMPT, message: this._buildExtractionInput(userMessage, assistantText, toolCallsInRun), @@ -861,6 +899,7 @@ export class AgentOrchestrator { try { const tier = this.getDelegationTier('compaction'); const result = await this.delegate({ + task: 'compaction', tier, systemPrompt: CONTEXT_CHECKPOINT_PROMPT, message: conversation, diff --git a/src/config/schema.ts b/src/config/schema.ts index b5d5a49..cc00aac 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -490,6 +490,38 @@ const agentsSchema = z.object({ tool_summarisation: 'fast', complex_reasoning: 'complex', }), + background_models: z.object({ + compaction: z.object({ + enabled: z.boolean().default(true), + provider: z.enum(MODEL_PROVIDERS), + model: z.string().min(1), + fallback_tier: modelTierEnum.default('fast'), + }).optional(), + memory_extraction: z.object({ + enabled: z.boolean().default(true), + provider: z.enum(MODEL_PROVIDERS), + model: z.string().min(1), + fallback_tier: modelTierEnum.default('fast'), + }).optional(), + classification: z.object({ + enabled: z.boolean().default(true), + provider: z.enum(MODEL_PROVIDERS), + model: z.string().min(1), + fallback_tier: modelTierEnum.default('fast'), + }).optional(), + tool_summarisation: z.object({ + enabled: z.boolean().default(true), + provider: z.enum(MODEL_PROVIDERS), + model: z.string().min(1), + fallback_tier: modelTierEnum.default('fast'), + }).optional(), + complex_reasoning: z.object({ + enabled: z.boolean().default(true), + provider: z.enum(MODEL_PROVIDERS), + model: z.string().min(1), + fallback_tier: modelTierEnum.default('fast'), + }).optional(), + }).default({}), auto_escalate: z.boolean().default(false), max_delegation_depth: z.number().min(1).max(10).default(3), /** Maximum tool-loop iterations before the agent stops. */ diff --git a/src/context/compaction.ts b/src/context/compaction.ts index 97d5b69..e959bbb 100644 --- a/src/context/compaction.ts +++ b/src/context/compaction.ts @@ -117,6 +117,7 @@ export async function compactHistory(opts: { const tier = orchestrator.getDelegationTier('compaction'); const result = await orchestrator.delegate({ + task: 'compaction', tier, systemPrompt: COMPACTION_SYSTEM_PROMPT, message: formattedConversation, @@ -133,6 +134,7 @@ export async function compactHistory(opts: { try { const extractionTier = orchestrator.getDelegationTier('memory_extraction'); const extraction = await orchestrator.delegate({ + task: 'memory_extraction', tier: extractionTier, systemPrompt: MEMORY_EXTRACTION_PROMPT, message: `Extract persistent facts from this conversation:\n\n${formattedConversation}`, diff --git a/src/daemon/routing.ts b/src/daemon/routing.ts index d430572..ba18a1d 100644 --- a/src/daemon/routing.ts +++ b/src/daemon/routing.ts @@ -13,7 +13,7 @@ import { createMediaSendTool, createAgentDelegateTool } from '../tools/index.js' import type { AgentDelegateDeps } from '../tools/index.js'; import { createSandboxedShellTool, createSandboxedProcessStartTool, SandboxManager } from '../sandbox/index.js'; import { MODEL_PROVIDERS, type Config, type ModelConfig, type ModelProvider } from '../config/index.js'; -import { ModelRouter, type ModelTier } from '../models/index.js'; +import { ModelRouter, type ModelClient, type ModelTier } from '../models/index.js'; import { ToolRegistry, ToolExecutor } from '../tools/index.js'; import { SessionManager } from '../session/index.js'; import { AgentConfigRegistry, AgentRouter } from '../agents/index.js'; @@ -77,6 +77,55 @@ function tierFromUseCase(config: Config, useCaseRaw: unknown): ModelTier | undef return undefined; } +function buildBackgroundModelOverrides(config: Config): Partial> { + const overrides: Partial> = {}; + const configured = config.agents?.background_models ?? {}; + const providerConfigs = buildProviderConfigMap(config); + const tasks: Array = [ + 'compaction', + 'memory_extraction', + 'classification', + 'tool_summarisation', + 'complex_reasoning', + ]; + + for (const task of tasks) { + const entry = configured[task]; + if (!entry || entry.enabled === false) { + continue; + } + + const template = providerConfigs[entry.provider]; + try { + const client = createClientFromConfig( + template + ? { ...template, provider: entry.provider, model: entry.model } + : { provider: entry.provider, model: entry.model }, + ); + overrides[task] = { + client, + label: `${entry.provider}/${entry.model}`, + fallbackTier: entry.fallback_tier, + }; + } catch (error) { + console.warn( + `[Flynn:routing] Failed to initialize background model override for ${task} ` + + `(${entry.provider}/${entry.model}): ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + return overrides; +} + function parseResearchPrefix(text: string): string | undefined { const trimmed = text.trim(); const researchMatch = trimmed.match(/^research(?:\s*[:,-])?\s+(.+)$/i); @@ -174,6 +223,7 @@ export function createMessageRouter(deps: { // Cache agents by session ID + agent config name to avoid recreating on every message const agents = new Map(); const talkModeUntil = new Map(); + const activeRuns = new Map(); async function maybeBuildTtsAttachment(responseText: string, channel: string) { if (!isTtsEnabledForChannel(deps.config, channel)) { @@ -261,6 +311,7 @@ export function createMessageRouter(deps: { tool_summarisation: deps.config.agents.delegation.tool_summarisation ?? 'fast', complex_reasoning: deps.config.agents.delegation.complex_reasoning ?? 'complex', }; + const backgroundModelOverrides = buildBackgroundModelOverrides(deps.config); // Clone the tool registry and replace high-risk tools with sandboxed versions if configured. let effectiveToolRegistry = deps.toolRegistry; @@ -377,6 +428,7 @@ export function createMessageRouter(deps: { toolExecutor: deps.toolExecutor, primaryTier: effectiveTier, delegation: delegationConfig, + backgroundModelOverrides, maxDelegationDepth: deps.config.agents.max_delegation_depth ?? 3, maxIterations: deps.config.agents.max_iterations, compaction: deps.config.compaction.enabled ? { @@ -419,6 +471,7 @@ export function createMessageRouter(deps: { } const handler = async (msg: InboundMessage, reply: (response: OutboundMessage) => Promise): Promise => { + const sessionIdForRun = `${msg.channel}:${msg.senderId}`; let incomingText = msg.text; let matchedReactionName: string | undefined; const talkMode = deps.config.audio?.talk_mode; @@ -721,6 +774,14 @@ export function createMessageRouter(deps: { session.deleteConfig('modelTier'); return ''; }, + cancelRun: () => { + const run = activeRuns.get(session.id); + if (!run || !run.isCancellable()) { + return 'No active operation to cancel.'; + } + run.cancel(); + return 'Cancellation requested. The active operation will stop at the next safe point.'; + }, delegateAgent: async (agentName: string, task: string) => { const target = agentName.trim(); @@ -1293,6 +1354,7 @@ export function createMessageRouter(deps: { } let response: string; + activeRuns.set(sessionIdForRun, agent); try { response = await agent.process(messageText, attachments); } catch (error) { @@ -1322,6 +1384,8 @@ export function createMessageRouter(deps: { text: 'Sorry, an error occurred while processing your message.', replyTo: msg.id, }); + } finally { + activeRuns.delete(sessionIdForRun); } }; diff --git a/src/gateway/handlers/config.ts b/src/gateway/handlers/config.ts index 981fbe5..34c5cdb 100644 --- a/src/gateway/handlers/config.ts +++ b/src/gateway/handlers/config.ts @@ -1,6 +1,6 @@ import type { GatewayRequest, OutboundMessage } from '../protocol.js'; import { makeResponse, makeError, ErrorCode } from '../protocol.js'; -import type { Config } from '../../config/index.js'; +import { MODEL_PROVIDERS, type Config, type ModelProvider } from '../../config/index.js'; export interface ConfigHandlerDeps { config: Config; @@ -163,6 +163,156 @@ const PATCHABLE_KEYS: Record boolean config.server.nodes.push.enabled = value; return true; }, + 'agents.primary_tier': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.primary_tier = value; + return true; + }, + 'agents.delegation.compaction': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.delegation.compaction = value; + return true; + }, + 'agents.delegation.memory_extraction': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.delegation.memory_extraction = value; + return true; + }, + 'agents.delegation.classification': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.delegation.classification = value; + return true; + }, + 'agents.delegation.tool_summarisation': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.delegation.tool_summarisation = value; + return true; + }, + 'agents.delegation.complex_reasoning': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.delegation.complex_reasoning = value; + return true; + }, + 'agents.background_models.compaction.enabled': (config, value) => { + if (typeof value !== 'boolean') {return false;} + config.agents.background_models.compaction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.compaction.enabled = value; + return true; + }, + 'agents.background_models.compaction.provider': (config, value) => { + if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;} + config.agents.background_models.compaction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.compaction.provider = value as ModelProvider; + return true; + }, + 'agents.background_models.compaction.model': (config, value) => { + if (typeof value !== 'string' || value.trim().length === 0) {return false;} + config.agents.background_models.compaction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.compaction.model = value.trim(); + return true; + }, + 'agents.background_models.compaction.fallback_tier': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.background_models.compaction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.compaction.fallback_tier = value; + return true; + }, + 'agents.background_models.memory_extraction.enabled': (config, value) => { + if (typeof value !== 'boolean') {return false;} + config.agents.background_models.memory_extraction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.memory_extraction.enabled = value; + return true; + }, + 'agents.background_models.memory_extraction.provider': (config, value) => { + if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;} + config.agents.background_models.memory_extraction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.memory_extraction.provider = value as ModelProvider; + return true; + }, + 'agents.background_models.memory_extraction.model': (config, value) => { + if (typeof value !== 'string' || value.trim().length === 0) {return false;} + config.agents.background_models.memory_extraction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.memory_extraction.model = value.trim(); + return true; + }, + 'agents.background_models.memory_extraction.fallback_tier': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.background_models.memory_extraction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.memory_extraction.fallback_tier = value; + return true; + }, + 'agents.background_models.classification.enabled': (config, value) => { + if (typeof value !== 'boolean') {return false;} + config.agents.background_models.classification ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.classification.enabled = value; + return true; + }, + 'agents.background_models.classification.provider': (config, value) => { + if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;} + config.agents.background_models.classification ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.classification.provider = value as ModelProvider; + return true; + }, + 'agents.background_models.classification.model': (config, value) => { + if (typeof value !== 'string' || value.trim().length === 0) {return false;} + config.agents.background_models.classification ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.classification.model = value.trim(); + return true; + }, + 'agents.background_models.classification.fallback_tier': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.background_models.classification ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.classification.fallback_tier = value; + return true; + }, + 'agents.background_models.tool_summarisation.enabled': (config, value) => { + if (typeof value !== 'boolean') {return false;} + config.agents.background_models.tool_summarisation ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.tool_summarisation.enabled = value; + return true; + }, + 'agents.background_models.tool_summarisation.provider': (config, value) => { + if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;} + config.agents.background_models.tool_summarisation ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.tool_summarisation.provider = value as ModelProvider; + return true; + }, + 'agents.background_models.tool_summarisation.model': (config, value) => { + if (typeof value !== 'string' || value.trim().length === 0) {return false;} + config.agents.background_models.tool_summarisation ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.tool_summarisation.model = value.trim(); + return true; + }, + 'agents.background_models.tool_summarisation.fallback_tier': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.background_models.tool_summarisation ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.tool_summarisation.fallback_tier = value; + return true; + }, + 'agents.background_models.complex_reasoning.enabled': (config, value) => { + if (typeof value !== 'boolean') {return false;} + config.agents.background_models.complex_reasoning ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.complex_reasoning.enabled = value; + return true; + }, + 'agents.background_models.complex_reasoning.provider': (config, value) => { + if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;} + config.agents.background_models.complex_reasoning ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.complex_reasoning.provider = value as ModelProvider; + return true; + }, + 'agents.background_models.complex_reasoning.model': (config, value) => { + if (typeof value !== 'string' || value.trim().length === 0) {return false;} + config.agents.background_models.complex_reasoning ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.complex_reasoning.model = value.trim(); + return true; + }, + 'agents.background_models.complex_reasoning.fallback_tier': (config, value) => { + if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;} + config.agents.background_models.complex_reasoning ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' }; + config.agents.background_models.complex_reasoning.fallback_tier = value; + return true; + }, 'automation.delivery_mode': (config, value) => { if (value !== 'shared_session' && value !== 'isolated_job' && value !== 'announce') {return false;} config.automation ??= {} as Config['automation']; diff --git a/src/gateway/session-bridge.ts b/src/gateway/session-bridge.ts index 9ddc9e7..2c1ae81 100644 --- a/src/gateway/session-bridge.ts +++ b/src/gateway/session-bridge.ts @@ -2,12 +2,13 @@ import { randomUUID } from 'crypto'; import type { SessionManager } from '../session/manager.js'; import type { ModelClient } from '../models/types.js'; import type { ModelRouter, ModelTier } from '../models/router.js'; +import { createClientFromConfig } from '../daemon/models.js'; +import type { Config, ModelConfig, ModelProvider } from '../config/index.js'; import type { ToolRegistry } from '../tools/registry.js'; import type { ToolExecutor } from '../tools/executor.js'; import { AgentOrchestrator, type DelegationConfig } from '../backends/native/orchestrator.js'; import type { ToolUseEvent } from '../backends/native/agent.js'; import type { MemoryStore } from '../memory/store.js'; -import type { Config } from '../config/index.js'; import { summarizeSessionOnEnd, type SessionEndSummaryConfig } from '../session/endSummary.js'; export interface SessionBridgeConfig { @@ -284,6 +285,7 @@ export class SessionBridge { tool_summarisation: config?.agents.delegation.tool_summarisation ?? 'fast', complex_reasoning: config?.agents.delegation.complex_reasoning ?? 'complex', }; + const backgroundModelOverrides = this.buildBackgroundModelOverrides(); agent = new AgentOrchestrator({ modelRouter: this.config.modelClient as ModelRouter, @@ -293,6 +295,7 @@ export class SessionBridge { toolExecutor: this.config.toolExecutor, primaryTier, delegation: delegationConfig, + backgroundModelOverrides, maxDelegationDepth: config?.agents.max_delegation_depth ?? 3, maxIterations: config?.agents.max_iterations, compaction: config?.compaction.enabled ? { @@ -337,4 +340,77 @@ export class SessionBridge { } return agent; } + + private buildBackgroundModelOverrides(): Partial> { + const runtimeConfig = this.config.config; + const overrides: Partial> = {}; + if (!runtimeConfig) { + return overrides; + } + + const providerConfigs = this.buildProviderConfigMap(runtimeConfig); + const configured = runtimeConfig.agents?.background_models ?? {}; + const tasks: Array = [ + 'compaction', + 'memory_extraction', + 'classification', + 'tool_summarisation', + 'complex_reasoning', + ]; + + for (const task of tasks) { + const entry = configured[task]; + if (!entry || entry.enabled === false) { + continue; + } + const template = providerConfigs[entry.provider]; + try { + const client = createClientFromConfig( + template + ? { ...template, provider: entry.provider, model: entry.model } + : { provider: entry.provider, model: entry.model }, + ); + overrides[task] = { + client, + label: `${entry.provider}/${entry.model}`, + fallbackTier: entry.fallback_tier, + }; + } catch (error) { + console.warn( + `[Flynn:gateway] Failed to initialize background model override for ${task} ` + + `(${entry.provider}/${entry.model}): ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + return overrides; + } + + private buildProviderConfigMap(config: Config): Partial> { + const providerConfigs: Partial> = {}; + const modelConfigs: ModelConfig[] = [ + config.models.default, + ...(config.models.fast ? [config.models.fast] : []), + ...(config.models.complex ? [config.models.complex] : []), + ...(config.models.local ? [config.models.local] : []), + ...Object.values(config.models.local_providers ?? {}), + ]; + + for (const modelConfig of modelConfigs) { + providerConfigs[modelConfig.provider] = modelConfig; + if (modelConfig.fallback) { + providerConfigs[modelConfig.fallback.provider] = modelConfig.fallback; + } + } + + return providerConfigs; + } }