feat(models): add background task model override config and runtime wiring

This commit is contained in:
William Valentin
2026-02-19 09:52:45 -08:00
parent 4df1291e64
commit 027f7ad283
6 changed files with 367 additions and 4 deletions
+40 -1
View File
@@ -1,5 +1,5 @@
import type { ModelRouter, ModelTier } from '../../models/router.js';
import type { ChatRequest, Message, TokenUsage } from '../../models/types.js';
import type { ChatRequest, Message, ModelClient, TokenUsage } from '../../models/types.js';
import type { Session } from '../../session/index.js';
import type { ToolRegistry } from '../../tools/registry.js';
import type { ToolExecutor } from '../../tools/executor.js';
@@ -22,6 +22,7 @@ import { CONTEXT_CHECKPOINT_PROMPT, MEMORY_EXTRACTION_PROMPT } from './prompts.j
/** A single-turn, stateless request to a sub-agent at a specific tier. */
export interface SubAgentRequest {
tier: ModelTier;
task?: keyof DelegationConfig;
systemPrompt: string;
message: string;
maxTokens?: number;
@@ -106,6 +107,12 @@ export interface OrchestratorConfig {
primaryTier: ModelTier;
/** Which tier to use for each delegation task type. */
delegation: DelegationConfig;
/** Optional direct provider/model overrides for specific delegation task types. */
backgroundModelOverrides?: Partial<Record<keyof DelegationConfig, {
client: ModelClient;
label: string;
fallbackTier: ModelTier;
}>>;
/** Maximum nesting depth for delegation calls (safety guard). */
maxDelegationDepth: number;
onToolUse?: (event: ToolUseEvent) => void;
@@ -157,6 +164,11 @@ export class AgentOrchestrator {
private _agent: NativeAgent;
private _modelRouter: ModelRouter;
private _delegation: DelegationConfig;
private _backgroundModelOverrides: Partial<Record<keyof DelegationConfig, {
client: ModelClient;
label: string;
fallbackTier: ModelTier;
}>>;
private _maxDelegationDepth: number;
private _toolRegistry?: ToolRegistry;
private _session?: Session;
@@ -185,6 +197,7 @@ export class AgentOrchestrator {
constructor(config: OrchestratorConfig) {
this._modelRouter = config.modelRouter;
this._delegation = config.delegation;
this._backgroundModelOverrides = config.backgroundModelOverrides ?? {};
this._maxDelegationDepth = config.maxDelegationDepth;
this._toolRegistry = config.toolRegistry;
this._session = config.session;
@@ -262,6 +275,30 @@ export class AgentOrchestrator {
chatRequest.tools = this._toolRegistry.filteredToAnthropicFormat(policyContext);
}
const override = request.task ? this._backgroundModelOverrides[request.task] : undefined;
if (override) {
try {
const response = await override.client.chat(chatRequest);
this._trackUsage(tier, response.usage);
console.log(
`[Flynn:delegate] task=${request.task} provider_model=${override.label} fallback_tier=${override.fallbackTier} ` +
`tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`,
);
return {
content: response.content,
usage: response.usage,
tier,
};
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.warn(
`[Flynn:delegate] task=${request.task} provider/model override failed (${override.label}): ${message}; ` +
`falling back to tier='${override.fallbackTier}'`,
);
tier = override.fallbackTier;
}
}
const response = await this._modelRouter.chat(chatRequest, tier);
// Track cumulative usage for this tier
@@ -645,6 +682,7 @@ export class AgentOrchestrator {
try {
const extractionTier = this.getDelegationTier('memory_extraction');
const extraction = await this.delegate({
task: 'memory_extraction',
tier: extractionTier,
systemPrompt: MEMORY_EXTRACTION_PROMPT,
message: this._buildExtractionInput(userMessage, assistantText, toolCallsInRun),
@@ -861,6 +899,7 @@ export class AgentOrchestrator {
try {
const tier = this.getDelegationTier('compaction');
const result = await this.delegate({
task: 'compaction',
tier,
systemPrompt: CONTEXT_CHECKPOINT_PROMPT,
message: conversation,
+32
View File
@@ -490,6 +490,38 @@ const agentsSchema = z.object({
tool_summarisation: 'fast',
complex_reasoning: 'complex',
}),
background_models: z.object({
compaction: z.object({
enabled: z.boolean().default(true),
provider: z.enum(MODEL_PROVIDERS),
model: z.string().min(1),
fallback_tier: modelTierEnum.default('fast'),
}).optional(),
memory_extraction: z.object({
enabled: z.boolean().default(true),
provider: z.enum(MODEL_PROVIDERS),
model: z.string().min(1),
fallback_tier: modelTierEnum.default('fast'),
}).optional(),
classification: z.object({
enabled: z.boolean().default(true),
provider: z.enum(MODEL_PROVIDERS),
model: z.string().min(1),
fallback_tier: modelTierEnum.default('fast'),
}).optional(),
tool_summarisation: z.object({
enabled: z.boolean().default(true),
provider: z.enum(MODEL_PROVIDERS),
model: z.string().min(1),
fallback_tier: modelTierEnum.default('fast'),
}).optional(),
complex_reasoning: z.object({
enabled: z.boolean().default(true),
provider: z.enum(MODEL_PROVIDERS),
model: z.string().min(1),
fallback_tier: modelTierEnum.default('fast'),
}).optional(),
}).default({}),
auto_escalate: z.boolean().default(false),
max_delegation_depth: z.number().min(1).max(10).default(3),
/** Maximum tool-loop iterations before the agent stops. */
+2
View File
@@ -117,6 +117,7 @@ export async function compactHistory(opts: {
const tier = orchestrator.getDelegationTier('compaction');
const result = await orchestrator.delegate({
task: 'compaction',
tier,
systemPrompt: COMPACTION_SYSTEM_PROMPT,
message: formattedConversation,
@@ -133,6 +134,7 @@ export async function compactHistory(opts: {
try {
const extractionTier = orchestrator.getDelegationTier('memory_extraction');
const extraction = await orchestrator.delegate({
task: 'memory_extraction',
tier: extractionTier,
systemPrompt: MEMORY_EXTRACTION_PROMPT,
message: `Extract persistent facts from this conversation:\n\n${formattedConversation}`,
+65 -1
View File
@@ -13,7 +13,7 @@ import { createMediaSendTool, createAgentDelegateTool } from '../tools/index.js'
import type { AgentDelegateDeps } from '../tools/index.js';
import { createSandboxedShellTool, createSandboxedProcessStartTool, SandboxManager } from '../sandbox/index.js';
import { MODEL_PROVIDERS, type Config, type ModelConfig, type ModelProvider } from '../config/index.js';
import { ModelRouter, type ModelTier } from '../models/index.js';
import { ModelRouter, type ModelClient, type ModelTier } from '../models/index.js';
import { ToolRegistry, ToolExecutor } from '../tools/index.js';
import { SessionManager } from '../session/index.js';
import { AgentConfigRegistry, AgentRouter } from '../agents/index.js';
@@ -77,6 +77,55 @@ function tierFromUseCase(config: Config, useCaseRaw: unknown): ModelTier | undef
return undefined;
}
function buildBackgroundModelOverrides(config: Config): Partial<Record<keyof DelegationConfig, {
client: ModelClient;
label: string;
fallbackTier: ModelTier;
}>> {
const overrides: Partial<Record<keyof DelegationConfig, {
client: ModelClient;
label: string;
fallbackTier: ModelTier;
}>> = {};
const configured = config.agents?.background_models ?? {};
const providerConfigs = buildProviderConfigMap(config);
const tasks: Array<keyof DelegationConfig> = [
'compaction',
'memory_extraction',
'classification',
'tool_summarisation',
'complex_reasoning',
];
for (const task of tasks) {
const entry = configured[task];
if (!entry || entry.enabled === false) {
continue;
}
const template = providerConfigs[entry.provider];
try {
const client = createClientFromConfig(
template
? { ...template, provider: entry.provider, model: entry.model }
: { provider: entry.provider, model: entry.model },
);
overrides[task] = {
client,
label: `${entry.provider}/${entry.model}`,
fallbackTier: entry.fallback_tier,
};
} catch (error) {
console.warn(
`[Flynn:routing] Failed to initialize background model override for ${task} ` +
`(${entry.provider}/${entry.model}): ${error instanceof Error ? error.message : String(error)}`,
);
}
}
return overrides;
}
function parseResearchPrefix(text: string): string | undefined {
const trimmed = text.trim();
const researchMatch = trimmed.match(/^research(?:\s*[:,-])?\s+(.+)$/i);
@@ -174,6 +223,7 @@ export function createMessageRouter(deps: {
// Cache agents by session ID + agent config name to avoid recreating on every message
const agents = new Map<string, { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector }>();
const talkModeUntil = new Map<string, number>();
const activeRuns = new Map<string, AgentOrchestrator>();
async function maybeBuildTtsAttachment(responseText: string, channel: string) {
if (!isTtsEnabledForChannel(deps.config, channel)) {
@@ -261,6 +311,7 @@ export function createMessageRouter(deps: {
tool_summarisation: deps.config.agents.delegation.tool_summarisation ?? 'fast',
complex_reasoning: deps.config.agents.delegation.complex_reasoning ?? 'complex',
};
const backgroundModelOverrides = buildBackgroundModelOverrides(deps.config);
// Clone the tool registry and replace high-risk tools with sandboxed versions if configured.
let effectiveToolRegistry = deps.toolRegistry;
@@ -377,6 +428,7 @@ export function createMessageRouter(deps: {
toolExecutor: deps.toolExecutor,
primaryTier: effectiveTier,
delegation: delegationConfig,
backgroundModelOverrides,
maxDelegationDepth: deps.config.agents.max_delegation_depth ?? 3,
maxIterations: deps.config.agents.max_iterations,
compaction: deps.config.compaction.enabled ? {
@@ -419,6 +471,7 @@ export function createMessageRouter(deps: {
}
const handler = async (msg: InboundMessage, reply: (response: OutboundMessage) => Promise<void>): Promise<void> => {
const sessionIdForRun = `${msg.channel}:${msg.senderId}`;
let incomingText = msg.text;
let matchedReactionName: string | undefined;
const talkMode = deps.config.audio?.talk_mode;
@@ -721,6 +774,14 @@ export function createMessageRouter(deps: {
session.deleteConfig('modelTier');
return '';
},
cancelRun: () => {
const run = activeRuns.get(session.id);
if (!run || !run.isCancellable()) {
return 'No active operation to cancel.';
}
run.cancel();
return 'Cancellation requested. The active operation will stop at the next safe point.';
},
delegateAgent: async (agentName: string, task: string) => {
const target = agentName.trim();
@@ -1293,6 +1354,7 @@ export function createMessageRouter(deps: {
}
let response: string;
activeRuns.set(sessionIdForRun, agent);
try {
response = await agent.process(messageText, attachments);
} catch (error) {
@@ -1322,6 +1384,8 @@ export function createMessageRouter(deps: {
text: 'Sorry, an error occurred while processing your message.',
replyTo: msg.id,
});
} finally {
activeRuns.delete(sessionIdForRun);
}
};
+151 -1
View File
@@ -1,6 +1,6 @@
import type { GatewayRequest, OutboundMessage } from '../protocol.js';
import { makeResponse, makeError, ErrorCode } from '../protocol.js';
import type { Config } from '../../config/index.js';
import { MODEL_PROVIDERS, type Config, type ModelProvider } from '../../config/index.js';
export interface ConfigHandlerDeps {
config: Config;
@@ -163,6 +163,156 @@ const PATCHABLE_KEYS: Record<string, (config: Config, value: unknown) => boolean
config.server.nodes.push.enabled = value;
return true;
},
'agents.primary_tier': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.primary_tier = value;
return true;
},
'agents.delegation.compaction': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.delegation.compaction = value;
return true;
},
'agents.delegation.memory_extraction': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.delegation.memory_extraction = value;
return true;
},
'agents.delegation.classification': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.delegation.classification = value;
return true;
},
'agents.delegation.tool_summarisation': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.delegation.tool_summarisation = value;
return true;
},
'agents.delegation.complex_reasoning': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.delegation.complex_reasoning = value;
return true;
},
'agents.background_models.compaction.enabled': (config, value) => {
if (typeof value !== 'boolean') {return false;}
config.agents.background_models.compaction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.compaction.enabled = value;
return true;
},
'agents.background_models.compaction.provider': (config, value) => {
if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;}
config.agents.background_models.compaction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.compaction.provider = value as ModelProvider;
return true;
},
'agents.background_models.compaction.model': (config, value) => {
if (typeof value !== 'string' || value.trim().length === 0) {return false;}
config.agents.background_models.compaction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.compaction.model = value.trim();
return true;
},
'agents.background_models.compaction.fallback_tier': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.background_models.compaction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.compaction.fallback_tier = value;
return true;
},
'agents.background_models.memory_extraction.enabled': (config, value) => {
if (typeof value !== 'boolean') {return false;}
config.agents.background_models.memory_extraction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.memory_extraction.enabled = value;
return true;
},
'agents.background_models.memory_extraction.provider': (config, value) => {
if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;}
config.agents.background_models.memory_extraction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.memory_extraction.provider = value as ModelProvider;
return true;
},
'agents.background_models.memory_extraction.model': (config, value) => {
if (typeof value !== 'string' || value.trim().length === 0) {return false;}
config.agents.background_models.memory_extraction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.memory_extraction.model = value.trim();
return true;
},
'agents.background_models.memory_extraction.fallback_tier': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.background_models.memory_extraction ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.memory_extraction.fallback_tier = value;
return true;
},
'agents.background_models.classification.enabled': (config, value) => {
if (typeof value !== 'boolean') {return false;}
config.agents.background_models.classification ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.classification.enabled = value;
return true;
},
'agents.background_models.classification.provider': (config, value) => {
if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;}
config.agents.background_models.classification ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.classification.provider = value as ModelProvider;
return true;
},
'agents.background_models.classification.model': (config, value) => {
if (typeof value !== 'string' || value.trim().length === 0) {return false;}
config.agents.background_models.classification ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.classification.model = value.trim();
return true;
},
'agents.background_models.classification.fallback_tier': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.background_models.classification ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.classification.fallback_tier = value;
return true;
},
'agents.background_models.tool_summarisation.enabled': (config, value) => {
if (typeof value !== 'boolean') {return false;}
config.agents.background_models.tool_summarisation ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.tool_summarisation.enabled = value;
return true;
},
'agents.background_models.tool_summarisation.provider': (config, value) => {
if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;}
config.agents.background_models.tool_summarisation ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.tool_summarisation.provider = value as ModelProvider;
return true;
},
'agents.background_models.tool_summarisation.model': (config, value) => {
if (typeof value !== 'string' || value.trim().length === 0) {return false;}
config.agents.background_models.tool_summarisation ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.tool_summarisation.model = value.trim();
return true;
},
'agents.background_models.tool_summarisation.fallback_tier': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.background_models.tool_summarisation ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.tool_summarisation.fallback_tier = value;
return true;
},
'agents.background_models.complex_reasoning.enabled': (config, value) => {
if (typeof value !== 'boolean') {return false;}
config.agents.background_models.complex_reasoning ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.complex_reasoning.enabled = value;
return true;
},
'agents.background_models.complex_reasoning.provider': (config, value) => {
if (!MODEL_PROVIDERS.includes(String(value) as ModelProvider)) {return false;}
config.agents.background_models.complex_reasoning ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.complex_reasoning.provider = value as ModelProvider;
return true;
},
'agents.background_models.complex_reasoning.model': (config, value) => {
if (typeof value !== 'string' || value.trim().length === 0) {return false;}
config.agents.background_models.complex_reasoning ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.complex_reasoning.model = value.trim();
return true;
},
'agents.background_models.complex_reasoning.fallback_tier': (config, value) => {
if (value !== 'fast' && value !== 'default' && value !== 'complex' && value !== 'local') {return false;}
config.agents.background_models.complex_reasoning ??= { enabled: true, provider: 'openai', model: 'gpt-4o-mini', fallback_tier: 'fast' };
config.agents.background_models.complex_reasoning.fallback_tier = value;
return true;
},
'automation.delivery_mode': (config, value) => {
if (value !== 'shared_session' && value !== 'isolated_job' && value !== 'announce') {return false;}
config.automation ??= {} as Config['automation'];
+77 -1
View File
@@ -2,12 +2,13 @@ import { randomUUID } from 'crypto';
import type { SessionManager } from '../session/manager.js';
import type { ModelClient } from '../models/types.js';
import type { ModelRouter, ModelTier } from '../models/router.js';
import { createClientFromConfig } from '../daemon/models.js';
import type { Config, ModelConfig, ModelProvider } from '../config/index.js';
import type { ToolRegistry } from '../tools/registry.js';
import type { ToolExecutor } from '../tools/executor.js';
import { AgentOrchestrator, type DelegationConfig } from '../backends/native/orchestrator.js';
import type { ToolUseEvent } from '../backends/native/agent.js';
import type { MemoryStore } from '../memory/store.js';
import type { Config } from '../config/index.js';
import { summarizeSessionOnEnd, type SessionEndSummaryConfig } from '../session/endSummary.js';
export interface SessionBridgeConfig {
@@ -284,6 +285,7 @@ export class SessionBridge {
tool_summarisation: config?.agents.delegation.tool_summarisation ?? 'fast',
complex_reasoning: config?.agents.delegation.complex_reasoning ?? 'complex',
};
const backgroundModelOverrides = this.buildBackgroundModelOverrides();
agent = new AgentOrchestrator({
modelRouter: this.config.modelClient as ModelRouter,
@@ -293,6 +295,7 @@ export class SessionBridge {
toolExecutor: this.config.toolExecutor,
primaryTier,
delegation: delegationConfig,
backgroundModelOverrides,
maxDelegationDepth: config?.agents.max_delegation_depth ?? 3,
maxIterations: config?.agents.max_iterations,
compaction: config?.compaction.enabled ? {
@@ -337,4 +340,77 @@ export class SessionBridge {
}
return agent;
}
private buildBackgroundModelOverrides(): Partial<Record<keyof DelegationConfig, {
client: ModelClient;
label: string;
fallbackTier: ModelTier;
}>> {
const runtimeConfig = this.config.config;
const overrides: Partial<Record<keyof DelegationConfig, {
client: ModelClient;
label: string;
fallbackTier: ModelTier;
}>> = {};
if (!runtimeConfig) {
return overrides;
}
const providerConfigs = this.buildProviderConfigMap(runtimeConfig);
const configured = runtimeConfig.agents?.background_models ?? {};
const tasks: Array<keyof DelegationConfig> = [
'compaction',
'memory_extraction',
'classification',
'tool_summarisation',
'complex_reasoning',
];
for (const task of tasks) {
const entry = configured[task];
if (!entry || entry.enabled === false) {
continue;
}
const template = providerConfigs[entry.provider];
try {
const client = createClientFromConfig(
template
? { ...template, provider: entry.provider, model: entry.model }
: { provider: entry.provider, model: entry.model },
);
overrides[task] = {
client,
label: `${entry.provider}/${entry.model}`,
fallbackTier: entry.fallback_tier,
};
} catch (error) {
console.warn(
`[Flynn:gateway] Failed to initialize background model override for ${task} ` +
`(${entry.provider}/${entry.model}): ${error instanceof Error ? error.message : String(error)}`,
);
}
}
return overrides;
}
private buildProviderConfigMap(config: Config): Partial<Record<ModelProvider, ModelConfig>> {
const providerConfigs: Partial<Record<ModelProvider, ModelConfig>> = {};
const modelConfigs: ModelConfig[] = [
config.models.default,
...(config.models.fast ? [config.models.fast] : []),
...(config.models.complex ? [config.models.complex] : []),
...(config.models.local ? [config.models.local] : []),
...Object.values(config.models.local_providers ?? {}),
];
for (const modelConfig of modelConfigs) {
providerConfigs[modelConfig.provider] = modelConfig;
if (modelConfig.fallback) {
providerConfigs[modelConfig.fallback.provider] = modelConfig.fallback;
}
}
return providerConfigs;
}
}