Improve in-flight cancel latency via run abort signal propagation

This commit is contained in:
William Valentin
2026-02-19 12:24:39 -08:00
parent 290303c14e
commit 2c3a00f6dd
12 changed files with 148 additions and 20 deletions
+4 -1
View File
@@ -100,7 +100,10 @@ export class AnthropicClient implements ModelClient {
params.thinking = { type: 'enabled', budget_tokens: 4096 };
}
const response = await this.client.messages.create(params) as AnthropicMessage;
const response = await this.client.messages.create(
params,
request.signal ? { signal: request.signal } : undefined,
) as AnthropicMessage;
const textContent = response.content.find((c) => c.type === 'text');
const content = textContent?.type === 'text' ? textContent.text : '';
+8 -2
View File
@@ -65,7 +65,10 @@ export class BedrockClient implements ModelClient {
}
const command = new ConverseCommand(params);
const response = await this.client.send(command);
const response = await this.client.send(
command,
request.signal ? { abortSignal: request.signal } : undefined,
);
// Extract text and tool_use content from the response
const outputContent = response.output?.message?.content ?? [];
@@ -126,7 +129,10 @@ export class BedrockClient implements ModelClient {
try {
const command = new ConverseStreamCommand(params);
const response = await this.client.send(command);
const response = await this.client.send(
command,
request.signal ? { abortSignal: request.signal } : undefined,
);
let inputTokens = 0;
let outputTokens = 0;
+8 -2
View File
@@ -163,7 +163,10 @@ export class GitHubModelsClient implements ModelClient {
(params as OpenAI.ChatCompletionCreateParamsNonStreaming & { reasoning_effort?: 'low' | 'medium' | 'high' }).reasoning_effort = 'medium';
}
const response = await this.client.chat.completions.create(params);
const response = await this.client.chat.completions.create(
params,
request.signal ? { signal: request.signal } : undefined,
);
const choice = response.choices[0];
const content = choice?.message?.content ?? '';
@@ -237,7 +240,10 @@ export class GitHubModelsClient implements ModelClient {
}
try {
const stream = await this.client.chat.completions.create(params);
const stream = await this.client.chat.completions.create(
params,
request.signal ? { signal: request.signal } : undefined,
);
let totalInputTokens = 0;
let totalOutputTokens = 0;
+5 -1
View File
@@ -247,13 +247,16 @@ export class LlamaCppClient implements ModelClient {
}
const controller = new AbortController();
const signal = request.signal
? AbortSignal.any([request.signal, controller.signal])
: controller.signal;
const timer = setTimeout(() => controller.abort(), this.requestTimeout);
try {
response = await fetch(`${this.endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
body: JSON.stringify(body),
signal: controller.signal,
signal,
});
} finally {
clearTimeout(timer);
@@ -331,6 +334,7 @@ export class LlamaCppClient implements ModelClient {
method: 'POST',
headers,
body: JSON.stringify(body),
signal: request.signal,
});
if (!response.ok) {
+5 -1
View File
@@ -140,6 +140,7 @@ export class OpenAIClient implements ModelClient {
method: 'POST',
headers,
body: JSON.stringify(body),
signal: request.signal,
});
if (!res.ok) {
@@ -277,7 +278,10 @@ export class OpenAIClient implements ModelClient {
let response: OpenAI.ChatCompletion;
try {
response = await this.client.chat.completions.create(params);
response = await this.client.chat.completions.create(
params,
request.signal ? { signal: request.signal } : undefined,
);
} catch (error) {
const status = typeof (error as { status?: unknown })?.status === 'number'
? (error as { status: number }).status
+2
View File
@@ -80,6 +80,8 @@ export interface ChatRequest {
tools?: ToolDefinition[];
/** Enable extended thinking/reasoning mode for this request. */
thinking?: boolean;
/** Optional abort signal for cancelling in-flight provider requests. */
signal?: AbortSignal;
}
export interface ChatResponse {