Improve in-flight cancel latency via run abort signal propagation
This commit is contained in:
@@ -100,7 +100,10 @@ export class AnthropicClient implements ModelClient {
|
||||
params.thinking = { type: 'enabled', budget_tokens: 4096 };
|
||||
}
|
||||
|
||||
const response = await this.client.messages.create(params) as AnthropicMessage;
|
||||
const response = await this.client.messages.create(
|
||||
params,
|
||||
request.signal ? { signal: request.signal } : undefined,
|
||||
) as AnthropicMessage;
|
||||
|
||||
const textContent = response.content.find((c) => c.type === 'text');
|
||||
const content = textContent?.type === 'text' ? textContent.text : '';
|
||||
|
||||
@@ -65,7 +65,10 @@ export class BedrockClient implements ModelClient {
|
||||
}
|
||||
|
||||
const command = new ConverseCommand(params);
|
||||
const response = await this.client.send(command);
|
||||
const response = await this.client.send(
|
||||
command,
|
||||
request.signal ? { abortSignal: request.signal } : undefined,
|
||||
);
|
||||
|
||||
// Extract text and tool_use content from the response
|
||||
const outputContent = response.output?.message?.content ?? [];
|
||||
@@ -126,7 +129,10 @@ export class BedrockClient implements ModelClient {
|
||||
|
||||
try {
|
||||
const command = new ConverseStreamCommand(params);
|
||||
const response = await this.client.send(command);
|
||||
const response = await this.client.send(
|
||||
command,
|
||||
request.signal ? { abortSignal: request.signal } : undefined,
|
||||
);
|
||||
|
||||
let inputTokens = 0;
|
||||
let outputTokens = 0;
|
||||
|
||||
@@ -163,7 +163,10 @@ export class GitHubModelsClient implements ModelClient {
|
||||
(params as OpenAI.ChatCompletionCreateParamsNonStreaming & { reasoning_effort?: 'low' | 'medium' | 'high' }).reasoning_effort = 'medium';
|
||||
}
|
||||
|
||||
const response = await this.client.chat.completions.create(params);
|
||||
const response = await this.client.chat.completions.create(
|
||||
params,
|
||||
request.signal ? { signal: request.signal } : undefined,
|
||||
);
|
||||
|
||||
const choice = response.choices[0];
|
||||
const content = choice?.message?.content ?? '';
|
||||
@@ -237,7 +240,10 @@ export class GitHubModelsClient implements ModelClient {
|
||||
}
|
||||
|
||||
try {
|
||||
const stream = await this.client.chat.completions.create(params);
|
||||
const stream = await this.client.chat.completions.create(
|
||||
params,
|
||||
request.signal ? { signal: request.signal } : undefined,
|
||||
);
|
||||
|
||||
let totalInputTokens = 0;
|
||||
let totalOutputTokens = 0;
|
||||
|
||||
@@ -247,13 +247,16 @@ export class LlamaCppClient implements ModelClient {
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const signal = request.signal
|
||||
? AbortSignal.any([request.signal, controller.signal])
|
||||
: controller.signal;
|
||||
const timer = setTimeout(() => controller.abort(), this.requestTimeout);
|
||||
try {
|
||||
response = await fetch(`${this.endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
signal: controller.signal,
|
||||
signal,
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
@@ -331,6 +334,7 @@ export class LlamaCppClient implements ModelClient {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
signal: request.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
|
||||
@@ -140,6 +140,7 @@ export class OpenAIClient implements ModelClient {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
signal: request.signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
@@ -277,7 +278,10 @@ export class OpenAIClient implements ModelClient {
|
||||
|
||||
let response: OpenAI.ChatCompletion;
|
||||
try {
|
||||
response = await this.client.chat.completions.create(params);
|
||||
response = await this.client.chat.completions.create(
|
||||
params,
|
||||
request.signal ? { signal: request.signal } : undefined,
|
||||
);
|
||||
} catch (error) {
|
||||
const status = typeof (error as { status?: unknown })?.status === 'number'
|
||||
? (error as { status: number }).status
|
||||
|
||||
@@ -80,6 +80,8 @@ export interface ChatRequest {
|
||||
tools?: ToolDefinition[];
|
||||
/** Enable extended thinking/reasoning mode for this request. */
|
||||
thinking?: boolean;
|
||||
/** Optional abort signal for cancelling in-flight provider requests. */
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export interface ChatResponse {
|
||||
|
||||
Reference in New Issue
Block a user