feat(security): harden tool provenance and skill isolation
This commit is contained in:
@@ -143,6 +143,10 @@ export class NativeAgent {
|
||||
private async toolLoop(): Promise<string> {
|
||||
const tools = this.toolRegistry!.filteredToAnthropicFormat(this._toolPolicyContext);
|
||||
|
||||
// Track whether untrusted content (web/fetched/tool output) has been introduced
|
||||
// during this run. Used to harden against prompt injection.
|
||||
let untrustedContentSeen = false;
|
||||
|
||||
// Detect tool inventory changes to combat conversational inertia in long sessions.
|
||||
// When tools change (e.g. new tools added between restarts), the model's prior messages
|
||||
// saying "I can't do that" can override tool definitions. Injecting a system note fixes this.
|
||||
@@ -262,11 +266,24 @@ export class NativeAgent {
|
||||
const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
|
||||
this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });
|
||||
|
||||
const result = await this.toolExecutor!.execute(internalName, tc.args, this._toolPolicyContext);
|
||||
const perCallContext: ToolPolicyContext | undefined = this._toolPolicyContext
|
||||
? { ...this._toolPolicyContext, untrustedContent: untrustedContentSeen }
|
||||
: undefined;
|
||||
|
||||
const result = await this.toolExecutor!.execute(internalName, tc.args, perCallContext);
|
||||
|
||||
this.onToolUse?.({ type: 'end', tool: internalName, result });
|
||||
|
||||
const resultContent = result.success ? result.output : (result.error ?? 'Unknown error');
|
||||
const provenance = (internalName === 'web.fetch' || internalName === 'web.search' || internalName === 'browser.content')
|
||||
? 'fetched_content'
|
||||
: 'tool_output';
|
||||
|
||||
if (provenance === 'fetched_content') {
|
||||
untrustedContentSeen = true;
|
||||
}
|
||||
|
||||
const rawContent = result.success ? result.output : (result.error ?? 'Unknown error');
|
||||
const resultContent = `[provenance=${provenance} tool=${internalName} untrusted=${provenance === 'fetched_content' ? 'true' : 'false'}]\n${rawContent}\n[/provenance]`;
|
||||
toolResultBlocks.push({
|
||||
type: 'tool_result',
|
||||
tool_use_id: tc.id,
|
||||
|
||||
Reference in New Issue
Block a user