From 56f3bfbc29a466af07b7b84c95549315eb430058 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Sat, 7 Feb 2026 02:28:31 -0500 Subject: [PATCH 1/2] refactor: migrate Anthropic provider to @ai-sdk/anthropic Replace the raw @anthropic-ai/sdk implementation with @ai-sdk/anthropic (Vercel AI SDK) for consistency with other providers (Bedrock, DeepSeek, Mistral, etc.). Changes: - Replace Anthropic() client with createAnthropic() from @ai-sdk/anthropic - Replace manual stream parsing with streamText() + processAiSdkStreamPart() - Replace client.messages.create() with generateText() for completePrompt() - Use convertToAiSdkMessages() and convertToolsForAiSdk() for format conversion - Handle prompt caching via AI SDK providerOptions (cacheControl on messages) - Handle extended thinking via providerOptions.anthropic.thinking - Add getThoughtSignature() and getRedactedThinkingBlocks() for thinking signature round-tripping (matching Bedrock pattern, improves on original which had a TODO for this) - Add isAiSdkProvider() returning true - Update tests to mock @ai-sdk/anthropic and ai instead of raw SDK --- pnpm-lock.yaml | 3 + src/api/providers/__tests__/anthropic.spec.ts | 962 ++++++++---------- src/api/providers/anthropic.ts | 558 +++++----- src/package.json | 1 + 4 files changed, 693 insertions(+), 831 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7f48e153c9a..ed15a95c3f1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -749,6 +749,9 @@ importers: '@ai-sdk/amazon-bedrock': specifier: ^4.0.51 version: 4.0.51(zod@3.25.76) + '@ai-sdk/anthropic': + specifier: ^3.0.38 + version: 3.0.38(zod@3.25.76) '@ai-sdk/baseten': specifier: ^1.0.31 version: 1.0.31(zod@3.25.76) diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index 7a107edbc8b..c6d07bc82e0 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -12,79 +12,80 @@ vitest.mock("@roo-code/telemetry", () => ({ }, })) -const mockCreate = vitest.fn() - -vitest.mock("@anthropic-ai/sdk", () => { - const mockAnthropicConstructor = vitest.fn().mockImplementation(() => ({ - messages: { - create: mockCreate.mockImplementation(async (options) => { - if (!options.stream) { - return { - id: "test-completion", - content: [{ type: "text", text: "Test response" }], - role: "assistant", - model: options.model, - usage: { - input_tokens: 10, - output_tokens: 5, - }, - } - } - return { - async *[Symbol.asyncIterator]() { - yield { - type: "message_start", - message: { - usage: { - input_tokens: 100, - output_tokens: 50, - cache_creation_input_tokens: 20, - cache_read_input_tokens: 10, - }, - }, - } - yield { - type: "content_block_start", - index: 0, - content_block: { - type: "text", - text: "Hello", - }, - } - yield { - type: "content_block_delta", - delta: { - type: "text_delta", - text: " world", - }, - } - }, - } - }), - }, - })) +// Mock the AI SDK +const mockStreamText = vitest.fn() +const mockGenerateText = vitest.fn() + +vitest.mock("ai", () => ({ + streamText: (...args: any[]) => mockStreamText(...args), + generateText: (...args: any[]) => mockGenerateText(...args), + tool: vitest.fn(), + jsonSchema: vitest.fn(), + ToolSet: {}, +})) - return { - Anthropic: mockAnthropicConstructor, - } -}) +// Mock the @ai-sdk/anthropic provider +const mockCreateAnthropic = vitest.fn() + +vitest.mock("@ai-sdk/anthropic", () => ({ + createAnthropic: (...args: any[]) => mockCreateAnthropic(...args), +})) + +// Mock ai-sdk transform utilities +vitest.mock("../../transform/ai-sdk", () => ({ + convertToAiSdkMessages: vitest.fn().mockReturnValue([{ role: "user", content: [{ type: "text", text: "Hello" }] }]), + convertToolsForAiSdk: vitest.fn().mockReturnValue(undefined), + processAiSdkStreamPart: vitest.fn().mockImplementation(function* (part: any) { + if (part.type === "text-delta") { + yield { type: "text", text: part.text } + } else if (part.type === "reasoning-delta") { + yield { type: "reasoning", text: part.text } + } else if (part.type === "tool-input-start") { + yield { type: "tool_call_start", id: part.id, name: part.toolName } + } else if (part.type === "tool-input-delta") { + yield { type: "tool_call_delta", id: part.id, delta: part.delta } + } else if (part.type === "tool-input-end") { + yield { type: "tool_call_end", id: part.id } + } + }), + mapToolChoice: vitest.fn().mockReturnValue(undefined), + handleAiSdkError: vitest.fn().mockImplementation((error: any) => error), +})) -// Import after mock +// Import mocked modules +import { convertToAiSdkMessages, convertToolsForAiSdk, mapToolChoice } from "../../transform/ai-sdk" import { Anthropic } from "@anthropic-ai/sdk" -const mockAnthropicConstructor = vitest.mocked(Anthropic) +// Helper: create a mock provider function +function createMockProviderFn() { + const providerFn = vitest.fn().mockReturnValue("mock-model") + return providerFn +} describe("AnthropicHandler", () => { let handler: AnthropicHandler let mockOptions: ApiHandlerOptions + let mockProviderFn: ReturnType beforeEach(() => { mockOptions = { apiKey: "test-api-key", apiModelId: "claude-3-5-sonnet-20241022", } + + mockProviderFn = createMockProviderFn() + mockCreateAnthropic.mockReturnValue(mockProviderFn) + handler = new AnthropicHandler(mockOptions) vitest.clearAllMocks() + + // Re-set mock defaults after clearAllMocks + mockCreateAnthropic.mockReturnValue(mockProviderFn) + vitest + .mocked(convertToAiSdkMessages) + .mockReturnValue([{ role: "user", content: [{ type: "text", text: "Hello" }] }]) + vitest.mocked(convertToolsForAiSdk).mockReturnValue(undefined) + vitest.mocked(mapToolChoice).mockReturnValue(undefined) }) describe("constructor", () => { @@ -94,7 +95,6 @@ describe("AnthropicHandler", () => { }) it("should initialize with undefined API key", () => { - // The SDK will handle API key validation, so we just verify it initializes const handlerWithoutKey = new AnthropicHandler({ ...mockOptions, apiKey: undefined, @@ -112,44 +112,132 @@ describe("AnthropicHandler", () => { }) it("use apiKey for passing token if anthropicUseAuthToken is not set", () => { - const handlerWithCustomUrl = new AnthropicHandler({ + mockCreateAnthropic.mockClear() + const _ = new AnthropicHandler({ ...mockOptions, }) - expect(handlerWithCustomUrl).toBeInstanceOf(AnthropicHandler) - expect(mockAnthropicConstructor).toHaveBeenCalledTimes(1) - expect(mockAnthropicConstructor.mock.calls[0]![0]!.apiKey).toEqual("test-api-key") - expect(mockAnthropicConstructor.mock.calls[0]![0]!.authToken).toBeUndefined() + expect(mockCreateAnthropic).toHaveBeenCalledTimes(1) + const callArgs = mockCreateAnthropic.mock.calls[0]![0]! + expect(callArgs.apiKey).toEqual("test-api-key") + expect(callArgs.authToken).toBeUndefined() }) it("use apiKey for passing token if anthropicUseAuthToken is set but custom base URL is not given", () => { - const handlerWithCustomUrl = new AnthropicHandler({ + mockCreateAnthropic.mockClear() + const _ = new AnthropicHandler({ ...mockOptions, anthropicUseAuthToken: true, }) - expect(handlerWithCustomUrl).toBeInstanceOf(AnthropicHandler) - expect(mockAnthropicConstructor).toHaveBeenCalledTimes(1) - expect(mockAnthropicConstructor.mock.calls[0]![0]!.apiKey).toEqual("test-api-key") - expect(mockAnthropicConstructor.mock.calls[0]![0]!.authToken).toBeUndefined() + expect(mockCreateAnthropic).toHaveBeenCalledTimes(1) + const callArgs = mockCreateAnthropic.mock.calls[0]![0]! + expect(callArgs.apiKey).toEqual("test-api-key") + expect(callArgs.authToken).toBeUndefined() }) it("use authToken for passing token if both of anthropicBaseUrl and anthropicUseAuthToken are set", () => { + mockCreateAnthropic.mockClear() const customBaseUrl = "https://custom.anthropic.com" - const handlerWithCustomUrl = new AnthropicHandler({ + const _ = new AnthropicHandler({ ...mockOptions, anthropicBaseUrl: customBaseUrl, anthropicUseAuthToken: true, }) - expect(handlerWithCustomUrl).toBeInstanceOf(AnthropicHandler) - expect(mockAnthropicConstructor).toHaveBeenCalledTimes(1) - expect(mockAnthropicConstructor.mock.calls[0]![0]!.authToken).toEqual("test-api-key") - expect(mockAnthropicConstructor.mock.calls[0]![0]!.apiKey).toBeUndefined() + expect(mockCreateAnthropic).toHaveBeenCalledTimes(1) + const callArgs = mockCreateAnthropic.mock.calls[0]![0]! + expect(callArgs.authToken).toEqual("test-api-key") + expect(callArgs.apiKey).toBeUndefined() + }) + + it("should include 1M context beta header when enabled", () => { + mockCreateAnthropic.mockClear() + const _ = new AnthropicHandler({ + ...mockOptions, + apiModelId: "claude-sonnet-4-5", + anthropicBeta1MContext: true, + }) + expect(mockCreateAnthropic).toHaveBeenCalledTimes(1) + const callArgs = mockCreateAnthropic.mock.calls[0]![0]! + expect(callArgs.headers["anthropic-beta"]).toContain("context-1m-2025-08-07") + }) + + it("should include output-128k beta for thinking model", () => { + mockCreateAnthropic.mockClear() + const _ = new AnthropicHandler({ + ...mockOptions, + apiModelId: "claude-3-7-sonnet-20250219:thinking", + }) + expect(mockCreateAnthropic).toHaveBeenCalledTimes(1) + const callArgs = mockCreateAnthropic.mock.calls[0]![0]! + expect(callArgs.headers["anthropic-beta"]).toContain("output-128k-2025-02-19") }) }) describe("createMessage", () => { const systemPrompt = "You are a helpful assistant." + function setupStreamTextMock(parts: any[], usage?: any, providerMetadata?: any) { + const asyncIterable = { + async *[Symbol.asyncIterator]() { + for (const part of parts) { + yield part + } + }, + } + mockStreamText.mockReturnValue({ + fullStream: asyncIterable, + usage: Promise.resolve(usage || { inputTokens: 100, outputTokens: 50 }), + providerMetadata: Promise.resolve( + providerMetadata || { + anthropic: { + cacheCreationInputTokens: 20, + cacheReadInputTokens: 10, + }, + }, + ), + }) + } + + it("should stream text content using AI SDK", async () => { + setupStreamTextMock([ + { type: "text-delta", text: "Hello" }, + { type: "text-delta", text: " world" }, + ]) + + const stream = handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "First message" }], + }, + ]) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify text content + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(2) + expect(textChunks[0].text).toBe("Hello") + expect(textChunks[1].text).toBe(" world") + + // Verify usage information + const usageChunks = chunks.filter((chunk) => chunk.type === "usage") + expect(usageChunks.length).toBeGreaterThan(0) + }) + it("should handle prompt caching for supported models", async () => { + setupStreamTextMock( + [{ type: "text-delta", text: "Hello" }], + { inputTokens: 100, outputTokens: 50 }, + { + anthropic: { + cacheCreationInputTokens: 20, + cacheReadInputTokens: 10, + }, + }, + ) + const stream = handler.createMessage(systemPrompt, [ { role: "user", @@ -170,56 +258,276 @@ describe("AnthropicHandler", () => { chunks.push(chunk) } - // Verify usage information - const usageChunk = chunks.find((chunk) => chunk.type === "usage") + // Verify usage information includes cache metrics + const usageChunk = chunks.find( + (chunk) => chunk.type === "usage" && (chunk.cacheWriteTokens || chunk.cacheReadTokens), + ) expect(usageChunk).toBeDefined() - expect(usageChunk?.inputTokens).toBe(100) - expect(usageChunk?.outputTokens).toBe(50) expect(usageChunk?.cacheWriteTokens).toBe(20) expect(usageChunk?.cacheReadTokens).toBe(10) - // Verify text content - const textChunks = chunks.filter((chunk) => chunk.type === "text") - expect(textChunks).toHaveLength(2) - expect(textChunks[0].text).toBe("Hello") - expect(textChunks[1].text).toBe(" world") + // Verify streamText was called + expect(mockStreamText).toHaveBeenCalled() + }) + + it("should pass tools via AI SDK when tools are provided", async () => { + const mockTools = [ + { + type: "function" as const, + function: { + name: "get_weather", + description: "Get the current weather", + parameters: { + type: "object", + properties: { + location: { type: "string" }, + }, + required: ["location"], + }, + }, + }, + ] + + setupStreamTextMock([{ type: "text-delta", text: "Weather check" }]) + + const stream = handler.createMessage( + systemPrompt, + [{ role: "user", content: [{ type: "text" as const, text: "What's the weather?" }] }], + { taskId: "test-task", tools: mockTools }, + ) + + for await (const _chunk of stream) { + // Consume stream + } - // Verify API - expect(mockCreate).toHaveBeenCalled() + // Verify tools were converted + expect(convertToolsForAiSdk).toHaveBeenCalled() + expect(mockStreamText).toHaveBeenCalled() + }) + + it("should handle tool_choice mapping", async () => { + setupStreamTextMock([{ type: "text-delta", text: "test" }]) + + const stream = handler.createMessage( + systemPrompt, + [{ role: "user", content: [{ type: "text" as const, text: "test" }] }], + { taskId: "test-task", tool_choice: "auto" }, + ) + + for await (const _chunk of stream) { + // Consume stream + } + + expect(mapToolChoice).toHaveBeenCalledWith("auto") + }) + + it("should disable parallel tool use when parallelToolCalls is false", async () => { + setupStreamTextMock([{ type: "text-delta", text: "test" }]) + + const stream = handler.createMessage( + systemPrompt, + [{ role: "user", content: [{ type: "text" as const, text: "test" }] }], + { taskId: "test-task", parallelToolCalls: false }, + ) + + for await (const _chunk of stream) { + // Consume stream + } + + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + providerOptions: expect.objectContaining({ + anthropic: expect.objectContaining({ + disableParallelToolUse: true, + }), + }), + }), + ) + }) + + it("should not set disableParallelToolUse when parallelToolCalls is true or undefined", async () => { + setupStreamTextMock([{ type: "text-delta", text: "test" }]) + + const stream = handler.createMessage( + systemPrompt, + [{ role: "user", content: [{ type: "text" as const, text: "test" }] }], + { taskId: "test-task", parallelToolCalls: true }, + ) + + for await (const _chunk of stream) { + // Consume stream + } + + // providerOptions should not include disableParallelToolUse + const callArgs = mockStreamText.mock.calls[0]![0] + const anthropicOptions = callArgs?.providerOptions?.anthropic + expect(anthropicOptions?.disableParallelToolUse).toBeUndefined() + }) + + it("should handle tool call streaming via AI SDK", async () => { + setupStreamTextMock([ + { type: "tool-input-start", id: "toolu_123", toolName: "get_weather" }, + { type: "tool-input-delta", id: "toolu_123", delta: '{"location":' }, + { type: "tool-input-delta", id: "toolu_123", delta: '"London"}' }, + { type: "tool-input-end", id: "toolu_123" }, + ]) + + const stream = handler.createMessage( + systemPrompt, + [{ role: "user", content: [{ type: "text" as const, text: "What's the weather?" }] }], + { taskId: "test-task" }, + ) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const startChunk = chunks.find((c) => c.type === "tool_call_start") + expect(startChunk).toBeDefined() + expect(startChunk?.id).toBe("toolu_123") + expect(startChunk?.name).toBe("get_weather") + + const deltaChunks = chunks.filter((c) => c.type === "tool_call_delta") + expect(deltaChunks).toHaveLength(2) + + const endChunk = chunks.find((c) => c.type === "tool_call_end") + expect(endChunk).toBeDefined() + }) + + it("should capture thinking signature from stream events", async () => { + const testSignature = "test-thinking-signature" + setupStreamTextMock([ + { + type: "reasoning-delta", + text: "thinking...", + providerMetadata: { anthropic: { signature: testSignature } }, + }, + { type: "text-delta", text: "Answer" }, + ]) + + const stream = handler.createMessage(systemPrompt, [ + { role: "user", content: [{ type: "text" as const, text: "test" }] }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + expect(handler.getThoughtSignature()).toBe(testSignature) + }) + + it("should capture redacted thinking blocks from stream events", async () => { + setupStreamTextMock([ + { + type: "reasoning-delta", + text: "", + providerMetadata: { anthropic: { redactedData: "redacted-data-base64" } }, + }, + { type: "text-delta", text: "Answer" }, + ]) + + const stream = handler.createMessage(systemPrompt, [ + { role: "user", content: [{ type: "text" as const, text: "test" }] }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const redactedBlocks = handler.getRedactedThinkingBlocks() + expect(redactedBlocks).toBeDefined() + expect(redactedBlocks).toHaveLength(1) + expect(redactedBlocks![0]).toEqual({ + type: "redacted_thinking", + data: "redacted-data-base64", + }) + }) + + it("should reset thinking state between requests", async () => { + // First request with signature + setupStreamTextMock([ + { + type: "reasoning-delta", + text: "thinking...", + providerMetadata: { anthropic: { signature: "sig-1" } }, + }, + ]) + + const stream1 = handler.createMessage(systemPrompt, [ + { role: "user", content: [{ type: "text" as const, text: "test 1" }] }, + ]) + for await (const _chunk of stream1) { + // Consume + } + expect(handler.getThoughtSignature()).toBe("sig-1") + + // Second request without signature + setupStreamTextMock([{ type: "text-delta", text: "plain answer" }]) + + const stream2 = handler.createMessage(systemPrompt, [ + { role: "user", content: [{ type: "text" as const, text: "test 2" }] }, + ]) + for await (const _chunk of stream2) { + // Consume + } + expect(handler.getThoughtSignature()).toBeUndefined() + }) + + it("should include system message with cache control in the request", async () => { + setupStreamTextMock([{ type: "text-delta", text: "test" }]) + + const stream = handler.createMessage(systemPrompt, [ + { role: "user", content: [{ type: "text" as const, text: "test" }] }, + ]) + + for await (const _chunk of stream) { + // Consume + } + + // Verify streamText was called with system message containing cache control + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "system", + content: systemPrompt, + providerOptions: { + anthropic: { cacheControl: { type: "ephemeral" } }, + }, + }), + ]), + }), + ) }) }) describe("completePrompt", () => { it("should complete prompt successfully", async () => { + mockGenerateText.mockResolvedValueOnce({ + text: "Test response", + }) + const result = await handler.completePrompt("Test prompt") expect(result).toBe("Test response") - expect(mockCreate).toHaveBeenCalledWith({ - model: mockOptions.apiModelId, - messages: [{ role: "user", content: "Test prompt" }], - max_tokens: 8192, - temperature: 0, - thinking: undefined, - stream: false, - }) + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + prompt: "Test prompt", + temperature: 0, + }), + ) }) it("should handle API errors", async () => { - mockCreate.mockRejectedValueOnce(new Error("Anthropic completion error: API Error")) - await expect(handler.completePrompt("Test prompt")).rejects.toThrow("Anthropic completion error: API Error") - }) - - it("should handle non-text content", async () => { - mockCreate.mockImplementationOnce(async () => ({ - content: [{ type: "image" }], - })) - const result = await handler.completePrompt("Test prompt") - expect(result).toBe("") + const error = new Error("Anthropic completion error: API Error") + mockGenerateText.mockRejectedValueOnce(error) + await expect(handler.completePrompt("Test prompt")).rejects.toThrow() }) it("should handle empty response", async () => { - mockCreate.mockImplementationOnce(async () => ({ - content: [{ type: "text", text: "" }], - })) + mockGenerateText.mockResolvedValueOnce({ + text: "", + }) const result = await handler.completePrompt("Test prompt") expect(result).toBe("") }) @@ -299,447 +607,19 @@ describe("AnthropicHandler", () => { }) }) - describe("reasoning block filtering", () => { - const systemPrompt = "You are a helpful assistant." - - it("should filter out internal reasoning blocks before sending to API", async () => { - handler = new AnthropicHandler({ - apiKey: "test-api-key", - apiModelId: "claude-3-5-sonnet-20241022", - }) - - // Messages with internal reasoning blocks (from stored conversation history) - const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [ - { - role: "user", - content: "Hello", - }, - { - role: "assistant", - content: [ - { - type: "reasoning" as any, - text: "This is internal reasoning that should be filtered", - }, - { - type: "text", - text: "This is the response", - }, - ], - }, - { - role: "user", - content: "Continue", - }, - ] - - const stream = handler.createMessage(systemPrompt, messagesWithReasoning) - const chunks: any[] = [] - - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Verify the API was called with filtered messages (no reasoning blocks) - const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages - expect(calledMessages).toHaveLength(3) - - // Check assistant message - should have reasoning block filtered out - const assistantMessage = calledMessages.find((m: any) => m.role === "assistant") - expect(assistantMessage).toBeDefined() - expect(assistantMessage.content).toEqual([{ type: "text", text: "This is the response" }]) - - // Verify reasoning blocks were NOT sent to the API - expect(assistantMessage.content).not.toContainEqual(expect.objectContaining({ type: "reasoning" })) - }) - - it("should filter empty messages after removing all reasoning blocks", async () => { - handler = new AnthropicHandler({ - apiKey: "test-api-key", - apiModelId: "claude-3-5-sonnet-20241022", - }) - - // Message with only reasoning content (should be completely filtered) - const messagesWithOnlyReasoning: Anthropic.Messages.MessageParam[] = [ - { - role: "user", - content: "Hello", - }, - { - role: "assistant", - content: [ - { - type: "reasoning" as any, - text: "Only reasoning, no actual text", - }, - ], - }, - { - role: "user", - content: "Continue", - }, - ] - - const stream = handler.createMessage(systemPrompt, messagesWithOnlyReasoning) - const chunks: any[] = [] - - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Verify empty message was filtered out - const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages - expect(calledMessages.length).toBe(2) // Only the two user messages - expect(calledMessages.every((m: any) => m.role === "user")).toBe(true) + describe("isAiSdkProvider", () => { + it("should return true", () => { + expect(handler.isAiSdkProvider()).toBe(true) }) }) - describe("native tool calling", () => { - const systemPrompt = "You are a helpful assistant." - const messages: Anthropic.Messages.MessageParam[] = [ - { - role: "user", - content: [{ type: "text" as const, text: "What's the weather in London?" }], - }, - ] - - const mockTools = [ - { - type: "function" as const, - function: { - name: "get_weather", - description: "Get the current weather", - parameters: { - type: "object", - properties: { - location: { type: "string" }, - }, - required: ["location"], - }, - }, - }, - ] - - it("should include tools in request when tools are provided", async () => { - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - }) - - // Consume the stream to trigger the API call - for await (const _chunk of stream) { - // Just consume - } - - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - tools: expect.arrayContaining([ - expect.objectContaining({ - name: "get_weather", - description: "Get the current weather", - input_schema: expect.objectContaining({ - type: "object", - properties: expect.objectContaining({ - location: { type: "string" }, - }), - }), - }), - ]), - }), - expect.anything(), - ) - }) - - it("should include tools when tools are provided", async () => { - const xmlHandler = new AnthropicHandler({ - ...mockOptions, - }) - - const stream = xmlHandler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - }) - - // Consume the stream to trigger the API call - for await (const _chunk of stream) { - // Just consume - } - - // Tool calling is request-driven: if tools are provided, we should include them. - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - tools: expect.arrayContaining([ - expect.objectContaining({ - name: "get_weather", - }), - ]), - }), - expect.anything(), - ) - }) - - it("should always include tools in request (tools are always present after PR #10841)", async () => { - // Handler uses native protocol by default - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - }) - - // Consume the stream to trigger the API call - for await (const _chunk of stream) { - // Just consume - } - - // Tools are now always present (minimum 6 from ALWAYS_AVAILABLE_TOOLS) - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - tools: expect.any(Array), - tool_choice: expect.any(Object), - }), - expect.anything(), - ) - }) - - it("should convert tool_choice 'auto' to Anthropic format", async () => { - // Handler uses native protocol by default - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - tool_choice: "auto", - }) - - // Consume the stream to trigger the API call - for await (const _chunk of stream) { - // Just consume - } - - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - tool_choice: { type: "auto", disable_parallel_tool_use: false }, - }), - expect.anything(), - ) - }) - - it("should convert tool_choice 'required' to Anthropic 'any' format", async () => { - // Handler uses native protocol by default - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - tool_choice: "required", - }) - - // Consume the stream to trigger the API call - for await (const _chunk of stream) { - // Just consume - } - - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - tool_choice: { type: "any", disable_parallel_tool_use: false }, - }), - expect.anything(), - ) - }) - - it("should set tool_choice to undefined when tool_choice is 'none' (tools are still passed)", async () => { - // Handler uses native protocol by default - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - tool_choice: "none", - }) - - // Consume the stream to trigger the API call - for await (const _chunk of stream) { - // Just consume - } - - // Tools are now always present (minimum 6 from ALWAYS_AVAILABLE_TOOLS) - // When tool_choice is 'none', the converter returns undefined for tool_choice - // but tools are still passed since they're always present - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - tools: expect.any(Array), - tool_choice: undefined, - }), - expect.anything(), - ) - }) - - it("should convert specific tool_choice to Anthropic 'tool' format", async () => { - // Handler uses native protocol by default - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - tool_choice: { type: "function" as const, function: { name: "get_weather" } }, - }) - - // Consume the stream to trigger the API call - for await (const _chunk of stream) { - // Just consume - } - - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - tool_choice: { type: "tool", name: "get_weather", disable_parallel_tool_use: false }, - }), - expect.anything(), - ) + describe("thinking signature", () => { + it("should return undefined when no signature captured", () => { + expect(handler.getThoughtSignature()).toBeUndefined() }) - it("should enable parallel tool calls when parallelToolCalls is true", async () => { - // Handler uses native protocol by default - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - tool_choice: "auto", - parallelToolCalls: true, - }) - - // Consume the stream to trigger the API call - for await (const _chunk of stream) { - // Just consume - } - - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - tool_choice: { type: "auto", disable_parallel_tool_use: false }, - }), - expect.anything(), - ) - }) - - it("should handle tool_use blocks in stream and emit tool_call_partial", async () => { - mockCreate.mockImplementationOnce(async () => ({ - async *[Symbol.asyncIterator]() { - yield { - type: "message_start", - message: { - usage: { - input_tokens: 100, - output_tokens: 50, - }, - }, - } - yield { - type: "content_block_start", - index: 0, - content_block: { - type: "tool_use", - id: "toolu_123", - name: "get_weather", - }, - } - }, - })) - - // Handler uses native protocol by default - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - }) - - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Find the tool_call_partial chunk - const toolCallChunk = chunks.find((chunk) => chunk.type === "tool_call_partial") - expect(toolCallChunk).toBeDefined() - expect(toolCallChunk).toEqual({ - type: "tool_call_partial", - index: 0, - id: "toolu_123", - name: "get_weather", - arguments: undefined, - }) - }) - - it("should handle input_json_delta in stream and emit tool_call_partial arguments", async () => { - mockCreate.mockImplementationOnce(async () => ({ - async *[Symbol.asyncIterator]() { - yield { - type: "message_start", - message: { - usage: { - input_tokens: 100, - output_tokens: 50, - }, - }, - } - yield { - type: "content_block_start", - index: 0, - content_block: { - type: "tool_use", - id: "toolu_123", - name: "get_weather", - }, - } - yield { - type: "content_block_delta", - index: 0, - delta: { - type: "input_json_delta", - partial_json: '{"location":', - }, - } - yield { - type: "content_block_delta", - index: 0, - delta: { - type: "input_json_delta", - partial_json: '"London"}', - }, - } - yield { - type: "content_block_stop", - index: 0, - } - }, - })) - - // Handler uses native protocol by default - const stream = handler.createMessage(systemPrompt, messages, { - taskId: "test-task", - tools: mockTools, - }) - - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Find the tool_call_partial chunks - const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial") - expect(toolCallChunks).toHaveLength(3) - - // First chunk has id and name - expect(toolCallChunks[0]).toEqual({ - type: "tool_call_partial", - index: 0, - id: "toolu_123", - name: "get_weather", - arguments: undefined, - }) - - // Subsequent chunks have arguments - expect(toolCallChunks[1]).toEqual({ - type: "tool_call_partial", - index: 0, - id: undefined, - name: undefined, - arguments: '{"location":', - }) - - expect(toolCallChunks[2]).toEqual({ - type: "tool_call_partial", - index: 0, - id: undefined, - name: undefined, - arguments: '"London"}', - }) + it("should return undefined for redacted blocks when none captured", () => { + expect(handler.getRedactedThinkingBlocks()).toBeUndefined() }) }) }) diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index b2b158f0956..630cfdaf3cc 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -1,7 +1,6 @@ -import { Anthropic } from "@anthropic-ai/sdk" -import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming" -import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources" -import OpenAI from "openai" +import type { Anthropic } from "@anthropic-ai/sdk" +import { createAnthropic } from "@ai-sdk/anthropic" +import { streamText, generateText, ToolSet } from "ai" import { type ModelInfo, @@ -14,317 +13,281 @@ import { import { TelemetryService } from "@roo-code/telemetry" import type { ApiHandlerOptions } from "../../shared/api" +import { shouldUseReasoningBudget } from "../../shared/api" -import { ApiStream } from "../transform/stream" +import type { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { getModelParams } from "../transform/model-params" -import { filterNonAnthropicBlocks } from "../transform/anthropic-filter" -import { handleProviderError } from "./utils/error-handler" +import { + convertToAiSdkMessages, + convertToolsForAiSdk, + processAiSdkStreamPart, + mapToolChoice, + handleAiSdkError, +} from "../transform/ai-sdk" +import { calculateApiCostAnthropic } from "../../shared/cost" +import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" -import { calculateApiCostAnthropic } from "../../shared/cost" -import { - convertOpenAIToolsToAnthropic, - convertOpenAIToolChoiceToAnthropic, -} from "../../core/prompts/tools/native-tools/converters" export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler { private options: ApiHandlerOptions - private client: Anthropic + private provider: ReturnType private readonly providerName = "Anthropic" + private lastThoughtSignature: string | undefined + private lastRedactedThinkingBlocks: Array<{ type: "redacted_thinking"; data: string }> = [] constructor(options: ApiHandlerOptions) { super() this.options = options - const apiKeyFieldName = - this.options.anthropicBaseUrl && this.options.anthropicUseAuthToken ? "authToken" : "apiKey" + const useAuthToken = Boolean(options.anthropicBaseUrl && options.anthropicUseAuthToken) - this.client = new Anthropic({ - baseURL: this.options.anthropicBaseUrl || undefined, - [apiKeyFieldName]: this.options.apiKey, - }) - } + // Build beta headers for model-specific features + const betas: string[] = [] + const modelId = options.apiModelId + + if (modelId === "claude-3-7-sonnet-20250219:thinking") { + betas.push("output-128k-2025-02-19") + } - async *createMessage( - systemPrompt: string, - messages: Anthropic.Messages.MessageParam[], - metadata?: ApiHandlerCreateMessageMetadata, - ): ApiStream { - let stream: AnthropicStream - const cacheControl: CacheControlEphemeral = { type: "ephemeral" } - let { - id: modelId, - betas = ["fine-grained-tool-streaming-2025-05-14"], - maxTokens, - temperature, - reasoning: thinking, - } = this.getModel() - - // Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API - const sanitizedMessages = filterNonAnthropicBlocks(messages) - - // Add 1M context beta flag if enabled for supported models (Claude Sonnet 4/4.5, Opus 4.6) if ( (modelId === "claude-sonnet-4-20250514" || modelId === "claude-sonnet-4-5" || modelId === "claude-opus-4-6") && - this.options.anthropicBeta1MContext + options.anthropicBeta1MContext ) { betas.push("context-1m-2025-08-07") } - const nativeToolParams = { - tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), - tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls), + this.provider = createAnthropic({ + baseURL: options.anthropicBaseUrl || undefined, + ...(useAuthToken ? { authToken: options.apiKey } : { apiKey: options.apiKey ?? "not-provided" }), + headers: { + ...DEFAULT_HEADERS, + ...(betas.length > 0 ? { "anthropic-beta": betas.join(",") } : {}), + }, + }) + } + + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const modelConfig = this.getModel() + + // Reset thinking state for this request + this.lastThoughtSignature = undefined + this.lastRedactedThinkingBlocks = [] + + // Convert messages to AI SDK format + const aiSdkMessages = convertToAiSdkMessages(messages) + + // Convert tools to AI SDK format + const openAiTools = this.convertToolsForOpenAI(metadata?.tools) + const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined + + // Build Anthropic provider options + const anthropicProviderOptions: Record = {} + + // Configure thinking/reasoning if the model supports it + const isThinkingEnabled = + shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) && + modelConfig.reasoning && + modelConfig.reasoningBudget + + if (isThinkingEnabled) { + anthropicProviderOptions.thinking = { + type: "enabled", + budgetTokens: modelConfig.reasoningBudget, + } } - switch (modelId) { - case "claude-sonnet-4-5": - case "claude-sonnet-4-20250514": - case "claude-opus-4-6": - case "claude-opus-4-5-20251101": - case "claude-opus-4-1-20250805": - case "claude-opus-4-20250514": - case "claude-3-7-sonnet-20250219": - case "claude-3-5-sonnet-20241022": - case "claude-3-5-haiku-20241022": - case "claude-3-opus-20240229": - case "claude-haiku-4-5-20251001": - case "claude-3-haiku-20240307": { - /** - * The latest message will be the new user message, one before - * will be the assistant message from a previous request, and - * the user message before that will be a previously cached user - * message. So we need to mark the latest user message as - * ephemeral to cache it for the next request, and mark the - * second to last user message as ephemeral to let the server - * know the last message to retrieve from the cache for the - * current request. - */ - const userMsgIndices = sanitizedMessages.reduce( - (acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc), - [] as number[], - ) + // Forward parallelToolCalls setting + // When parallelToolCalls is explicitly false, disable parallel tool use + if (metadata?.parallelToolCalls === false) { + anthropicProviderOptions.disableParallelToolUse = true + } + + // Apply cache control to user messages + // Strategy: cache the last 2 user messages (write-to-cache + read-from-cache) + const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } } + + const userMsgIndices = messages.reduce( + (acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc), + [] as number[], + ) + + const targetIndices = new Set() + const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1 + const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 + + if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex) + if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex) + + if (targetIndices.size > 0) { + this.applyCacheControlToAiSdkMessages(messages, aiSdkMessages, targetIndices, cacheProviderOption) + } + + // Prepend system prompt as a system message with cache control + const systemMessage = { + role: "system" as const, + content: systemPrompt, + providerOptions: { + anthropic: { cacheControl: { type: "ephemeral" } }, + }, + } - const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1 - const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 - - try { - stream = await this.client.messages.create( - { - model: modelId, - max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, - temperature, - thinking, - // Setting cache breakpoint for system prompt so new tasks can reuse it. - system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }], - messages: sanitizedMessages.map((message, index) => { - if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { - return { - ...message, - content: - typeof message.content === "string" - ? [{ type: "text", text: message.content, cache_control: cacheControl }] - : message.content.map((content, contentIndex) => - contentIndex === message.content.length - 1 - ? { ...content, cache_control: cacheControl } - : content, - ), - } - } - return message - }), - stream: true, - ...nativeToolParams, - }, - (() => { - // prompt caching: https://x.com/alexalbert__/status/1823751995901272068 - // https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers - // https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393 - - // Then check for models that support prompt caching - switch (modelId) { - case "claude-sonnet-4-5": - case "claude-sonnet-4-20250514": - case "claude-opus-4-6": - case "claude-opus-4-5-20251101": - case "claude-opus-4-1-20250805": - case "claude-opus-4-20250514": - case "claude-3-7-sonnet-20250219": - case "claude-3-5-sonnet-20241022": - case "claude-3-5-haiku-20241022": - case "claude-3-opus-20240229": - case "claude-haiku-4-5-20251001": - case "claude-3-haiku-20240307": - betas.push("prompt-caching-2024-07-31") - return { headers: { "anthropic-beta": betas.join(",") } } - default: - return undefined - } - })(), - ) - } catch (error) { - TelemetryService.instance.captureException( - new ApiProviderError( - error instanceof Error ? error.message : String(error), - this.providerName, - modelId, - "createMessage", - ), - ) - throw error + // Build streamText request + const requestOptions: Parameters[0] = { + model: this.provider(modelConfig.id), + messages: [systemMessage, ...aiSdkMessages], + temperature: modelConfig.temperature, + maxOutputTokens: modelConfig.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, + tools: aiSdkTools, + toolChoice: mapToolChoice(metadata?.tool_choice), + ...(Object.keys(anthropicProviderOptions).length > 0 && { + providerOptions: { anthropic: anthropicProviderOptions } as any, + }), + } + + try { + const result = streamText(requestOptions) + + for await (const part of result.fullStream) { + // Capture thinking signature from stream events + // The AI SDK's @ai-sdk/anthropic emits the signature as a reasoning-delta + // event with providerMetadata.anthropic.signature + const partAny = part as any + if (partAny.providerMetadata?.anthropic?.signature) { + this.lastThoughtSignature = partAny.providerMetadata.anthropic.signature } - break - } - default: { - try { - stream = (await this.client.messages.create({ - model: modelId, - max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, - temperature, - system: [{ text: systemPrompt, type: "text" }], - messages: sanitizedMessages, - stream: true, - ...nativeToolParams, - })) as any - } catch (error) { - TelemetryService.instance.captureException( - new ApiProviderError( - error instanceof Error ? error.message : String(error), - this.providerName, - modelId, - "createMessage", - ), - ) - throw error + + // Capture redacted thinking blocks from stream events + if (partAny.providerMetadata?.anthropic?.redactedData) { + this.lastRedactedThinkingBlocks.push({ + type: "redacted_thinking", + data: partAny.providerMetadata.anthropic.redactedData, + }) } - break + + for (const chunk of processAiSdkStreamPart(part)) { + yield chunk + } + } + + // Yield usage metrics at the end, including cache metrics from providerMetadata + const usage = await result.usage + const providerMetadata = await result.providerMetadata + if (usage) { + yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata) } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + TelemetryService.instance.captureException( + new ApiProviderError(errorMessage, this.providerName, modelConfig.id, "createMessage"), + ) + throw handleAiSdkError(error, this.providerName) } + } - let inputTokens = 0 - let outputTokens = 0 - let cacheWriteTokens = 0 - let cacheReadTokens = 0 - - for await (const chunk of stream) { - switch (chunk.type) { - case "message_start": { - // Tells us cache reads/writes/input/output. - const { - input_tokens = 0, - output_tokens = 0, - cache_creation_input_tokens, - cache_read_input_tokens, - } = chunk.message.usage - - yield { - type: "usage", - inputTokens: input_tokens, - outputTokens: output_tokens, - cacheWriteTokens: cache_creation_input_tokens || undefined, - cacheReadTokens: cache_read_input_tokens || undefined, - } + /** + * Process usage metrics from the AI SDK response, including Anthropic's cache metrics. + */ + private processUsageMetrics( + usage: { inputTokens?: number; outputTokens?: number }, + info: ModelInfo, + providerMetadata?: Record>, + ): ApiStreamUsageChunk { + const inputTokens = usage.inputTokens ?? 0 + const outputTokens = usage.outputTokens ?? 0 + + // Extract cache metrics from Anthropic's providerMetadata + const anthropicMeta = providerMetadata?.anthropic as + | { cacheCreationInputTokens?: number; cacheReadInputTokens?: number } + | undefined + const cacheWriteTokens = anthropicMeta?.cacheCreationInputTokens ?? 0 + const cacheReadTokens = anthropicMeta?.cacheReadInputTokens ?? 0 + + const { totalCost } = calculateApiCostAnthropic( + info, + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + ) - inputTokens += input_tokens - outputTokens += output_tokens - cacheWriteTokens += cache_creation_input_tokens || 0 - cacheReadTokens += cache_read_input_tokens || 0 + return { + type: "usage", + inputTokens, + outputTokens, + cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined, + cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined, + totalCost, + } + } - break - } - case "message_delta": - // Tells us stop_reason, stop_sequence, and output tokens - // along the way and at the end of the message. - yield { - type: "usage", - inputTokens: 0, - outputTokens: chunk.usage.output_tokens || 0, + /** + * Apply cacheControl providerOptions to the correct AI SDK messages by walking + * the original Anthropic messages and converted AI SDK messages in parallel. + * + * convertToAiSdkMessages() can split a single Anthropic user message (containing + * tool_results + text) into 2 AI SDK messages (tool role + user role). This method + * accounts for that split so cache control lands on the right message. + */ + private applyCacheControlToAiSdkMessages( + originalMessages: Anthropic.Messages.MessageParam[], + aiSdkMessages: { role: string; providerOptions?: Record> }[], + targetOriginalIndices: Set, + cacheProviderOption: Record>, + ): void { + let aiSdkIdx = 0 + for (let origIdx = 0; origIdx < originalMessages.length; origIdx++) { + const origMsg = originalMessages[origIdx] + + if (typeof origMsg.content === "string") { + if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) { + aiSdkMessages[aiSdkIdx].providerOptions = { + ...aiSdkMessages[aiSdkIdx].providerOptions, + ...cacheProviderOption, } + } + aiSdkIdx++ + } else if (origMsg.role === "user") { + const hasToolResults = origMsg.content.some((part) => (part as { type: string }).type === "tool_result") + const hasNonToolContent = origMsg.content.some( + (part) => (part as { type: string }).type === "text" || (part as { type: string }).type === "image", + ) - break - case "message_stop": - // No usage data, just an indicator that the message is done. - break - case "content_block_start": - switch (chunk.content_block.type) { - case "thinking": - // We may receive multiple text blocks, in which - // case just insert a line break between them. - if (chunk.index > 0) { - yield { type: "reasoning", text: "\n" } - } - - yield { type: "reasoning", text: chunk.content_block.thinking } - break - case "text": - // We may receive multiple text blocks, in which - // case just insert a line break between them. - if (chunk.index > 0) { - yield { type: "text", text: "\n" } - } - - yield { type: "text", text: chunk.content_block.text } - break - case "tool_use": { - // Emit initial tool call partial with id and name - yield { - type: "tool_call_partial", - index: chunk.index, - id: chunk.content_block.id, - name: chunk.content_block.name, - arguments: undefined, - } - break + if (hasToolResults && hasNonToolContent) { + const userMsgIdx = aiSdkIdx + 1 + if (targetOriginalIndices.has(origIdx) && userMsgIdx < aiSdkMessages.length) { + aiSdkMessages[userMsgIdx].providerOptions = { + ...aiSdkMessages[userMsgIdx].providerOptions, + ...cacheProviderOption, } } - break - case "content_block_delta": - switch (chunk.delta.type) { - case "thinking_delta": - yield { type: "reasoning", text: chunk.delta.thinking } - break - case "text_delta": - yield { type: "text", text: chunk.delta.text } - break - case "input_json_delta": { - // Emit tool call partial chunks as arguments stream in - yield { - type: "tool_call_partial", - index: chunk.index, - id: undefined, - name: undefined, - arguments: chunk.delta.partial_json, - } - break + aiSdkIdx += 2 + } else if (hasToolResults) { + if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) { + aiSdkMessages[aiSdkIdx].providerOptions = { + ...aiSdkMessages[aiSdkIdx].providerOptions, + ...cacheProviderOption, } } - - break - case "content_block_stop": - // Block complete - no action needed for now. - // NativeToolCallParser handles tool call completion - // Note: Signature for multi-turn thinking would require using stream.finalMessage() - // after iteration completes, which requires restructuring the streaming approach. - break - } - } - - if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) { - const { totalCost } = calculateApiCostAnthropic( - this.getModel().info, - inputTokens, - outputTokens, - cacheWriteTokens, - cacheReadTokens, - ) - - yield { - type: "usage", - inputTokens: 0, - outputTokens: 0, - totalCost, + aiSdkIdx++ + } else { + if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) { + aiSdkMessages[aiSdkIdx].providerOptions = { + ...aiSdkMessages[aiSdkIdx].providerOptions, + ...cacheProviderOption, + } + } + aiSdkIdx++ + } + } else { + aiSdkIdx++ } } } @@ -339,7 +302,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa (id === "claude-sonnet-4-20250514" || id === "claude-sonnet-4-5" || id === "claude-opus-4-6") && this.options.anthropicBeta1MContext ) { - // Use the tier pricing for 1M context const tier = info.tiers?.[0] if (tier) { info = { @@ -368,37 +330,53 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa return { id: id === "claude-3-7-sonnet-20250219:thinking" ? "claude-3-7-sonnet-20250219" : id, info, - betas: id === "claude-3-7-sonnet-20250219:thinking" ? ["output-128k-2025-02-19"] : undefined, ...params, } } - async completePrompt(prompt: string) { - let { id: model, temperature } = this.getModel() + async completePrompt(prompt: string): Promise { + const { id, temperature } = this.getModel() - let message try { - message = await this.client.messages.create({ - model, - max_tokens: ANTHROPIC_DEFAULT_MAX_TOKENS, - thinking: undefined, + const { text } = await generateText({ + model: this.provider(id), + prompt, + maxOutputTokens: ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, - messages: [{ role: "user", content: prompt }], - stream: false, }) + + return text } catch (error) { TelemetryService.instance.captureException( new ApiProviderError( error instanceof Error ? error.message : String(error), this.providerName, - model, + id, "completePrompt", ), ) - throw error + throw handleAiSdkError(error, this.providerName) } + } + + /** + * Returns the thinking signature captured from the last Anthropic response. + * Claude models with extended thinking return a cryptographic signature + * which must be round-tripped back for multi-turn conversations with tool use. + */ + getThoughtSignature(): string | undefined { + return this.lastThoughtSignature + } + + /** + * Returns any redacted thinking blocks captured from the last Anthropic response. + * Anthropic returns these when safety filters trigger on reasoning content. + */ + getRedactedThinkingBlocks(): Array<{ type: "redacted_thinking"; data: string }> | undefined { + return this.lastRedactedThinkingBlocks.length > 0 ? this.lastRedactedThinkingBlocks : undefined + } - const content = message.content.find(({ type }) => type === "text") - return content?.type === "text" ? content.text : "" + override isAiSdkProvider(): boolean { + return true } } diff --git a/src/package.json b/src/package.json index 70cc99ba731..ca240be888f 100644 --- a/src/package.json +++ b/src/package.json @@ -451,6 +451,7 @@ }, "dependencies": { "@ai-sdk/amazon-bedrock": "^4.0.51", + "@ai-sdk/anthropic": "^3.0.38", "@ai-sdk/baseten": "^1.0.31", "@ai-sdk/cerebras": "^2.0.31", "@ai-sdk/deepseek": "^2.0.18", From 0b92a2d674200057834a5d5b3aa23a4a14746772 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 9 Feb 2026 09:25:27 -0500 Subject: [PATCH 2/2] fix: address PR review - remove apiKey fallback and use system+systemProviderOptions pattern --- src/api/providers/__tests__/anthropic.spec.ts | 30 +++++++++---------- src/api/providers/anthropic.ts | 18 +++++------ 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index c6d07bc82e0..b80dc205eb5 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -94,12 +94,15 @@ describe("AnthropicHandler", () => { expect(handler.getModel().id).toBe(mockOptions.apiModelId) }) - it("should initialize with undefined API key", () => { + it("should initialize with undefined API key and pass it through for env-var fallback", () => { + mockCreateAnthropic.mockClear() const handlerWithoutKey = new AnthropicHandler({ ...mockOptions, apiKey: undefined, }) expect(handlerWithoutKey).toBeInstanceOf(AnthropicHandler) + const callArgs = mockCreateAnthropic.mock.calls[0]![0]! + expect(callArgs.apiKey).toBeUndefined() }) it("should use custom base URL if provided", () => { @@ -474,7 +477,7 @@ describe("AnthropicHandler", () => { expect(handler.getThoughtSignature()).toBeUndefined() }) - it("should include system message with cache control in the request", async () => { + it("should pass system prompt via system param with systemProviderOptions for cache control", async () => { setupStreamTextMock([{ type: "text-delta", text: "test" }]) const stream = handler.createMessage(systemPrompt, [ @@ -485,20 +488,15 @@ describe("AnthropicHandler", () => { // Consume } - // Verify streamText was called with system message containing cache control - expect(mockStreamText).toHaveBeenCalledWith( - expect.objectContaining({ - messages: expect.arrayContaining([ - expect.objectContaining({ - role: "system", - content: systemPrompt, - providerOptions: { - anthropic: { cacheControl: { type: "ephemeral" } }, - }, - }), - ]), - }), - ) + // Verify streamText was called with system + systemProviderOptions (not as a message) + const callArgs = mockStreamText.mock.calls[0]![0] + expect(callArgs.system).toBe(systemPrompt) + expect(callArgs.systemProviderOptions).toEqual({ + anthropic: { cacheControl: { type: "ephemeral" } }, + }) + // System prompt should NOT be in the messages array + const systemMessages = callArgs.messages.filter((m: any) => m.role === "system") + expect(systemMessages).toHaveLength(0) }) }) diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 630cfdaf3cc..f6ee47e130c 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -62,7 +62,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa this.provider = createAnthropic({ baseURL: options.anthropicBaseUrl || undefined, - ...(useAuthToken ? { authToken: options.apiKey } : { apiKey: options.apiKey ?? "not-provided" }), + ...(useAuthToken ? { authToken: options.apiKey } : { apiKey: options.apiKey }), headers: { ...DEFAULT_HEADERS, ...(betas.length > 0 ? { "anthropic-beta": betas.join(",") } : {}), @@ -130,19 +130,15 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa this.applyCacheControlToAiSdkMessages(messages, aiSdkMessages, targetIndices, cacheProviderOption) } - // Prepend system prompt as a system message with cache control - const systemMessage = { - role: "system" as const, - content: systemPrompt, - providerOptions: { - anthropic: { cacheControl: { type: "ephemeral" } }, - }, - } - // Build streamText request + // Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values const requestOptions: Parameters[0] = { model: this.provider(modelConfig.id), - messages: [systemMessage, ...aiSdkMessages], + system: systemPrompt, + ...({ + systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, + } as Record), + messages: aiSdkMessages, temperature: modelConfig.temperature, maxOutputTokens: modelConfig.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, tools: aiSdkTools,