Skip to content

Commit 559fd35

Browse files
mattappersonclaude
andcommitted
fix: improve Claude message detection and block conversion
- Improve isClaudeStyleMessages() heuristic to check ALL messages for Claude-specific features (tool_result, image with source, tool_use) - Properly convert tool_use blocks to ResponsesOutputItemFunctionCall - Convert image blocks in user messages to OpenResponsesInputMessageItem - Handle assistant images via synthetic function call outputs - Add TODO comment for cache_read_input_tokens mapping - Add console.warn for JSON parsing failures in tool arguments - Add E2E tests for getClaudeMessage() output format 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 5c9ae3b commit 559fd35

File tree

3 files changed

+232
-38
lines changed

3 files changed

+232
-38
lines changed

src/funcs/callModel.ts

Lines changed: 147 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -27,43 +27,61 @@ export type CallModelTools =
2727

2828
/**
2929
* Check if input is Anthropic Claude-style messages (ClaudeMessageParam[])
30-
* Claude messages have role of 'user' or 'assistant' only and content can be string or array of blocks
31-
* with Claude-specific block types ('text', 'image', 'tool_use', 'tool_result')
30+
*
31+
* Claude messages have only 'user' or 'assistant' roles (no 'system', 'tool', 'developer')
32+
* and may contain Claude-specific block types in content arrays.
33+
*
34+
* We check for Claude-ONLY features to distinguish from OpenAI format:
35+
* - 'tool_result' blocks (Claude-specific; OpenAI uses role: 'tool')
36+
* - 'image' blocks with 'source' object (Claude-specific structure)
3237
*/
3338
function isClaudeStyleMessages(
3439
input: CallModelInput
3540
): input is models.ClaudeMessageParam[] {
36-
if (!Array.isArray(input)) {
37-
return false;
38-
}
39-
if (input.length === 0) {
41+
if (!Array.isArray(input) || input.length === 0) {
4042
return false;
4143
}
4244

43-
const first = input[0] as Record<string, unknown>;
44-
// Must have role and no 'type' field at top level
45-
if (!first || !("role" in first) || "type" in first) {
46-
return false;
47-
}
45+
// Check ALL messages for Claude-specific features
46+
for (const msg of input) {
47+
const m = msg as Record<string, unknown>;
48+
if (!m || !("role" in m) || "type" in m) {
49+
continue;
50+
}
4851

49-
const content = first["content"];
50-
// If content is an array with Claude-specific block types, it's Claude-style
51-
if (Array.isArray(content) && content.length > 0) {
52-
const firstBlock = content[0] as Record<string, unknown>;
53-
const blockType = firstBlock?.["type"];
54-
// Claude blocks have specific types like 'text', 'image', 'tool_use', 'tool_result'
55-
if (
56-
blockType === "text" ||
57-
blockType === "image" ||
58-
blockType === "tool_use" ||
59-
blockType === "tool_result"
60-
) {
61-
return true;
52+
// OpenAI has 'system', 'developer', 'tool' roles that Claude doesn't have
53+
// If we see these roles, it's definitely NOT Claude format
54+
const role = m["role"];
55+
if (role === "system" || role === "developer" || role === "tool") {
56+
return false;
57+
}
58+
59+
const content = m["content"];
60+
if (!Array.isArray(content)) {
61+
continue;
62+
}
63+
64+
for (const block of content) {
65+
const b = block as Record<string, unknown>;
66+
const blockType = b?.["type"];
67+
// 'tool_result' is Claude-specific (OpenAI uses role: 'tool' messages instead)
68+
if (blockType === "tool_result") {
69+
return true;
70+
}
71+
// 'image' with 'source' object is Claude-specific
72+
// OpenAI uses 'image_url' structure instead
73+
if (blockType === "image" && typeof b?.["source"] === "object") {
74+
return true;
75+
}
76+
// 'tool_use' blocks are Claude-specific (OpenAI uses 'tool_calls' array on message)
77+
if (blockType === "tool_use" && typeof b?.["id"] === "string") {
78+
return true;
79+
}
6280
}
6381
}
6482

65-
// String content with only 'user' or 'assistant' roles could be either format
66-
// Default to Chat-style for ambiguous cases (more permissive)
83+
// No Claude-specific features found
84+
// Default to NOT Claude (prefer OpenAI chat format as it's more common)
6785
return false;
6886
}
6987

@@ -192,16 +210,41 @@ function convertChatToResponsesInput(
192210
) as models.OpenResponsesInput;
193211
}
194212

213+
/**
214+
* Convert a Claude image source to a URL string
215+
*/
216+
function claudeImageSourceToUrl(
217+
source: models.ClaudeBase64ImageSource | models.ClaudeURLImageSource
218+
): string {
219+
if (source.type === "url") {
220+
return source.url;
221+
}
222+
// Convert base64 to data URL
223+
return `data:${source.media_type};base64,${source.data}`;
224+
}
225+
226+
/**
227+
* Generate a unique ID for synthetic function calls
228+
*/
229+
let syntheticIdCounter = 0;
230+
function generateSyntheticId(): string {
231+
return `synthetic_${Date.now()}_${++syntheticIdCounter}`;
232+
}
233+
195234
/**
196235
* Convert Claude-style messages to responses-style input
236+
*
237+
* Handles:
238+
* - text blocks -> concatenated text content
239+
* - tool_result blocks -> OpenResponsesFunctionCallOutput
240+
* - tool_use blocks -> ResponsesOutputItemFunctionCall (preserves tool call history)
241+
* - image blocks in user messages -> OpenResponsesInputMessageItem with ResponseInputImage
242+
* - image blocks in assistant messages -> synthetic function call output with image data
197243
*/
198244
function convertClaudeToResponsesInput(
199245
messages: models.ClaudeMessageParam[]
200246
): models.OpenResponsesInput {
201-
const result: (
202-
| models.OpenResponsesEasyInputMessage
203-
| models.OpenResponsesFunctionCallOutput
204-
)[] = [];
247+
const result: models.OpenResponsesInput1[] = [];
205248

206249
for (const msg of messages) {
207250
const { role, content } = msg;
@@ -215,13 +258,16 @@ function convertClaudeToResponsesInput(
215258
continue;
216259
}
217260

218-
// Handle array content - extract text and handle tool results
261+
// Analyze content blocks
219262
const textParts: string[] = [];
263+
const imageBlocks: models.ClaudeImageBlockParam[] = [];
264+
const toolUseBlocks: models.ClaudeToolUseBlockParam[] = [];
265+
220266
for (const block of content) {
221267
if (block.type === "text") {
222268
textParts.push(block.text);
223269
} else if (block.type === "tool_result") {
224-
// Tool results need special handling - convert to function_call_output
270+
// Tool results -> function_call_output
225271
let toolContent: string;
226272
if (typeof block.content === "string") {
227273
toolContent = block.content;
@@ -236,13 +282,75 @@ function convertClaudeToResponsesInput(
236282
callId: block.tool_use_id,
237283
output: toolContent,
238284
} as models.OpenResponsesFunctionCallOutput);
285+
} else if (block.type === "tool_use") {
286+
toolUseBlocks.push(block);
287+
} else if (block.type === "image") {
288+
imageBlocks.push(block);
239289
}
240-
// Note: tool_use and image blocks in input are typically part of conversation history
241-
// They would come from previous assistant responses, we skip them for now
242290
}
243291

244-
// If we collected text parts, add them as a message
245-
if (textParts.length > 0) {
292+
// Handle tool_use blocks (from assistant messages in conversation history)
293+
for (const toolUse of toolUseBlocks) {
294+
result.push({
295+
type: "function_call",
296+
callId: toolUse.id,
297+
name: toolUse.name,
298+
arguments: JSON.stringify(toolUse.input),
299+
} as models.ResponsesOutputItemFunctionCall);
300+
}
301+
302+
// Handle images based on role
303+
if (imageBlocks.length > 0) {
304+
if (role === "user") {
305+
// User messages with images -> OpenResponsesInputMessageItem
306+
const contentParts: (models.ResponseInputText | models.ResponseInputImage)[] = [];
307+
308+
// Add text parts first
309+
for (const text of textParts) {
310+
contentParts.push({
311+
type: "input_text",
312+
text,
313+
} as models.ResponseInputText);
314+
}
315+
316+
// Add image parts
317+
for (const imgBlock of imageBlocks) {
318+
contentParts.push({
319+
type: "input_image",
320+
detail: "auto",
321+
imageUrl: claudeImageSourceToUrl(imgBlock.source),
322+
} as models.ResponseInputImage);
323+
}
324+
325+
result.push({
326+
type: "message",
327+
role: "user",
328+
content: contentParts,
329+
} as models.OpenResponsesInputMessageItem);
330+
} else {
331+
// Assistant messages with images -> synthetic function call outputs
332+
// First add text content if any
333+
if (textParts.length > 0) {
334+
result.push({
335+
role: "assistant",
336+
content: textParts.join(""),
337+
} as models.OpenResponsesEasyInputMessage);
338+
}
339+
340+
// Add images as synthetic tool outputs
341+
for (const imgBlock of imageBlocks) {
342+
result.push({
343+
type: "function_call_output",
344+
callId: generateSyntheticId(),
345+
output: JSON.stringify({
346+
type: "image",
347+
url: claudeImageSourceToUrl(imgBlock.source),
348+
}),
349+
} as models.OpenResponsesFunctionCallOutput);
350+
}
351+
}
352+
} else if (textParts.length > 0) {
353+
// No images, just text content
246354
result.push({
247355
role: role as "user" | "assistant",
248356
content: textParts.join(""),
@@ -400,6 +508,9 @@ export function callModel<TTools extends readonly Tool[] = Tool[]>(
400508
};
401509

402510
// Only pass enhanced tools to wrapper (needed for auto-execution)
511+
// Double assertion needed because TTools is a generic extending readonly Tool[],
512+
// while enhancedTools is Tool[]. TypeScript can't verify the specific TTools subtype
513+
// at runtime, but we know it's safe since we extracted these tools from the input.
403514
if (enhancedTools) {
404515
wrapperOptions.tools = enhancedTools as unknown as TTools;
405516
}

src/lib/stream-transformers.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,9 @@ export function convertToClaudeMessage(
428428

429429
try {
430430
parsedInput = JSON.parse(fnCall.arguments);
431-
} catch {
432-
// If parsing fails, keep as empty object
431+
} catch (e) {
432+
// JSON parsing failed - likely malformed arguments from model
433+
console.warn(`[OpenRouter SDK] Failed to parse tool arguments for ${fnCall.name}: ${e}`);
433434
parsedInput = {};
434435
}
435436

@@ -470,6 +471,8 @@ export function convertToClaudeMessage(
470471
input_tokens: response.usage?.inputTokens ?? 0,
471472
output_tokens: response.usage?.outputTokens ?? 0,
472473
cache_creation_input_tokens: response.usage?.inputTokensDetails?.cachedTokens ?? 0,
474+
// TODO: OpenResponses doesn't expose cache_read separately from cachedTokens.
475+
// Anthropic distinguishes cache creation vs read; OpenResponses combines them.
473476
cache_read_input_tokens: 0,
474477
},
475478
};

tests/e2e/callModel.test.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,86 @@ describe('callModel E2E Tests', () => {
279279
});
280280
});
281281

282+
describe('getClaudeMessage - Claude output format', () => {
283+
it('should return ClaudeMessage with correct structure', async () => {
284+
const response = client.callModel({
285+
model: 'meta-llama/llama-3.2-1b-instruct',
286+
input: "Say 'hello' and nothing else.",
287+
});
288+
289+
const claudeMessage = await response.getClaudeMessage();
290+
291+
expect(claudeMessage.type).toBe('message');
292+
expect(claudeMessage.role).toBe('assistant');
293+
expect(claudeMessage.content).toBeInstanceOf(Array);
294+
expect(claudeMessage.content.length).toBeGreaterThan(0);
295+
expect(claudeMessage.content[0]?.type).toBe('text');
296+
expect(claudeMessage.stop_reason).toBeDefined();
297+
expect(claudeMessage.usage).toBeDefined();
298+
expect(claudeMessage.usage.input_tokens).toBeGreaterThan(0);
299+
expect(claudeMessage.usage.output_tokens).toBeGreaterThan(0);
300+
}, 30000);
301+
302+
it('should include text content in ClaudeMessage', async () => {
303+
const response = client.callModel({
304+
model: 'meta-llama/llama-3.2-1b-instruct',
305+
input: "Say the word 'banana' and nothing else.",
306+
});
307+
308+
const claudeMessage = await response.getClaudeMessage();
309+
const textBlock = claudeMessage.content.find((b) => b.type === 'text');
310+
311+
expect(textBlock).toBeDefined();
312+
if (textBlock && textBlock.type === 'text') {
313+
expect(textBlock.text.toLowerCase()).toContain('banana');
314+
}
315+
}, 30000);
316+
317+
it('should include tool_use blocks when tools are called', async () => {
318+
const response = client.callModel({
319+
model: 'openai/gpt-4o-mini',
320+
input: "What's the weather in Paris?",
321+
tools: [
322+
{
323+
type: ToolType.Function,
324+
function: {
325+
name: 'get_weather',
326+
description: 'Get weather for a location',
327+
inputSchema: z.object({
328+
location: z.string(),
329+
}),
330+
},
331+
},
332+
],
333+
maxToolRounds: 0, // Don't execute tools, just get the tool call
334+
});
335+
336+
const claudeMessage = await response.getClaudeMessage();
337+
338+
const toolUseBlock = claudeMessage.content.find((b) => b.type === 'tool_use');
339+
expect(toolUseBlock).toBeDefined();
340+
expect(claudeMessage.stop_reason).toBe('tool_use');
341+
342+
if (toolUseBlock && toolUseBlock.type === 'tool_use') {
343+
expect(toolUseBlock.name).toBe('get_weather');
344+
expect(toolUseBlock.id).toBeDefined();
345+
expect(toolUseBlock.input).toBeDefined();
346+
}
347+
}, 30000);
348+
349+
it('should have correct model field in ClaudeMessage', async () => {
350+
const response = client.callModel({
351+
model: 'meta-llama/llama-3.2-1b-instruct',
352+
input: "Say 'test'",
353+
});
354+
355+
const claudeMessage = await response.getClaudeMessage();
356+
357+
expect(claudeMessage.model).toBeDefined();
358+
expect(typeof claudeMessage.model).toBe('string');
359+
}, 30000);
360+
});
361+
282362
describe('response.text - Text extraction', () => {
283363
it('should successfully get text from a response', async () => {
284364
const response = client.callModel({

0 commit comments

Comments
 (0)