fix: improve Claude message detection and block conversion

mattapperson · claude · mattapperson · commit 559fd3592735 · 2025-12-16T10:29:17.000-05:00
- Improve isClaudeStyleMessages() heuristic to check ALL messages for Claude-specific features (tool_result, image with source, tool_use) - Properly convert tool_use blocks to ResponsesOutputItemFunctionCall - Convert image blocks in user messages to OpenResponsesInputMessageItem - Handle assistant images via synthetic function call outputs - Add TODO comment for cache_read_input_tokens mapping - Add console.warn for JSON parsing failures in tool arguments - Add E2E tests for getClaudeMessage() output format 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/src/funcs/callModel.ts b/src/funcs/callModel.ts
@@ -27,43 +27,61 @@ export type CallModelTools =
 
 /**
  * Check if input is Anthropic Claude-style messages (ClaudeMessageParam[])
- * Claude messages have role of 'user' or 'assistant' only and content can be string or array of blocks
- * with Claude-specific block types ('text', 'image', 'tool_use', 'tool_result')
+ *
+ * Claude messages have only 'user' or 'assistant' roles (no 'system', 'tool', 'developer')
+ * and may contain Claude-specific block types in content arrays.
+ *
+ * We check for Claude-ONLY features to distinguish from OpenAI format:
+ * - 'tool_result' blocks (Claude-specific; OpenAI uses role: 'tool')
+ * - 'image' blocks with 'source' object (Claude-specific structure)
  */
 function isClaudeStyleMessages(
   input: CallModelInput
 ): input is models.ClaudeMessageParam[] {
-  if (!Array.isArray(input)) {
-    return false;
-  }
-  if (input.length === 0) {
+  if (!Array.isArray(input) || input.length === 0) {
     return false;
   }
 
-  const first = input[0] as Record<string, unknown>;
-  // Must have role and no 'type' field at top level
-  if (!first || !("role" in first) || "type" in first) {
-    return false;
-  }
+  // Check ALL messages for Claude-specific features
+  for (const msg of input) {
+    const m = msg as Record<string, unknown>;
+    if (!m || !("role" in m) || "type" in m) {
+      continue;
+    }
 
-  const content = first["content"];
-  // If content is an array with Claude-specific block types, it's Claude-style
-  if (Array.isArray(content) && content.length > 0) {
-    const firstBlock = content[0] as Record<string, unknown>;
-    const blockType = firstBlock?.["type"];
-    // Claude blocks have specific types like 'text', 'image', 'tool_use', 'tool_result'
-    if (
-      blockType === "text" ||
-      blockType === "image" ||
-      blockType === "tool_use" ||
-      blockType === "tool_result"
-    ) {
-      return true;
+    // OpenAI has 'system', 'developer', 'tool' roles that Claude doesn't have
+    // If we see these roles, it's definitely NOT Claude format
+    const role = m["role"];
+    if (role === "system" || role === "developer" || role === "tool") {
+      return false;
+    }
+
+    const content = m["content"];
+    if (!Array.isArray(content)) {
+      continue;
+    }
+
+    for (const block of content) {
+      const b = block as Record<string, unknown>;
+      const blockType = b?.["type"];
+      // 'tool_result' is Claude-specific (OpenAI uses role: 'tool' messages instead)
+      if (blockType === "tool_result") {
+        return true;
+      }
+      // 'image' with 'source' object is Claude-specific
+      // OpenAI uses 'image_url' structure instead
+      if (blockType === "image" && typeof b?.["source"] === "object") {
+        return true;
+      }
+      // 'tool_use' blocks are Claude-specific (OpenAI uses 'tool_calls' array on message)
+      if (blockType === "tool_use" && typeof b?.["id"] === "string") {
+        return true;
+      }
     }
   }
 
-  // String content with only 'user' or 'assistant' roles could be either format
-  // Default to Chat-style for ambiguous cases (more permissive)
+  // No Claude-specific features found
+  // Default to NOT Claude (prefer OpenAI chat format as it's more common)
   return false;
 }
 
@@ -192,16 +210,41 @@ function convertChatToResponsesInput(
   ) as models.OpenResponsesInput;
 }
 
+/**
+ * Convert a Claude image source to a URL string
+ */
+function claudeImageSourceToUrl(
+  source: models.ClaudeBase64ImageSource | models.ClaudeURLImageSource
+): string {
+  if (source.type === "url") {
+    return source.url;
+  }
+  // Convert base64 to data URL
+  return `data:${source.media_type};base64,${source.data}`;
+}
+
+/**
+ * Generate a unique ID for synthetic function calls
+ */
+let syntheticIdCounter = 0;
+function generateSyntheticId(): string {
+  return `synthetic_${Date.now()}_${++syntheticIdCounter}`;
+}
+
 /**
  * Convert Claude-style messages to responses-style input
+ *
+ * Handles:
+ * - text blocks -> concatenated text content
+ * - tool_result blocks -> OpenResponsesFunctionCallOutput
+ * - tool_use blocks -> ResponsesOutputItemFunctionCall (preserves tool call history)
+ * - image blocks in user messages -> OpenResponsesInputMessageItem with ResponseInputImage
+ * - image blocks in assistant messages -> synthetic function call output with image data
  */
 function convertClaudeToResponsesInput(
   messages: models.ClaudeMessageParam[]
 ): models.OpenResponsesInput {
-  const result: (
-    | models.OpenResponsesEasyInputMessage
-    | models.OpenResponsesFunctionCallOutput
-  )[] = [];
+  const result: models.OpenResponsesInput1[] = [];
 
   for (const msg of messages) {
     const { role, content } = msg;
@@ -215,13 +258,16 @@ function convertClaudeToResponsesInput(
       continue;
     }
 
-    // Handle array content - extract text and handle tool results
+    // Analyze content blocks
     const textParts: string[] = [];
+    const imageBlocks: models.ClaudeImageBlockParam[] = [];
+    const toolUseBlocks: models.ClaudeToolUseBlockParam[] = [];
+
     for (const block of content) {
       if (block.type === "text") {
         textParts.push(block.text);
       } else if (block.type === "tool_result") {
-        // Tool results need special handling - convert to function_call_output
+        // Tool results -> function_call_output
         let toolContent: string;
         if (typeof block.content === "string") {
           toolContent = block.content;
@@ -236,13 +282,75 @@ function convertClaudeToResponsesInput(
           callId: block.tool_use_id,
           output: toolContent,
         } as models.OpenResponsesFunctionCallOutput);
+      } else if (block.type === "tool_use") {
+        toolUseBlocks.push(block);
+      } else if (block.type === "image") {
+        imageBlocks.push(block);
       }
-      // Note: tool_use and image blocks in input are typically part of conversation history
-      // They would come from previous assistant responses, we skip them for now
     }
 
-    // If we collected text parts, add them as a message
-    if (textParts.length > 0) {
+    // Handle tool_use blocks (from assistant messages in conversation history)
+    for (const toolUse of toolUseBlocks) {
+      result.push({
+        type: "function_call",
+        callId: toolUse.id,
+        name: toolUse.name,
+        arguments: JSON.stringify(toolUse.input),
+      } as models.ResponsesOutputItemFunctionCall);
+    }
+
+    // Handle images based on role
+    if (imageBlocks.length > 0) {
+      if (role === "user") {
+        // User messages with images -> OpenResponsesInputMessageItem
+        const contentParts: (models.ResponseInputText | models.ResponseInputImage)[] = [];
+
+        // Add text parts first
+        for (const text of textParts) {
+          contentParts.push({
+            type: "input_text",
+            text,
+          } as models.ResponseInputText);
+        }
+
+        // Add image parts
+        for (const imgBlock of imageBlocks) {
+          contentParts.push({
+            type: "input_image",
+            detail: "auto",
+            imageUrl: claudeImageSourceToUrl(imgBlock.source),
+          } as models.ResponseInputImage);
+        }
+
+        result.push({
+          type: "message",
+          role: "user",
+          content: contentParts,
+        } as models.OpenResponsesInputMessageItem);
+      } else {
+        // Assistant messages with images -> synthetic function call outputs
+        // First add text content if any
+        if (textParts.length > 0) {
+          result.push({
+            role: "assistant",
+            content: textParts.join(""),
+          } as models.OpenResponsesEasyInputMessage);
+        }
+
+        // Add images as synthetic tool outputs
+        for (const imgBlock of imageBlocks) {
+          result.push({
+            type: "function_call_output",
+            callId: generateSyntheticId(),
+            output: JSON.stringify({
+              type: "image",
+              url: claudeImageSourceToUrl(imgBlock.source),
+            }),
+          } as models.OpenResponsesFunctionCallOutput);
+        }
+      }
+    } else if (textParts.length > 0) {
+      // No images, just text content
       result.push({
         role: role as "user" | "assistant",
         content: textParts.join(""),
@@ -400,6 +508,9 @@ export function callModel<TTools extends readonly Tool[] = Tool[]>(
   };
 
   // Only pass enhanced tools to wrapper (needed for auto-execution)
+  // Double assertion needed because TTools is a generic extending readonly Tool[],
+  // while enhancedTools is Tool[]. TypeScript can't verify the specific TTools subtype
+  // at runtime, but we know it's safe since we extracted these tools from the input.
   if (enhancedTools) {
     wrapperOptions.tools = enhancedTools as unknown as TTools;
   }
diff --git a/src/lib/stream-transformers.ts b/src/lib/stream-transformers.ts
@@ -428,8 +428,9 @@ export function convertToClaudeMessage(
 
       try {
         parsedInput = JSON.parse(fnCall.arguments);
-      } catch {
-        // If parsing fails, keep as empty object
+      } catch (e) {
+        // JSON parsing failed - likely malformed arguments from model
+        console.warn(`[OpenRouter SDK] Failed to parse tool arguments for ${fnCall.name}: ${e}`);
         parsedInput = {};
       }
 
@@ -470,6 +471,8 @@ export function convertToClaudeMessage(
       input_tokens: response.usage?.inputTokens ?? 0,
       output_tokens: response.usage?.outputTokens ?? 0,
       cache_creation_input_tokens: response.usage?.inputTokensDetails?.cachedTokens ?? 0,
+      // TODO: OpenResponses doesn't expose cache_read separately from cachedTokens.
+      // Anthropic distinguishes cache creation vs read; OpenResponses combines them.
       cache_read_input_tokens: 0,
     },
   };
diff --git a/tests/e2e/callModel.test.ts b/tests/e2e/callModel.test.ts
@@ -279,6 +279,86 @@ describe('callModel E2E Tests', () => {
     });
   });
 
+  describe('getClaudeMessage - Claude output format', () => {
+    it('should return ClaudeMessage with correct structure', async () => {
+      const response = client.callModel({
+        model: 'meta-llama/llama-3.2-1b-instruct',
+        input: "Say 'hello' and nothing else.",
+      });
+
+      const claudeMessage = await response.getClaudeMessage();
+
+      expect(claudeMessage.type).toBe('message');
+      expect(claudeMessage.role).toBe('assistant');
+      expect(claudeMessage.content).toBeInstanceOf(Array);
+      expect(claudeMessage.content.length).toBeGreaterThan(0);
+      expect(claudeMessage.content[0]?.type).toBe('text');
+      expect(claudeMessage.stop_reason).toBeDefined();
+      expect(claudeMessage.usage).toBeDefined();
+      expect(claudeMessage.usage.input_tokens).toBeGreaterThan(0);
+      expect(claudeMessage.usage.output_tokens).toBeGreaterThan(0);
+    }, 30000);
+
+    it('should include text content in ClaudeMessage', async () => {
+      const response = client.callModel({
+        model: 'meta-llama/llama-3.2-1b-instruct',
+        input: "Say the word 'banana' and nothing else.",
+      });
+
+      const claudeMessage = await response.getClaudeMessage();
+      const textBlock = claudeMessage.content.find((b) => b.type === 'text');
+
+      expect(textBlock).toBeDefined();
+      if (textBlock && textBlock.type === 'text') {
+        expect(textBlock.text.toLowerCase()).toContain('banana');
+      }
+    }, 30000);
+
+    it('should include tool_use blocks when tools are called', async () => {
+      const response = client.callModel({
+        model: 'openai/gpt-4o-mini',
+        input: "What's the weather in Paris?",
+        tools: [
+          {
+            type: ToolType.Function,
+            function: {
+              name: 'get_weather',
+              description: 'Get weather for a location',
+              inputSchema: z.object({
+                location: z.string(),
+              }),
+            },
+          },
+        ],
+        maxToolRounds: 0, // Don't execute tools, just get the tool call
+      });
+
+      const claudeMessage = await response.getClaudeMessage();
+
+      const toolUseBlock = claudeMessage.content.find((b) => b.type === 'tool_use');
+      expect(toolUseBlock).toBeDefined();
+      expect(claudeMessage.stop_reason).toBe('tool_use');
+
+      if (toolUseBlock && toolUseBlock.type === 'tool_use') {
+        expect(toolUseBlock.name).toBe('get_weather');
+        expect(toolUseBlock.id).toBeDefined();
+        expect(toolUseBlock.input).toBeDefined();
+      }
+    }, 30000);
+
+    it('should have correct model field in ClaudeMessage', async () => {
+      const response = client.callModel({
+        model: 'meta-llama/llama-3.2-1b-instruct',
+        input: "Say 'test'",
+      });
+
+      const claudeMessage = await response.getClaudeMessage();
+
+      expect(claudeMessage.model).toBeDefined();
+      expect(typeof claudeMessage.model).toBe('string');
+    }, 30000);
+  });
+
   describe('response.text - Text extraction', () => {
     it('should successfully get text from a response', async () => {
       const response = client.callModel({