@@ -27,43 +27,61 @@ export type CallModelTools =
2727
2828/**
2929 * Check if input is Anthropic Claude-style messages (ClaudeMessageParam[])
30- * Claude messages have role of 'user' or 'assistant' only and content can be string or array of blocks
31- * with Claude-specific block types ('text', 'image', 'tool_use', 'tool_result')
30+ *
31+ * Claude messages have only 'user' or 'assistant' roles (no 'system', 'tool', 'developer')
32+ * and may contain Claude-specific block types in content arrays.
33+ *
34+ * We check for Claude-ONLY features to distinguish from OpenAI format:
35+ * - 'tool_result' blocks (Claude-specific; OpenAI uses role: 'tool')
36+ * - 'image' blocks with 'source' object (Claude-specific structure)
3237 */
3338function isClaudeStyleMessages (
3439 input : CallModelInput
3540) : input is models . ClaudeMessageParam [ ] {
36- if ( ! Array . isArray ( input ) ) {
37- return false ;
38- }
39- if ( input . length === 0 ) {
41+ if ( ! Array . isArray ( input ) || input . length === 0 ) {
4042 return false ;
4143 }
4244
43- const first = input [ 0 ] as Record < string , unknown > ;
44- // Must have role and no 'type' field at top level
45- if ( ! first || ! ( "role" in first ) || "type" in first ) {
46- return false ;
47- }
45+ // Check ALL messages for Claude-specific features
46+ for ( const msg of input ) {
47+ const m = msg as Record < string , unknown > ;
48+ if ( ! m || ! ( "role" in m ) || "type" in m ) {
49+ continue ;
50+ }
4851
49- const content = first [ "content" ] ;
50- // If content is an array with Claude-specific block types, it's Claude-style
51- if ( Array . isArray ( content ) && content . length > 0 ) {
52- const firstBlock = content [ 0 ] as Record < string , unknown > ;
53- const blockType = firstBlock ?. [ "type" ] ;
54- // Claude blocks have specific types like 'text', 'image', 'tool_use', 'tool_result'
55- if (
56- blockType === "text" ||
57- blockType === "image" ||
58- blockType === "tool_use" ||
59- blockType === "tool_result"
60- ) {
61- return true ;
52+ // OpenAI has 'system', 'developer', 'tool' roles that Claude doesn't have
53+ // If we see these roles, it's definitely NOT Claude format
54+ const role = m [ "role" ] ;
55+ if ( role === "system" || role === "developer" || role === "tool" ) {
56+ return false ;
57+ }
58+
59+ const content = m [ "content" ] ;
60+ if ( ! Array . isArray ( content ) ) {
61+ continue ;
62+ }
63+
64+ for ( const block of content ) {
65+ const b = block as Record < string , unknown > ;
66+ const blockType = b ?. [ "type" ] ;
67+ // 'tool_result' is Claude-specific (OpenAI uses role: 'tool' messages instead)
68+ if ( blockType === "tool_result" ) {
69+ return true ;
70+ }
71+ // 'image' with 'source' object is Claude-specific
72+ // OpenAI uses 'image_url' structure instead
73+ if ( blockType === "image" && typeof b ?. [ "source" ] === "object" ) {
74+ return true ;
75+ }
76+ // 'tool_use' blocks are Claude-specific (OpenAI uses 'tool_calls' array on message)
77+ if ( blockType === "tool_use" && typeof b ?. [ "id" ] === "string" ) {
78+ return true ;
79+ }
6280 }
6381 }
6482
65- // String content with only 'user' or 'assistant' roles could be either format
66- // Default to Chat-style for ambiguous cases ( more permissive )
83+ // No Claude-specific features found
84+ // Default to NOT Claude (prefer OpenAI chat format as it's more common )
6785 return false ;
6886}
6987
@@ -192,16 +210,41 @@ function convertChatToResponsesInput(
192210 ) as models . OpenResponsesInput ;
193211}
194212
213+ /**
214+ * Convert a Claude image source to a URL string
215+ */
216+ function claudeImageSourceToUrl (
217+ source : models . ClaudeBase64ImageSource | models . ClaudeURLImageSource
218+ ) : string {
219+ if ( source . type === "url" ) {
220+ return source . url ;
221+ }
222+ // Convert base64 to data URL
223+ return `data:${ source . media_type } ;base64,${ source . data } ` ;
224+ }
225+
226+ /**
227+ * Generate a unique ID for synthetic function calls
228+ */
229+ let syntheticIdCounter = 0 ;
230+ function generateSyntheticId ( ) : string {
231+ return `synthetic_${ Date . now ( ) } _${ ++ syntheticIdCounter } ` ;
232+ }
233+
195234/**
196235 * Convert Claude-style messages to responses-style input
236+ *
237+ * Handles:
238+ * - text blocks -> concatenated text content
239+ * - tool_result blocks -> OpenResponsesFunctionCallOutput
240+ * - tool_use blocks -> ResponsesOutputItemFunctionCall (preserves tool call history)
241+ * - image blocks in user messages -> OpenResponsesInputMessageItem with ResponseInputImage
242+ * - image blocks in assistant messages -> synthetic function call output with image data
197243 */
198244function convertClaudeToResponsesInput (
199245 messages : models . ClaudeMessageParam [ ]
200246) : models . OpenResponsesInput {
201- const result : (
202- | models . OpenResponsesEasyInputMessage
203- | models . OpenResponsesFunctionCallOutput
204- ) [ ] = [ ] ;
247+ const result : models . OpenResponsesInput1 [ ] = [ ] ;
205248
206249 for ( const msg of messages ) {
207250 const { role, content } = msg ;
@@ -215,13 +258,16 @@ function convertClaudeToResponsesInput(
215258 continue ;
216259 }
217260
218- // Handle array content - extract text and handle tool results
261+ // Analyze content blocks
219262 const textParts : string [ ] = [ ] ;
263+ const imageBlocks : models . ClaudeImageBlockParam [ ] = [ ] ;
264+ const toolUseBlocks : models . ClaudeToolUseBlockParam [ ] = [ ] ;
265+
220266 for ( const block of content ) {
221267 if ( block . type === "text" ) {
222268 textParts . push ( block . text ) ;
223269 } else if ( block . type === "tool_result" ) {
224- // Tool results need special handling - convert to function_call_output
270+ // Tool results -> function_call_output
225271 let toolContent : string ;
226272 if ( typeof block . content === "string" ) {
227273 toolContent = block . content ;
@@ -236,13 +282,75 @@ function convertClaudeToResponsesInput(
236282 callId : block . tool_use_id ,
237283 output : toolContent ,
238284 } as models . OpenResponsesFunctionCallOutput ) ;
285+ } else if ( block . type === "tool_use" ) {
286+ toolUseBlocks . push ( block ) ;
287+ } else if ( block . type === "image" ) {
288+ imageBlocks . push ( block ) ;
239289 }
240- // Note: tool_use and image blocks in input are typically part of conversation history
241- // They would come from previous assistant responses, we skip them for now
242290 }
243291
244- // If we collected text parts, add them as a message
245- if ( textParts . length > 0 ) {
292+ // Handle tool_use blocks (from assistant messages in conversation history)
293+ for ( const toolUse of toolUseBlocks ) {
294+ result . push ( {
295+ type : "function_call" ,
296+ callId : toolUse . id ,
297+ name : toolUse . name ,
298+ arguments : JSON . stringify ( toolUse . input ) ,
299+ } as models . ResponsesOutputItemFunctionCall ) ;
300+ }
301+
302+ // Handle images based on role
303+ if ( imageBlocks . length > 0 ) {
304+ if ( role === "user" ) {
305+ // User messages with images -> OpenResponsesInputMessageItem
306+ const contentParts : ( models . ResponseInputText | models . ResponseInputImage ) [ ] = [ ] ;
307+
308+ // Add text parts first
309+ for ( const text of textParts ) {
310+ contentParts . push ( {
311+ type : "input_text" ,
312+ text,
313+ } as models . ResponseInputText ) ;
314+ }
315+
316+ // Add image parts
317+ for ( const imgBlock of imageBlocks ) {
318+ contentParts . push ( {
319+ type : "input_image" ,
320+ detail : "auto" ,
321+ imageUrl : claudeImageSourceToUrl ( imgBlock . source ) ,
322+ } as models . ResponseInputImage ) ;
323+ }
324+
325+ result . push ( {
326+ type : "message" ,
327+ role : "user" ,
328+ content : contentParts ,
329+ } as models . OpenResponsesInputMessageItem ) ;
330+ } else {
331+ // Assistant messages with images -> synthetic function call outputs
332+ // First add text content if any
333+ if ( textParts . length > 0 ) {
334+ result . push ( {
335+ role : "assistant" ,
336+ content : textParts . join ( "" ) ,
337+ } as models . OpenResponsesEasyInputMessage ) ;
338+ }
339+
340+ // Add images as synthetic tool outputs
341+ for ( const imgBlock of imageBlocks ) {
342+ result . push ( {
343+ type : "function_call_output" ,
344+ callId : generateSyntheticId ( ) ,
345+ output : JSON . stringify ( {
346+ type : "image" ,
347+ url : claudeImageSourceToUrl ( imgBlock . source ) ,
348+ } ) ,
349+ } as models . OpenResponsesFunctionCallOutput ) ;
350+ }
351+ }
352+ } else if ( textParts . length > 0 ) {
353+ // No images, just text content
246354 result . push ( {
247355 role : role as "user" | "assistant" ,
248356 content : textParts . join ( "" ) ,
@@ -400,6 +508,9 @@ export function callModel<TTools extends readonly Tool[] = Tool[]>(
400508 } ;
401509
402510 // Only pass enhanced tools to wrapper (needed for auto-execution)
511+ // Double assertion needed because TTools is a generic extending readonly Tool[],
512+ // while enhancedTools is Tool[]. TypeScript can't verify the specific TTools subtype
513+ // at runtime, but we know it's safe since we extracted these tools from the input.
403514 if ( enhancedTools ) {
404515 wrapperOptions . tools = enhancedTools as unknown as TTools ;
405516 }
0 commit comments