diff --git a/src/core/assistant-message/XmlToolCallFallbackParser.ts b/src/core/assistant-message/XmlToolCallFallbackParser.ts new file mode 100644 index 00000000000..89c62477d47 --- /dev/null +++ b/src/core/assistant-message/XmlToolCallFallbackParser.ts @@ -0,0 +1,138 @@ +/** + * Fallback parser for XML-formatted tool calls in text responses. + * + * When a model doesn't support native function/tool calling (common with + * some OpenAI-compatible proxies), it may output tool calls as XML text + * instead of structured tool_call events. This parser extracts those + * XML tool calls from the text and converts them to ToolUse blocks. + * + * Supported format: + * + * value + * + * + * For example: + * + * src/main.ts + * + */ + +import { type ToolName, toolNames } from "@roo-code/types" + +import { type ToolUse, type ToolParamName, toolParamNames, TOOL_ALIASES } from "../../shared/tools" + +/** + * Set of all recognized tool names (canonical names + aliases). + */ +const ALL_TOOL_NAMES: Set = new Set([...toolNames, ...Object.keys(TOOL_ALIASES)]) + +/** + * Set of all recognized parameter names for quick lookup. + */ +const VALID_PARAM_NAMES: Set = new Set(toolParamNames) + +/** + * Resolve an alias to its canonical tool name, or return the name as-is if not aliased. + */ +function resolveAlias(name: string): ToolName | undefined { + if ((toolNames as readonly string[]).includes(name)) { + return name as ToolName + } + const aliased = TOOL_ALIASES[name] + return aliased ?? undefined +} + +/** + * Extract parameter values from the inner XML content of a tool call. + * + * Handles both single-line and multi-line parameter values: + * src/main.ts + * line 1\nline 2 + */ +function extractParams(innerXml: string): Partial> { + const params: Partial> = {} + + // Match XML parameter tags - supports multi-line content within params. + // Uses a non-greedy match to handle multiple params correctly. + const paramRegex = /<(\w+)>([\s\S]*?)<\/\1>/g + let match: RegExpExecArray | null + + while ((match = paramRegex.exec(innerXml)) !== null) { + const paramName = match[1] + const paramValue = match[2] + + if (VALID_PARAM_NAMES.has(paramName)) { + params[paramName as ToolParamName] = paramValue + } + } + + return params +} + +export interface XmlToolCallParseResult { + /** The tool uses parsed from the text. */ + toolUses: ToolUse[] + /** Whether any tool calls were found and parsed. */ + found: boolean +} + +/** + * Parse XML-formatted tool calls from text content. + * + * Scans the given text for patterns like `value` + * where tool_name is a recognized tool name. Returns an array of ToolUse blocks. + * + * @param text - The text content to scan for XML tool calls + * @returns Parse result with found tool uses + */ +export function parseXmlToolCalls(text: string): XmlToolCallParseResult { + const toolUses: ToolUse[] = [] + + if (!text || text.trim().length === 0) { + return { toolUses, found: false } + } + + // Build a regex that matches any known tool name as an XML tag. + // The tool name must be a complete word boundary to avoid false positives + // with tags like . + const toolNamePattern = [...ALL_TOOL_NAMES].join("|") + + // Match: ...content... + // - Tool name must be an exact match (not a substring of another tag) + // - Content between tags is captured (can be multi-line) + const toolCallRegex = new RegExp(`<(${toolNamePattern})>([\\s\\S]*?)<\\/\\1>`, "g") + + let match: RegExpExecArray | null + let idCounter = 0 + + while ((match = toolCallRegex.exec(text)) !== null) { + const rawToolName = match[1] + const innerContent = match[2] + + const canonicalName = resolveAlias(rawToolName) + if (!canonicalName) { + continue + } + + const params = extractParams(innerContent) + + idCounter++ + const toolUse: ToolUse = { + type: "tool_use", + id: `xml_fallback_${idCounter}_${Date.now()}`, + name: canonicalName, + params, + partial: false, + usedLegacyFormat: true, + } + + // If the alias differs from canonical, preserve it + if (rawToolName !== canonicalName) { + toolUse.originalName = rawToolName + } + + toolUses.push(toolUse) + } + + return { toolUses, found: toolUses.length > 0 } +} diff --git a/src/core/assistant-message/__tests__/XmlToolCallFallbackParser.spec.ts b/src/core/assistant-message/__tests__/XmlToolCallFallbackParser.spec.ts new file mode 100644 index 00000000000..bb11aef83ff --- /dev/null +++ b/src/core/assistant-message/__tests__/XmlToolCallFallbackParser.spec.ts @@ -0,0 +1,304 @@ +import { parseXmlToolCalls } from "../XmlToolCallFallbackParser" + +describe("XmlToolCallFallbackParser", () => { + describe("parseXmlToolCalls", () => { + it("should return empty result for empty text", () => { + expect(parseXmlToolCalls("").found).toBe(false) + expect(parseXmlToolCalls("").toolUses).toEqual([]) + }) + + it("should return empty result for text with no tool calls", () => { + const text = "I'll help you with that task. Let me think about the best approach." + const result = parseXmlToolCalls(text) + expect(result.found).toBe(false) + expect(result.toolUses).toEqual([]) + }) + + it("should parse a single read_file tool call", () => { + const text = `Let me read the file first. + + +src/main.ts +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].type).toBe("tool_use") + expect(result.toolUses[0].name).toBe("read_file") + expect(result.toolUses[0].params.path).toBe("src/main.ts") + expect(result.toolUses[0].partial).toBe(false) + expect(result.toolUses[0].usedLegacyFormat).toBe(true) + }) + + it("should parse a write_to_file tool call with multi-line content", () => { + const text = ` +src/hello.ts +export function hello() { + console.log("hello world") +} + +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("write_to_file") + expect(result.toolUses[0].params.path).toBe("src/hello.ts") + expect(result.toolUses[0].params.content).toContain("hello world") + }) + + it("should parse multiple tool calls in a single response", () => { + const text = `I'll read both files. + + +src/a.ts + + +Now let me also check the other file. + + +src/b.ts +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(2) + expect(result.toolUses[0].params.path).toBe("src/a.ts") + expect(result.toolUses[1].params.path).toBe("src/b.ts") + }) + + it("should parse execute_command tool call", () => { + const text = ` +npm test +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("execute_command") + expect(result.toolUses[0].params.command).toBe("npm test") + }) + + it("should parse search_files tool call with multiple params", () => { + const text = ` +src +function\\s+\\w+ +*.ts +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("search_files") + expect(result.toolUses[0].params.path).toBe("src") + expect(result.toolUses[0].params.regex).toBe("function\\s+\\w+") + expect(result.toolUses[0].params.file_pattern).toBe("*.ts") + }) + + it("should parse list_files tool call", () => { + const text = ` +src +true +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("list_files") + expect(result.toolUses[0].params.path).toBe("src") + expect(result.toolUses[0].params.recursive).toBe("true") + }) + + it("should parse attempt_completion tool call", () => { + const text = ` +I've completed the task successfully. +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("attempt_completion") + expect(result.toolUses[0].params.result).toBe("I've completed the task successfully.") + }) + + it("should parse apply_diff tool call with multi-line diff content", () => { + const text = ` +src/main.ts +<<<<<<< SEARCH +:start_line:1 +------- +old content +======= +new content +>>>>>>> REPLACE +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("apply_diff") + expect(result.toolUses[0].params.path).toBe("src/main.ts") + expect(result.toolUses[0].params.diff).toContain("SEARCH") + expect(result.toolUses[0].params.diff).toContain("REPLACE") + }) + + it("should handle tool aliases (write_file -> write_to_file)", () => { + const text = ` +src/test.ts +test content +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("write_to_file") + expect(result.toolUses[0].originalName).toBe("write_file") + }) + + it("should not match non-tool XML tags", () => { + const text = ` +VS Code version: 1.107.1 + + + +Let me analyze the situation. +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(false) + expect(result.toolUses).toEqual([]) + }) + + it("should handle tool calls inline with surrounding text", () => { + const text = `First, I'll check the directory. .false That should give us what we need.` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("list_files") + expect(result.toolUses[0].params.path).toBe(".") + }) + + it("should generate unique IDs for each tool call", () => { + const text = `a.ts +b.ts` + + const result = parseXmlToolCalls(text) + expect(result.toolUses).toHaveLength(2) + expect(result.toolUses[0].id).not.toBe(result.toolUses[1].id) + }) + + it("should parse ask_followup_question tool call", () => { + const text = ` +Which file would you like me to modify? +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("ask_followup_question") + expect(result.toolUses[0].params.question).toBe("Which file would you like me to modify?") + }) + + it("should parse switch_mode tool call", () => { + const text = ` +architect +Need to design the solution first +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("switch_mode") + expect(result.toolUses[0].params.mode_slug).toBe("architect") + expect(result.toolUses[0].params.reason).toBe("Need to design the solution first") + }) + + it("should parse new_task tool call", () => { + const text = ` +code +Implement the feature +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("new_task") + expect(result.toolUses[0].params.mode).toBe("code") + expect(result.toolUses[0].params.message).toBe("Implement the feature") + }) + + it("should parse use_mcp_tool tool call", () => { + const text = ` +my-server +my-tool +{"key": "value"} +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("use_mcp_tool") + expect(result.toolUses[0].params.server_name).toBe("my-server") + expect(result.toolUses[0].params.tool_name).toBe("my-tool") + expect(result.toolUses[0].params.arguments).toBe('{"key": "value"}') + }) + + it("should ignore unknown parameter names within tool calls", () => { + const text = ` +test.ts +should be ignored +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].params.path).toBe("test.ts") + expect(Object.keys(result.toolUses[0].params)).toHaveLength(1) + }) + + it("should handle whitespace in parameter values", () => { + const text = ` + src/main.ts +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + // Whitespace in params is preserved as-is (tool handlers may trim) + expect(result.toolUses[0].params.path).toBe(" src/main.ts ") + }) + + it("should handle execute_command with cwd parameter", () => { + const text = ` +ls -la +/home/user/project +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses[0].name).toBe("execute_command") + expect(result.toolUses[0].params.command).toBe("ls -la") + expect(result.toolUses[0].params.cwd).toBe("/home/user/project") + }) + + it("should handle null/undefined input gracefully", () => { + expect(parseXmlToolCalls(null as unknown as string).found).toBe(false) + expect(parseXmlToolCalls(undefined as unknown as string).found).toBe(false) + }) + + it("should handle codebase_search tool call", () => { + const text = ` +authentication middleware +src +` + + const result = parseXmlToolCalls(text) + expect(result.found).toBe(true) + expect(result.toolUses).toHaveLength(1) + expect(result.toolUses[0].name).toBe("codebase_search") + expect(result.toolUses[0].params.query).toBe("authentication middleware") + expect(result.toolUses[0].params.path).toBe("src") + }) + }) +}) diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index f4e41c1bfd7..7ca7611e851 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -105,6 +105,7 @@ import { RooIgnoreController } from "../ignore/RooIgnoreController" import { RooProtectedController } from "../protect/RooProtectedController" import { type AssistantMessageContent, presentAssistantMessage } from "../assistant-message" import { NativeToolCallParser } from "../assistant-message/NativeToolCallParser" +import { parseXmlToolCalls } from "../assistant-message/XmlToolCallFallbackParser" import { manageContext, willManageContext } from "../context-management" import { ClineProvider } from "../webview/ClineProvider" import { MultiSearchReplaceDiffStrategy } from "../diff/strategies/multi-search-replace" @@ -3425,10 +3426,45 @@ export class Task extends EventEmitter implements TaskLike { // Check if we have any content to process (text or tool uses) const hasTextContent = assistantMessage.length > 0 - const hasToolUses = this.assistantMessageContent.some( + let hasToolUses = this.assistantMessageContent.some( (block) => block.type === "tool_use" || block.type === "mcp_tool_use", ) + // XML tool call fallback: When the model doesn't support native function calling + // (common with some OpenAI-compatible proxies), it may output tool calls as XML + // text. If we have text content but no native tool uses, try to parse XML tool + // calls from the text as a fallback. (See: GitHub issue #11187) + if (hasTextContent && !hasToolUses) { + const fallbackResult = parseXmlToolCalls(assistantMessage) + if (fallbackResult.found) { + console.log( + `[Task#${this.taskId}] XML tool call fallback: parsed ${fallbackResult.toolUses.length} tool(s) from text`, + ) + + // Replace the text block(s) with the parsed tool uses + // Keep only non-text blocks (if any) from the original content + this.assistantMessageContent = this.assistantMessageContent.filter( + (block) => block.type !== "text", + ) + + // Add the parsed tool uses + for (const toolUse of fallbackResult.toolUses) { + this.assistantMessageContent.push(toolUse) + } + + // Present each tool call so they are processed by presentAssistantMessage + this.userMessageContentReady = false + for (const toolUse of fallbackResult.toolUses) { + presentAssistantMessage(this) + } + + // Re-check hasToolUses now that we've added fallback-parsed tools + hasToolUses = this.assistantMessageContent.some( + (block) => block.type === "tool_use" || block.type === "mcp_tool_use", + ) + } + } + if (hasTextContent || hasToolUses) { // Reset counter when we get a successful response with content this.consecutiveNoAssistantMessagesCount = 0