From 5e6d64881ff868be6c70c81d979ae8499c9ff857 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Fri, 9 Jan 2026 18:54:02 +0000
Subject: [PATCH] feat: improve message history pruning to keep tool pairs
 together

- Add `pruneMessages` function to `llm-executor.ts`
- Update `executeLlmStep` to use robust pruning instead of naive slicing
- Ensure orphaned tool results are re-attached to their parent assistant message
- Fallback to naive slicing if parent is not found or history is malformed
---
 src/runner/executors/llm-executor.ts | 40 ++++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/src/runner/executors/llm-executor.ts b/src/runner/executors/llm-executor.ts
index 1659796..791bbb8 100644
--- a/src/runner/executors/llm-executor.ts
+++ b/src/runner/executors/llm-executor.ts
@@ -130,6 +130,36 @@ function mapToCoreMessages(messages: LLMMessage[]): any[] {
   return coreMessages;
 }
 
+// --- Helper Functions ---
+
+/**
+ * Prunes the message history to the last N messages, ensuring that tool calls and tool results
+ * are kept together.
+ */
+export function pruneMessages(messages: LLMMessage[], maxHistory: number): LLMMessage[] {
+  if (messages.length <= maxHistory) {
+    return messages;
+  }
+
+  let startIndex = messages.length - maxHistory;
+
+  // Loop to backtrack if we landed on a tool message
+  while (startIndex > 0 && messages[startIndex].role === 'tool') {
+    startIndex--;
+  }
+
+  // Check if we landed on a valid parent (Assistant with tool_calls)
+  const candidate = messages[startIndex];
+  if (candidate.role === 'assistant' && candidate.tool_calls && candidate.tool_calls.length > 0) {
+    // Found the parent, include it and everything after
+    return messages.slice(startIndex);
+  }
+
+  // Fallback to naive slicing if we can't find a clean parent connection
+  // (This matches current behavior for edge cases, preventing regressions in weird states)
+  return messages.slice(messages.length - maxHistory);
+}
+
 // --- Main Execution Logic ---
 
 export async function executeLlmStep(
@@ -255,11 +285,11 @@ export async function executeLlmStep(
         // Enforce maxMessageHistory to preventing context window exhaustion
         let messagesForTurn = currentMessages;
         if (step.maxMessageHistory && currentMessages.length > step.maxMessageHistory) {
-          // Keep the last N messages
-          // Note: This naive slicing might cut off a tool_call that corresponds to a tool_result
-          // but robust models should handle it or we accept the degradation for stability.
-          messagesForTurn = currentMessages.slice(-step.maxMessageHistory);
-          logger.debug(`  ✂️ Pruned context to last ${step.maxMessageHistory} messages`);
+          // Keep the last N messages (with robust pruning to keep tool pairs together)
+          messagesForTurn = pruneMessages(currentMessages, step.maxMessageHistory);
+          logger.debug(
+            `  ✂️ Pruned context to last ${messagesForTurn.length} messages (maxHistory=${step.maxMessageHistory})`
+          );
         }
 
         const coreMessages = mapToCoreMessages(messagesForTurn);