Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions src/ares/code_agents/mini_swe_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@

# Copied from minisweagent's default config.
_TIMEOUT_TEMPLATE = """
The last command <command>{action}</command> timed out and has been killed.
The last command <command>{{ action }}</command> timed out and has been killed.
The output of the command was:
<output>
{output}
{{ output }}
</output>
Please try another command and make sure to avoid those requiring interactive input.
""".strip()
Expand Down Expand Up @@ -106,8 +106,19 @@ def _render_format_error_template(format_error_template: str, actions: list[str]


def _render_timeout_template(action: str, output: str) -> str:
# TODO: Use jinja2, and allow updating of configuration.
return _TIMEOUT_TEMPLATE.format(action=action, output=output)
"""Render the timeout error message using Jinja2.

Args:
action: The action/command that timed out
output: Any partial output from the command (may be empty)

Returns:
Rendered timeout error message
"""
return jinja2.Template(_TIMEOUT_TEMPLATE, undefined=jinja2.StrictUndefined).render(
action=action,
output=output,
)


@dataclasses.dataclass(kw_only=True)
Expand Down
11 changes: 6 additions & 5 deletions src/ares/code_agents/terminus2/terminus2_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ class Terminus2Agent(code_agent_base.CodeAgent):

container: containers.Container
llm_client: llm_clients.LLMClient
# TODO: Actually use the stat tracker in the agent.
tracker: stat_tracker.StatTracker = dataclasses.field(default_factory=stat_tracker.NullStatTracker)
parser_format: Literal["json", "xml"] = "json"
max_turns: int = 1_000_000 # Match terminal-bench reference (effectively unlimited)
Expand Down Expand Up @@ -489,7 +488,8 @@ async def run(self, task: str) -> None:
self._original_instruction = task # Store for summarization

# Initialize tmux session to capture initial terminal state
await self._ensure_tmux_session()
with self.tracker.timeit("t2/setup"):
await self._ensure_tmux_session()

# Capture initial terminal state using incremental output
# First call returns "Current Terminal Screen:\n{visible}" automatically
Expand Down Expand Up @@ -680,9 +680,10 @@ async def _query_llm(self) -> response.LLMResponse:
)

try:
response = await self.llm_client(
request.LLMRequest(messages=self._messages, system_prompt=self._system_prompt)
)
with self.tracker.timeit("t2/llm_request"):
response = await self.llm_client(
request.LLMRequest(messages=self._messages, system_prompt=self._system_prompt)
)
_LOGGER.debug("[%d] Received LLM response", id(self))
return response

Expand Down
20 changes: 10 additions & 10 deletions src/ares/code_agents/terminus2/terminus2_agent_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def handle_command(self, command: str) -> containers.ExecResult:
session_name = match.group(1)
self.sessions[session_name] = "active"
self.panes[session_name] = ""
return containers.ExecResult(output="", exit_code=0)
return containers.ExecResult(stdout="", stderr="", exit_code=0)

elif "tmux send-keys" in command and "-l" in command:
match = re.search(r"-t\s+(\S+)", command)
Expand All @@ -44,7 +44,7 @@ def handle_command(self, command: str) -> containers.ExecResult:
if text_match and session_name in self.panes:
text = text_match.group(1) or text_match.group(2) or ""
self.panes[session_name] += text
return containers.ExecResult(output="", exit_code=0)
return containers.ExecResult(stdout="", stderr="", exit_code=0)

elif "tmux send-keys" in command and "Enter" in command:
match = re.search(r"-t\s+(\S+)", command)
Expand All @@ -55,40 +55,40 @@ def handle_command(self, command: str) -> containers.ExecResult:
self.panes[session_name] += "\n"
if typed_command.strip():
self.panes[session_name] += f"[executed: {typed_command}]\n"
return containers.ExecResult(output="", exit_code=0)
return containers.ExecResult(stdout="", stderr="", exit_code=0)

elif "tmux capture-pane" in command:
match = re.search(r"-t\s+(\S+)", command)
output = ""
if match:
session_name = match.group(1)
output = self.panes.get(session_name, "")
return containers.ExecResult(output=output, exit_code=0)
return containers.ExecResult(stdout=output, stderr="", exit_code=0)

elif "tmux kill-session" in command:
match = re.search(r"-t\s+(\S+)", command)
if match:
session_name = match.group(1)
self.sessions.pop(session_name, None)
self.panes.pop(session_name, None)
return containers.ExecResult(output="", exit_code=0)
return containers.ExecResult(stdout="", stderr="", exit_code=0)

elif "tmux has-session" in command:
match = re.search(r"-t\s+(\S+)", command)
if match:
session_name = match.group(1)
exit_code = 0 if session_name in self.sessions else 1
return containers.ExecResult(output="", exit_code=exit_code)
return containers.ExecResult(output="", exit_code=1)
return containers.ExecResult(stdout="", stderr="", exit_code=exit_code)
return containers.ExecResult(stdout="", stderr="", exit_code=1)

elif "which tmux" in command:
return containers.ExecResult(output="/usr/bin/tmux", exit_code=0)
return containers.ExecResult(stdout="/usr/bin/tmux", stderr="", exit_code=0)

elif "tmux set-option" in command:
return containers.ExecResult(output="", exit_code=0)
return containers.ExecResult(stdout="", stderr="", exit_code=0)

# Default success for other commands
return containers.ExecResult(output="", exit_code=0)
return containers.ExecResult(stdout="", stderr="", exit_code=0)


class TestTerminus2AgentBasics:
Expand Down
24 changes: 22 additions & 2 deletions src/ares/containers/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,30 @@

@dataclasses.dataclass(frozen=True)
class ExecResult:
# TODO: Maybe stdout/stderr?
output: str
"""Result of executing a command in a container.

Attributes:
stdout: Standard output from the command.
stderr: Standard error output from the command.
exit_code: Exit code of the command (0 typically means success).
output: Combined stdout + stderr for backward compatibility.
This is a computed property - prefer using stdout/stderr directly.
"""

stdout: str
stderr: str
exit_code: int

@property
def output(self) -> str:
"""Combined stdout and stderr for backward compatibility.

Returns stdout + stderr concatenated. For new code, prefer accessing
stdout and stderr separately for better error handling.
"""
# Combine with stderr second so errors appear at the end
return self.stdout + self.stderr


@dataclasses.dataclass(frozen=True)
class Resources:
Expand Down
4 changes: 3 additions & 1 deletion src/ares/containers/daytona.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ async def exec_run(
if float(int_exit_code) != exit_code:
raise ValueError(f"Exit code is not an integer: {exit_code}")

return containers.ExecResult(output=result.result, exit_code=int_exit_code)
# Daytona provides combined stdout+stderr in result field
# Put it in stdout, leave stderr empty for now
return containers.ExecResult(stdout=result.result, stderr="", exit_code=int_exit_code)

def stop_and_remove(self) -> None:
"""Stop and remove the container."""
Expand Down
4 changes: 3 additions & 1 deletion src/ares/containers/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ async def exec_run(
timeout=timeout_s,
)
result_str = result.output.decode("utf-8", errors="replace")
return containers.ExecResult(output=result_str, exit_code=result.exit_code)
# Docker provides combined stdout+stderr in output field
# Put it in stdout, leave stderr empty for now
return containers.ExecResult(stdout=result_str, stderr="", exit_code=result.exit_code)

def stop_and_remove(self) -> None:
"""Stop and remove the container."""
Expand Down
Loading
Loading