diff --git a/README.md b/README.md
index 54c5077..9feb612 100644
--- a/README.md
+++ b/README.md
@@ -195,7 +195,7 @@ AgenticLoop/
├── agent/ # Agent implementations
│ ├── base.py # BaseAgent abstract class
│ ├── context.py # Context injection
-│ ├── react_agent.py # ReAct mode
+│ ├── agent.py # ReAct agent + Ralph verification loop
│ ├── plan_execute_agent.py # Plan-and-Execute mode
│ ├── tool_executor.py # Tool execution engine
│ └── todo.py # Todo list management
diff --git a/agent/react_agent.py b/agent/agent.py
similarity index 96%
rename from agent/react_agent.py
rename to agent/agent.py
index 4ebac24..3196860 100644
--- a/agent/react_agent.py
+++ b/agent/agent.py
@@ -1,5 +1,6 @@
"""ReAct (Reasoning + Acting) agent implementation."""
+from config import Config
from llm import LLMMessage
from utils import terminal_ui
@@ -146,13 +147,14 @@ async def run(self, task: str) -> str:
tools = self.tool_executor.get_tool_schemas()
- # Use the generic ReAct loop implementation
- result = await self._react_loop(
+ # Use ralph loop (outer verification wrapping the inner ReAct loop)
+ result = await self._ralph_loop(
messages=[], # Not used when use_memory=True
tools=tools,
use_memory=True,
save_to_memory=True,
task=task,
+ max_iterations=Config.RALPH_LOOP_MAX_ITERATIONS,
)
self._print_memory_stats()
diff --git a/agent/base.py b/agent/base.py
index 4f088dc..7598e06 100644
--- a/agent/base.py
+++ b/agent/base.py
@@ -12,6 +12,7 @@
from .todo import TodoList
from .tool_executor import ToolExecutor
+from .verification import LLMVerifier, VerificationResult, Verifier
if TYPE_CHECKING:
from llm import LiteLLMAdapter, ModelManager
@@ -300,3 +301,99 @@ def get_current_model_info(self) -> Optional[dict]:
"provider": profile.provider,
}
return None
+
+ async def _ralph_loop(
+ self,
+ messages: List[LLMMessage],
+ tools: List,
+ use_memory: bool = True,
+ save_to_memory: bool = True,
+ task: str = "",
+ max_iterations: int = 3,
+ verifier: Optional[Verifier] = None,
+ ) -> str:
+ """Outer verification loop that wraps _react_loop.
+
+ After _react_loop returns a final answer, a verifier judges whether the
+ original task is satisfied. If not, feedback is injected and the inner
+ loop re-enters.
+
+ Args:
+ messages: Initial message list (passed through to _react_loop).
+ tools: List of available tool schemas.
+ use_memory: If True, use self.memory for context.
+ save_to_memory: If True, save messages to self.memory.
+ task: The original task description.
+ max_iterations: Maximum number of outer verification iterations.
+ verifier: Optional custom Verifier instance. Defaults to LLMVerifier.
+
+ Returns:
+ Final answer as a string.
+ """
+ if verifier is None:
+ verifier = LLMVerifier(self.llm, terminal_ui)
+
+ previous_results: List[VerificationResult] = []
+
+ for iteration in range(1, max_iterations + 1):
+ logger.debug(f"Ralph loop iteration {iteration}/{max_iterations}")
+
+ result = await self._react_loop(
+ messages=messages,
+ tools=tools,
+ use_memory=use_memory,
+ save_to_memory=save_to_memory,
+ task=task,
+ )
+
+ # Skip verification on last iteration — just return whatever we got
+ if iteration == max_iterations:
+ logger.debug("Ralph loop: max iterations reached, returning result")
+ terminal_ui.console.print(
+ f"\n[bold dark_orange]⚠ Verification skipped "
+ f"(max iterations {max_iterations} reached), returning last result[/bold dark_orange]"
+ )
+ return result
+
+ verification = await verifier.verify(
+ task=task,
+ result=result,
+ iteration=iteration,
+ previous_results=previous_results,
+ )
+ previous_results.append(verification)
+
+ if verification.complete:
+ logger.debug(f"Ralph loop: verified complete — {verification.reason}")
+ terminal_ui.console.print(
+ f"\n[bold green]✓ Verification passed "
+ f"(attempt {iteration}/{max_iterations}): {verification.reason}[/bold green]"
+ )
+ return result
+
+ # Print the incomplete result so the user can see what the agent produced
+ terminal_ui.print_unfinished_answer(result)
+
+ # Inject feedback as a user message so the next _react_loop iteration
+ # picks it up from memory.
+ feedback = (
+ f"Your previous answer was reviewed and found incomplete. "
+ f"Feedback: {verification.reason}\n\n"
+ f"Please address the feedback and provide a complete answer."
+ )
+ # Print the incomplete result so the user can see what the agent produced
+ terminal_ui.print_unfinished_answer(result)
+
+ logger.debug(f"Ralph loop: injecting feedback — {verification.reason}")
+ terminal_ui.console.print(
+ f"\n[bold yellow]⟳ Verification feedback (attempt {iteration}/{max_iterations}): "
+ f"{verification.reason}[/bold yellow]"
+ )
+
+ if use_memory and save_to_memory:
+ await self.memory.add_message(LLMMessage(role="user", content=feedback))
+ else:
+ messages.append(LLMMessage(role="user", content=feedback))
+
+ # Should not reach here, but return last result as safety fallback
+ return result # type: ignore[possibly-undefined]
diff --git a/agent/verification.py b/agent/verification.py
new file mode 100644
index 0000000..5d9608c
--- /dev/null
+++ b/agent/verification.py
@@ -0,0 +1,135 @@
+"""Verification interface and default LLM verifier for the Ralph Loop.
+
+The verifier judges whether the agent's final answer truly satisfies the
+original task. If not, feedback is returned so the outer loop can re-enter
+the inner ReAct loop with corrective guidance.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Protocol, runtime_checkable
+
+from llm import LLMMessage
+from utils import get_logger
+from utils.tui.progress import AsyncSpinner
+
+if TYPE_CHECKING:
+ from llm import LiteLLMAdapter
+ from utils.tui.terminal_ui import TerminalUI
+
+logger = get_logger(__name__)
+
+
+@dataclass
+class VerificationResult:
+ """Result of a verification check."""
+
+ complete: bool
+ reason: str
+
+
+@runtime_checkable
+class Verifier(Protocol):
+ """Protocol for task-completion verifiers."""
+
+ async def verify(
+ self,
+ task: str,
+ result: str,
+ iteration: int,
+ previous_results: list[VerificationResult],
+ ) -> VerificationResult:
+ """Judge whether *result* satisfies *task*.
+
+ Args:
+ task: The original user task description.
+ result: The agent's final answer from the inner loop.
+ iteration: Current outer-loop iteration (1-indexed).
+ previous_results: Verification results from earlier iterations.
+
+ Returns:
+ VerificationResult indicating completion status and reasoning.
+ """
+ ... # pragma: no cover
+
+
+_VERIFICATION_PROMPT = """\
+You are a strict verification assistant. Your job is to determine whether an \
+AI agent's answer fully and correctly completes the user's original task.
+
+
+{task}
+
+
+
+{result}
+
+
+{previous_context}
+
+
+1. If the task is a ONE-TIME request (e.g. "calculate 1+1", "summarize this file"), \
+judge whether the answer is correct and complete.
+
+2. If the task requires MULTIPLE steps and only some were done, respond INCOMPLETE \
+with specific feedback on what remains.
+
+
+Respond with EXACTLY one of:
+- COMPLETE:
+- INCOMPLETE:
+
+Do NOT restate the answer. Only judge it."""
+
+
+class LLMVerifier:
+ """Default verifier that uses a lightweight LLM call (no tools)."""
+
+ def __init__(self, llm: LiteLLMAdapter, terminal_ui: TerminalUI | None = None):
+ self.llm = llm
+ self._tui = terminal_ui
+
+ async def verify(
+ self,
+ task: str,
+ result: str,
+ iteration: int,
+ previous_results: list[VerificationResult],
+ ) -> VerificationResult:
+ previous_context = ""
+ if previous_results:
+ lines = []
+ for i, pr in enumerate(previous_results, 1):
+ status = "complete" if pr.complete else "incomplete"
+ lines.append(f" Attempt {i}: {status} — {pr.reason}")
+ previous_context = "Previous verification attempts:\n" + "\n".join(lines)
+
+ prompt = _VERIFICATION_PROMPT.format(
+ task=task,
+ result=result[:4000], # Truncate to avoid excessive tokens
+ previous_context=previous_context,
+ )
+
+ messages = [
+ LLMMessage(role="system", content="You are a task-completion verifier."),
+ LLMMessage(role="user", content=prompt),
+ ]
+
+ console = self._tui.console if self._tui else None
+ if console:
+ async with AsyncSpinner(console, "Verifying completion..."):
+ response = await self.llm.call_async(messages=messages, tools=None, max_tokens=512)
+ else:
+ response = await self.llm.call_async(messages=messages, tools=None, max_tokens=512)
+
+ text = (response.content or "").strip()
+ logger.debug(f"Verification response (iter {iteration}): {text}")
+
+ upper = text.upper()
+ if upper.startswith("COMPLETE"):
+ reason = text.split(":", 1)[1].strip() if ":" in text else text
+ return VerificationResult(complete=True, reason=reason)
+ else:
+ reason = text.split(":", 1)[1].strip() if ":" in text else text
+ return VerificationResult(complete=False, reason=reason)
diff --git a/config.py b/config.py
index 3bb987a..5dccdd3 100644
--- a/config.py
+++ b/config.py
@@ -17,6 +17,9 @@
TOOL_TIMEOUT=600
MAX_ITERATIONS=1000
+
+# Ralph Loop (outer verification loop — re-checks task completion)
+# RALPH_LOOP_MAX_ITERATIONS=3
"""
@@ -75,6 +78,9 @@ class Config:
# Agent Configuration
MAX_ITERATIONS = int(_cfg.get("MAX_ITERATIONS", "1000"))
+ # Ralph Loop (outer verification loop)
+ RALPH_LOOP_MAX_ITERATIONS = int(_cfg.get("RALPH_LOOP_MAX_ITERATIONS", "3"))
+
# Retry Configuration
RETRY_MAX_ATTEMPTS = int(_cfg.get("RETRY_MAX_ATTEMPTS", "3"))
RETRY_INITIAL_DELAY = float(_cfg.get("RETRY_INITIAL_DELAY", "1.0"))
diff --git a/docs/configuration.md b/docs/configuration.md
index fb36680..a7d6182 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -91,6 +91,16 @@ Open `.aloop/models.yaml` in your editor, then it will auto-reload after you sav
Add/remove/default are done by editing `.aloop/models.yaml` directly.
+## Ralph Loop (Outer Verification)
+
+An outer loop verifies that the agent's final answer actually satisfies
+the original task. If incomplete, feedback is injected and the inner
+ReAct loop re-enters. Enabled by default.
+
+```bash
+RALPH_LOOP_MAX_ITERATIONS=3 # Max verification attempts before returning
+```
+
## Email Notification Configuration (Resend)
Used by the `notify` tool to send emails via [Resend](https://resend.com):
diff --git a/docs/examples.md b/docs/examples.md
index d1a42a1..b3d4781 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -229,7 +229,7 @@ See [Memory Management](memory-management.md) for more details.
```python
import asyncio
-from agent.react_agent import ReActAgent
+from agent.agent import ReActAgent
from llm import LiteLLMAdapter, ModelManager
from tools import CalculatorTool, FileReadTool
from config import Config
diff --git a/docs/extending.md b/docs/extending.md
index 155f087..8ed1fa0 100644
--- a/docs/extending.md
+++ b/docs/extending.md
@@ -274,7 +274,7 @@ Here's an agent that breaks tasks into subtasks and delegates:
```python
# agent/collaborative_agent.py
from .base import BaseAgent
-from .react_agent import ReActAgent
+from .agent import ReActAgent
class CollaborativeAgent(BaseAgent):
"""Agent that breaks tasks into subtasks and delegates to specialists."""
diff --git a/docs/memory-management.md b/docs/memory-management.md
index b556305..e8bf522 100644
--- a/docs/memory-management.md
+++ b/docs/memory-management.md
@@ -356,7 +356,7 @@ This provides exact token counts instead of estimates.
```python
from memory import MemoryManager, MemoryConfig
-from agent.react_agent import ReActAgent
+from agent.agent import ReActAgent
# Create memory config
config = MemoryConfig(
diff --git a/examples/react_example.py b/examples/react_example.py
index 011925b..ad6e7a8 100644
--- a/examples/react_example.py
+++ b/examples/react_example.py
@@ -7,7 +7,7 @@
# Add parent directory to path to import modules
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from agent.react_agent import ReActAgent
+from agent.agent import ReActAgent
from llm import LiteLLMAdapter, ModelManager
from tools.calculator import CalculatorTool
from tools.file_ops import FileReadTool, FileWriteTool
diff --git a/examples/web_fetch_example.py b/examples/web_fetch_example.py
index 030bdcb..b6097ff 100644
--- a/examples/web_fetch_example.py
+++ b/examples/web_fetch_example.py
@@ -6,7 +6,7 @@
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from agent.react_agent import ReActAgent
+from agent.agent import ReActAgent
from llm import LiteLLMAdapter, ModelManager
from tools.web_fetch import WebFetchTool
diff --git a/main.py b/main.py
index e51e35a..b33049e 100644
--- a/main.py
+++ b/main.py
@@ -4,7 +4,7 @@
import asyncio
import warnings
-from agent.react_agent import ReActAgent
+from agent.agent import ReActAgent
from config import Config
from interactive import run_interactive_mode, run_model_setup_mode
from llm import LiteLLMAdapter, ModelManager
@@ -18,7 +18,6 @@
from tools.shell import ShellTool
from tools.shell_background import BackgroundTaskManager, ShellTaskStatusTool
from tools.smart_edit import SmartEditTool
-from tools.timer import TimerTool
from tools.web_fetch import WebFetchTool
from tools.web_search import WebSearchTool
from utils import get_log_file_path, setup_logger, terminal_ui
@@ -54,7 +53,6 @@ def create_agent(model_id: str | None = None):
CodeNavigatorTool(),
ShellTool(task_manager=task_manager),
ShellTaskStatusTool(task_manager=task_manager),
- TimerTool(),
NotifyTool(),
]
diff --git a/rfc/003-asyncio-migration.md b/rfc/003-asyncio-migration.md
index 90a2dc0..a73e2cc 100644
--- a/rfc/003-asyncio-migration.md
+++ b/rfc/003-asyncio-migration.md
@@ -54,7 +54,7 @@ Current execution paths contain multiple blocking operations (HTTP, subprocess,
This RFC targets the runtime path that executes agent loops and tools:
- **Entrypoints**: `main.py`, `cli.py`, `interactive.py`
-- **Agent runtime**: `agent/base.py`, `agent/react_agent.py`, `agent/plan_execute_agent.py`, `agent/tool_executor.py`
+- **Agent runtime**: `agent/base.py`, `agent/agent.py`, `agent/plan_execute_agent.py`, `agent/tool_executor.py`
- **LLM layer**: `llm/litellm_adapter.py`, `llm/retry.py`
- **Memory/persistence**: `memory/manager.py`, `memory/store.py`
- **Tools**: `tools/*` (prioritized conversions, not all at once)
diff --git a/rfc/006-ralph-loop.md b/rfc/006-ralph-loop.md
new file mode 100644
index 0000000..e27974f
--- /dev/null
+++ b/rfc/006-ralph-loop.md
@@ -0,0 +1,78 @@
+# RFC 006 — Ralph Loop (Outer Verification Loop)
+
+**Status:** Implemented
+**Created:** 2026-01-30
+**Author:** —
+
+## Abstract
+
+Add an outer verification loop ("Ralph Loop") that checks whether the inner ReAct loop's final answer truly satisfies the original task. If the verifier deems the answer incomplete, feedback is injected and the inner loop re-enters. This provides an automated quality gate without changing the core ReAct loop.
+
+## Problem Statement
+
+Task completion in AgenticLoop is entirely LLM-driven: the inner ReAct loop terminates when the model emits `StopReason.STOP`. This means the model alone decides when it is "done," with no independent check that the answer is correct or complete. For complex tasks the model may stop prematurely — producing a partial answer, missing a subtask, or satisfying a surface reading of the prompt while missing deeper intent.
+
+An outer loop that independently verifies completion and injects corrective feedback addresses this gap without adding complexity to the inner loop itself.
+
+## Design Goals
+
+1. **Non-invasive** — the inner `_react_loop` remains unchanged.
+2. **Opt-in** — disabled by default (`RALPH_LOOP_ENABLED=false`). Existing behavior is untouched.
+3. **Pluggable verification** — ships with an LLM-based verifier but accepts any object matching the `Verifier` Protocol.
+4. **Bounded** — a configurable iteration cap (`RALPH_LOOP_MAX_ITERATIONS`, default 3) prevents runaway loops. On the final iteration, verification is skipped.
+5. **Minimal token overhead** — the verification call uses no tools, a short system prompt, and a 512-token cap.
+
+## Architecture
+
+```
+_ralph_loop (outer)
+ └─ for iteration in 1..max_iterations:
+ 1. _react_loop() → result
+ 2. if last iteration → return result
+ 3. verifier.verify(task, result)
+ ├─ complete → return result
+ └─ incomplete → inject feedback as user message, continue
+```
+
+### Verification Interface
+
+- `VerificationResult` dataclass: `complete: bool`, `reason: str`
+- `Verifier` runtime-checkable Protocol: `async def verify(task, result, iteration, previous_results) -> VerificationResult`
+- `LLMVerifier`: default implementation — lightweight LLM call (no tools, max 512 tokens). Truncates the agent answer to 4 000 chars and includes previous attempt context.
+
+### Feedback Injection
+
+Works naturally with the existing memory system:
+
+1. `_react_loop()` completes → assistant's final message is already in memory.
+2. `_ralph_loop()` appends a `user` message containing the verifier's feedback.
+3. The next `_react_loop()` invocation picks up the full context via `memory.get_context_for_llm()`.
+
+No changes to the memory system are required.
+
+### Configuration
+
+Two new keys in `config.py` (and `.aloop/config` template):
+
+| Key | Default | Description |
+|---|---|---|
+| `RALPH_LOOP_ENABLED` | `false` | Enable the outer verification loop |
+| `RALPH_LOOP_MAX_ITERATIONS` | `3` | Maximum outer iterations before returning |
+
+## Alternatives Considered
+
+1. **Post-hoc tool call** — add a "verify" tool the agent can call itself. Rejected because the agent already decides when to stop; giving it a verify tool doesn't solve the "stops too early" problem.
+2. **Prompt engineering only** — add "double-check your answer" instructions. Unreliable; the model may still skip verification.
+3. **Always-on** — run verification unconditionally. Rejected for cost/latency reasons; opt-in is safer as a first step.
+
+## Risks and Open Questions
+
+- **Cost**: each outer iteration adds one verification LLM call plus a full re-run of the inner loop. The default cap of 3 bounds worst-case overhead.
+- **Verifier quality**: the LLM verifier uses the same model. A weaker/faster model could be used in future for cost savings.
+- **Feedback loop divergence**: the agent could oscillate if feedback is contradictory. The iteration cap mitigates this.
+
+## Future Directions
+
+- Support a separate (cheaper/faster) model for verification.
+- Structured verification output (JSON) for richer feedback.
+- Per-task opt-in via CLI flag (`--verify`).
diff --git a/test/test_basic.py b/test/test_basic.py
index 0a58547..42a16e3 100644
--- a/test/test_basic.py
+++ b/test/test_basic.py
@@ -7,7 +7,7 @@
async def test_imports():
- from agent.react_agent import ReActAgent # noqa: F401
+ from agent.agent import ReActAgent # noqa: F401
from config import Config # noqa: F401
from tools.calculator import CalculatorTool # noqa: F401
from tools.file_ops import FileReadTool, FileWriteTool # noqa: F401
diff --git a/test/test_ralph_loop.py b/test/test_ralph_loop.py
new file mode 100644
index 0000000..903a355
--- /dev/null
+++ b/test/test_ralph_loop.py
@@ -0,0 +1,285 @@
+"""Tests for the Ralph Loop (outer verification loop)."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from agent.verification import LLMVerifier, VerificationResult, Verifier
+from llm import LLMMessage, LLMResponse, StopReason
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_llm_response(content: str) -> LLMResponse:
+ """Create a simple LLMResponse that triggers StopReason.STOP."""
+ return LLMResponse(
+ content=content,
+ stop_reason=StopReason.STOP,
+ usage={"input_tokens": 10, "output_tokens": 5},
+ )
+
+
+class _StubVerifier:
+ """Verifier that returns a pre-programmed sequence of results."""
+
+ def __init__(self, results: list[VerificationResult]):
+ self._results = list(results)
+ self._call_count = 0
+
+ async def verify(
+ self,
+ task: str,
+ result: str,
+ iteration: int,
+ previous_results: list[VerificationResult],
+ ) -> VerificationResult:
+ vr = self._results[self._call_count]
+ self._call_count += 1
+ return vr
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture()
+def mock_agent():
+ """Create a minimal BaseAgent-like object for testing _ralph_loop.
+
+ We patch the heavy dependencies (LLM, memory, tools) so only the loop
+ logic is exercised.
+ """
+ from agent.base import BaseAgent
+
+ # Concrete subclass so we can instantiate without hitting ABC restriction
+ class _ConcreteAgent(BaseAgent):
+ async def run(self, task: str) -> str:
+ raise NotImplementedError
+
+ agent = object.__new__(_ConcreteAgent)
+
+ # Minimal stubs
+ agent.llm = MagicMock()
+ agent.llm.extract_text = lambda r: r.content or ""
+
+ agent.memory = MagicMock()
+ agent.memory.add_message = AsyncMock()
+ agent.memory.get_context_for_llm = MagicMock(return_value=[])
+
+ return agent
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_ralph_loop_passes_on_first_attempt(mock_agent):
+ """Verification passes on the first attempt — returns immediately."""
+ mock_agent._react_loop = AsyncMock(return_value="The answer is 42.")
+
+ verifier = _StubVerifier([VerificationResult(complete=True, reason="Correct")])
+
+ result = await mock_agent._ralph_loop(
+ messages=[],
+ tools=[],
+ use_memory=False,
+ save_to_memory=False,
+ task="What is the answer?",
+ max_iterations=3,
+ verifier=verifier,
+ )
+
+ assert result == "The answer is 42."
+ assert mock_agent._react_loop.await_count == 1
+
+
+@pytest.mark.asyncio
+async def test_ralph_loop_retries_then_passes(mock_agent):
+ """Verification fails once, feedback injected, then passes on second attempt."""
+ mock_agent._react_loop = AsyncMock(
+ side_effect=["Incomplete answer", "Complete answer with details"]
+ )
+
+ verifier = _StubVerifier(
+ [
+ VerificationResult(complete=False, reason="Missing details"),
+ VerificationResult(complete=True, reason="Now complete"),
+ ]
+ )
+
+ result = await mock_agent._ralph_loop(
+ messages=[],
+ tools=[],
+ use_memory=False,
+ save_to_memory=False,
+ task="Explain X",
+ max_iterations=3,
+ verifier=verifier,
+ )
+
+ assert result == "Complete answer with details"
+ assert mock_agent._react_loop.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_ralph_loop_max_iterations_skips_verification(mock_agent):
+ """On the last iteration, verification is skipped and the result is returned."""
+ mock_agent._react_loop = AsyncMock(side_effect=["first", "second", "third"])
+
+ # Verifier always says incomplete — but the 3rd iteration should skip it
+ verifier = _StubVerifier(
+ [
+ VerificationResult(complete=False, reason="nope"),
+ VerificationResult(complete=False, reason="still nope"),
+ # This should never be reached
+ VerificationResult(complete=False, reason="unreachable"),
+ ]
+ )
+
+ result = await mock_agent._ralph_loop(
+ messages=[],
+ tools=[],
+ use_memory=False,
+ save_to_memory=False,
+ task="Do something",
+ max_iterations=3,
+ verifier=verifier,
+ )
+
+ assert result == "third"
+ assert mock_agent._react_loop.await_count == 3
+ # Only 2 verify calls (iterations 1 and 2; iteration 3 skips verification)
+ assert verifier._call_count == 2
+
+
+@pytest.mark.asyncio
+async def test_ralph_loop_custom_verifier_protocol(mock_agent):
+ """A custom verifier following the Verifier Protocol works correctly."""
+
+ class MyVerifier:
+ async def verify(self, task, result, iteration, previous_results):
+ return VerificationResult(complete=True, reason="custom verifier says yes")
+
+ assert isinstance(MyVerifier(), Verifier)
+
+ mock_agent._react_loop = AsyncMock(return_value="answer")
+
+ result = await mock_agent._ralph_loop(
+ messages=[],
+ tools=[],
+ use_memory=False,
+ save_to_memory=False,
+ task="task",
+ max_iterations=3,
+ verifier=MyVerifier(),
+ )
+
+ assert result == "answer"
+
+
+@pytest.mark.asyncio
+async def test_ralph_loop_injects_feedback_into_messages(mock_agent):
+ """When verification fails, feedback is appended as a user message."""
+ messages: list[LLMMessage] = []
+ mock_agent._react_loop = AsyncMock(side_effect=["bad", "good"])
+
+ verifier = _StubVerifier(
+ [
+ VerificationResult(complete=False, reason="Missing X"),
+ VerificationResult(complete=True, reason="OK"),
+ ]
+ )
+
+ await mock_agent._ralph_loop(
+ messages=messages,
+ tools=[],
+ use_memory=False,
+ save_to_memory=False,
+ task="Do Y",
+ max_iterations=3,
+ verifier=verifier,
+ )
+
+ # One feedback message should have been appended
+ assert len(messages) == 1
+ assert messages[0].role == "user"
+ assert "Missing X" in messages[0].content
+
+
+@pytest.mark.asyncio
+async def test_run_dispatches_to_ralph_loop():
+ """ReActAgent.run() always uses _ralph_loop."""
+ from agent.agent import ReActAgent
+
+ agent = object.__new__(ReActAgent)
+ agent.llm = MagicMock()
+ agent.memory = MagicMock()
+ agent.memory.system_messages = ["sys"]
+ agent.memory.add_message = AsyncMock()
+ agent.memory.save_memory = AsyncMock()
+ agent.memory.get_stats = MagicMock(return_value={})
+ agent.tool_executor = MagicMock()
+ agent.tool_executor.get_tool_schemas = MagicMock(return_value=[])
+
+ agent._ralph_loop = AsyncMock(return_value="ralph result")
+ agent._print_memory_stats = MagicMock()
+
+ with patch("agent.agent.Config") as mock_config:
+ mock_config.RALPH_LOOP_MAX_ITERATIONS = 3
+ result = await agent.run("test task")
+
+ assert result == "ralph result"
+ agent._ralph_loop.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_llm_verifier_complete():
+ """LLMVerifier parses a COMPLETE response correctly."""
+ mock_llm = MagicMock()
+ mock_llm.call_async = AsyncMock(
+ return_value=LLMResponse(
+ content="COMPLETE: The answer correctly solves the task.",
+ stop_reason=StopReason.STOP,
+ )
+ )
+
+ verifier = LLMVerifier(mock_llm)
+ result = await verifier.verify(
+ task="Calculate 1+1",
+ result="2",
+ iteration=1,
+ previous_results=[],
+ )
+
+ assert result.complete is True
+ assert "correctly solves" in result.reason
+
+
+@pytest.mark.asyncio
+async def test_llm_verifier_incomplete():
+ """LLMVerifier parses an INCOMPLETE response correctly."""
+ mock_llm = MagicMock()
+ mock_llm.call_async = AsyncMock(
+ return_value=LLMResponse(
+ content="INCOMPLETE: The answer does not show the work.",
+ stop_reason=StopReason.STOP,
+ )
+ )
+
+ verifier = LLMVerifier(mock_llm)
+ result = await verifier.verify(
+ task="Show your work for 1+1",
+ result="2",
+ iteration=1,
+ previous_results=[],
+ )
+
+ assert result.complete is False
+ assert "does not show" in result.reason
diff --git a/test/test_smart_edit_integration.py b/test/test_smart_edit_integration.py
index ef8556f..7804f25 100644
--- a/test/test_smart_edit_integration.py
+++ b/test/test_smart_edit_integration.py
@@ -7,7 +7,7 @@
import pytest
-from agent.react_agent import ReActAgent
+from agent.agent import ReActAgent
from llm import LiteLLMAdapter, ModelManager
from tools.file_ops import FileReadTool, FileWriteTool
from tools.smart_edit import SmartEditTool
diff --git a/test/test_timer_tool.py b/test/test_timer_tool.py
deleted file mode 100644
index ded098e..0000000
--- a/test/test_timer_tool.py
+++ /dev/null
@@ -1,100 +0,0 @@
-"""Tests for the TimerTool."""
-
-import time
-
-import pytest
-
-from tools.timer import TimerTool
-
-
-@pytest.fixture
-def timer_tool():
- return TimerTool()
-
-
-class TestTimerToolProperties:
- def test_name(self, timer_tool):
- assert timer_tool.name == "timer"
-
- def test_description(self, timer_tool):
- assert "timer" in timer_tool.description.lower()
-
- def test_parameters(self, timer_tool):
- params = timer_tool.parameters
- assert "mode" in params
- assert "value" in params
- assert "task" in params
-
- def test_schema(self, timer_tool):
- schema = timer_tool.to_anthropic_schema()
- assert schema["name"] == "timer"
- assert "mode" in schema["input_schema"]["properties"]
-
-
-class TestTimerDelay:
- async def test_delay_returns_task(self, timer_tool):
- result = await timer_tool.execute(mode="delay", value="0", task="do something")
- assert "Timer triggered" in result
- assert "do something" in result
-
- async def test_delay_no_recurring_instruction(self, timer_tool):
- result = await timer_tool.execute(mode="delay", value="0", task="one-shot")
- assert "MUST call the timer tool again" not in result
-
- async def test_delay_waits(self, timer_tool):
- start = time.monotonic()
- await timer_tool.execute(mode="delay", value="0.1", task="test")
- elapsed = time.monotonic() - start
- assert elapsed >= 0.09
-
- async def test_delay_invalid_value(self, timer_tool):
- result = await timer_tool.execute(mode="delay", value="abc", task="test")
- assert "Error" in result
-
- async def test_delay_negative_value(self, timer_tool):
- result = await timer_tool.execute(mode="delay", value="-1", task="test")
- assert "Error" in result
-
-
-class TestTimerInterval:
- async def test_interval_returns_task(self, timer_tool):
- result = await timer_tool.execute(mode="interval", value="0", task="repeat this")
- assert "Timer triggered" in result
- assert "repeat this" in result
-
- async def test_interval_includes_recurring_instruction(self, timer_tool):
- result = await timer_tool.execute(mode="interval", value="0", task="repeat this")
- assert "MUST call the timer tool again" in result
- assert 'mode="interval"' in result
-
- async def test_interval_invalid_value(self, timer_tool):
- result = await timer_tool.execute(mode="interval", value="abc", task="test")
- assert "Error" in result
-
- async def test_interval_negative_value(self, timer_tool):
- result = await timer_tool.execute(mode="interval", value="-1", task="test")
- assert "Error" in result
-
-
-class TestTimerCron:
- async def test_cron_invalid_expression(self, timer_tool):
- result = await timer_tool.execute(mode="cron", value="not a cron", task="test")
- assert "Error" in result
- assert "invalid cron" in result
-
- async def test_cron_valid_expression(self, timer_tool):
- result = await timer_tool.execute(mode="cron", value="* * * * *", task="cron task")
- assert "Timer triggered" in result
- assert "cron task" in result
-
- async def test_cron_includes_recurring_instruction(self, timer_tool):
- result = await timer_tool.execute(mode="cron", value="* * * * *", task="cron task")
- assert "MUST call the timer tool again" in result
- assert 'mode="cron"' in result
-
-
-class TestTimerUnknownMode:
- async def test_unknown_mode(self, timer_tool):
- result = await timer_tool.execute(mode="bogus", value="1", task="test")
- assert "Error" in result
- assert "unknown mode" in result
diff --git a/tools/notify.py b/tools/notify.py
index c82f827..8c8586f 100644
--- a/tools/notify.py
+++ b/tools/notify.py
@@ -19,10 +19,7 @@ def name(self) -> str:
@property
def description(self) -> str:
- return (
- "Send an email notification via Resend. "
- "Requires RESEND_API_KEY and NOTIFY_EMAIL_FROM in .aloop/config."
- )
+ return "Send an email notification via Resend."
@property
def parameters(self) -> Dict[str, Any]:
diff --git a/tools/timer.py b/tools/timer.py
deleted file mode 100644
index 612e8ab..0000000
--- a/tools/timer.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Timer tool for scheduling delayed or periodic agent tasks."""
-
-import asyncio
-import time
-from typing import Any, Dict
-
-from croniter import croniter
-
-from tools.base import BaseTool
-
-
-class TimerTool(BaseTool):
- """Wait until a specified time or duration, then return the task description."""
-
- @property
- def name(self) -> str:
- return "timer"
-
- @property
- def description(self) -> str:
- return (
- "Set a timer to trigger after a delay or at a cron-scheduled time. "
- "Modes: 'delay' (wait N seconds), 'interval' (wait N seconds, agent loops), "
- "'cron' (wait until next cron match). Returns the task description when triggered."
- )
-
- @property
- def parameters(self) -> Dict[str, Any]:
- return {
- "mode": {
- "type": "string",
- "description": (
- "Timer mode: 'delay' (one-shot, fire once after N seconds), "
- "'interval' (recurring, fire every N seconds — you must call timer again after each task), "
- "'cron' (recurring, fire on cron schedule — you must call timer again after each task)"
- ),
- "enum": ["delay", "interval", "cron"],
- },
- "value": {
- "type": "string",
- "description": (
- "For delay/interval: number of seconds (e.g. '60'). "
- "For cron: a cron expression (e.g. '0 9 * * *' for daily at 9 AM)."
- ),
- },
- "task": {
- "type": "string",
- "description": "Task description to return when the timer triggers.",
- },
- }
-
- async def execute(self, mode: str, value: str, task: str) -> str:
- if mode == "delay":
- try:
- seconds = float(value)
- except ValueError:
- return f"Error: value must be a number for delay mode, got '{value}'"
- if seconds < 0:
- return f"Error: value must be non-negative, got {seconds}"
- await asyncio.sleep(seconds)
- return f"Timer triggered. Task to execute: {task}"
-
- if mode == "interval":
- try:
- seconds = float(value)
- except ValueError:
- return f"Error: value must be a number for interval mode, got '{value}'"
- if seconds < 0:
- return f"Error: value must be non-negative, got {seconds}"
- await asyncio.sleep(seconds)
- return (
- f"Timer triggered. Task to execute: {task}\n\n"
- f"[IMPORTANT: This is a recurring interval timer. "
- f"After completing the task above, you MUST call the timer tool again "
- f'with the same parameters (mode="interval", value="{value}", '
- f'task="{task}") to continue the cycle.]'
- )
-
- if mode == "cron":
- if not croniter.is_valid(value):
- return f"Error: invalid cron expression '{value}'"
- now = time.time()
- cron = croniter(value, now)
- next_fire = cron.get_next(float)
- wait_seconds = max(0, next_fire - now)
- await asyncio.sleep(wait_seconds)
- return (
- f"Timer triggered. Task to execute: {task}\n\n"
- f"[IMPORTANT: This is a recurring cron timer. "
- f"After completing the task above, you MUST call the timer tool again "
- f'with the same parameters (mode="cron", value="{value}", '
- f'task="{task}") to continue the schedule.]'
- )
-
- return f"Error: unknown mode '{mode}'. Use 'delay', 'interval', or 'cron'."
diff --git a/utils/terminal_ui.py b/utils/terminal_ui.py
index cde1c58..ab36858 100644
--- a/utils/terminal_ui.py
+++ b/utils/terminal_ui.py
@@ -182,6 +182,26 @@ def print_final_answer(answer: str) -> None:
)
+def print_unfinished_answer(answer: str) -> None:
+ """Print an intermediate answer that did not pass verification.
+
+ Args:
+ answer: Answer text (supports Markdown)
+ """
+ colors = _get_colors()
+ console.print()
+ md = Markdown(answer)
+ console.print(
+ Panel(
+ md,
+ title=f"[bold {colors.warning}]Unfinished Answer[/bold {colors.warning}]",
+ border_style=colors.warning,
+ box=box.ROUNDED,
+ padding=(1, 2),
+ )
+ )
+
+
def print_memory_stats(stats: Dict[str, Any]) -> None:
"""Print memory statistics in a formatted table.