-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add Ralph Loop outer verification for task completion #48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b20c52c
335ef94
ac95cd8
122cd8e
7f2ff61
ad69b39
51edb10
9f8ff8c
bd1e2d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,7 @@ | |
|
|
||
| from .todo import TodoList | ||
| from .tool_executor import ToolExecutor | ||
| from .verification import LLMVerifier, VerificationResult, Verifier | ||
|
|
||
| if TYPE_CHECKING: | ||
| from llm import LiteLLMAdapter, ModelManager | ||
|
|
@@ -300,3 +301,99 @@ def get_current_model_info(self) -> Optional[dict]: | |
| "provider": profile.provider, | ||
| } | ||
| return None | ||
|
|
||
| async def _ralph_loop( | ||
| self, | ||
| messages: List[LLMMessage], | ||
| tools: List, | ||
| use_memory: bool = True, | ||
| save_to_memory: bool = True, | ||
| task: str = "", | ||
| max_iterations: int = 3, | ||
| verifier: Optional[Verifier] = None, | ||
| ) -> str: | ||
| """Outer verification loop that wraps _react_loop. | ||
|
|
||
| After _react_loop returns a final answer, a verifier judges whether the | ||
| original task is satisfied. If not, feedback is injected and the inner | ||
| loop re-enters. | ||
|
|
||
| Args: | ||
| messages: Initial message list (passed through to _react_loop). | ||
| tools: List of available tool schemas. | ||
| use_memory: If True, use self.memory for context. | ||
| save_to_memory: If True, save messages to self.memory. | ||
| task: The original task description. | ||
| max_iterations: Maximum number of outer verification iterations. | ||
| verifier: Optional custom Verifier instance. Defaults to LLMVerifier. | ||
|
|
||
| Returns: | ||
| Final answer as a string. | ||
| """ | ||
| if verifier is None: | ||
| verifier = LLMVerifier(self.llm, terminal_ui) | ||
|
|
||
| previous_results: List[VerificationResult] = [] | ||
|
|
||
| for iteration in range(1, max_iterations + 1): | ||
| logger.debug(f"Ralph loop iteration {iteration}/{max_iterations}") | ||
|
|
||
| result = await self._react_loop( | ||
| messages=messages, | ||
| tools=tools, | ||
| use_memory=use_memory, | ||
| save_to_memory=save_to_memory, | ||
| task=task, | ||
| ) | ||
|
|
||
| # Skip verification on last iteration — just return whatever we got | ||
| if iteration == max_iterations: | ||
| logger.debug("Ralph loop: max iterations reached, returning result") | ||
| terminal_ui.console.print( | ||
| f"\n[bold dark_orange]⚠ Verification skipped " | ||
| f"(max iterations {max_iterations} reached), returning last result[/bold dark_orange]" | ||
| ) | ||
|
Comment on lines
+352
to
+355
|
||
| return result | ||
|
|
||
| verification = await verifier.verify( | ||
| task=task, | ||
| result=result, | ||
| iteration=iteration, | ||
| previous_results=previous_results, | ||
| ) | ||
| previous_results.append(verification) | ||
|
|
||
| if verification.complete: | ||
| logger.debug(f"Ralph loop: verified complete — {verification.reason}") | ||
| terminal_ui.console.print( | ||
| f"\n[bold green]✓ Verification passed " | ||
| f"(attempt {iteration}/{max_iterations}): {verification.reason}[/bold green]" | ||
| ) | ||
|
Comment on lines
+368
to
+371
|
||
| return result | ||
|
|
||
| # Print the incomplete result so the user can see what the agent produced | ||
| terminal_ui.print_unfinished_answer(result) | ||
|
|
||
| # Inject feedback as a user message so the next _react_loop iteration | ||
| # picks it up from memory. | ||
| feedback = ( | ||
| f"Your previous answer was reviewed and found incomplete. " | ||
| f"Feedback: {verification.reason}\n\n" | ||
| f"Please address the feedback and provide a complete answer." | ||
| ) | ||
| # Print the incomplete result so the user can see what the agent produced | ||
| terminal_ui.print_unfinished_answer(result) | ||
|
||
|
|
||
| logger.debug(f"Ralph loop: injecting feedback — {verification.reason}") | ||
| terminal_ui.console.print( | ||
| f"\n[bold yellow]⟳ Verification feedback (attempt {iteration}/{max_iterations}): " | ||
| f"{verification.reason}[/bold yellow]" | ||
| ) | ||
|
Comment on lines
+388
to
+391
|
||
|
|
||
| if use_memory and save_to_memory: | ||
| await self.memory.add_message(LLMMessage(role="user", content=feedback)) | ||
| else: | ||
| messages.append(LLMMessage(role="user", content=feedback)) | ||
|
|
||
| # Should not reach here, but return last result as safety fallback | ||
| return result # type: ignore[possibly-undefined] | ||
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| """Verification interface and default LLM verifier for the Ralph Loop. | ||
|
|
||
| The verifier judges whether the agent's final answer truly satisfies the | ||
| original task. If not, feedback is returned so the outer loop can re-enter | ||
| the inner ReAct loop with corrective guidance. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from dataclasses import dataclass | ||
| from typing import TYPE_CHECKING, Protocol, runtime_checkable | ||
|
|
||
| from llm import LLMMessage | ||
| from utils import get_logger | ||
| from utils.tui.progress import AsyncSpinner | ||
|
|
||
| if TYPE_CHECKING: | ||
| from llm import LiteLLMAdapter | ||
| from utils.tui.terminal_ui import TerminalUI | ||
|
|
||
| logger = get_logger(__name__) | ||
|
|
||
|
|
||
| @dataclass | ||
| class VerificationResult: | ||
| """Result of a verification check.""" | ||
|
|
||
| complete: bool | ||
| reason: str | ||
|
|
||
|
|
||
| @runtime_checkable | ||
| class Verifier(Protocol): | ||
| """Protocol for task-completion verifiers.""" | ||
|
|
||
| async def verify( | ||
| self, | ||
| task: str, | ||
| result: str, | ||
| iteration: int, | ||
| previous_results: list[VerificationResult], | ||
| ) -> VerificationResult: | ||
| """Judge whether *result* satisfies *task*. | ||
|
|
||
| Args: | ||
| task: The original user task description. | ||
| result: The agent's final answer from the inner loop. | ||
| iteration: Current outer-loop iteration (1-indexed). | ||
| previous_results: Verification results from earlier iterations. | ||
|
|
||
| Returns: | ||
| VerificationResult indicating completion status and reasoning. | ||
| """ | ||
| ... # pragma: no cover | ||
|
|
||
|
|
||
| _VERIFICATION_PROMPT = """\ | ||
| You are a strict verification assistant. Your job is to determine whether an \ | ||
| AI agent's answer fully and correctly completes the user's original task. | ||
|
|
||
| <task> | ||
| {task} | ||
| </task> | ||
|
|
||
| <agent_answer> | ||
| {result} | ||
| </agent_answer> | ||
|
|
||
| {previous_context} | ||
|
|
||
| <judgment_rules> | ||
| 1. If the task is a ONE-TIME request (e.g. "calculate 1+1", "summarize this file"), \ | ||
| judge whether the answer is correct and complete. | ||
|
|
||
| 2. If the task requires MULTIPLE steps and only some were done, respond INCOMPLETE \ | ||
| with specific feedback on what remains. | ||
| </judgment_rules> | ||
|
|
||
| Respond with EXACTLY one of: | ||
| - COMPLETE: <brief reason why the task is satisfied> | ||
| - INCOMPLETE: <specific feedback on what is missing or wrong> | ||
|
|
||
| Do NOT restate the answer. Only judge it.""" | ||
|
|
||
|
|
||
| class LLMVerifier: | ||
| """Default verifier that uses a lightweight LLM call (no tools).""" | ||
|
|
||
| def __init__(self, llm: LiteLLMAdapter, terminal_ui: TerminalUI | None = None): | ||
| self.llm = llm | ||
| self._tui = terminal_ui | ||
|
|
||
| async def verify( | ||
| self, | ||
| task: str, | ||
| result: str, | ||
| iteration: int, | ||
| previous_results: list[VerificationResult], | ||
| ) -> VerificationResult: | ||
| previous_context = "" | ||
| if previous_results: | ||
| lines = [] | ||
| for i, pr in enumerate(previous_results, 1): | ||
| status = "complete" if pr.complete else "incomplete" | ||
| lines.append(f" Attempt {i}: {status} — {pr.reason}") | ||
| previous_context = "Previous verification attempts:\n" + "\n".join(lines) | ||
|
|
||
| prompt = _VERIFICATION_PROMPT.format( | ||
| task=task, | ||
| result=result[:4000], # Truncate to avoid excessive tokens | ||
| previous_context=previous_context, | ||
| ) | ||
|
|
||
| messages = [ | ||
| LLMMessage(role="system", content="You are a task-completion verifier."), | ||
| LLMMessage(role="user", content=prompt), | ||
| ] | ||
|
|
||
| console = self._tui.console if self._tui else None | ||
| if console: | ||
| async with AsyncSpinner(console, "Verifying completion..."): | ||
| response = await self.llm.call_async(messages=messages, tools=None, max_tokens=512) | ||
| else: | ||
| response = await self.llm.call_async(messages=messages, tools=None, max_tokens=512) | ||
|
|
||
| text = (response.content or "").strip() | ||
| logger.debug(f"Verification response (iter {iteration}): {text}") | ||
|
|
||
| upper = text.upper() | ||
| if upper.startswith("COMPLETE"): | ||
| reason = text.split(":", 1)[1].strip() if ":" in text else text | ||
| return VerificationResult(complete=True, reason=reason) | ||
| else: | ||
| reason = text.split(":", 1)[1].strip() if ":" in text else text | ||
| return VerificationResult(complete=False, reason=reason) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variable
terminal_uiis used without being imported or defined in this scope. It should likely beself._tuior imported from the utils module.