PrimeIntellect-ai · d42me · Jan 13, 2026 · Jan 15, 2026 · Jan 29, 2026
diff --git a/packages/prime/src/prime_cli/commands/env.py b/packages/prime/src/prime_cli/commands/env.py
@@ -1,3 +1,4 @@
+import asyncio
 import hashlib
 import json
 import os
@@ -28,6 +29,11 @@
 from ..utils.env_metadata import find_environment_metadata
 from ..utils.eval_push import push_eval_results_to_hub
 from ..utils.formatters import format_file_size
+from ..utils.hosted_eval import (
+    HostedEvalConfig,
+    print_hosted_result,
+    run_hosted_evaluation,
+)
 
 app = typer.Typer(help="Manage verifiers environments", no_args_is_help=True)
 console = Console()
@@ -2076,9 +2082,17 @@ def run_eval(
     env_path: Optional[str],
     endpoints_path: Optional[str] = None,
     headers: Optional[List[str]] = None,
+    hosted: bool = False,
+    poll_interval: float = 10.0,
+    no_stream_logs: bool = False,
+    timeout_minutes: Optional[int] = None,
+    allow_sandbox_access: bool = False,
+    allow_instances_access: bool = False,
+    custom_secrets: Optional[str] = None,
+    eval_name: Optional[str] = None,
 ) -> None:
     """
-    Run verifiers' vf-eval with Prime Inference
+    Run verifiers' vf-eval with Prime Inference (local) or as a hosted evaluation on the platform.
     """
     is_slug = (
         "/" in environment and not environment.startswith("./") and not environment.startswith("/")
@@ -2087,31 +2101,177 @@ def run_eval(
     upstream_owner = None
     upstream_name = None
     env_name_for_vf_eval = environment
+    environment_id = None
+    requested_version = "latest"
 
     if is_slug:
         env_slug = environment
-        requested_version = "latest"
         if "@" in environment:
             env_slug, requested_version = environment.rsplit("@", 1)
 
         parts = env_slug.split("/")
         if len(parts) == 2 and parts[0] and parts[1]:
             upstream_owner, upstream_name = parts
             env_name_for_vf_eval = upstream_name
+        else:
+            console.print(f"[red]Invalid environment slug format: {environment}[/red]")
+            raise typer.Exit(1)
+
+    if hosted:
+        if not is_slug or not upstream_owner or not upstream_name:
+            metadata = find_environment_metadata(
+                env_name=environment,
+                env_path=Path(env_path) if env_path else None,
+            )
+
             console.print(
-                f"[dim]Using upstream environment {upstream_owner}/{upstream_name}[/dim]\n"
+                "[red]Error: Hosted evaluations require environment slug (owner/name).[/red]"
             )
 
-            if not _is_environment_installed(upstream_name, requested_version):
-                console.print(f"[cyan]Installing {environment}...[/cyan]")
-                if not _install_single_environment(environment):
-                    raise typer.Exit(1)
+            if metadata and metadata.get("owner") and metadata.get("name"):
+                suggested_slug = f"{metadata['owner']}/{metadata['name']}"
+                console.print("[yellow]Tip:[/yellow] Found local environment metadata.")
+                console.print(f"[dim]Try:[/dim] prime eval {suggested_slug} --hosted")
+            else:
+                console.print(
+                    f"[dim]Example: prime eval primeintellect/{environment} --hosted[/dim]"
+                )
+
+            raise typer.Exit(1)
+
+        client = APIClient(require_auth=False)
+        try:
+            env_details = fetch_environment_details(
+                client, upstream_owner, upstream_name, requested_version
+            )
+            environment_id = env_details.get("id")
+        except APIError as e:
+            console.print(f"[red]Error: Environment '{environment}' not found on the hub.[/red]")
+            console.print(f"[dim]{e}[/dim]")
+            console.print()
+
+            metadata = find_environment_metadata(
+                env_name=upstream_name, env_path=Path(env_path) if env_path else None
+            )
+
+            if metadata and metadata.get("owner") == upstream_owner:
+                console.print(
+                    "[yellow]Found local environment that hasn't been pushed yet.[/yellow]"
+                )
                 console.print()
 
-            is_resolved = True
-        else:
-            console.print(f"[red]Invalid environment slug format: {environment}[/red]")
+                should_push = typer.confirm(
+                    "Would you like to push this environment to the hub now?", default=True
+                )
+
+                if should_push:
+                    console.print()
+                    console.print("[cyan]Pushing environment to hub...[/cyan]")
+
+                    env_dir = env_path if env_path else Path.cwd()
+                    result = subprocess.run(
+                        ["prime", "env", "push"], cwd=env_dir, capture_output=False, text=True
+                    )
+
+                    if result.returncode != 0:
+                        console.print("[red]Failed to push environment.[/red]")
+                        raise typer.Exit(1)
+
+                    console.print()
+                    console.print("[green]✓ Environment pushed successfully![/green]")
+                    console.print("[cyan]Continuing with hosted evaluation...[/cyan]")
+                    console.print()
+
+                    try:
+                        env_details = fetch_environment_details(
+                            client, upstream_owner, upstream_name, requested_version
+                        )
+                        environment_id = env_details.get("id")
+                    except APIError as e2:
+                        console.print(
+                            f"[red]Error: Still couldn't find environment after push: {e2}[/red]"
+                        )
+                        raise typer.Exit(1)
+                else:
+                    console.print()
+                    console.print("[yellow]Cancelled. To push manually, run:[/yellow]")
+                    console.print("  prime env push")
+                    raise typer.Exit(1)
+            else:
+                console.print("[dim]To publish your environment, run:[/dim]")
+                console.print("  prime env push")
+                raise typer.Exit(1)
+
+        if not environment_id:
+            console.print(f"[red]Error: Could not get environment ID for '{environment}'[/red]")
             raise typer.Exit(1)
+
+        console.print(f"[dim]Using environment {upstream_owner}/{upstream_name}[/dim]\n")
+
+        # Parse env_args JSON if provided
+        parsed_env_args = None
+        if env_args:
+            try:
+                parsed_env_args = json.loads(env_args)
+            except json.JSONDecodeError as e:
+                console.print(f"[red]Error parsing --env-args: {e}[/red]")
+                raise typer.Exit(1)
+
+        # Parse custom_secrets JSON if provided
+        parsed_custom_secrets = None
+        if custom_secrets:
+            try:
+                parsed_custom_secrets = json.loads(custom_secrets)
+            except json.JSONDecodeError as e:
+                console.print(f"[red]Error parsing --custom-secrets: {e}[/red]")
+                raise typer.Exit(1)
+
+        # Create hosted eval config
+        hosted_config = HostedEvalConfig(
+            environment_id=environment_id,
+            inference_model=model,
+            num_examples=num_examples if num_examples is not None else 5,
+            rollouts_per_example=rollouts_per_example if rollouts_per_example is not None else 3,
+            env_args=parsed_env_args,
+            name=eval_name,
+            timeout_minutes=timeout_minutes,
+            allow_sandbox_access=allow_sandbox_access,
+            allow_instances_access=allow_instances_access,
+            custom_secrets=parsed_custom_secrets,
+        )
+
+        try:
+            result = asyncio.run(
+                run_hosted_evaluation(
+                    config=hosted_config,
+                    poll_interval=poll_interval,
+                    stream_logs=not no_stream_logs,
+                )
+            )
+            print_hosted_result(result)
+
+            if result.status != "COMPLETED":
+                raise typer.Exit(1)
+        except APIError as e:
+            console.print(f"[red]Hosted evaluation failed: {e}[/red]")
+            raise typer.Exit(1)
+
+        return
+
+    if is_slug:
+        console.print(f"[dim]Using upstream environment {upstream_owner}/{upstream_name}[/dim]\n")
+
+        requested_version = "latest"
+        if "@" in environment:
+            _, requested_version = environment.rsplit("@", 1)
+
+        if not _is_environment_installed(env_name_for_vf_eval, requested_version):
+            console.print(f"[cyan]Installing {environment}...[/cyan]")
+            if not _install_single_environment(environment):
+                raise typer.Exit(1)
+            console.print()
+
+        is_resolved = True
     else:
         check_path = Path(env_path) if env_path else Path.cwd()
         is_resolved = display_upstream_environment_info(
@@ -2392,6 +2552,11 @@ def eval_env(
             "(used to locate .prime/.env-metadata.json for upstream resolution)"
         ),
     ),
+    hosted: bool = typer.Option(
+        False,
+        "--hosted",
+        help="Run evaluation on the Prime Intellect platform instead of locally",
+    ),
 ) -> None:
     """Use 'prime eval' instead."""
 
@@ -2423,4 +2588,5 @@ def eval_env(
         env_path=env_path,
         endpoints_path=None,
         headers=None,
+        hosted=hosted,
     )
diff --git a/packages/prime/src/prime_cli/commands/evals.py b/packages/prime/src/prime_cli/commands/evals.py
@@ -641,13 +641,54 @@ def run_eval_cmd(
         "--header",
         help="Extra HTTP header for inference API ('Name: Value'). Repeatable.",
     ),
+    hosted: bool = typer.Option(
+        False,
+        "--hosted",
+        help="Run evaluation on the platform instead of locally",
+    ),
+    poll_interval: float = typer.Option(
+        10.0,
+        "--poll-interval",
+        help="Polling interval in seconds for hosted evaluation status",
+    ),
+    no_stream_logs: bool = typer.Option(
+        False,
+        "--no-stream-logs",
+        help="Disable log streaming for hosted evaluations",
+    ),
+    timeout_minutes: Optional[int] = typer.Option(
+        None,
+        "--timeout-minutes",
+        help="Timeout in minutes for hosted evaluation (default: 120, max: 1440)",
+    ),
+    allow_sandbox_access: bool = typer.Option(
+        False,
+        "--allow-sandbox-access",
+        help="Allow sandbox read/write access for hosted evaluations",
+    ),
+    allow_instances_access: bool = typer.Option(
+        False,
+        "--allow-instances-access",
+        help="Allow pod/instance creation and management for hosted evaluations",
+    ),
+    custom_secrets: Optional[str] = typer.Option(
+        None,
+        "--custom-secrets",
+        help='Custom secrets for hosted eval as JSON (e.g., \'{"API_KEY": "xxx"}\')',
+    ),
+    eval_name: Optional[str] = typer.Option(
+        None,
+        "--eval-name",
+        help="Custom name for the hosted evaluation",
+    ),
 ) -> None:
     """
-    Run verifiers' vf-eval with Prime Inference.
+    Run verifiers' vf-eval with Prime Inference (local) or on the platform (--hosted).
 
     Examples:
        prime eval run primeintellect/wordle -m openai/gpt-4.1-mini -n 5
        prime eval run wordle -m openai/gpt-4.1-mini -n 2 -r 3 -t 1024 -T 0.7
+       prime eval run primeintellect/gsm8k --hosted -m openai/gpt-4.1-mini -n 10
     """
     run_eval(
         environment=environment,
@@ -677,4 +718,12 @@ def run_eval_cmd(
         env_path=env_path,
         endpoints_path=endpoints_path,
         headers=header,
+        hosted=hosted,
+        poll_interval=poll_interval,
+        no_stream_logs=no_stream_logs,
+        timeout_minutes=timeout_minutes,
+        allow_sandbox_access=allow_sandbox_access,
+        allow_instances_access=allow_instances_access,
+        custom_secrets=custom_secrets,
+        eval_name=eval_name,
     )