Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 39 additions & 9 deletions packages/prime/src/prime_cli/commands/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from ..utils.env_metadata import find_environment_metadata
from ..utils.eval_push import push_eval_results_to_hub
from ..utils.formatters import format_file_size
from ..utils.remote_env import init_sandbox_environment, init_ts_environment

app = typer.Typer(help="Manage verifiers environments", no_args_is_help=True)
console = Console()
Expand Down Expand Up @@ -893,20 +894,49 @@ def init(
rewrite_readme: bool = typer.Option(
False, "--rewrite-readme", help="Overwrite README.md with template if it already exists"
),
sandbox: bool = typer.Option(
False, "--sandbox", help="Create a remote sandbox environment template"
),
ts: bool = typer.Option(
False, "--ts", help="Create a TypeScript sandbox environment template"
),
) -> None:
"""Initialize a new verifiers environment from template"""
try:
# this import is slow, so we do it inside the command
from verifiers.scripts.init import init_environment
if sandbox and ts:
console.print("[red]Error: Cannot use both --sandbox and --ts flags[/red]")
raise typer.Exit(1)

if sandbox:
created_path = init_sandbox_environment(name, path)
console.print(f"""
[green]✓ Created sandbox environment template in {created_path}/[/green]
""")
console.print("\nNext steps:")
console.print(f" cd {created_path}")
console.print(" # Edit sandbox/setup.sh to configure your environment")
console.print(" prime env push")
elif ts:
created_path = init_ts_environment(name, path)
console.print(f"""
[green]✓ Created TypeScript environment template in {created_path}/[/green]
""")
console.print("\nNext steps:")
console.print(f" cd {created_path}")
console.print(" # Edit sandbox/src/index.ts to define your tools and rewards")
console.print(" prime env push")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

--rewrite-readme option silently ignored with sandbox/ts flags

Low Severity

The rewrite_readme parameter is defined as a CLI option but is only used when neither --sandbox nor --ts flags are set. When using --sandbox or --ts, the option is accepted but silently ignored. The init_sandbox_environment and init_ts_environment functions always overwrite all files (including README.md) using write_text(), which is inconsistent with the regular init path that respects the --rewrite-readme flag. Users could specify --sandbox --rewrite-readme and expect specific behavior that doesn't occur.

Fix in Cursor Fix in Web

else:
# this import is slow, so we do it inside the command
from verifiers.scripts.init import init_environment

created_path = init_environment(name, path, rewrite_readme)
created_path = init_environment(name, path, rewrite_readme)

console.print(f"[green]✓ Created environment template in {created_path}/[/green]")
console.print("\nNext steps:")
console.print(f" cd {created_path}")
filename = f"{name}.py".replace("-", "_")
console.print(f" # Edit the {filename} file to implement your environment")
console.print(" prime env push")
console.print(f"[green]✓ Created environment template in {created_path}/[/green]")
console.print("\nNext steps:")
console.print(f" cd {created_path}")
filename = f"{name}.py".replace("-", "_")
console.print(f" # Edit the {filename} file to implement your environment")
console.print(" prime env push")

except FileNotFoundError as e:
console.print(f"[red]File not found: {e}[/red]")
Expand Down
284 changes: 284 additions & 0 deletions packages/prime/src/prime_cli/utils/remote_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
from pathlib import Path


def init_sandbox_environment(name: str, path: str) -> Path:
env_id_underscore = name.replace("-", "_")
local_dir = Path(path) / env_id_underscore
local_dir.mkdir(parents=True, exist_ok=True)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Empty name causes files written to wrong location

Low Severity

When name is an empty string, Path(path) / env_id_underscore evaluates to just Path(path) (the environments directory itself). This causes all generated files (pyproject.toml, README.md) to be written directly to the environments directory rather than a subdirectory, potentially overwriting existing files. The Python file would be created as .py (a hidden file with no real name). Other commands in the codebase validate empty names with if not name: checks, but these new functions lack such validation.

Additional Locations (1)

Fix in Cursor Fix in Web


sandbox_dir = local_dir / "sandbox"
sandbox_dir.mkdir(parents=True, exist_ok=True)

pyproject_content = f'''[project]
name = "{name}"
version = "0.1.0"
requires-python = ">=3.10"
dependencies = ["verifiers"]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build]
include = ["{env_id_underscore}.py", "pyproject.toml", "sandbox/**/*"]
'''
(local_dir / "pyproject.toml").write_text(pyproject_content)

readme_content = f'''# {name}

A remote sandbox environment.

## Structure

- `{env_id_underscore}.py` - Environment definition using RemoteEnv
- `sandbox/setup.sh` - Setup script that runs in the sandbox

## Usage

Edit `sandbox/setup.sh` to install dependencies and start your service.
The last command in setup.sh should start your long-running process.
'''
(local_dir / "README.md").write_text(readme_content)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid overwriting existing env files on init

The sandbox/TS init path writes README.md (and other files) unconditionally because it creates the target directory with exist_ok=True and then calls write_text without checking for pre‑existing content. If a user re-runs prime env init --sandbox/--ts in an existing environment directory (for example to add the new template files), this will silently clobber their README and other edits, even though the CLI already has a --rewrite-readme flag for the legacy path. Consider failing when files already exist or honoring --rewrite-readme to avoid data loss in that scenario.

Useful? React with 👍 / 👎.


env_py_content = '''from pathlib import Path
from verifiers.envs.experimental.remote_envs import RemoteEnv


def load_environment(**kwargs):
return RemoteEnv(
sandbox_path=Path(__file__).parent / "sandbox",
**kwargs
)
'''
(local_dir / f"{env_id_underscore}.py").write_text(env_py_content)

setup_sh_content = '''#!/bin/bash
set -e

echo "Setup complete. Add your start command here."
'''
(sandbox_dir / "setup.sh").write_text(setup_sh_content)

return local_dir


def init_ts_environment(name: str, path: str) -> Path:
env_id_underscore = name.replace("-", "_")
local_dir = Path(path) / env_id_underscore
local_dir.mkdir(parents=True, exist_ok=True)

sandbox_dir = local_dir / "sandbox"
sandbox_dir.mkdir(parents=True, exist_ok=True)
src_dir = sandbox_dir / "src"
src_dir.mkdir(parents=True, exist_ok=True)

pyproject_content = f'''[project]
name = "{name}"
version = "0.1.0"
requires-python = ">=3.10"
dependencies = ["verifiers"]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build]
include = ["{env_id_underscore}.py", "pyproject.toml", "sandbox/**/*"]
'''
(local_dir / "pyproject.toml").write_text(pyproject_content)

readme_content = f'''# {name}

A TypeScript sandbox environment with REST API tool/reward discovery.

## Structure

- `{env_id_underscore}.py` - Environment definition using TypeScriptEnv
- `sandbox/setup.sh` - Installs Node.js and starts the server
- `sandbox/src/index.ts` - REST API with tool and reward endpoints

## REST API Contract

Your TypeScript server must implement:

- `GET /tools` - Returns tool definitions
- `POST /tools/:name` - Executes a tool
- `GET /rewards` - Returns reward function definitions
- `POST /rewards/:name` - Calculates a reward

## Usage

Edit `sandbox/src/index.ts` to define your tools and rewards.
'''
(local_dir / "README.md").write_text(readme_content)

env_py_content = '''from pathlib import Path
from verifiers.envs.experimental.remote_envs import TypeScriptEnv


def load_environment(**kwargs):
return TypeScriptEnv(
sandbox_path=Path(__file__).parent / "sandbox",
**kwargs
)
'''
(local_dir / f"{env_id_underscore}.py").write_text(env_py_content)

setup_sh_content = '''#!/bin/bash
set -e

apt-get update && apt-get install -y curl unzip
curl -fsSL https://bun.sh/install | bash
export PATH="$HOME/.bun/bin:$PATH"

bun install
bun run src/index.ts
'''
(sandbox_dir / "setup.sh").write_text(setup_sh_content)

package_json_content = f'''{{
"name": "{name}",
"version": "1.0.0",
"dependencies": {{
"zod": "^3.23.0",
"zod-to-json-schema": "^3.23.0"
}}
}}
'''
(sandbox_dir / "package.json").write_text(package_json_content)

index_ts_content = '''import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";

const PORT = 3000;

// =============================================================================
// Tools - Define your tools here
// =============================================================================

const EchoArgs = z.object({
message: z.string().describe("Message to echo back"),
});

function echo(args: z.infer<typeof EchoArgs>): string {
return args.message;
}

const AddArgs = z.object({
x: z.number().describe("First number"),
y: z.number().describe("Second number"),
});

function add(args: z.infer<typeof AddArgs>): string {
return String(args.x + args.y);
}

type ToolDef = {
description: string;
schema: z.ZodObject<any>;
fn: (args: any) => string;
};

const tools: Record<string, ToolDef> = {
echo: {
description: "Echoes back the input message",
schema: EchoArgs,
fn: echo,
},
add: {
description: "Adds two numbers together",
schema: AddArgs,
fn: add,
},
};

// =============================================================================
// Rewards - Define your reward functions here
// =============================================================================

function correctness(prompt: any, completion: any, answer: any, state: any): number {
const lastMessage = completion[completion.length - 1];
const content = lastMessage?.content || "";
return content.includes(answer) ? 1.0 : 0.0;
}

type RewardDef = {
weight: number;
fn: (prompt: any, completion: any, answer: any, state: any) => number;
};

const rewards: Record<string, RewardDef> = {
correctness: {
weight: 1.0,
fn: correctness,
},
};

// =============================================================================
// Server - No need to modify below
// =============================================================================

function getToolList() {
return Object.entries(tools).map(([name, tool]) => ({
type: "function",
function: {
name,
description: tool.description,
parameters: zodToJsonSchema(tool.schema, { $refStrategy: "none" }),
},
}));
}

function getRewardList() {
return Object.entries(rewards).map(([name, reward]) => ({
name,
weight: reward.weight,
}));
}

Bun.serve({
port: PORT,
async fetch(req) {
const url = new URL(req.url);
const path = url.pathname;

if (path === "/tools" && req.method === "GET") {
return Response.json({ tools: getToolList() });
}

if (path.startsWith("/tools/") && req.method === "POST") {
const name = path.slice("/tools/".length);
const tool = tools[name];
if (!tool) {
return Response.json({ error: `Tool ${name} not found` }, { status: 404 });
}
const { args } = await req.json();
const parsed = tool.schema.parse(args);
const result = tool.fn(parsed);
return Response.json({ result });
}

if (path === "/rewards" && req.method === "GET") {
return Response.json({ rewards: getRewardList() });
}

if (path.startsWith("/rewards/") && req.method === "POST") {
const name = path.slice("/rewards/".length);
const reward = rewards[name];
if (!reward) {
return Response.json({ error: `Reward ${name} not found` }, { status: 404 });
}
const { prompt, completion, answer, state } = await req.json();
const score = reward.fn(prompt, completion, answer, state);
return Response.json({ score });
}

return Response.json({ error: "Not found" }, { status: 404 });
},
});

console.log(`Server running on port ${PORT}`);
'''
(src_dir / "index.ts").write_text(index_ts_content)

return local_dir
Loading