diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index aa6ade25..cb1d6142 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -29,6 +29,7 @@ import logging import os +import random import time from argparse import ArgumentParser from math import ceil @@ -314,57 +315,48 @@ def get_exact_P_T(env: HyperGrid, gflownet: GFlowNet) -> torch.Tensor: return (u * probabilities[..., -1]).detach().cpu() -def _sample_new_strategy( - args, - agent_group_id: int, - iteration: int, - prev_eps: float, - prev_temp: float, - prev_noisy: int, -) -> dict: - """Select a new exploration strategy, including noisy layers. +def _sample_new_strategy(args, rng: random.Random) -> dict: + """Sample a new exploration strategy by independently sampling each parameter. - The strategy only defines exploration-time parameters and the count of - noisy layers to use when building/rebuilding the networks. + Each parameter (epsilon, temperature, n_noisy_layers) is sampled from a + normal distribution with mean and std specified in args. Values are clamped + to valid ranges. - We pick deterministically from a small candidate pool, excluding the - previous configuration when possible, to ensure diversity across - restarts without requiring synchronization. + Args: + args: Argument namespace containing mean/std for each parameter: + - epsilon, strategy_epsilon_std + - temperature, strategy_temperature_std + - n_noisy_layers, strategy_n_noisy_layers_std + - strategy_noisy_std_init (optional, default 0.5) + rng: Random number generator instance to use for sampling. Returns: - A dict with keys: name, epsilon, temperature, n_noisy_layers, - and noisy_std_init (if present in args, default 0.5 otherwise). + A dict with keys: name, epsilon, temperature, n_noisy_layers, noisy_std_init. """ - # TODO: Generate a new exploration strategy instead of selecting from a pre-defined - # list. - candidates = [ - {"name": "on_policy", "epsilon": 0.0, "temperature": 1.0, "n_noisy_layers": 0}, - {"name": "epsilon_0.1", "epsilon": 0.1, "temperature": 1.0, "n_noisy_layers": 0}, - {"name": "temp_1.5", "epsilon": 0.0, "temperature": 1.5, "n_noisy_layers": 0}, - {"name": "noisy_1", "epsilon": 0.0, "temperature": 1.0, "n_noisy_layers": 1}, - { - "name": "noisy_2_temp_1.5", - "epsilon": 0.0, - "temperature": 1.5, - "n_noisy_layers": 2, - }, - ] - choices = [ - c - for c in candidates - if ( - c["epsilon"] != prev_eps - or c["temperature"] != prev_temp - or c["n_noisy_layers"] != prev_noisy - ) - ] - if not choices: - choices = candidates - idx_seed = int(args.seed) + int(agent_group_id) * 7919 + int(iteration) * 104729 - idx = idx_seed % len(choices) - strat = choices[idx] - strat["noisy_std_init"] = float(getattr(args, "agent_noisy_std_init", 0.5)) - return strat + # Get mean/std from args with sensible defaults. + eps_mean = float(getattr(args, "epsilon", 0.1)) + eps_std = float(getattr(args, "strategy_epsilon_std", 0.05)) + temp_mean = float(getattr(args, "temperature", 1.5)) + temp_std = float(getattr(args, "strategy_temperature_std", 0.5)) + noisy_mean = float(getattr(args, "n_noisy_layers", 1.0)) + noisy_std = float(getattr(args, "strategy_n_noisy_layers_std", 1.0)) + noisy_std_init = float(getattr(args, "noisy_std_init", 0.5)) + + # Sample from normal distribution and clamp to valid ranges. + epsilon = max(0.0, rng.gauss(eps_mean, eps_std)) + temperature = max(0.01, rng.gauss(temp_mean, temp_std)) # temperature > 0 + n_noisy_layers = max(0, round(rng.gauss(noisy_mean, noisy_std))) + + # Build a descriptive name for the strategy. + name = f"eps_{epsilon:.3f}_temp_{temperature:.3f}_noisy_{n_noisy_layers}" + + return { + "name": name, + "epsilon": epsilon, + "temperature": temperature, + "n_noisy_layers": n_noisy_layers, + "noisy_std_init": noisy_std_init, + } def _make_optimizer_for(gflownet, args) -> torch.optim.Optimizer: @@ -480,26 +472,21 @@ def set_up_logF_estimator( return ScalarEstimator(module=module, preprocessor=preprocessor) -def set_up_gflownet(args, env, preprocessor, agent_group_list, my_agent_group_id): +def set_up_gflownet( + args, env, preprocessor, agent_group_list, my_agent_group_id, strategy_rng +): """Returns a GFlowNet complete with the required estimators.""" # Initialize per-agent exploration strategy. # Default (tests stable): on-policy, no noisy layers. # When --use_random_strategies is provided, sample a random initial strategy. if getattr(args, "use_random_strategies", False): - cfg = _sample_new_strategy( - args, - agent_group_id=my_agent_group_id, - iteration=0, - prev_eps=9999.0, - prev_temp=9999.0, - prev_noisy=9999, - ) + cfg = _sample_new_strategy(args, strategy_rng) else: cfg = { - "epsilon": 0.0, - "temperature": 1.0, - "n_noisy_layers": 0, - "noisy_std_init": 0.5, + "epsilon": args.epsilon, + "temperature": args.temperature, + "n_noisy_layers": args.n_noisy_layers, + "noisy_std_init": args.noisy_std_init, } args.agent_epsilon = float(cfg.get("epsilon", 0.0)) @@ -672,6 +659,10 @@ def main(args) -> dict: # noqa: C901 set_seed(args.seed + distributed_context.my_rank) + # Create RNG for strategy sampling (seeded deterministically per agent group). + agent_group_id = distributed_context.agent_group_id or 0 + strategy_rng = random.Random(args.seed + agent_group_id) + # Initialize the environment. env = HyperGrid( args.ndim, @@ -767,6 +758,7 @@ def _model_builder() -> Tuple[GFlowNet, torch.optim.Optimizer]: preprocessor, distributed_context.agent_groups, distributed_context.agent_group_id, + strategy_rng, ) if use_wandb: import wandb @@ -1424,6 +1416,48 @@ def cleanup(): action="store_true", help="Use a random strategy for the initial gflownet and restarts.", ) + parser.add_argument( + "--epsilon", + type=float, + default=0.0, + help="Mean epsilon for strategy sampling (default: 0.0).", + ) + parser.add_argument( + "--temperature", + type=float, + default=1.0, + help="Mean temperature for strategy sampling (default: 1.0).", + ) + parser.add_argument( + "--n_noisy_layers", + type=float, + default=0, + help="Mean number of noisy layers for strategy sampling (default: 0).", + ) + parser.add_argument( + "--noisy_std_init", + type=float, + default=0.5, + help="Initial std for noisy layers (default: 0.5).", + ) + parser.add_argument( + "--strategy_epsilon_std", + type=float, + default=0.1, + help="Std of epsilon for strategy sampling (default: 0.1).", + ) + parser.add_argument( + "--strategy_temperature_std", + type=float, + default=1.0, + help="Std of temperature for strategy sampling (default: 1.0).", + ) + parser.add_argument( + "--strategy_n_noisy_layers_std", + type=float, + default=1.0, + help="Std of number of noisy layers for strategy sampling (default: 1.0).", + ) parser.add_argument( "--use_restarts", action="store_true",