From 638405439fc49c54af058c481e729caf0c403235 Mon Sep 17 00:00:00 2001 From: Omar Younis Date: Thu, 5 Feb 2026 15:24:39 +0100 Subject: [PATCH 1/8] customize random strategy --- tutorials/examples/train_hypergrid.py | 150 ++++++++++++++++---------- 1 file changed, 91 insertions(+), 59 deletions(-) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index aa6ade25..1d73e780 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -33,6 +33,7 @@ from argparse import ArgumentParser from math import ceil from typing import Optional, Tuple, cast +import random import matplotlib.pyplot as plt import torch @@ -314,57 +315,48 @@ def get_exact_P_T(env: HyperGrid, gflownet: GFlowNet) -> torch.Tensor: return (u * probabilities[..., -1]).detach().cpu() -def _sample_new_strategy( - args, - agent_group_id: int, - iteration: int, - prev_eps: float, - prev_temp: float, - prev_noisy: int, -) -> dict: - """Select a new exploration strategy, including noisy layers. +def _sample_new_strategy(args, rng: random.Random) -> dict: + """Sample a new exploration strategy by independently sampling each parameter. - The strategy only defines exploration-time parameters and the count of - noisy layers to use when building/rebuilding the networks. + Each parameter (epsilon, temperature, n_noisy_layers) is sampled from a + normal distribution with mean and std specified in args. Values are clamped + to valid ranges. - We pick deterministically from a small candidate pool, excluding the - previous configuration when possible, to ensure diversity across - restarts without requiring synchronization. + Args: + args: Argument namespace containing mean/std for each parameter: + - epsilon, strategy_epsilon_std + - temperature, strategy_temperature_std + - n_noisy_layers, strategy_n_noisy_layers_std + - strategy_noisy_std_init (optional, default 0.5) + rng: Random number generator instance to use for sampling. Returns: - A dict with keys: name, epsilon, temperature, n_noisy_layers, - and noisy_std_init (if present in args, default 0.5 otherwise). + A dict with keys: name, epsilon, temperature, n_noisy_layers, noisy_std_init. """ - # TODO: Generate a new exploration strategy instead of selecting from a pre-defined - # list. - candidates = [ - {"name": "on_policy", "epsilon": 0.0, "temperature": 1.0, "n_noisy_layers": 0}, - {"name": "epsilon_0.1", "epsilon": 0.1, "temperature": 1.0, "n_noisy_layers": 0}, - {"name": "temp_1.5", "epsilon": 0.0, "temperature": 1.5, "n_noisy_layers": 0}, - {"name": "noisy_1", "epsilon": 0.0, "temperature": 1.0, "n_noisy_layers": 1}, - { - "name": "noisy_2_temp_1.5", - "epsilon": 0.0, - "temperature": 1.5, - "n_noisy_layers": 2, - }, - ] - choices = [ - c - for c in candidates - if ( - c["epsilon"] != prev_eps - or c["temperature"] != prev_temp - or c["n_noisy_layers"] != prev_noisy - ) - ] - if not choices: - choices = candidates - idx_seed = int(args.seed) + int(agent_group_id) * 7919 + int(iteration) * 104729 - idx = idx_seed % len(choices) - strat = choices[idx] - strat["noisy_std_init"] = float(getattr(args, "agent_noisy_std_init", 0.5)) - return strat + # Get mean/std from args with sensible defaults. + eps_mean = float(getattr(args, "epsilon_mean", 0.1)) + eps_std = float(getattr(args, "strategy_epsilon_std", 0.05)) + temp_mean = float(getattr(args, "temperature", 1.5)) + temp_std = float(getattr(args, "strategy_temperature_std", 0.5)) + noisy_mean = float(getattr(args, "n_noisy_layers", 1.0)) + noisy_std = float(getattr(args, "strategy_n_noisy_layers_std", 1.0)) + noisy_std_init = float(getattr(args, "noisy_std_init", 0.5)) + + # Sample from normal distribution and clamp to valid ranges. + epsilon = max(0.0, rng.gauss(eps_mean, eps_std)) + temperature = max(0.01, rng.gauss(temp_mean, temp_std)) # temperature > 0 + n_noisy_layers = max(0, round(rng.gauss(noisy_mean, noisy_std))) + + # Build a descriptive name for the strategy. + name = f"eps_{epsilon:.3f}_temp_{temperature:.3f}_noisy_{n_noisy_layers}" + + return { + "name": name, + "epsilon": epsilon, + "temperature": temperature, + "n_noisy_layers": n_noisy_layers, + "noisy_std_init": noisy_std_init, + } def _make_optimizer_for(gflownet, args) -> torch.optim.Optimizer: @@ -480,26 +472,19 @@ def set_up_logF_estimator( return ScalarEstimator(module=module, preprocessor=preprocessor) -def set_up_gflownet(args, env, preprocessor, agent_group_list, my_agent_group_id): +def set_up_gflownet(args, env, preprocessor, agent_group_list, my_agent_group_id, strategy_rng): """Returns a GFlowNet complete with the required estimators.""" # Initialize per-agent exploration strategy. # Default (tests stable): on-policy, no noisy layers. # When --use_random_strategies is provided, sample a random initial strategy. if getattr(args, "use_random_strategies", False): - cfg = _sample_new_strategy( - args, - agent_group_id=my_agent_group_id, - iteration=0, - prev_eps=9999.0, - prev_temp=9999.0, - prev_noisy=9999, - ) + cfg = _sample_new_strategy(args, strategy_rng) else: cfg = { - "epsilon": 0.0, - "temperature": 1.0, - "n_noisy_layers": 0, - "noisy_std_init": 0.5, + "epsilon": args.epsilon, + "temperature": args.temperature, + "n_noisy_layers": args.n_noisy_layers, + "noisy_std_init": args.noisy_std_init, } args.agent_epsilon = float(cfg.get("epsilon", 0.0)) @@ -672,6 +657,10 @@ def main(args) -> dict: # noqa: C901 set_seed(args.seed + distributed_context.my_rank) + # Create RNG for strategy sampling (seeded deterministically per agent group). + agent_group_id = distributed_context.agent_group_id or 0 + strategy_rng = random.Random(args.seed + agent_group_id) + # Initialize the environment. env = HyperGrid( args.ndim, @@ -767,6 +756,7 @@ def _model_builder() -> Tuple[GFlowNet, torch.optim.Optimizer]: preprocessor, distributed_context.agent_groups, distributed_context.agent_group_id, + strategy_rng, ) if use_wandb: import wandb @@ -1424,6 +1414,48 @@ def cleanup(): action="store_true", help="Use a random strategy for the initial gflownet and restarts.", ) + parser.add_argument( + "--epsilon", + type=float, + default=0.0, + help="Mean epsilon for strategy sampling (default: 0.1).", + ) + parser.add_argument( + "--temperature", + type=float, + default=1.0, + help="Mean temperature for strategy sampling (default: 1.5).", + ) + parser.add_argument( + "--n_noisy_layers", + type=float, + default=0, + help="Mean number of noisy layers for strategy sampling (default: 1.0).", + ) + parser.add_argument( + "--noisy_std_init", + type=float, + default=0.5, + help="Initial std for noisy layers (default: 0.5).", + ) + parser.add_argument( + "--strategy_epsilon_std", + type=float, + default=0.1, + help="Std of epsilon for strategy sampling (default: 0.05).", + ) + parser.add_argument( + "--strategy_temperature_std", + type=float, + default=1.0, + help="Std of temperature for strategy sampling (default: 0.5).", + ) + parser.add_argument( + "--strategy_n_noisy_layers_std", + type=float, + default=1.0, + help="Std of number of noisy layers for strategy sampling (default: 1.0).", + ) parser.add_argument( "--use_restarts", action="store_true", From 513fa6c3ba82b42133ed92b9b214ce606743bc10 Mon Sep 17 00:00:00 2001 From: Omar Younis Date: Thu, 5 Feb 2026 15:27:27 +0100 Subject: [PATCH 2/8] isort --- tutorials/examples/train_hypergrid.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index 1d73e780..b6be78ce 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -29,11 +29,11 @@ import logging import os +import random import time from argparse import ArgumentParser from math import ceil from typing import Optional, Tuple, cast -import random import matplotlib.pyplot as plt import torch @@ -472,7 +472,9 @@ def set_up_logF_estimator( return ScalarEstimator(module=module, preprocessor=preprocessor) -def set_up_gflownet(args, env, preprocessor, agent_group_list, my_agent_group_id, strategy_rng): +def set_up_gflownet( + args, env, preprocessor, agent_group_list, my_agent_group_id, strategy_rng +): """Returns a GFlowNet complete with the required estimators.""" # Initialize per-agent exploration strategy. # Default (tests stable): on-policy, no noisy layers. From 123232cb510557293b76afecd45df342db920275 Mon Sep 17 00:00:00 2001 From: Omar Younis <42100908+younik@users.noreply.github.com> Date: Thu, 5 Feb 2026 15:39:04 +0100 Subject: [PATCH 3/8] Apply suggestion from @graphite-app[bot] Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com> --- tutorials/examples/train_hypergrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index b6be78ce..da2bbcdd 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -334,7 +334,7 @@ def _sample_new_strategy(args, rng: random.Random) -> dict: A dict with keys: name, epsilon, temperature, n_noisy_layers, noisy_std_init. """ # Get mean/std from args with sensible defaults. - eps_mean = float(getattr(args, "epsilon_mean", 0.1)) + eps_mean = float(getattr(args, "epsilon", 0.1)) eps_std = float(getattr(args, "strategy_epsilon_std", 0.05)) temp_mean = float(getattr(args, "temperature", 1.5)) temp_std = float(getattr(args, "strategy_temperature_std", 0.5)) From 8d57ad600ea91aabf6a6ffc6a655dbe2443d8ca2 Mon Sep 17 00:00:00 2001 From: Omar Younis <42100908+younik@users.noreply.github.com> Date: Thu, 5 Feb 2026 15:39:17 +0100 Subject: [PATCH 4/8] Apply suggestion from @graphite-app[bot] Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com> --- tutorials/examples/train_hypergrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index da2bbcdd..21aba5ae 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -1420,7 +1420,7 @@ def cleanup(): "--epsilon", type=float, default=0.0, - help="Mean epsilon for strategy sampling (default: 0.1).", + help="Mean epsilon for strategy sampling (default: 0.0).", ) parser.add_argument( "--temperature", From 4789fe1bb46ec33164f12c7d966629185bb16d6f Mon Sep 17 00:00:00 2001 From: Omar Younis <42100908+younik@users.noreply.github.com> Date: Thu, 5 Feb 2026 15:39:28 +0100 Subject: [PATCH 5/8] Apply suggestion from @graphite-app[bot] Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com> --- tutorials/examples/train_hypergrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index 21aba5ae..0cfc0d14 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -1432,7 +1432,7 @@ def cleanup(): "--n_noisy_layers", type=float, default=0, - help="Mean number of noisy layers for strategy sampling (default: 1.0).", + help="Mean number of noisy layers for strategy sampling (default: 0).", ) parser.add_argument( "--noisy_std_init", From 2d18bc36af71ed05ce6e8192dc8913de688c3ee2 Mon Sep 17 00:00:00 2001 From: Omar Younis <42100908+younik@users.noreply.github.com> Date: Thu, 5 Feb 2026 15:39:38 +0100 Subject: [PATCH 6/8] Apply suggestion from @graphite-app[bot] Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com> --- tutorials/examples/train_hypergrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index 0cfc0d14..be5bb38e 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -1426,7 +1426,7 @@ def cleanup(): "--temperature", type=float, default=1.0, - help="Mean temperature for strategy sampling (default: 1.5).", + help="Mean temperature for strategy sampling (default: 1.0).", ) parser.add_argument( "--n_noisy_layers", From e9427945f43b16f8bcf06db2b2c251185a060564 Mon Sep 17 00:00:00 2001 From: Omar Younis <42100908+younik@users.noreply.github.com> Date: Thu, 5 Feb 2026 15:39:47 +0100 Subject: [PATCH 7/8] Apply suggestion from @graphite-app[bot] Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com> --- tutorials/examples/train_hypergrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index be5bb38e..94cc2e89 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -1444,7 +1444,7 @@ def cleanup(): "--strategy_epsilon_std", type=float, default=0.1, - help="Std of epsilon for strategy sampling (default: 0.05).", + help="Std of epsilon for strategy sampling (default: 0.1).", ) parser.add_argument( "--strategy_temperature_std", From c7bd799946b0871a3f5b2b1f0cea0a97bbf9acae Mon Sep 17 00:00:00 2001 From: Omar Younis <42100908+younik@users.noreply.github.com> Date: Thu, 5 Feb 2026 15:40:02 +0100 Subject: [PATCH 8/8] Apply suggestion from @graphite-app[bot] Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com> --- tutorials/examples/train_hypergrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index 94cc2e89..cb1d6142 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -1450,7 +1450,7 @@ def cleanup(): "--strategy_temperature_std", type=float, default=1.0, - help="Std of temperature for strategy sampling (default: 0.5).", + help="Std of temperature for strategy sampling (default: 1.0).", ) parser.add_argument( "--strategy_n_noisy_layers_std",