From 638405439fc49c54af058c481e729caf0c403235 Mon Sep 17 00:00:00 2001
From: Omar Younis <omar.younis98@gmail.com>
Date: Thu, 5 Feb 2026 15:24:39 +0100
Subject: [PATCH 1/8] customize random strategy

---
 tutorials/examples/train_hypergrid.py | 150 ++++++++++++++++----------
 1 file changed, 91 insertions(+), 59 deletions(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index aa6ade25..1d73e780 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -33,6 +33,7 @@
 from argparse import ArgumentParser
 from math import ceil
 from typing import Optional, Tuple, cast
+import random
 
 import matplotlib.pyplot as plt
 import torch
@@ -314,57 +315,48 @@ def get_exact_P_T(env: HyperGrid, gflownet: GFlowNet) -> torch.Tensor:
     return (u * probabilities[..., -1]).detach().cpu()
 
 
-def _sample_new_strategy(
-    args,
-    agent_group_id: int,
-    iteration: int,
-    prev_eps: float,
-    prev_temp: float,
-    prev_noisy: int,
-) -> dict:
-    """Select a new exploration strategy, including noisy layers.
+def _sample_new_strategy(args, rng: random.Random) -> dict:
+    """Sample a new exploration strategy by independently sampling each parameter.
 
-    The strategy only defines exploration-time parameters and the count of
-    noisy layers to use when building/rebuilding the networks.
+    Each parameter (epsilon, temperature, n_noisy_layers) is sampled from a
+    normal distribution with mean and std specified in args. Values are clamped
+    to valid ranges.
 
-    We pick deterministically from a small candidate pool, excluding the
-    previous configuration when possible, to ensure diversity across
-    restarts without requiring synchronization.
+    Args:
+        args: Argument namespace containing mean/std for each parameter:
+            - epsilon, strategy_epsilon_std
+            - temperature, strategy_temperature_std
+            - n_noisy_layers, strategy_n_noisy_layers_std
+            - strategy_noisy_std_init (optional, default 0.5)
+        rng: Random number generator instance to use for sampling.
 
     Returns:
-        A dict with keys: name, epsilon, temperature, n_noisy_layers,
-        and noisy_std_init (if present in args, default 0.5 otherwise).
+        A dict with keys: name, epsilon, temperature, n_noisy_layers, noisy_std_init.
     """
-    # TODO: Generate a new exploration strategy instead of selecting from a pre-defined
-    # list.
-    candidates = [
-        {"name": "on_policy", "epsilon": 0.0, "temperature": 1.0, "n_noisy_layers": 0},
-        {"name": "epsilon_0.1", "epsilon": 0.1, "temperature": 1.0, "n_noisy_layers": 0},
-        {"name": "temp_1.5", "epsilon": 0.0, "temperature": 1.5, "n_noisy_layers": 0},
-        {"name": "noisy_1", "epsilon": 0.0, "temperature": 1.0, "n_noisy_layers": 1},
-        {
-            "name": "noisy_2_temp_1.5",
-            "epsilon": 0.0,
-            "temperature": 1.5,
-            "n_noisy_layers": 2,
-        },
-    ]
-    choices = [
-        c
-        for c in candidates
-        if (
-            c["epsilon"] != prev_eps
-            or c["temperature"] != prev_temp
-            or c["n_noisy_layers"] != prev_noisy
-        )
-    ]
-    if not choices:
-        choices = candidates
-    idx_seed = int(args.seed) + int(agent_group_id) * 7919 + int(iteration) * 104729
-    idx = idx_seed % len(choices)
-    strat = choices[idx]
-    strat["noisy_std_init"] = float(getattr(args, "agent_noisy_std_init", 0.5))
-    return strat
+    # Get mean/std from args with sensible defaults.
+    eps_mean = float(getattr(args, "epsilon_mean", 0.1))
+    eps_std = float(getattr(args, "strategy_epsilon_std", 0.05))
+    temp_mean = float(getattr(args, "temperature", 1.5))
+    temp_std = float(getattr(args, "strategy_temperature_std", 0.5))
+    noisy_mean = float(getattr(args, "n_noisy_layers", 1.0))
+    noisy_std = float(getattr(args, "strategy_n_noisy_layers_std", 1.0))
+    noisy_std_init = float(getattr(args, "noisy_std_init", 0.5))
+
+    # Sample from normal distribution and clamp to valid ranges.
+    epsilon = max(0.0, rng.gauss(eps_mean, eps_std))
+    temperature = max(0.01, rng.gauss(temp_mean, temp_std))  # temperature > 0
+    n_noisy_layers = max(0, round(rng.gauss(noisy_mean, noisy_std)))
+
+    # Build a descriptive name for the strategy.
+    name = f"eps_{epsilon:.3f}_temp_{temperature:.3f}_noisy_{n_noisy_layers}"
+
+    return {
+        "name": name,
+        "epsilon": epsilon,
+        "temperature": temperature,
+        "n_noisy_layers": n_noisy_layers,
+        "noisy_std_init": noisy_std_init,
+    }
 
 
 def _make_optimizer_for(gflownet, args) -> torch.optim.Optimizer:
@@ -480,26 +472,19 @@ def set_up_logF_estimator(
     return ScalarEstimator(module=module, preprocessor=preprocessor)
 
 
-def set_up_gflownet(args, env, preprocessor, agent_group_list, my_agent_group_id):
+def set_up_gflownet(args, env, preprocessor, agent_group_list, my_agent_group_id, strategy_rng):
     """Returns a GFlowNet complete with the required estimators."""
     # Initialize per-agent exploration strategy.
     # Default (tests stable): on-policy, no noisy layers.
     # When --use_random_strategies is provided, sample a random initial strategy.
     if getattr(args, "use_random_strategies", False):
-        cfg = _sample_new_strategy(
-            args,
-            agent_group_id=my_agent_group_id,
-            iteration=0,
-            prev_eps=9999.0,
-            prev_temp=9999.0,
-            prev_noisy=9999,
-        )
+        cfg = _sample_new_strategy(args, strategy_rng)
     else:
         cfg = {
-            "epsilon": 0.0,
-            "temperature": 1.0,
-            "n_noisy_layers": 0,
-            "noisy_std_init": 0.5,
+            "epsilon": args.epsilon,
+            "temperature": args.temperature,
+            "n_noisy_layers": args.n_noisy_layers,
+            "noisy_std_init": args.noisy_std_init,
         }
 
     args.agent_epsilon = float(cfg.get("epsilon", 0.0))
@@ -672,6 +657,10 @@ def main(args) -> dict:  # noqa: C901
 
     set_seed(args.seed + distributed_context.my_rank)
 
+    # Create RNG for strategy sampling (seeded deterministically per agent group).
+    agent_group_id = distributed_context.agent_group_id or 0
+    strategy_rng = random.Random(args.seed + agent_group_id)
+
     # Initialize the environment.
     env = HyperGrid(
         args.ndim,
@@ -767,6 +756,7 @@ def _model_builder() -> Tuple[GFlowNet, torch.optim.Optimizer]:
             preprocessor,
             distributed_context.agent_groups,
             distributed_context.agent_group_id,
+            strategy_rng,
         )
         if use_wandb:
             import wandb
@@ -1424,6 +1414,48 @@ def cleanup():
         action="store_true",
         help="Use a random strategy for the initial gflownet and restarts.",
     )
+    parser.add_argument(
+        "--epsilon",
+        type=float,
+        default=0.0,
+        help="Mean epsilon for strategy sampling (default: 0.1).",
+    )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        default=1.0,
+        help="Mean temperature for strategy sampling (default: 1.5).",
+    )
+    parser.add_argument(
+        "--n_noisy_layers",
+        type=float,
+        default=0,
+        help="Mean number of noisy layers for strategy sampling (default: 1.0).",
+    )
+    parser.add_argument(
+        "--noisy_std_init",
+        type=float,
+        default=0.5,
+        help="Initial std for noisy layers (default: 0.5).",
+    )
+    parser.add_argument(
+        "--strategy_epsilon_std",
+        type=float,
+        default=0.1,
+        help="Std of epsilon for strategy sampling (default: 0.05).",
+    )
+    parser.add_argument(
+        "--strategy_temperature_std",
+        type=float,
+        default=1.0,
+        help="Std of temperature for strategy sampling (default: 0.5).",
+    )
+    parser.add_argument(
+        "--strategy_n_noisy_layers_std",
+        type=float,
+        default=1.0,
+        help="Std of number of noisy layers for strategy sampling (default: 1.0).",
+    )
     parser.add_argument(
         "--use_restarts",
         action="store_true",

From 513fa6c3ba82b42133ed92b9b214ce606743bc10 Mon Sep 17 00:00:00 2001
From: Omar Younis <omar.younis98@gmail.com>
Date: Thu, 5 Feb 2026 15:27:27 +0100
Subject: [PATCH 2/8] isort

---
 tutorials/examples/train_hypergrid.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index 1d73e780..b6be78ce 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -29,11 +29,11 @@
 
 import logging
 import os
+import random
 import time
 from argparse import ArgumentParser
 from math import ceil
 from typing import Optional, Tuple, cast
-import random
 
 import matplotlib.pyplot as plt
 import torch
@@ -472,7 +472,9 @@ def set_up_logF_estimator(
     return ScalarEstimator(module=module, preprocessor=preprocessor)
 
 
-def set_up_gflownet(args, env, preprocessor, agent_group_list, my_agent_group_id, strategy_rng):
+def set_up_gflownet(
+    args, env, preprocessor, agent_group_list, my_agent_group_id, strategy_rng
+):
     """Returns a GFlowNet complete with the required estimators."""
     # Initialize per-agent exploration strategy.
     # Default (tests stable): on-policy, no noisy layers.

From 123232cb510557293b76afecd45df342db920275 Mon Sep 17 00:00:00 2001
From: Omar Younis <42100908+younik@users.noreply.github.com>
Date: Thu, 5 Feb 2026 15:39:04 +0100
Subject: [PATCH 3/8] Apply suggestion from @graphite-app[bot]

Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com>
---
 tutorials/examples/train_hypergrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index b6be78ce..da2bbcdd 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -334,7 +334,7 @@ def _sample_new_strategy(args, rng: random.Random) -> dict:
         A dict with keys: name, epsilon, temperature, n_noisy_layers, noisy_std_init.
     """
     # Get mean/std from args with sensible defaults.
-    eps_mean = float(getattr(args, "epsilon_mean", 0.1))
+    eps_mean = float(getattr(args, "epsilon", 0.1))
     eps_std = float(getattr(args, "strategy_epsilon_std", 0.05))
     temp_mean = float(getattr(args, "temperature", 1.5))
     temp_std = float(getattr(args, "strategy_temperature_std", 0.5))

From 8d57ad600ea91aabf6a6ffc6a655dbe2443d8ca2 Mon Sep 17 00:00:00 2001
From: Omar Younis <42100908+younik@users.noreply.github.com>
Date: Thu, 5 Feb 2026 15:39:17 +0100
Subject: [PATCH 4/8] Apply suggestion from @graphite-app[bot]

Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com>
---
 tutorials/examples/train_hypergrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index da2bbcdd..21aba5ae 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -1420,7 +1420,7 @@ def cleanup():
         "--epsilon",
         type=float,
         default=0.0,
-        help="Mean epsilon for strategy sampling (default: 0.1).",
+        help="Mean epsilon for strategy sampling (default: 0.0).",
     )
     parser.add_argument(
         "--temperature",

From 4789fe1bb46ec33164f12c7d966629185bb16d6f Mon Sep 17 00:00:00 2001
From: Omar Younis <42100908+younik@users.noreply.github.com>
Date: Thu, 5 Feb 2026 15:39:28 +0100
Subject: [PATCH 5/8] Apply suggestion from @graphite-app[bot]

Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com>
---
 tutorials/examples/train_hypergrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index 21aba5ae..0cfc0d14 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -1432,7 +1432,7 @@ def cleanup():
         "--n_noisy_layers",
         type=float,
         default=0,
-        help="Mean number of noisy layers for strategy sampling (default: 1.0).",
+        help="Mean number of noisy layers for strategy sampling (default: 0).",
     )
     parser.add_argument(
         "--noisy_std_init",

From 2d18bc36af71ed05ce6e8192dc8913de688c3ee2 Mon Sep 17 00:00:00 2001
From: Omar Younis <42100908+younik@users.noreply.github.com>
Date: Thu, 5 Feb 2026 15:39:38 +0100
Subject: [PATCH 6/8] Apply suggestion from @graphite-app[bot]

Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com>
---
 tutorials/examples/train_hypergrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index 0cfc0d14..be5bb38e 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -1426,7 +1426,7 @@ def cleanup():
         "--temperature",
         type=float,
         default=1.0,
-        help="Mean temperature for strategy sampling (default: 1.5).",
+        help="Mean temperature for strategy sampling (default: 1.0).",
     )
     parser.add_argument(
         "--n_noisy_layers",

From e9427945f43b16f8bcf06db2b2c251185a060564 Mon Sep 17 00:00:00 2001
From: Omar Younis <42100908+younik@users.noreply.github.com>
Date: Thu, 5 Feb 2026 15:39:47 +0100
Subject: [PATCH 7/8] Apply suggestion from @graphite-app[bot]

Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com>
---
 tutorials/examples/train_hypergrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index be5bb38e..94cc2e89 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -1444,7 +1444,7 @@ def cleanup():
         "--strategy_epsilon_std",
         type=float,
         default=0.1,
-        help="Std of epsilon for strategy sampling (default: 0.05).",
+        help="Std of epsilon for strategy sampling (default: 0.1).",
     )
     parser.add_argument(
         "--strategy_temperature_std",

From c7bd799946b0871a3f5b2b1f0cea0a97bbf9acae Mon Sep 17 00:00:00 2001
From: Omar Younis <42100908+younik@users.noreply.github.com>
Date: Thu, 5 Feb 2026 15:40:02 +0100
Subject: [PATCH 8/8] Apply suggestion from @graphite-app[bot]

Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com>
---
 tutorials/examples/train_hypergrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index 94cc2e89..cb1d6142 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -1450,7 +1450,7 @@ def cleanup():
         "--strategy_temperature_std",
         type=float,
         default=1.0,
-        help="Std of temperature for strategy sampling (default: 0.5).",
+        help="Std of temperature for strategy sampling (default: 1.0).",
     )
     parser.add_argument(
         "--strategy_n_noisy_layers_std",