GFNOrg · hyeok9855 · Mar 4, 2025 · Nov 6, 2024 · Nov 7, 2024 · Nov 7, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,9 @@ classifiers = [
 einops = ">=0.6.1"
 numpy = ">=1.21.2"
 python = "^3.10"
-torch = ">=1.9.0"
+torch = ">=2.6.0"
+tensordict = ">=0.6.1"
+torch_geometric = ">=2.6.1"
 
 # dev dependencies.
 black = { version = "24.3", optional = true }

diff --git a/src/gfn/actions.py b/src/gfn/actions.py
@@ -1,10 +1,12 @@
 from __future__ import annotations  # This allows to use the class name in type hints
 
+import enum
 from abc import ABC
 from math import prod
 from typing import ClassVar, Sequence
 
 import torch
+from tensordict import TensorDict
 
 
 class Actions(ABC):
@@ -170,3 +172,156 @@ def is_exit(self) -> torch.Tensor:
             *self.batch_shape, *((1,) * len(self.__class__.action_shape))
         )
         return self.compare(exit_actions_tensor)
+
+
+class GraphActionType(enum.IntEnum):
+    ADD_NODE = 0
+    ADD_EDGE = 1
+    EXIT = 2
+    DUMMY = 3
+
+
+class GraphActions(Actions):
+    """Actions for graph-based environments.
+
+    Each action is one of:
+    - ADD_NODE: Add a node with given features
+    - ADD_EDGE: Add an edge between two nodes with given features
+    - EXIT: Terminate the trajectory
+
+    Attributes:
+        features_dim: Dimension of node/edge features
+        tensor: TensorDict containing:
+            - action_type: Type of action (GraphActionType)
+            - features: Features for nodes/edges
+            - edge_index: Source/target nodes for edges
+    """
+
+    features_dim: ClassVar[int]
+
+    def __init__(self, tensor: TensorDict):
+        """Initializes a GraphAction object.
+
+        Args:
+            action: a GraphActionType indicating the type of action.
+            features: a tensor of shape (batch_shape, feature_shape) representing the features of the nodes or of the edges, depending on the action type.
+                In case of EXIT action, this can be None.
+            edge_index: an tensor of shape (batch_shape, 2) representing the edge to add.
+                This must defined if and only if the action type is GraphActionType.AddEdge.
+        """
+        self.batch_shape = tensor["action_type"].shape
+        features = tensor.get("features", None)
+        if features is None:
+            assert torch.all(
+                torch.logical_or(
+                    tensor["action_type"] == GraphActionType.EXIT,
+                    tensor["action_type"] == GraphActionType.DUMMY,
+                )
+            )
+            features = torch.zeros((*self.batch_shape, self.features_dim))
+        edge_index = tensor.get("edge_index", None)
+        if edge_index is None:
+            assert torch.all(tensor["action_type"] != GraphActionType.ADD_EDGE)
+            edge_index = torch.zeros((*self.batch_shape, 2), dtype=torch.long)
+
+        self.tensor = TensorDict(
+            {
+                "action_type": tensor["action_type"],
+                "features": features,
+                "edge_index": edge_index,
+            },
+            batch_size=self.batch_shape,
+        )
+
+    def __repr__(self):
+        return f"""GraphAction object with {self.batch_shape} actions."""
+
+    @property
+    def device(self) -> torch.device | None:
+        """Returns the device of the features tensor."""
+        return self.tensor.device
+
+    def __len__(self) -> int:
+        """Returns the number of actions in the batch."""
+        return int(prod(self.batch_shape))
+
+    def __getitem__(self, index: int | Sequence[int] | Sequence[bool]) -> GraphActions:
+        """Get particular actions of the batch."""
+        return GraphActions(self.tensor[index])
+
+    def __setitem__(
+        self, index: int | Sequence[int] | Sequence[bool], action: GraphActions
+    ) -> None:
+        """Set particular actions of the batch."""
+        self.tensor[index] = action.tensor
+
+    def compare(self, other: GraphActions) -> torch.Tensor:
+        """Compares the actions to another GraphAction object.
+
+        Args:
+            other: GraphAction object to compare.
+
+        Returns: boolean tensor of shape batch_shape indicating whether the actions are equal.
+        """
+        compare = torch.all(self.tensor == other.tensor, dim=-1)
+        return (
+            compare["action_type"]
+            & (compare["action_type"] == GraphActionType.EXIT | compare["features"])
+            & (
+                compare["action_type"]
+                != GraphActionType.ADD_EDGE | compare["edge_index"]
+            )
+        )
+
+    @property
+    def is_exit(self) -> torch.Tensor:
+        """Returns a boolean tensor of shape `batch_shape` indicating whether the actions are exit actions."""
+        return self.action_type == GraphActionType.EXIT
+
+    @property
+    def is_dummy(self) -> torch.Tensor:
+        """Returns a boolean tensor of shape `batch_shape` indicating whether the actions are dummy actions."""
+        return self.action_type == GraphActionType.DUMMY
+
+    @property
+    def action_type(self) -> torch.Tensor:
+        """Returns the action type tensor."""
+        return self.tensor["action_type"]
+
+    @property
+    def features(self) -> torch.Tensor:
+        """Returns the features tensor."""
+        return self.tensor["features"]
+
+    @property
+    def edge_index(self) -> torch.Tensor:
+        """Returns the edge index tensor."""
+        return self.tensor["edge_index"]
+
+    @classmethod
+    def make_dummy_actions(cls, batch_shape: tuple[int]) -> GraphActions:
+        """Creates a GraphActions object of dummy actions with the given batch shape."""
+        return cls(
+            TensorDict(
+                {
+                    "action_type": torch.full(
+                        batch_shape, fill_value=GraphActionType.DUMMY
+                    ),
+                },
+                batch_size=batch_shape,
+            )
+        )
+
+    @classmethod
+    def make_exit_actions(cls, batch_shape: tuple[int]) -> Actions:
+        """Creates an GraphActions object of exit actions with the given batch shape."""
+        return cls(
+            TensorDict(
+                {
+                    "action_type": torch.full(
+                        batch_shape, fill_value=GraphActionType.EXIT
+                    ),
+                },
+                batch_size=batch_shape,
+            )
+        )
diff --git a/src/gfn/containers/__init__.py b/src/gfn/containers/__init__.py
@@ -1,3 +1,3 @@
-from .replay_buffer import PrioritizedReplayBuffer, ReplayBuffer
+from .replay_buffer import NormBasedDiversePrioritizedReplayBuffer, ReplayBuffer
 from .trajectories import Trajectories
 from .transitions import Transitions
diff --git a/src/gfn/containers/replay_buffer.py b/src/gfn/containers/replay_buffer.py
@@ -22,13 +22,15 @@ class ReplayBuffer:
         training_objects: the buffer of objects used for training.
         terminating_states: a States class representation of $s_f$.
         objects_type: the type of buffer (transitions, trajectories, or states).
+        prioritized: whether the buffer is prioritized by log_reward or not.
     """
 
     def __init__(
         self,
         env: Env,
         objects_type: Literal["transitions", "trajectories", "states"],
         capacity: int = 1000,
+        prioritized: bool = False,
     ):
         """Instantiates a replay buffer.
         Args:
@@ -53,34 +55,61 @@ def __init__(
             raise ValueError(f"Unknown objects_type: {objects_type}")
 
         self._is_full = False
+        self.prioritized = prioritized
 
     def __repr__(self):
         return f"ReplayBuffer(capacity={self.capacity}, containing {len(self)} {self.objects_type})"
 
     def __len__(self):
         return len(self.training_objects)
 
-    def add(self, training_objects: Transitions | Trajectories | tuple[States]):
+    def _add_objs(
+        self,
+        training_objects: Transitions | Trajectories | tuple[States],
+    ):
         """Adds a training object to the buffer."""
         terminating_states = None
         if isinstance(training_objects, tuple):
             assert self.objects_type == "states" and self.terminating_states is not None
             training_objects, terminating_states = training_objects  # pyright: ignore
-
+        
         to_add = len(training_objects)
-
         self._is_full |= len(self) + to_add >= self.capacity
 
+        # Adds the objects to the buffer.
         self.training_objects.extend(training_objects)  # pyright: ignore
+
+        # Sort elements by logreward, capping the size at the defined capacity.
+        if self.prioritized:
+
+            if (
+                self.training_objects.log_rewards is None
+                or training_objects.log_rewards is None  # pyright: ignore
+            ):
+                raise ValueError("log_rewards must be defined for prioritized replay.")
+
+            # Ascending sort.
+            ix = torch.argsort(self.training_objects.log_rewards)  # pyright: ignore
+            self.training_objects = self.training_objects[ix]  # pyright: ignore
         self.training_objects = self.training_objects[
-            -self.capacity :
+            -self.capacity :  # Ascending sort, so we retain the final elements.
         ]  # pyright: ignore
 
+        # Add the terminating states to the buffer.
         if self.terminating_states is not None:
             assert terminating_states is not None
-            self.terminating_states.extend(terminating_states)  # pyright: ignore
+            self.terminating_states.extend(terminating_states)
+
+            # Sort terminating states by logreward as well.
+            if self.prioritized:
+                self.terminating_states = self.terminating_states[ix]
+
             self.terminating_states = self.terminating_states[-self.capacity :]
 
+    def add(self, training_objects: Transitions | Trajectories | tuple[States]):
+        """Adds a training object to the buffer."""
+        self._add_objs(training_objects)
+
     def sample(self, n_trajectories: int) -> Transitions | Trajectories | tuple[States]:
         """Samples `n_trajectories` training objects from the buffer."""
         if self.terminating_states is not None:
@@ -113,8 +142,8 @@ def load(self, directory: str):
             )
 
 
-class PrioritizedReplayBuffer(ReplayBuffer):
-    """A replay buffer of trajectories or transitions.
+class NormBasedDiversePrioritizedReplayBuffer(ReplayBuffer):
+    """A replay buffer of trajectories or transitions with diverse trajectories.
 
     Attributes:
         env: the Environment instance.
@@ -152,53 +181,27 @@ def __init__(
         super().__init__(env, objects_type, capacity)
         self.cutoff_distance = cutoff_distance
         self.p_norm_distance = p_norm_distance
+        self._prioritized = True
 
-    def _add_objs(
-        self,
-        training_objects: Transitions | Trajectories | tuple[States],
-        terminating_states: States | None = None,
-    ):
-        """Adds a training object to the buffer."""
-        # Adds the objects to the buffer.
-        self.training_objects.extend(training_objects)  # pyright: ignore
-
-        # Sort elements by logreward, capping the size at the defined capacity.
-        ix = torch.argsort(self.training_objects.log_rewards)  # pyright: ignore
-        self.training_objects = self.training_objects[ix]  # pyright: ignore
-        self.training_objects = self.training_objects[
-            -self.capacity :
-        ]  # pyright: ignore
-
-        # Add the terminating states to the buffer.
-        if self.terminating_states is not None:
-            assert terminating_states is not None
-            self.terminating_states.extend(terminating_states)
-
-            # Sort terminating states by logreward as well.
-            self.terminating_states = self.terminating_states[ix]
-            self.terminating_states = self.terminating_states[-self.capacity :]
+    @property
+    def prioritized(self) -> bool:
+        return self._prioritized
 
     def add(self, training_objects: Transitions | Trajectories | tuple[States]):
         """Adds a training object to the buffer."""
-        terminating_states = None
-        if isinstance(training_objects, tuple):
-            assert self.objects_type == "states" and self.terminating_states is not None
-            training_objects, terminating_states = training_objects  # pyright: ignore
-
         to_add = len(training_objects)
         self._is_full |= len(self) + to_add >= self.capacity
 
         # The buffer isn't full yet.
         if len(self.training_objects) < self.capacity:
-            self._add_objs(training_objects, terminating_states)
+            self._add_objs(training_objects)
 
         # Our buffer is full and we will prioritize diverse, high reward additions.
         else:
-            if (
-                self.training_objects.log_rewards is None
-                or training_objects.log_rewards is None  # pyright: ignore
-            ):
-                raise ValueError("log_rewards must be defined for prioritized replay.")
+            terminating_states = None
+            if isinstance(training_objects, tuple):
+                assert self.objects_type == "states" and self.terminating_states is not None
+                training_objects, terminating_states = training_objects  # pyright: ignore
 
             # Sort the incoming elements by their logrewards.
             ix = torch.argsort(

diff --git a/src/gfn/containers/trajectories.py b/src/gfn/containers/trajectories.py
@@ -104,7 +104,7 @@ def __init__(
             assert (
                 log_probs.shape == (self.max_length, self.n_trajectories)
                 and log_probs.dtype == torch.float
-            )
+            ), f"log_probs.shape={log_probs.shape}, self.max_length={self.max_length}, self.n_trajectories={self.n_trajectories}"
         else:
             log_probs = torch.full(size=(0, 0), fill_value=0, dtype=torch.float)
         self.log_probs: torch.Tensor = log_probs
@@ -256,7 +256,7 @@ def extend(self, other: Trajectories) -> None:
 
         # TODO: The replay buffer is storing `dones` - this wastes a lot of space.
         self.actions.extend(other.actions)
-        self.states.extend(other.states)
+        self.states.extend(other.states)  # n_trajectories comes from this.
         self.when_is_done = torch.cat((self.when_is_done, other.when_is_done), dim=0)
 
         # For log_probs, we first need to make the first dimensions of self.log_probs