cp

daveey · daveey · commit 0fdcc0bbae3b · 2025-12-22T20:40:38.000-08:00
diff --git a/packages/mettagrid/cpp/bindings/mettagrid_c.cpp b/packages/mettagrid/cpp/bindings/mettagrid_c.cpp
@@ -68,7 +68,7 @@ MettaGrid::MettaGrid(const GameConfig& game_config, const py::list map, unsigned
 
   _grid = std::make_unique<Grid>(height, width);
   _obs_encoder = std::make_unique<ObservationEncoder>(
-      game_config.protocol_details_obs, resource_names, game_config.feature_ids, game_config.token_value_max);
+      game_config.protocol_details_obs, resource_names, game_config.feature_ids, game_config.token_value_base);
 
   // Initialize ObservationFeature namespace with feature IDs
   ObservationFeature::Initialize(game_config.feature_ids);
diff --git a/packages/mettagrid/cpp/include/mettagrid/config/mettagrid_config.hpp b/packages/mettagrid/cpp/include/mettagrid/config/mettagrid_config.hpp
@@ -53,7 +53,7 @@ struct GameConfig {
   std::shared_ptr<ClipperConfig> clipper = nullptr;
 
   // Observation encoding settings
-  unsigned int token_value_max = 255;  // Maximum value per inventory token (base for encoding)
+  unsigned int token_value_base = 256;  // Base for multi-token inventory encoding (value per token: 0 to base-1)
 };
 
 namespace py = pybind11;
@@ -127,7 +127,7 @@ inline void bind_game_config(py::module& m) {
            py::arg("clipper") = std::shared_ptr<ClipperConfig>(nullptr),
 
            // Observation encoding
-           py::arg("token_value_max") = 255)
+           py::arg("token_value_base") = 256)
       .def_readwrite("num_agents", &GameConfig::num_agents)
       .def_readwrite("max_steps", &GameConfig::max_steps)
       .def_readwrite("episode_truncates", &GameConfig::episode_truncates)
@@ -158,7 +158,7 @@ inline void bind_game_config(py::module& m) {
       .def_readwrite("clipper", &GameConfig::clipper)
 
       // Observation encoding
-      .def_readwrite("token_value_max", &GameConfig::token_value_max);
+      .def_readwrite("token_value_base", &GameConfig::token_value_base);
 }
 
 #endif  // PACKAGES_METTAGRID_CPP_INCLUDE_METTAGRID_CONFIG_METTAGRID_CONFIG_HPP_
diff --git a/packages/mettagrid/cpp/include/mettagrid/objects/chest.hpp b/packages/mettagrid/cpp/include/mettagrid/objects/chest.hpp
@@ -1,6 +1,7 @@
 #ifndef PACKAGES_METTAGRID_CPP_INCLUDE_METTAGRID_OBJECTS_CHEST_HPP_
 #define PACKAGES_METTAGRID_CPP_INCLUDE_METTAGRID_OBJECTS_CHEST_HPP_
 
+#include <algorithm>
 #include <set>
 #include <unordered_map>
 #include <vector>
@@ -125,7 +126,8 @@ class Chest : public GridObject, public Usable, public HasInventory {
       throw std::runtime_error("Observation encoder not set for chest");
     }
     std::vector<PartialObservationToken> features;
-    features.reserve(1 + this->inventory.get().size() + this->tag_ids.size() + 3);
+    features.reserve(1 + this->inventory.get().size() * this->obs_encoder->get_num_inventory_tokens() +
+                     this->tag_ids.size() + (this->vibe != 0 ? 1 : 0));
 
     if (this->vibe != 0) features.push_back({ObservationFeature::Vibe, static_cast<ObservationType>(this->vibe)});
 
diff --git a/packages/mettagrid/cpp/include/mettagrid/objects/inventory.hpp b/packages/mettagrid/cpp/include/mettagrid/objects/inventory.hpp
@@ -1,6 +1,7 @@
 #ifndef PACKAGES_METTAGRID_CPP_INCLUDE_METTAGRID_OBJECTS_INVENTORY_HPP_
 #define PACKAGES_METTAGRID_CPP_INCLUDE_METTAGRID_OBJECTS_INVENTORY_HPP_
 
+#include <limits>
 #include <string>
 #include <unordered_map>
 #include <vector>
diff --git a/packages/mettagrid/cpp/include/mettagrid/systems/observation_encoder.hpp b/packages/mettagrid/cpp/include/mettagrid/systems/observation_encoder.hpp
@@ -54,8 +54,8 @@ class ObservationEncoder {
 
     // Build inventory feature ID maps using multi-token encoding
     // inv:{resource} = base token (always emitted)
-    // inv:{resource}:p1 = first power token (emitted if amount >= token_value_max)
-    // inv:{resource}:p2 = second power token (emitted if amount >= token_value_max^2)
+    // inv:{resource}:p1 = first power token (emitted if amount >= token_value_base)
+    // inv:{resource}:p2 = second power token (emitted if amount >= token_value_base^2)
     // etc.
     _inventory_feature_ids.resize(resource_names.size());
     _inventory_power_feature_ids.resize(resource_names.size());
@@ -158,9 +158,9 @@ class ObservationEncoder {
   }
 
   // Encode inventory amount using multi-token encoding with configurable base.
-  // inv:{resource} = amount % token_value_max (always emitted)
-  // inv:{resource}:p1 = (amount / token_value_max) % token_value_max (only emitted if amount >= token_value_max)
-  // inv:{resource}:p2 = (amount / token_value_max^2) % token_value_max (only emitted if amount >= token_value_max^2)
+  // inv:{resource} = amount % token_value_base (always emitted)
+  // inv:{resource}:p1 = (amount / token_value_base) % token_value_base (only emitted if amount >= token_value_base)
+  // inv:{resource}:p2 = (amount / token_value_base^2) % token_value_base (only emitted if amount >= token_value_base^2)
   // etc.
   void append_inventory_tokens(std::vector<PartialObservationToken>& features,
                                InventoryItem item,
@@ -179,7 +179,7 @@ class ObservationEncoder {
     }
   }
 
-  unsigned int get_token_value_max() const {
+  unsigned int get_token_value_base() const {
     return _token_value_base;
   }
 
@@ -195,7 +195,8 @@ class ObservationEncoder {
   size_t _num_inventory_tokens;
   std::vector<ObservationType> _input_feature_ids;
   std::vector<ObservationType> _output_feature_ids;
-  std::vector<ObservationType> _inventory_feature_ids;  // Maps item index to base feature ID (amount % token_value_max)
+  std::vector<ObservationType>
+      _inventory_feature_ids;  // Maps item index to base feature ID (amount % token_value_base)
   std::vector<std::vector<ObservationType>> _inventory_power_feature_ids;  // Maps item index to power feature IDs
 };
 
diff --git a/packages/mettagrid/cpp/src/mettagrid/objects/agent.cpp b/packages/mettagrid/cpp/src/mettagrid/objects/agent.cpp
@@ -209,7 +209,8 @@ std::vector<PartialObservationToken> Agent::obs_features() const {
   if (!this->obs_encoder) {
     throw std::runtime_error("Observation encoder not set for agent");
   }
-  const size_t num_tokens = this->inventory.get().size() + this->tag_ids.size() + 5;
+  const size_t num_tokens =
+      this->inventory.get().size() * this->obs_encoder->get_num_inventory_tokens() + this->tag_ids.size() + 5;
 
   std::vector<PartialObservationToken> features;
   features.reserve(num_tokens);
diff --git a/packages/mettagrid/docs/observations.md b/packages/mettagrid/docs/observations.md
@@ -116,36 +116,36 @@ depend on your game configuration (number of resources, whether protocol details
 | `cooldown_remaining`              | Remaining cooldown time for objects                                             | assembler, extractors     | Value capped at 255                                                             |
 | `clipped`                         | Whether an assembler is clipped or not                                          | extractors                |                                                                                 |
 | `remaining_uses`                  | Remaining uses for objects with use limits                                      | extractors                | Value capped at 255. Only emitted if `max_uses > 0`                             |
-| `inv:{resource_name}`             | Base inventory amount (amount % token_value_max)                                | agents, chests            | One feature per resource. See [Inventory Encoding](#inventory-encoding) below.  |
+| `inv:{resource_name}`             | Base inventory amount (amount % token_value_base)                               | agents, chests            | One feature per resource. See [Inventory Encoding](#inventory-encoding) below.  |
 | `inv:{resource_name}:p1`          | Power 1 component ((amount / B) % B)                                            | agents, chests            | Only emitted if amount >= B. See [Inventory Encoding](#inventory-encoding).     |
 | `inv:{resource_name}:p2`          | Power 2 component ((amount / B²) % B)                                           | agents, chests            | Only emitted if amount >= B². See [Inventory Encoding](#inventory-encoding).    |
 | `protocol_input:{resource_name}`  | Required input resource amount for current protocol                             | assembler, extractors     | One feature per resource                                                        |
 | `protocol_output:{resource_name}` | Output resource amount for current protocol                                     | assembler, extractors     | One feature per resource                                                        |
 
 ### Inventory Encoding
 
-Inventory values are encoded using a multi-token scheme with a configurable base (`ObsConfig.token_value_max`, default
-255). This allows representing large amounts while keeping individual token values bounded. The number of tokens is
+Inventory values are encoded using a multi-token scheme with a configurable base (`ObsConfig.token_value_base`, default
+256). This allows representing large amounts while keeping individual token values bounded. The number of tokens is
 dynamically computed based on the maximum inventory value (uint16_t max = 65535).
 
 - **`inv:{resource}`**: Base value = `amount % B` (always emitted if amount > 0)
 - **`inv:{resource}:p1`**: Power 1 = `(amount / B) % B` (only emitted if amount >= B)
 - **`inv:{resource}:p2`**: Power 2 = `(amount / B²) % B` (only emitted if amount >= B²)
 - etc.
 
-Where B = `token_value_max` (default 255).
+Where B = `token_value_base` (default 256).
 
 The full value is reconstructed as: `base + p1 * B + p2 * B² + ...`
 
-**Examples with token_value_max=255:**
+**Examples with token_value_base=256 (default):**
 
-| Amount | `inv:food` | `inv:food:p1` | `inv:food:p2` | Reconstruction            |
-| ------ | ---------- | ------------- | ------------- | ------------------------- |
-| 42     | 42         | (not emitted) | (not emitted) | 42                        |
-| 1234   | 214        | 4             | (not emitted) | 214 + 4 \* 255 = 1234     |
-| 65535  | 0          | 2             | 1             | 0 + 2 \* 255 + 1 \* 65025 |
+| Amount | `inv:food` | `inv:food:p1` | `inv:food:p2` | Reconstruction           |
+| ------ | ---------- | ------------- | ------------- | ------------------------ |
+| 42     | 42         | (not emitted) | (not emitted) | 42                       |
+| 1234   | 210        | 4             | (not emitted) | 210 + 4 \* 256 = 1234    |
+| 65535  | 255        | 255           | (not emitted) | 255 + 255 \* 256 = 65535 |
 
-**Examples with token_value_max=100:**
+**Examples with token_value_base=100:**
 
 | Amount | `inv:food` | `inv:food:p1` | `inv:food:p2` | Reconstruction              |
 | ------ | ---------- | ------------- | ------------- | --------------------------- |
diff --git a/packages/mettagrid/docs/simulator_api.md b/packages/mettagrid/docs/simulator_api.md
@@ -265,9 +265,11 @@ obs = agent.observation
 for token in obs.tokens:
     if token.feature.name == "object_type":
         print(f"Object at ({token.col()}, {token.row()}): {token.value}")
-    elif token.feature.name.startswith("inv:"):
-        resource = token.feature.name[4:]  # Remove "inv:" prefix
-        print(f"Inventory {resource}: {token.value}")
+
+# For inventory, use the agent.inventory property which handles the encoding
+inventory = agent.inventory
+for resource, amount in inventory.items():
+    print(f"Inventory {resource}: {amount}")
 ```
 
 ## Event Handling
diff --git a/packages/mettagrid/python/src/mettagrid/config/id_map.py b/packages/mettagrid/python/src/mettagrid/config/id_map.py
@@ -15,20 +15,20 @@
     from mettagrid.config.mettagrid_config import GameConfig
 
 
-def num_inventory_tokens_needed(max_inventory_value: int, token_value_max: int) -> int:
+def num_inventory_tokens_needed(max_inventory_value: int, token_value_base: int) -> int:
     """Calculate how many tokens are needed to encode max_inventory_value with given base.
 
     Args:
         max_inventory_value: Maximum inventory value to encode (e.g., 65535 for uint16_t)
-        token_value_max: Maximum value per token (base for encoding)
+        token_value_base: Base for encoding (value per token: 0 to base-1)
 
     Returns:
         Number of tokens needed
     """
     if max_inventory_value == 0:
         return 1
     # Need ceil(log_base(max_value + 1)) tokens
-    return math.ceil(math.log(max_inventory_value + 1, token_value_max))
+    return math.ceil(math.log(max_inventory_value + 1, token_value_base))
 
 
 class ObservationFeatureSpec(BaseModel):
@@ -137,14 +137,14 @@ def _compute_features(self) -> list[ObservationFeatureSpec]:
         feature_id += 1
 
         # Inventory features using multi-token encoding with configurable base
-        # inv:{resource} = amount % token_value_max (always emitted)
-        # inv:{resource}:p1 = (amount / token_value_max) % token_value_max (emitted if amount >= token_value_max)
-        # inv:{resource}:p2 = (amount / token_value_max^2) % token_value_max (emitted if amount >= token_value_max^2)
+        # inv:{resource} = amount % token_value_base (always emitted)
+        # inv:{resource}:p1 = (amount / token_value_base) % token_value_base (emitted if amount >= token_value_base)
+        # inv:{resource}:p2 = (amount / token_value_base^2) % token_value_base (emitted if amount >= token_value_base^2)
         # etc.
         # Number of tokens is computed based on max uint16_t value (65535)
-        token_value_max = self._config.obs.token_value_max
-        num_inv_tokens = num_inventory_tokens_needed(65535, token_value_max)
-        normalization = float(token_value_max)
+        token_value_base = self._config.obs.token_value_base
+        num_inv_tokens = num_inventory_tokens_needed(65535, token_value_base)
+        normalization = float(token_value_base)
         for resource_name in self._config.resource_names:
             # Base token (always present)
             name = f"inv:{resource_name}"
diff --git a/packages/mettagrid/python/src/mettagrid/config/mettagrid_c_config.py b/packages/mettagrid/python/src/mettagrid/config/mettagrid_c_config.py
@@ -348,9 +348,7 @@ def convert_to_cpp_game_config(mettagrid_config: dict | GameConfig):
                         if name in resource_name_to_id
                     }
                     limit_defs.append(
-                        CppLimitDef(
-                            resources=resource_ids, base_limit=min(resource_limit.limit, 255), modifiers=modifier_ids
-                        )
+                        CppLimitDef(resources=resource_ids, base_limit=resource_limit.limit, modifiers=modifier_ids)
                     )
 
             inventory_config = CppInventoryConfig()
@@ -383,7 +381,7 @@ def convert_to_cpp_game_config(mettagrid_config: dict | GameConfig):
         game_cpp_params["obs_width"] = obs_config["width"]
         game_cpp_params["obs_height"] = obs_config["height"]
         game_cpp_params["num_observation_tokens"] = obs_config["num_tokens"]
-        game_cpp_params["token_value_max"] = obs_config.get("token_value_max", 255)
+        game_cpp_params["token_value_base"] = obs_config.get("token_value_base", 256)
         # Note: token_dim is not used by C++ GameConfig, it's only used in Python
 
     # Convert observation features from Python to C++
diff --git a/packages/mettagrid/python/src/mettagrid/config/obs_config.py b/packages/mettagrid/python/src/mettagrid/config/obs_config.py
@@ -18,5 +18,8 @@ class ObsConfig(Config):
     height: int = Field(default=11)
     token_dim: int = Field(default=3)
     num_tokens: int = Field(default=200)
-    token_value_max: int = Field(default=255)
-    """Maximum value per inventory token (base for encoding). Default 255 for efficient byte packing."""
+    token_value_base: int = Field(default=256)
+    """Base for multi-token inventory encoding (value per token: 0 to base-1).
+
+    Default 256 for efficient byte packing.
+    """
diff --git a/packages/mettagrid/python/src/mettagrid/simulator/simulator.py b/packages/mettagrid/python/src/mettagrid/simulator/simulator.py
@@ -388,15 +388,15 @@ def inventory(self) -> Dict[str, int]:
         Returns a dictionary mapping resource names to their quantities.
 
         Inventory values are encoded using multi-token encoding:
-        - inv:{resource} contains the base value (amount % token_value_max)
-        - inv:{resource}:p1 contains power 1 ((amount / token_value_max) % token_value_max)
-        - inv:{resource}:p2 contains power 2 ((amount / token_value_max^2) % token_value_max)
+        - inv:{resource} contains the base value (amount % token_value_base)
+        - inv:{resource}:p1 contains power 1 ((amount / token_value_base) % token_value_base)
+        - inv:{resource}:p2 contains power 2 ((amount / token_value_base^2) % token_value_base)
         - etc.
-        The full value is reconstructed as: base + p1 * B + p2 * B^2 + ... where B = token_value_max
+        The full value is reconstructed as: base + p1 * B + p2 * B^2 + ... where B = token_value_base
         """
         import re
 
-        token_value_max = self._sim._config.game.obs.token_value_max
+        token_value_base = self._sim._config.game.obs.token_value_base
 
         # Collect tokens by resource name and power
         inv_values: Dict[str, Dict[int, int]] = {}  # resource_name -> {power -> value}
@@ -423,7 +423,7 @@ def inventory(self) -> Dict[str, int]:
         for resource_name, power_values in inv_values.items():
             total = 0
             for power, value in power_values.items():
-                total += value * (token_value_max**power)
+                total += value * (token_value_base**power)
             inv[resource_name] = total
 
         return inv
diff --git a/packages/mettagrid/tests/test_mettagrid.cpp b/packages/mettagrid/tests/test_mettagrid.cpp
@@ -796,7 +796,7 @@ TEST_F(MettaGridCppTest, AssemblerProtocolObservationsEnabled) {
   auto resource_names = create_test_resource_names();
   std::unordered_map<std::string, ObservationType> proto_feature_ids;
   // Assign arbitrary, unique feature ids for protocol input/output per resource
-  // Use multi-token encoding with :p1, :p2 suffixes (default token_value_max=255 needs 3 tokens for uint16_t)
+  // Use multi-token encoding with :p1, :p2 suffixes (default token_value_base=256 needs 2 tokens for uint16_t)
   for (size_t i = 0; i < resource_names.size(); ++i) {
     proto_feature_ids[std::string("protocol_input:") + resource_names[i]] = static_cast<ObservationType>(100 + i);
     proto_feature_ids[std::string("protocol_output:") + resource_names[i]] = static_cast<ObservationType>(120 + i);
diff --git a/packages/mettagrid/tests/test_set_inventory.py b/packages/mettagrid/tests/test_set_inventory.py

Original file line number	Diff line number	Diff line change
`@@ -209,7 +209,8 @@ std::vector<PartialObservationToken> Agent::obs_features() const {`
`209`	`209`	`if (!this->obs_encoder) {`
`210`	`210`	`throw std::runtime_error("Observation encoder not set for agent");`
`211`	`211`	`}`
`212`		`- const size_t num_tokens = this->inventory.get().size() + this->tag_ids.size() + 5;`
	`212`	`+ const size_t num_tokens =`
	`213`	`+ this->inventory.get().size() * this->obs_encoder->get_num_inventory_tokens() + this->tag_ids.size() + 5;`
`213`	`214`
`214`	`215`	`std::vector<PartialObservationToken> features;`
`215`	`216`	`features.reserve(num_tokens);`