TensorAuto · shuheng-liu · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -50,7 +50,7 @@ repos:
     hooks:
       - id: add-license-header
         name: Add License Header
-        entry: python .github/scripts/add_license_header.py
+        entry: python3 .github/scripts/add_license_header.py
         language: system
         types: [python]
         pass_filenames: true

diff --git a/configs/examples/dev_config.json b/configs/examples/dev_config.json
@@ -9,7 +9,7 @@
         "weights": [
             1.0
         ],
-        "action_freq": 30.0,    
+        "action_freq": 30.0,
         "image_resample_strategy": "nearest",
         "vector_resample_strategy": "nearest"
     },

diff --git a/docs/source/concepts.rst b/docs/source/concepts.rst
@@ -87,5 +87,3 @@ Environments
 Environments wrap simulation or real-robot interfaces compatible with OpenAI Gym/Gymnasium.
 The factory ``lerobot/common/envs/factory.py`` creates vectorized environments for efficient training and evaluation.
 Currently, only `Libero <https://libero-project.github.io/main.html>`_ is supported and it is configured via ``opentau.envs.configs.LiberoEnv``.
-
-
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -1,3 +1,17 @@
+# Copyright 2026 Tensor Auto Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Configuration file for the Sphinx documentation builder.
 #
 # For the full list of built-in configuration values, see the documentation:

diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
@@ -20,7 +20,7 @@ Some of the ways you can contribute to OpenTau:
 * Contributing to the examples or to the documentation.
 * Submitting issues related to bugs or desired new features.
 
-Following the guides below, feel free to open issues and PRs. 
+Following the guides below, feel free to open issues and PRs.
 
 Submitting a new issue or feature request
 -----------------------------------------

diff --git a/docs/source/tutorials/datasets.rst b/docs/source/tutorials/datasets.rst
@@ -4,7 +4,7 @@ Datasets
 .. note::
    Make sure you have followed the :doc:`/installation` guide before proceeding.
 
-Building a dataset mixture 
+Building a dataset mixture
 --------------------------
 
 You can define a dataset mixture in your configuration file using the ``dataset_mixture`` key. Here is an example:
@@ -73,5 +73,3 @@ Each training config should contain a dataset mixture definition. To evaluate th
         --num_workers=10
 
 This will output a token count for each language key in the dataset mixture, and save it to ``outputs/stats/token_count.json``.
-
-
diff --git a/docs/source/tutorials/evaluation.rst b/docs/source/tutorials/evaluation.rst
@@ -46,4 +46,3 @@ OpenTau currently supports the `LIBERO benchmark <https://libero-project.github.
 This will run the 0th task and 2nd task in ``libero_spatial``. Each task will run for 8 simulations in parallel.
 
 When launched with accelerate, each GPU process will only work on its fraction of the tasks, improving throughput.
-
diff --git a/docs/source/tutorials/training.rst b/docs/source/tutorials/training.rst
@@ -51,4 +51,3 @@ Training can be resumed by running:
 
 .. note::
    When resuming training from a checkpoint, the training step count will continue from the checkpoint's step, but the dataloader will be reset.
-
diff --git a/pyproject.toml b/pyproject.toml
@@ -76,24 +76,24 @@ dependencies = [
     "onnxruntime-gpu>=1.22.0 ; ((sys_platform == 'linux' and platform_machine == 'x86_64') or (sys_platform == 'win32' and (platform_machine == 'AMD64' or platform_machine == 'x86_64'))) ",
     "onnxscript>=0.3.1",
     "onnx-ir>=0.1.4",
-    "transformers @ git+https://github.com/huggingface/transformers.git@dcddb97", 
-    "scipy>=1.15.2", 
-    "pytest>=8.1.0", 
-    "pytest-cov>=5.0.0", 
-    "pyserial>=3.5", 
+    "transformers @ git+https://github.com/huggingface/transformers.git@dcddb97",
+    "scipy>=1.15.2",
+    "pytest>=8.1.0",
+    "pytest-cov>=5.0.0",
+    "pyserial>=3.5",
     "pytest-xdist>=3.8.0",
-    "scikit-image>=0.23.2", 
-    "pandas>=2.2.2", 
-    "accelerate>=1.4.0", 
+    "scikit-image>=0.23.2",
+    "pandas>=2.2.2",
+    "accelerate>=1.4.0",
     "deepspeed>=0.17.1"
 ]
 
 [project.optional-dependencies]
-dev = ["pre-commit>=3.7.0", 
-    "debugpy>=1.8.1", 
-    "pytest>=8.1.0", 
-    "pytest-cov>=5.0.0", 
-    "pyserial>=3.5", 
+dev = ["pre-commit>=3.7.0",
+    "debugpy>=1.8.1",
+    "pytest>=8.1.0",
+    "pytest-cov>=5.0.0",
+    "pyserial>=3.5",
     "pytest-xdist>=3.8.0",
     "sphinx>=8.1.3",
     "sphinx-rtd-theme>=3.0.1",

diff --git a/src/opentau/datasets/dataset_mixture.py b/src/opentau/datasets/dataset_mixture.py
@@ -59,6 +59,7 @@
         >>> mixture = WeightedDatasetMixture(cfg, datasets, weights, action_freq=30.0)
         >>> dataloader = mixture.get_dataloader()
 """
+
 import logging
 from typing import List, Optional
 

diff --git a/src/opentau/datasets/grounding/__init__.py b/src/opentau/datasets/grounding/__init__.py
@@ -65,4 +65,3 @@
         >>> print(list(available_grounding_datasets.keys()))
         ['clevr', 'cocoqa', 'dummy', 'pixmo', 'vsr']
 """
-
diff --git a/src/opentau/datasets/grounding/clevr.py b/src/opentau/datasets/grounding/clevr.py
@@ -1,4 +1,3 @@
-
 # Copyright 2026 Tensor Auto Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -38,7 +37,6 @@
         >>> dataset = make_dataset(cfg, train_cfg)
 """
 
-
 import logging
 
 import numpy as np

diff --git a/src/opentau/datasets/grounding/cocoqa.py b/src/opentau/datasets/grounding/cocoqa.py
@@ -1,4 +1,3 @@
-
 # Copyright 2026 Tensor Auto Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -40,7 +39,6 @@
         >>> dataset = make_dataset(cfg, train_cfg)
 """
 
-
 import logging
 from typing import List
 

diff --git a/src/opentau/datasets/grounding/dummy.py b/src/opentau/datasets/grounding/dummy.py
@@ -1,4 +1,3 @@
-
 # Copyright 2026 Tensor Auto Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

diff --git a/src/opentau/datasets/grounding/pixmo.py b/src/opentau/datasets/grounding/pixmo.py
@@ -1,4 +1,3 @@
-
 # Copyright 2026 Tensor Auto Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

diff --git a/src/opentau/datasets/grounding/vsr.py b/src/opentau/datasets/grounding/vsr.py
@@ -1,4 +1,3 @@
-
 # Copyright 2026 Tensor Auto Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

diff --git a/src/opentau/datasets/video_utils.py b/src/opentau/datasets/video_utils.py
@@ -101,6 +101,7 @@
 from datasets.features.features import register_feature
 from PIL import Image
 
+
 def get_safe_default_codec() -> str:
     """Get the default video codec backend, falling back to pyav if torchcodec is unavailable.
 

diff --git a/src/opentau/envs/configs.py b/src/opentau/envs/configs.py
@@ -31,7 +31,7 @@
 class EnvConfig(draccus.ChoiceRegistry, abc.ABC):
     """Base configuration for an environment.
 
-    Attributes:
+    Args:
         import_name: Name under which the environment should be imported. For LIBERO, this doesn't need to be set.
         make_id: Gymnasium/Gym environment id (e.g., ``"CartPole-v1"``) when using ``gym.make``-style construction.
         task: Optional task or suite identifier understood by the environment.
@@ -83,7 +83,7 @@ def gym_kwargs(self) -> dict:
 class LiberoEnv(EnvConfig):
     r"""Configuration for the LIBERO environment.
 
-    Attributes:
+    Args:
         task: The LIBERO task or suite to use (e.g., ``"libero_10"``).
         task_ids: Optional list of specific task IDs within the suite to use (if ``None``, all tasks in the suite are used).
         fps: Target frames-per-second for stepping/rendering.

diff --git a/src/opentau/policies/normalize.py b/src/opentau/policies/normalize.py
@@ -22,6 +22,7 @@
 """
 
 import sys
+
 import numpy as np
 import torch
 from torch import Tensor, nn

diff --git a/src/opentau/policies/pi0/modeling_pi0.py b/src/opentau/policies/pi0/modeling_pi0.py
@@ -64,7 +64,11 @@ def create_sinusoidal_pos_embedding(
     if time.ndim != 1:
         raise ValueError("The time tensor is expected to be of shape `(batch_size, )`.")
 
-    dtype = get_safe_dtype(torch.float64, device.type) if isinstance(device, torch.device) else get_safe_dtype(torch.float64, device)
+    dtype = (
+        get_safe_dtype(torch.float64, device.type)
+        if isinstance(device, torch.device)
+        else get_safe_dtype(torch.float64, device)
+    )
     fraction = torch.linspace(0.0, 1.0, dimension // 2, dtype=dtype, device=device)
     period = min_period * (max_period / min_period) ** fraction
 

diff --git a/src/opentau/policies/pi0/paligemma_with_expert.py b/src/opentau/policies/pi0/paligemma_with_expert.py
@@ -21,7 +21,6 @@
 action generation and conditioning.
 """
 
-
 import torch
 import torch.version
 from pytest import Cache

diff --git a/src/opentau/policies/pi05/modeling_pi05.py b/src/opentau/policies/pi05/modeling_pi05.py
@@ -70,7 +70,11 @@ def create_sinusoidal_pos_embedding(
     if time.ndim != 1:
         raise ValueError("The time tensor is expected to be of shape `(batch_size, )`.")
 
-    dtype = get_safe_dtype(torch.float64, device.type) if isinstance(device, torch.device) else get_safe_dtype(torch.float64, device)
+    dtype = (
+        get_safe_dtype(torch.float64, device.type)
+        if isinstance(device, torch.device)
+        else get_safe_dtype(torch.float64, device)
+    )
     fraction = torch.linspace(0.0, 1.0, dimension // 2, dtype=dtype, device=device)
     period = min_period * (max_period / min_period) ** fraction
 
@@ -636,8 +640,8 @@ def forward(
     def prepare_discrete_state(self, batch: dict[str, Tensor]) -> list[str]:
         """Discretizes the state into bins and converts it to a string representation.
 
-        Each dimension of the state vector is discretized into 256 bins. 
-        The values of each dimension of the state are expected to be in the range [-1, 1]. 
+        Each dimension of the state vector is discretized into 256 bins.
+        The values of each dimension of the state are expected to be in the range [-1, 1].
         The discretization bins are linearly spaced between -1 and 1.
         The index of the bin for each dimension is then concatenated into a space-separated string.
 
@@ -655,7 +659,9 @@ def prepare_discrete_state(self, batch: dict[str, Tensor]) -> list[str]:
         if np.any(state_np < -1.0) or np.any(state_np > 1.0):
             raise ValueError("State values are not normalized. All state values should be in [-1, 1].")
         discretized_states = np.digitize(state_np, bins=np.linspace(-1, 1, 256 + 1)[:-1]) - 1
-        return [" ".join(map(str, row)) for row in discretized_states] # TODO: return a tensor instead of a list of strings?
+        return [
+            " ".join(map(str, row)) for row in discretized_states
+        ]  # TODO: return a tensor instead of a list of strings?
 
     def prepare_discrete_actions(self, batch: dict[str, Tensor]) -> tuple[Tensor, Tensor]:
         """Prepares discrete actions for the model by tokenizing and padding them.
@@ -671,7 +677,9 @@ def prepare_discrete_actions(self, batch: dict[str, Tensor]) -> tuple[Tensor, Te
         device = batch["discrete_actions"].device
         discrete_actions = batch["discrete_actions"].to(device="cpu", dtype=torch.float32)
         tokens = self.discrete_action_processor.__call__(discrete_actions)
-        discrete_action_tokens, discrete_action_masks = pad_discrete_tokens(tokens, self.config.discrete_action_max_length)
+        discrete_action_tokens, discrete_action_masks = pad_discrete_tokens(
+            tokens, self.config.discrete_action_max_length
+        )
         return torch.from_numpy(discrete_action_tokens).to(device=device, dtype=torch.long), torch.from_numpy(
             discrete_action_masks
         ).to(device=device, dtype=torch.bool)
@@ -965,9 +973,7 @@ def embed_prefix(
 
         return embs, pad_masks, att_masks
 
-    def embed_suffix(
-        self, noisy_actions: Tensor, timestep: Tensor
-    ) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+    def embed_suffix(self, noisy_actions: Tensor, timestep: Tensor) -> tuple[Tensor, Tensor, Tensor, Tensor]:
         """Embed noisy_actions, timestep to prepare for Expert Gemma processing.
 
         Args:

diff --git a/src/opentau/policies/pi05/paligemma_with_expert.py b/src/opentau/policies/pi05/paligemma_with_expert.py
@@ -22,7 +22,6 @@
 action generation and conditioning.
 """
 
-
 import torch
 import torch.version
 from pytest import Cache

diff --git a/src/opentau/scripts/inference.py b/src/opentau/scripts/inference.py
@@ -20,6 +20,8 @@
 
 import torch
 
+from opentau.configs import parser
+from opentau.configs.train import TrainPipelineConfig
 from opentau.policies.factory import get_policy_class
 from opentau.utils.random_utils import set_seed
 from opentau.utils.utils import (
@@ -28,8 +30,6 @@
     create_dummy_observation,
     init_logging,
 )
-from opentau.configs import parser
-from opentau.configs.train import TrainPipelineConfig
 
 
 @parser.wrap()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -87,5 +87,3 @@ Environments
		Environments wrap simulation or real-robot interfaces compatible with OpenAI Gym/Gymnasium.
		The factory ``lerobot/common/envs/factory.py`` creates vectorized environments for efficient training and evaluation.
		Currently, only `Libero <https://libero-project.github.io/main.html>`_ is supported and it is configured via ``opentau.envs.configs.LiberoEnv``.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,4 +46,3 @@ OpenTau currently supports the `LIBERO benchmark <https://libero-project.github.
		This will run the 0th task and 2nd task in ``libero_spatial``. Each task will run for 8 simulations in parallel.

		When launched with accelerate, each GPU process will only work on its fraction of the tasks, improving throughput.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -51,4 +51,3 @@ Training can be resumed by running:

		.. note::
		When resuming training from a checkpoint, the training step count will continue from the checkpoint's step, but the dataloader will be reset.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -65,4 +65,3 @@
		>>> print(list(available_grounding_datasets.keys()))
		['clevr', 'cocoqa', 'dummy', 'pixmo', 'vsr']
		"""