From d115b778fffcbb4a1ed2fe2868dcf32e545afda5 Mon Sep 17 00:00:00 2001
From: akshay18iitg <akshayhitendrashah@gmail.com>
Date: Wed, 4 Feb 2026 18:36:36 -0800
Subject: [PATCH 1/4] Only calculating respoonse loss when response_predict
 config is set to true

---
 src/opentau/policies/pi05/modeling_pi05.py | 63 ++++++++++++----------
 1 file changed, 34 insertions(+), 29 deletions(-)
diff --git a/src/opentau/policies/pi05/modeling_pi05.py b/src/opentau/policies/pi05/modeling_pi05.py
index 8a02b3a..4f9f09b 100644
--- a/src/opentau/policies/pi05/modeling_pi05.py
+++ b/src/opentau/policies/pi05/modeling_pi05.py
@@ -1231,38 +1231,43 @@ def forward(
         # compute mean
         discrete_action_ce_loss = discrete_action_ce_loss.mean()
 
-        # compute cross entropy loss for response language
-        batch_size, seq_len = response_tokens.shape
-        response_token_start = -self.config.response_max_length - self.config.discrete_action_max_length
-        # The last token of language will predict <BOS> token of response, so no need to include for loss calculation. Hence slice starts from -self.config.discrete_action_max_length - self.config.response_max_length.
-        # The last token of response predicts first token  of discrete actions, so no need to include for loss calculation. Hence slice ends at -self.config.discrete_action_max_length - 1.
-        response_token_end = -self.config.discrete_action_max_length - 1
-        response_slice_object = slice(response_token_start, response_token_end)
-        response_out = prefix_out[
-            :,
-            response_slice_object,
-        ]
-        response_logits = self.paligemma_with_expert.paligemma.lm_head(response_out)
-        # response slice to exclude the <BOS> token from response while calculating loss.
-        response_slice = slice(1, None)
-        response_logits = response_logits.to(dtype=torch.float32)  # upcast to float32 for loss calculation
-        response_logits = rearrange(response_logits, "b s d -> (b s) d")
-        response_labels = rearrange(response_tokens[:, response_slice], "b s -> (b s)")
-        response_ce_loss = F.cross_entropy(response_logits, response_labels, reduction="none")
-
-        response_ce_loss = rearrange(response_ce_loss, "(b s) -> b s", b=batch_size, s=seq_len - 1)
-
-        # remove pad tokens
-        response_is_pad = ~response_masks  # convert into format where value for pad is True
-        # helps to control loss for response tokens in case of robotic data and VQA data
-        response_ce_loss = response_ce_loss * ~response_is_pad[:, response_slice]
-
-        # compute mean
-        response_ce_loss = response_ce_loss.mean()
+        # compute cross entropy loss for response language only when pedict_response is set to true
+        if self.config.predict_response:
+            batch_size, seq_len = response_tokens.shape
+            response_token_start = -self.config.response_max_length - self.config.discrete_action_max_length
+            # The last token of language will predict <BOS> token of response, so no need to include for loss calculation. Hence slice starts from -self.config.discrete_action_max_length - self.config.response_max_length.
+            # The last token of response predicts first token  of discrete actions, so no need to include for loss calculation. Hence slice ends at -self.config.discrete_action_max_length - 1.
+            response_token_end = -self.config.discrete_action_max_length - 1
+            response_slice_object = slice(response_token_start, response_token_end)
+            response_out = prefix_out[
+                :,
+                response_slice_object,
+            ]
+            response_logits = self.paligemma_with_expert.paligemma.lm_head(response_out)
+            # response slice to exclude the <BOS> token from response while calculating loss.
+            response_slice = slice(1, None)
+            response_logits = response_logits.to(
+                dtype=torch.float32
+            )  # upcast to float32 for loss calculation
+            response_logits = rearrange(response_logits, "b s d -> (b s) d")
+            response_labels = rearrange(response_tokens[:, response_slice], "b s -> (b s)")
+            response_ce_loss = F.cross_entropy(response_logits, response_labels, reduction="none")
+
+            response_ce_loss = rearrange(response_ce_loss, "(b s) -> b s", b=batch_size, s=seq_len - 1)
+
+            # remove pad tokens
+            response_is_pad = ~response_masks  # convert into format where value for pad is True
+            # helps to control loss for response tokens in case of robotic data and VQA data
+            response_ce_loss = response_ce_loss * ~response_is_pad[:, response_slice]
+
+            # compute mean
+            response_ce_loss = response_ce_loss.mean()
 
         return {
             "MSE": losses,
-            "CE": (discrete_action_ce_loss + response_ce_loss),
+            "CE": discrete_action_ce_loss + response_ce_loss
+            if self.config.predict_response
+            else discrete_action_ce_loss,
         }
 
     def sample_actions(

From 65273a564a484003310f5b764b3c3951ade71532 Mon Sep 17 00:00:00 2001
From: akshay18iitg <akshayhitendrashah@gmail.com>
Date: Thu, 5 Feb 2026 12:11:38 -0800
Subject: [PATCH 2/4] Updating respopnse_loss to 0

---
 README.md                                  | 2 +-
 src/opentau/policies/pi05/modeling_pi05.py | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 5b52ef2..1166f37 100644
--- a/README.md
+++ b/README.md
@@ -46,8 +46,8 @@ OpenTau ($\tau$) is a tool developed by *[Tensor][1]* to bridge this gap, and we
 |            Simulation Environments for Evaluating Models |            ❌            |                ✅                 |      ✅      |
 |                 Create Validation Splits During Training |            ❌            |                ❌                 |      ✅      |
 |    $\pi^{*}_{0.6}$ style Reinforcement Learning Pipeline |            ❌            |                ❌                 |      ✅      |
+| Raw Robotic data to Structured LeRobot format conversion |            ❌            |                ✅                 |      ✅      |
 |                                                Framework |      Jax / PyTorch       |             PyTorch               |   PyTorch    |
-| Raw Robotic data to Structured LeRobot format conversion |            ❌            |                ❌                 |      ✅      |
 
 ## Quick Start
 If you are familiar with LeRobot, getting started with OpenTau is very easy.
diff --git a/src/opentau/policies/pi05/modeling_pi05.py b/src/opentau/policies/pi05/modeling_pi05.py
index 4f9f09b..169e7e4 100644
--- a/src/opentau/policies/pi05/modeling_pi05.py
+++ b/src/opentau/policies/pi05/modeling_pi05.py
@@ -1262,13 +1262,10 @@ def forward(
 
             # compute mean
             response_ce_loss = response_ce_loss.mean()
+        else:
+            response_ce_loss = torch.tensor(0.0, device=losses.device)
 
-        return {
-            "MSE": losses,
-            "CE": discrete_action_ce_loss + response_ce_loss
-            if self.config.predict_response
-            else discrete_action_ce_loss,
-        }
+        return {"MSE": losses, "CE": discrete_action_ce_loss + response_ce_loss}
 
     def sample_actions(
         self,

From 9c98a98809969ed0cd31007d1ad08dd09f11c671 Mon Sep 17 00:00:00 2001
From: akshay18iitg <akshayhitendrashah@gmail.com>
Date: Thu, 5 Feb 2026 12:13:39 -0800
Subject: [PATCH 3/4] Updating respopnse_loss to 0

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1166f37..cd7800e 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ OpenTau ($\tau$) is a tool developed by *[Tensor][1]* to bridge this gap, and we
 |            Simulation Environments for Evaluating Models |            ❌            |                ✅                 |      ✅      |
 |                 Create Validation Splits During Training |            ❌            |                ❌                 |      ✅      |
 |    $\pi^{*}_{0.6}$ style Reinforcement Learning Pipeline |            ❌            |                ❌                 |      ✅      |
-| Raw Robotic data to Structured LeRobot format conversion |            ❌            |                ✅                 |      ✅      |
+| Robotic Rosbag data to Structured LeRobot format conversion |            ❌            |                ✅                 |      ✅      |
 |                                                Framework |      Jax / PyTorch       |             PyTorch               |   PyTorch    |
 
 ## Quick Start

From 085b658f44713e01187c9825789b52ac71f77e50 Mon Sep 17 00:00:00 2001
From: akshay18iitg <akshayhitendrashah@gmail.com>
Date: Thu, 5 Feb 2026 15:21:46 -0800
Subject: [PATCH 4/4] UPdating readme

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index cd7800e..a06bca4 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,6 @@ OpenTau ($\tau$) is a tool developed by *[Tensor][1]* to bridge this gap, and we
 |            Simulation Environments for Evaluating Models |            ❌            |                ✅                 |      ✅      |
 |                 Create Validation Splits During Training |            ❌            |                ❌                 |      ✅      |
 |    $\pi^{*}_{0.6}$ style Reinforcement Learning Pipeline |            ❌            |                ❌                 |      ✅      |
-| Robotic Rosbag data to Structured LeRobot format conversion |            ❌            |                ✅                 |      ✅      |
 |                                                Framework |      Jax / PyTorch       |             PyTorch               |   PyTorch    |
 
 ## Quick Start