From d115b778fffcbb4a1ed2fe2868dcf32e545afda5 Mon Sep 17 00:00:00 2001 From: akshay18iitg Date: Wed, 4 Feb 2026 18:36:36 -0800 Subject: [PATCH 1/4] Only calculating respoonse loss when response_predict config is set to true --- src/opentau/policies/pi05/modeling_pi05.py | 63 ++++++++++++---------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/src/opentau/policies/pi05/modeling_pi05.py b/src/opentau/policies/pi05/modeling_pi05.py index 8a02b3a..4f9f09b 100644 --- a/src/opentau/policies/pi05/modeling_pi05.py +++ b/src/opentau/policies/pi05/modeling_pi05.py @@ -1231,38 +1231,43 @@ def forward( # compute mean discrete_action_ce_loss = discrete_action_ce_loss.mean() - # compute cross entropy loss for response language - batch_size, seq_len = response_tokens.shape - response_token_start = -self.config.response_max_length - self.config.discrete_action_max_length - # The last token of language will predict token of response, so no need to include for loss calculation. Hence slice starts from -self.config.discrete_action_max_length - self.config.response_max_length. - # The last token of response predicts first token of discrete actions, so no need to include for loss calculation. Hence slice ends at -self.config.discrete_action_max_length - 1. - response_token_end = -self.config.discrete_action_max_length - 1 - response_slice_object = slice(response_token_start, response_token_end) - response_out = prefix_out[ - :, - response_slice_object, - ] - response_logits = self.paligemma_with_expert.paligemma.lm_head(response_out) - # response slice to exclude the token from response while calculating loss. - response_slice = slice(1, None) - response_logits = response_logits.to(dtype=torch.float32) # upcast to float32 for loss calculation - response_logits = rearrange(response_logits, "b s d -> (b s) d") - response_labels = rearrange(response_tokens[:, response_slice], "b s -> (b s)") - response_ce_loss = F.cross_entropy(response_logits, response_labels, reduction="none") - - response_ce_loss = rearrange(response_ce_loss, "(b s) -> b s", b=batch_size, s=seq_len - 1) - - # remove pad tokens - response_is_pad = ~response_masks # convert into format where value for pad is True - # helps to control loss for response tokens in case of robotic data and VQA data - response_ce_loss = response_ce_loss * ~response_is_pad[:, response_slice] - - # compute mean - response_ce_loss = response_ce_loss.mean() + # compute cross entropy loss for response language only when pedict_response is set to true + if self.config.predict_response: + batch_size, seq_len = response_tokens.shape + response_token_start = -self.config.response_max_length - self.config.discrete_action_max_length + # The last token of language will predict token of response, so no need to include for loss calculation. Hence slice starts from -self.config.discrete_action_max_length - self.config.response_max_length. + # The last token of response predicts first token of discrete actions, so no need to include for loss calculation. Hence slice ends at -self.config.discrete_action_max_length - 1. + response_token_end = -self.config.discrete_action_max_length - 1 + response_slice_object = slice(response_token_start, response_token_end) + response_out = prefix_out[ + :, + response_slice_object, + ] + response_logits = self.paligemma_with_expert.paligemma.lm_head(response_out) + # response slice to exclude the token from response while calculating loss. + response_slice = slice(1, None) + response_logits = response_logits.to( + dtype=torch.float32 + ) # upcast to float32 for loss calculation + response_logits = rearrange(response_logits, "b s d -> (b s) d") + response_labels = rearrange(response_tokens[:, response_slice], "b s -> (b s)") + response_ce_loss = F.cross_entropy(response_logits, response_labels, reduction="none") + + response_ce_loss = rearrange(response_ce_loss, "(b s) -> b s", b=batch_size, s=seq_len - 1) + + # remove pad tokens + response_is_pad = ~response_masks # convert into format where value for pad is True + # helps to control loss for response tokens in case of robotic data and VQA data + response_ce_loss = response_ce_loss * ~response_is_pad[:, response_slice] + + # compute mean + response_ce_loss = response_ce_loss.mean() return { "MSE": losses, - "CE": (discrete_action_ce_loss + response_ce_loss), + "CE": discrete_action_ce_loss + response_ce_loss + if self.config.predict_response + else discrete_action_ce_loss, } def sample_actions( From 65273a564a484003310f5b764b3c3951ade71532 Mon Sep 17 00:00:00 2001 From: akshay18iitg Date: Thu, 5 Feb 2026 12:11:38 -0800 Subject: [PATCH 2/4] Updating respopnse_loss to 0 --- README.md | 2 +- src/opentau/policies/pi05/modeling_pi05.py | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5b52ef2..1166f37 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,8 @@ OpenTau ($\tau$) is a tool developed by *[Tensor][1]* to bridge this gap, and we | Simulation Environments for Evaluating Models | ❌ | ✅ | ✅ | | Create Validation Splits During Training | ❌ | ❌ | ✅ | | $\pi^{*}_{0.6}$ style Reinforcement Learning Pipeline | ❌ | ❌ | ✅ | +| Raw Robotic data to Structured LeRobot format conversion | ❌ | ✅ | ✅ | | Framework | Jax / PyTorch | PyTorch | PyTorch | -| Raw Robotic data to Structured LeRobot format conversion | ❌ | ❌ | ✅ | ## Quick Start If you are familiar with LeRobot, getting started with OpenTau is very easy. diff --git a/src/opentau/policies/pi05/modeling_pi05.py b/src/opentau/policies/pi05/modeling_pi05.py index 4f9f09b..169e7e4 100644 --- a/src/opentau/policies/pi05/modeling_pi05.py +++ b/src/opentau/policies/pi05/modeling_pi05.py @@ -1262,13 +1262,10 @@ def forward( # compute mean response_ce_loss = response_ce_loss.mean() + else: + response_ce_loss = torch.tensor(0.0, device=losses.device) - return { - "MSE": losses, - "CE": discrete_action_ce_loss + response_ce_loss - if self.config.predict_response - else discrete_action_ce_loss, - } + return {"MSE": losses, "CE": discrete_action_ce_loss + response_ce_loss} def sample_actions( self, From 9c98a98809969ed0cd31007d1ad08dd09f11c671 Mon Sep 17 00:00:00 2001 From: akshay18iitg Date: Thu, 5 Feb 2026 12:13:39 -0800 Subject: [PATCH 3/4] Updating respopnse_loss to 0 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1166f37..cd7800e 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ OpenTau ($\tau$) is a tool developed by *[Tensor][1]* to bridge this gap, and we | Simulation Environments for Evaluating Models | ❌ | ✅ | ✅ | | Create Validation Splits During Training | ❌ | ❌ | ✅ | | $\pi^{*}_{0.6}$ style Reinforcement Learning Pipeline | ❌ | ❌ | ✅ | -| Raw Robotic data to Structured LeRobot format conversion | ❌ | ✅ | ✅ | +| Robotic Rosbag data to Structured LeRobot format conversion | ❌ | ✅ | ✅ | | Framework | Jax / PyTorch | PyTorch | PyTorch | ## Quick Start From 085b658f44713e01187c9825789b52ac71f77e50 Mon Sep 17 00:00:00 2001 From: akshay18iitg Date: Thu, 5 Feb 2026 15:21:46 -0800 Subject: [PATCH 4/4] UPdating readme --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index cd7800e..a06bca4 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,6 @@ OpenTau ($\tau$) is a tool developed by *[Tensor][1]* to bridge this gap, and we | Simulation Environments for Evaluating Models | ❌ | ✅ | ✅ | | Create Validation Splits During Training | ❌ | ❌ | ✅ | | $\pi^{*}_{0.6}$ style Reinforcement Learning Pipeline | ❌ | ❌ | ✅ | -| Robotic Rosbag data to Structured LeRobot format conversion | ❌ | ✅ | ✅ | | Framework | Jax / PyTorch | PyTorch | PyTorch | ## Quick Start