From fe2ceae573b8e1dc7aa632412ac1822e917c207e Mon Sep 17 00:00:00 2001
From: xander1421 <alexpruteanu@hotmail.com>
Date: Sun, 1 Feb 2026 02:11:46 +0200
Subject: [PATCH] fix: clamp VQ indices to prevent CUDA out-of-bounds errors

When using vector-quantize-pytorch >= 1.20 with PyTorch 2.10+ and
Python 3.14, the codebook indices from HeartMuLa can occasionally
exceed the valid range [0, codebook_size-1], causing CUDA device-side
assertion failures in the einx.get_at indexing operation.

This fix clamps the indices to the valid codebook range before passing
them to ResidualVQ.get_output_from_indices(), preventing the CUDA errors
while preserving the generated audio quality.

Tested with:
- Python 3.14
- PyTorch 2.10.0+cu128
- vector-quantize-pytorch 1.27.15
- CUDA 12.8

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/heartlib/heartcodec/models/flow_matching.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/heartlib/heartcodec/models/flow_matching.py b/src/heartlib/heartcodec/models/flow_matching.py
index c48d223..6f54e15 100644
--- a/src/heartlib/heartcodec/models/flow_matching.py
+++ b/src/heartlib/heartcodec/models/flow_matching.py
@@ -72,9 +72,14 @@ def inference_codes(
 
         batch_size = codes_bestrq_emb.shape[0]
         self.vq_embed.eval()
-        quantized_feature_emb = self.vq_embed.get_output_from_indices(
-            codes_bestrq_emb.transpose(1, 2)
-        )
+
+        # Clamp indices to valid codebook range to prevent CUDA out-of-bounds errors
+        # This is needed for compatibility with vector-quantize-pytorch >= 1.20
+        indices_input = codes_bestrq_emb.transpose(1, 2)
+        codebook_size = getattr(self.vq_embed, 'codebook_size', 8192)
+        indices_input = indices_input.clamp(0, codebook_size - 1)
+
+        quantized_feature_emb = self.vq_embed.get_output_from_indices(indices_input)
         quantized_feature_emb = self.cond_feature_emb(quantized_feature_emb)  # b t 512
         # assert 1==2
         quantized_feature_emb = F.interpolate(