diff --git a/lib/statsd/instrument/compiled_metric.rb b/lib/statsd/instrument/compiled_metric.rb index 22de9bc8..ff935335 100644 --- a/lib/statsd/instrument/compiled_metric.rb +++ b/lib/statsd/instrument/compiled_metric.rb @@ -151,12 +151,10 @@ def self.#{method}(__value__#{default_val_assignment}, #{tag_names.map { |name| __return_value__ = StatsD::Instrument::VOID #{generate_block_handler if allow_block} - # Compute hash of tag values for cache lookup using rotate-left + XOR. - # Rotation makes it order-dependent (unlike plain XOR), preventing collisions - # when tag values are swapped. We mask to 32 bits to avoid Bignum allocations - # from left shifts on 64-bit hash values. - __cache_key__ = #{tag_names.first}.hash & 0xFFFFFFFF - #{tag_names.drop(1).map { |name| "__cache_key__ = (((__cache_key__ << 5) | (__cache_key__ >> 27)) ^ #{name}.hash) & 0xFFFFFFFF" }.join("\n")} + # Compute hash of tag values for cache lookup using XOR with position-based shifts. + # Shifting each hash by its position makes it order-dependent, preventing collisions + # when tag values are swapped (e.g., (a, b) vs (b, a)). + __cache_key__ = #{tag_names.map.with_index { |name, i| i.zero? ? "#{name}.hash" : "(#{name}.hash >> #{i})" }.join(" ^ ")} # Look up or create a PrecompiledDatagram __datagram__ = diff --git a/test/compiled_metric_test.rb b/test/compiled_metric_test.rb index 5bf541aa..4c390b6e 100644 --- a/test/compiled_metric_test.rb +++ b/test/compiled_metric_test.rb @@ -285,10 +285,10 @@ def test_emits_metric_on_hash_collision cache = metric.instance_variable_get(:@tag_combination_cache) - # Compute cache keys using the rotate-left + XOR formula (32-bit bounded) - # For single tag, it's the hash value masked to 32 bits - cache_key_for_1 = 1.hash & 0xFFFFFFFF - cache_key_for_2 = 2.hash & 0xFFFFFFFF + # Compute cache keys using XOR with position-based shifts + # For single tag (position 0), it's just the hash value with no shift + cache_key_for_1 = 1.hash + cache_key_for_2 = 2.hash # Store the cached datagram under the collision key cached_datagram = cache[cache_key_for_1]