From 969ad489dc13a17cd1833b4b447ce2ce25a66b82 Mon Sep 17 00:00:00 2001 From: Karim El-Husseiny Date: Tue, 27 Jan 2026 14:53:39 +0200 Subject: [PATCH] Fix cache key collision for swapped tag values The previous XOR-based cache key computation was commutative, meaning swapped tag values would produce the same cache key. For example: (true, false) and (false, true) would both produce the same hash. This replaces XOR with rotate-left + XOR: - Rotation makes it order-dependent, preventing collisions when values swap - XOR keeps the result bounded, avoiding Bignum allocations The multiply-and-add approach (like Java's Objects.hash) was considered but causes Bignum allocations when hash values overflow during multiplication. --- lib/statsd/instrument/compiled_metric.rb | 10 ++++------ test/compiled_metric_test.rb | 8 ++++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/statsd/instrument/compiled_metric.rb b/lib/statsd/instrument/compiled_metric.rb index 22de9bc8..ff935335 100644 --- a/lib/statsd/instrument/compiled_metric.rb +++ b/lib/statsd/instrument/compiled_metric.rb @@ -151,12 +151,10 @@ def self.#{method}(__value__#{default_val_assignment}, #{tag_names.map { |name| __return_value__ = StatsD::Instrument::VOID #{generate_block_handler if allow_block} - # Compute hash of tag values for cache lookup using rotate-left + XOR. - # Rotation makes it order-dependent (unlike plain XOR), preventing collisions - # when tag values are swapped. We mask to 32 bits to avoid Bignum allocations - # from left shifts on 64-bit hash values. - __cache_key__ = #{tag_names.first}.hash & 0xFFFFFFFF - #{tag_names.drop(1).map { |name| "__cache_key__ = (((__cache_key__ << 5) | (__cache_key__ >> 27)) ^ #{name}.hash) & 0xFFFFFFFF" }.join("\n")} + # Compute hash of tag values for cache lookup using XOR with position-based shifts. + # Shifting each hash by its position makes it order-dependent, preventing collisions + # when tag values are swapped (e.g., (a, b) vs (b, a)). + __cache_key__ = #{tag_names.map.with_index { |name, i| i.zero? ? "#{name}.hash" : "(#{name}.hash >> #{i})" }.join(" ^ ")} # Look up or create a PrecompiledDatagram __datagram__ = diff --git a/test/compiled_metric_test.rb b/test/compiled_metric_test.rb index 5bf541aa..4c390b6e 100644 --- a/test/compiled_metric_test.rb +++ b/test/compiled_metric_test.rb @@ -285,10 +285,10 @@ def test_emits_metric_on_hash_collision cache = metric.instance_variable_get(:@tag_combination_cache) - # Compute cache keys using the rotate-left + XOR formula (32-bit bounded) - # For single tag, it's the hash value masked to 32 bits - cache_key_for_1 = 1.hash & 0xFFFFFFFF - cache_key_for_2 = 2.hash & 0xFFFFFFFF + # Compute cache keys using XOR with position-based shifts + # For single tag (position 0), it's just the hash value with no shift + cache_key_for_1 = 1.hash + cache_key_for_2 = 2.hash # Store the cached datagram under the collision key cached_datagram = cache[cache_key_for_1]