more tests

SwayamInSync · SwayamInSync · commit 4d34a850989b · 2025-12-23T12:43:21.000+05:30
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -17,6 +17,7 @@
 #include "dtype.h"
 #include "lock.h"
 #include "utilities.h"
+#include "constants.hpp"
 
 
 QuadPrecisionObject *
@@ -624,6 +625,119 @@ static PyGetSetDef QuadPrecision_getset[] = {
     {NULL}  /* Sentinel */
 };
 
+/*
+ * Hash function for QuadPrecision scalars.
+ * 
+ * This implements the same algorithm as CPython's _Py_HashDouble, adapted for
+ * quad precision (128-bit) floating point. The algorithm computes a hash based
+ * on the reduction of the value modulo the prime P = 2**PYHASH_BITS - 1.
+ * 
+ * Key invariant: hash(x) == hash(y) whenever x and y are numerically equal,
+ * even if x and y have different types. This ensures that:
+ *   hash(QuadPrecision(1.0)) == hash(1.0) == hash(1)
+ * 
+ * The algorithm:
+ * 1. Handle special cases: inf returns PYHASH_INF, nan uses pointer hash
+ * 2. Extract mantissa m in [0.5, 1.0) and exponent e via frexp(v) = m * 2^e
+ * 3. Process mantissa 28 bits at a time, accumulating into hash value x
+ * 4. Adjust for exponent using bit rotation (since 2^PYHASH_BITS ≡ 1 mod P)
+ * 5. Apply sign and handle the special case of -1 -> -2
+ */
+
+#if SIZEOF_VOID_P >= 8
+#  define PYHASH_BITS 61
+#else
+#  define PYHASH_BITS 31
+#endif
+#define PYHASH_MODULUS (((Py_uhash_t)1 << PYHASH_BITS) - 1)
+#define PYHASH_INF 314159
+
+static Py_hash_t
+QuadPrecision_hash(QuadPrecisionObject *self)
+{
+    Sleef_quad value;
+    int sign = 1;
+    
+    if (self->backend == BACKEND_SLEEF) {
+        value = self->value.sleef_value;
+    }
+    else {
+        value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value);
+    }
+    
+    // Check for NaN - use pointer hash (each NaN instance gets unique hash)
+    // This prevents hash table catastrophic pileups from NaN instances
+    if (Sleef_iunordq1(value, value)) {
+        return _Py_HashPointer((void *)self);
+    }
+    
+    if (Sleef_icmpeqq1(value, QUAD_PRECISION_INF)) {
+        return PYHASH_INF;
+    }
+    if (Sleef_icmpeqq1(value, QUAD_PRECISION_NINF)) {
+        return -PYHASH_INF;
+    }
+    
+    // Handle sign
+    Sleef_quad zero = Sleef_cast_from_int64q1(0);
+    if (Sleef_icmpltq1(value, zero)) {
+        sign = -1;
+        value = Sleef_negq1(value);
+    }
+    
+    // Get mantissa and exponent: value = m * 2^e, where 0.5 <= m < 1.0
+    int exponent;
+    Sleef_quad mantissa = Sleef_frexpq1(value, &exponent);
+    
+    // Process 28 bits at a time (same as CPython's _Py_HashDouble)
+    // This works well for both binary and hexadecimal floating point
+    Py_uhash_t x = 0;
+    // 2^28 = 268435456 - exactly representable in double, so cast is safe
+    Sleef_quad multiplier = Sleef_cast_from_int64q1(1LL << 28);
+    
+    // Continue until mantissa becomes zero (all bits processed)
+    while (Sleef_icmpneq1(mantissa, zero)) {
+        // Rotate x left by 28 bits within PYHASH_MODULUS
+        x = ((x << 28) & PYHASH_MODULUS) | (x >> (PYHASH_BITS - 28));
+        
+        // Scale mantissa by 2^28
+        mantissa = Sleef_mulq1_u05(mantissa, multiplier);
+        exponent -= 28;
+        
+        // Extract integer part
+        Sleef_quad int_part = Sleef_truncq1(mantissa);
+        Py_uhash_t y = (Py_uhash_t)Sleef_cast_to_int64q1(int_part);
+        
+        // Remove integer part from mantissa (keep fractional part)
+        mantissa = Sleef_subq1_u05(mantissa, int_part);
+        
+        // Accumulate
+        x += y;
+        if (x >= PYHASH_MODULUS) {
+            x -= PYHASH_MODULUS;
+        }
+    }
+    
+    // Adjust for exponent: reduce e modulo PYHASH_BITS
+    // For negative exponents: PYHASH_BITS - 1 - ((-1 - e) % PYHASH_BITS)
+    int e = exponent >= 0 
+            ? exponent % PYHASH_BITS 
+            : PYHASH_BITS - 1 - ((-1 - exponent) % PYHASH_BITS);
+    
+    // Rotate x left by e bits
+    x = ((x << e) & PYHASH_MODULUS) | (x >> (PYHASH_BITS - e));
+    
+    // Apply sign
+    x = x * sign;
+    
+    // -1 is reserved for errors, so use -2 instead
+    if (x == (Py_uhash_t)-1) {
+        x = (Py_uhash_t)-2;
+    }
+    
+    return (Py_hash_t)x;
+}
+
 PyTypeObject QuadPrecision_Type = {
         PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision",
         .tp_basicsize = sizeof(QuadPrecisionObject),
@@ -632,6 +746,7 @@ PyTypeObject QuadPrecision_Type = {
         .tp_dealloc = (destructor)QuadPrecision_dealloc,
         .tp_repr = (reprfunc)QuadPrecision_repr_dragon4,
         .tp_str = (reprfunc)QuadPrecision_str_dragon4,
+        .tp_hash = (hashfunc)QuadPrecision_hash,
         .tp_as_number = &quad_as_scalar,
         .tp_as_buffer = &QuadPrecision_as_buffer,
         .tp_richcompare = (richcmpfunc)quad_richcompare,
diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py
@@ -5229,3 +5229,121 @@ def test_add_regression_zero_plus_small(self):
         
         assert result_yx == result_xy, f"0 + x = {result_yx}, but x + 0 = {result_xy}"
         assert result_yx == x, f"0 + x = {result_yx}, expected {x}"
+
+
+class TestQuadPrecisionHash:
+    """Test suite for QuadPrecision hash function.
+    
+    The hash implementation follows CPython's _Py_HashDouble algorithm to ensure
+    the invariant: hash(x) == hash(y) when x and y are numerically equal,
+    even across different types.
+    """
+
+    @pytest.mark.parametrize("value", [
+        # Values that are exactly representable in binary floating point
+        "0.0", "1.0", "-1.0", "2.0", "-2.0",
+        "0.5", "0.25", "1.5", "-0.5",
+        "100.0", "-100.0",
+        # Powers of 2 are exactly representable
+        "0.125", "0.0625", "4.0", "8.0",
+    ])
+    def test_hash_matches_float(self, value):
+        """Test that hash(QuadPrecision) == hash(float) for exactly representable values.
+        
+        Note: Only values that are exactly representable in both float64 and float128
+        should match. Values like 0.1, 0.3 will have different hashes because they
+        have different binary representations at different precisions.
+        """
+        quad_val = QuadPrecision(value)
+        float_val = float(value)
+        assert hash(quad_val) == hash(float_val)
+
+    @pytest.mark.parametrize("value", [0.1, 0.3, 0.7, 1.1, 2.3])
+    def test_hash_matches_float_from_float(self, value):
+        """Test that QuadPrecision created from float has same hash as that float.
+        
+        When creating QuadPrecision from a Python float, the value is converted
+        from the float's double precision representation, so they should be
+        numerically equal and have the same hash.
+        """
+        quad_val = QuadPrecision(value)  # Created from float, not string
+        assert hash(quad_val) == hash(value)
+
+    @pytest.mark.parametrize("value", [0, 1, -1, 2, -2, 100, -100, 1000, -1000])
+    def test_hash_matches_int(self, value):
+        """Test that hash(QuadPrecision) == hash(int) for integer values."""
+        quad_val = QuadPrecision(value)
+        assert hash(quad_val) == hash(value)
+
+    def test_hash_matches_large_int(self):
+        """Test that hash(QuadPrecision) == hash(int) for large integers."""
+        big_int = 10**20
+        quad_val = QuadPrecision(str(big_int))
+        assert hash(quad_val) == hash(big_int)
+
+    def test_hash_infinity(self):
+        """Test that infinity hash matches Python's float infinity hash."""
+        assert hash(QuadPrecision("inf")) == hash(float("inf"))
+        assert hash(QuadPrecision("-inf")) == hash(float("-inf"))
+        # Standard PyHASH_INF values
+        assert hash(QuadPrecision("inf")) == 314159
+        assert hash(QuadPrecision("-inf")) == -314159
+
+    def test_hash_nan_unique(self):
+        """Test that each NaN instance gets a unique hash (pointer-based)."""
+        nan1 = QuadPrecision("nan")
+        nan2 = QuadPrecision("nan")
+        # NaN instances should have different hashes (based on object identity)
+        assert hash(nan1) != hash(nan2)
+
+    def test_hash_nan_same_instance(self):
+        """Test that the same NaN instance has consistent hash."""
+        nan = QuadPrecision("nan")
+        assert hash(nan) == hash(nan)
+
+    def test_hash_negative_one(self):
+        """Test that hash(-1) returns -2 (Python's hash convention)."""
+        # In Python, hash(-1) returns -2 because -1 is reserved for errors
+        assert hash(QuadPrecision(-1.0)) == -2
+        assert hash(QuadPrecision("-1.0")) == -2
+
+    def test_hash_set_membership(self):
+        """Test that QuadPrecision values work correctly in sets."""
+        vals = [QuadPrecision(1.0), QuadPrecision(2.0), QuadPrecision(1.0)]
+        unique_set = set(vals)
+        assert len(unique_set) == 2
+
+    def test_hash_set_cross_type(self):
+        """Test that QuadPrecision and float with same value are in same set bucket."""
+        s = {QuadPrecision(1.0)}
+        s.add(1.0)
+        assert len(s) == 1
+
+    def test_hash_dict_key(self):
+        """Test that QuadPrecision values work as dict keys."""
+        d = {QuadPrecision(1.0): "one", QuadPrecision(2.0): "two"}
+        assert d[QuadPrecision(1.0)] == "one"
+        assert d[QuadPrecision(2.0)] == "two"
+
+    def test_hash_dict_cross_type_lookup(self):
+        """Test that dict lookup works with float keys when hash matches."""
+        d = {QuadPrecision(1.0): "one"}
+        # Float lookup should work if hash and eq both work
+        assert d.get(1.0) == "one"
+
+    @pytest.mark.parametrize("value", [
+        "1e-100", "-1e-100",
+        "1e100", "-1e100",
+        "1e-300", "-1e-300",
+    ])
+    def test_hash_extreme_values(self, value):
+        """Test hash works for extreme values without errors."""
+        quad_val = QuadPrecision(value)
+        h = hash(quad_val)
+        assert isinstance(h, int)
+
+    @pytest.mark.parametrize("backend", ["sleef", "longdouble"])
+    def test_hash_backends(self, backend):
+        """Test hash works for both backends."""
+        quad_val = QuadPrecision(1.5, backend=backend)
+        assert hash(quad_val) == hash(1.5)