From 4aa20402d4c406a123ea82540cb06fd103f5d270 Mon Sep 17 00:00:00 2001 From: Ajay Nair Date: Sun, 4 May 2025 20:47:12 -0300 Subject: [PATCH 1/2] Added reset --- src/lib.rs | 6 ++++++ src/main.rs | 1 + 2 files changed, 7 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 9a80b7b..f26ffc7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -91,6 +91,7 @@ impl HyperLogLog> { } impl HyperLogLog { + /// Creates a new `HyperLogLog` with `p` bits. /// Panics if `p < 4` or if `p` is too large to shift safely. pub fn with_hasher(p: u32, hasher_builder: S) -> Self { @@ -103,6 +104,7 @@ impl HyperLogLog { // which means max leading zeros is 64 but the smallest data type rust handles is u8 let buckets = vec![0u8; m]; + HyperLogLog { p, m, buckets, hasher_builder, _marker: PhantomData } } @@ -175,4 +177,8 @@ impl HyperLogLog { return Ok(()) } + + pub fn reset(&mut self) { + self.buckets.fill(0); + } } diff --git a/src/main.rs b/src/main.rs index ab061dd..ab08c5a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,7 @@ fn main() { println!("Time to insert: {:.2?}", insertion_end); println!("Time to calculate cardinality: {:.2?}", duration_cardinality); + hll.reset(); let serialized = serde_json::to_string(&hll).unwrap(); println!("{}", serialized); let desserialized: HyperLogLog = serde_json::from_str(&serialized).unwrap(); From 532b72c4ea785656190b7251e77a5a13ff025d0c Mon Sep 17 00:00:00 2001 From: Ajay Nair Date: Sun, 4 May 2025 22:36:36 -0300 Subject: [PATCH 2/2] Added tests --- src/lib.rs | 12 +++++++ tests/reset_tests.rs | 74 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 tests/reset_tests.rs diff --git a/src/lib.rs b/src/lib.rs index f26ffc7..b26e698 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -181,4 +181,16 @@ impl HyperLogLog { pub fn reset(&mut self) { self.buckets.fill(0); } + + pub fn get_buckets(&self) -> Vec { + self.buckets.clone() + } + + pub fn get_p(&self) -> u32 { + self.p.clone() + } + + pub fn get_m(&self) -> usize { + self.m.clone() + } } diff --git a/tests/reset_tests.rs b/tests/reset_tests.rs new file mode 100644 index 0000000..7e891db --- /dev/null +++ b/tests/reset_tests.rs @@ -0,0 +1,74 @@ +use hyperloglog::HyperLogLog; + +#[test] +fn test_reset_clears_buckets() { + let mut hll = HyperLogLog::::new(10); + + // Insert some elements + hll.insert(1); + hll.insert(2); + + + let are_buckets_filled: bool = hll.get_buckets().iter().any(|x| *x > 0u8); + + // Ensure buckets are not in the default state before reset + assert!(are_buckets_filled == true); + + // Call reset + hll.reset(); + + let are_buckets_filled: bool = hll.get_buckets().iter().any(|x| *x > 0); + + assert!(are_buckets_filled == false); + +} + +#[test] +fn test_reset_does_not_affect_other_fields() { + let mut hll = HyperLogLog::::new(10); + + let original_p = hll.get_p(); + let original_m = hll.get_m(); + + // Call reset + hll.reset(); + + let new_p = hll.get_p(); + let new_m = hll.get_m(); + + assert!(original_p == new_p, "original p: {}, not equal to p after reset: {}", original_p, new_p); + assert!(original_m == new_m, "original m: {}, not equal to m after reset: {}", original_m, new_m); +} + +#[test] +fn test_reset_after_inserting_elements() { + let mut hll = HyperLogLog::::new(10); + + // Insert elements into the HyperLogLog + hll.insert(1); + hll.insert(2); + + // Ensure the cardinality estimate is non-zero + let cardinality_before_reset = hll.calculate_cardinality(); + assert!(cardinality_before_reset > 0); + + // Call reset + hll.reset(); + + // Ensure the cardinality estimate is zero after reset + let cardinality_after_reset = hll.calculate_cardinality(); + assert_eq!(cardinality_after_reset, 0); +} + +#[test] +fn test_reset_multiple_times() { + let mut hll = HyperLogLog::::new(10); + + // Call reset multiple times + hll.reset(); + hll.reset(); + hll.reset(); + + // Ensure buckets are still zero after multiple resets + assert_eq!(hll.get_buckets()[0], 0); +} \ No newline at end of file