Skip to content

Commit 508e9c8

Browse files
authored
Merge pull request #14 from xzeck/bug-fix/mismatched-data-type-on-serialize-and-deserialize
Fixed mismatched data type serializing and deserializing bug
2 parents a325a11 + 44fbd89 commit 508e9c8

File tree

6 files changed

+56
-3
lines changed

6 files changed

+56
-3
lines changed

examples/custom_type.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ impl ToBytes for Person {
1313
v.extend(self.name.as_bytes());
1414
v
1515
}
16+
17+
const TYPE_ID: &'static [u8] = b"Person";
1618
}
1719

1820
fn main() {

src/lib.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ pub use tobytes::ToBytes;
44

55
use std::{hash::{BuildHasher, BuildHasherDefault, DefaultHasher, Hasher}, marker::PhantomData};
66
use serde::{Deserialize, Deserializer, Serialize, Serializer};
7-
use serde::de::{Error as DeError};
7+
use serde::de::Error as DeError;
88

99
/// HyperLogLog is a probabilistic data structure for estimating cardinality.
1010
/// This implementation uses the HyperLogLog algorithm to estimate the
@@ -32,10 +32,11 @@ impl<T: ToBytes, S: BuildHasher + Default> Serialize for HyperLogLog<T, S> {
3232
where
3333
Ser: Serializer,
3434
{
35-
// generating a finger print
35+
// generating a fingerprint
3636
// This is so that if the state is saved and then reloaded we can ensure the same hashing function is used to maintain consistence
3737
let mut hasher = self.hasher_builder.build_hasher();
3838
hasher.write(b"__hyperloglog_fingerprint__");
39+
hasher.write(T::TYPE_ID);
3940
let fingerprint = hasher.finish();
4041

4142
// generating serializable structure
@@ -61,10 +62,11 @@ impl<'de, T: ToBytes, S: BuildHasher + Default> Deserialize<'de> for HyperLogLog
6162
// Recompute fingerprint using S::default()
6263
let mut hasher = S::default().build_hasher();
6364
hasher.write(b"__hyperloglog_fingerprint__");
65+
hasher.write(T::TYPE_ID);
6466
let expected_fingerprint = hasher.finish();
6567

6668
if expected_fingerprint != data.fingerprint {
67-
return Err(D::Error::custom("Hasher mismatch: incompatible hasher used during deserialization"));
69+
return Err(D::Error::custom("Hasher mismatch: incompatible hasher or datatype used during deserialization"));
6870
}
6971

7072
Ok(Self {

src/tobytes.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,90 +1,121 @@
11
pub trait ToBytes {
22
fn to_bytes(&self) -> Vec<u8>;
3+
4+
const TYPE_ID: &'static [u8];
35
}
46

57
// Integer types (Little Endian)
68
impl ToBytes for u8 {
79
fn to_bytes(&self) -> Vec<u8> {
810
vec![*self]
911
}
12+
13+
const TYPE_ID: &'static [u8] = b"u8";
1014
}
1115

1216
impl ToBytes for u16 {
1317
fn to_bytes(&self) -> Vec<u8> {
1418
self.to_le_bytes().to_vec()
1519
}
20+
21+
const TYPE_ID: &'static [u8] = b"u16";
1622
}
1723

1824
impl ToBytes for u32 {
1925
fn to_bytes(&self) -> Vec<u8> {
2026
self.to_le_bytes().to_vec()
2127
}
28+
29+
const TYPE_ID: &'static [u8] = b"u32";
2230
}
2331

2432
impl ToBytes for u64 {
2533
fn to_bytes(&self) -> Vec<u8> {
2634
self.to_le_bytes().to_vec()
2735
}
36+
37+
const TYPE_ID: &'static [u8] = b"u64";
2838
}
2939

3040
impl ToBytes for u128 {
3141
fn to_bytes(&self) -> Vec<u8> {
3242
self.to_le_bytes().to_vec()
3343
}
44+
45+
const TYPE_ID: &'static [u8] = b"u128";
3446
}
3547

3648
impl ToBytes for i8 {
3749
fn to_bytes(&self) -> Vec<u8> {
3850
vec![*self as u8]
3951
}
52+
53+
const TYPE_ID: &'static [u8] = b"i8";
54+
4055
}
4156

4257
impl ToBytes for i16 {
4358
fn to_bytes(&self) -> Vec<u8> {
4459
self.to_le_bytes().to_vec()
4560
}
61+
62+
const TYPE_ID: &'static [u8] = b"i16";
4663
}
4764

4865
impl ToBytes for i32 {
4966
fn to_bytes(&self) -> Vec<u8> {
5067
self.to_le_bytes().to_vec()
5168
}
69+
70+
const TYPE_ID: &'static [u8] = b"i32";
5271
}
5372

5473
impl ToBytes for i64 {
5574
fn to_bytes(&self) -> Vec<u8> {
5675
self.to_le_bytes().to_vec()
5776
}
77+
78+
const TYPE_ID: &'static [u8] = b"i64";
5879
}
5980

6081
impl ToBytes for i128 {
6182
fn to_bytes(&self) -> Vec<u8> {
6283
self.to_le_bytes().to_vec()
6384
}
85+
86+
const TYPE_ID: &'static [u8] = b"i128";
6487
}
6588

6689
// Floating point types (Native Endian)
6790
impl ToBytes for f32 {
6891
fn to_bytes(&self) -> Vec<u8> {
6992
self.to_ne_bytes().to_vec()
7093
}
94+
95+
const TYPE_ID: &'static [u8] = b"f32";
7196
}
7297

7398
impl ToBytes for f64 {
7499
fn to_bytes(&self) -> Vec<u8> {
75100
self.to_ne_bytes().to_vec()
76101
}
102+
103+
const TYPE_ID: &'static [u8] = b"f64";
77104
}
78105

79106

80107
impl ToBytes for &str {
81108
fn to_bytes(&self) -> Vec<u8> {
82109
self.as_bytes().to_vec()
83110
}
111+
112+
const TYPE_ID: &'static [u8] = b"&str";
84113
}
85114

86115
impl ToBytes for String {
87116
fn to_bytes(&self) -> Vec<u8> {
88117
self.as_bytes().to_vec()
89118
}
119+
120+
const TYPE_ID: &'static [u8] = b"String";
90121
}

tests/integer_tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ impl ToBytes for Colliding {
1010
fn to_bytes(&self) -> Vec<u8> {
1111
vec![0; 8]
1212
}
13+
14+
const TYPE_ID: &'static [u8] = b"Colliding";
1315
}
1416

1517
/// Sequential test of insert and cardinality

tests/serializing_deserializing_test.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ fn test_roundtrip_json_identity() {
5858
let recovered: HyperLogLog<i64, BuildHasherDefault<DefaultHasher>> =
5959
serde_json::from_str(&original).unwrap();
6060
let round_trip = serde_json::to_string(&recovered).unwrap();
61+
6162
assert_eq!(original, round_trip, "Serialize→deserialize→serialize should preserve exact JSON");
6263
}
6364

@@ -91,4 +92,17 @@ fn test_xxh3_roundtrip_succeeds() {
9192
hll.calculate_cardinality(),
9293
"XXH3→serde→XXH3 roundtrip should work and preserve cardinality"
9394
);
95+
}
96+
97+
#[test]
98+
fn test_error_on_deserializing_mismatched_element_type() {
99+
let p = 4;
100+
let mut hll: HyperLogLog<i64> = HyperLogLog::new(p);
101+
102+
let json = serde_json::to_string(&hll).unwrap();
103+
104+
let res: Result<HyperLogLog<f64>, _> = serde_json::from_str(&json);
105+
106+
assert!(res.is_err(), "Deserializing different datatype, should fail");
107+
94108
}

tests/string_tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ impl ToBytes for CollidingString {
111111
fn to_bytes(&self) -> Vec<u8> {
112112
Vec::new() // all collide
113113
}
114+
115+
const TYPE_ID: &'static [u8] = b"CollidingString";
114116
}
115117

116118
#[test]

0 commit comments

Comments
 (0)