From fb0573d7d676cc962b68b0ea90b010e706f02944 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Wed, 15 Oct 2025 00:18:36 +0530 Subject: [PATCH 1/9] Resolve merge with refactor --- Cargo.lock | 25 ++ Cargo.toml | 1 + crates/defs/src/error.rs | 1 + crates/index/Cargo.toml | 1 + crates/index/src/kd_tree.rs | 516 ++++++++++++++++++++++-------------- crates/index/src/lib.rs | 3 +- 6 files changed, 343 insertions(+), 204 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b3e627..4b28e53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "bincode" version = "1.3.3" @@ -181,6 +187,7 @@ name = "index" version = "0.1.0" dependencies = [ "defs", + "ordered-float", ] [[package]] @@ -295,12 +302,30 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "ordered-float" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" +dependencies = [ + "num-traits", +] + [[package]] name = "peeking_take_while" version = "0.1.2" diff --git a/Cargo.toml b/Cargo.toml index 670d962..1ecb74c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "crates/server", ] + # You can define shared dependencies for all crates here [workspace.dependencies] # tokio = { version = "1.37.0", features = ["full"] } diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index 1079c5e..ff15895 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -6,4 +6,5 @@ pub enum DbError { DeserializationError, IndexError(String), LockError, + IndexInitError, //TODO: Change this } diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml index b54e3c8..c81c18f 100644 --- a/crates/index/Cargo.toml +++ b/crates/index/Cargo.toml @@ -6,5 +6,6 @@ version = "0.1.0" edition = "2021" [dependencies] +ordered-float = "5.0.0" defs = { path = "../defs" } diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 444f578..92b30b8 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -1,252 +1,362 @@ -use std::cmp::Ordering; -use std::cmp::Ordering::Less; - -use serde_derive::{Deserialize, Serialize}; - -#[derive(Serialize, Deserialize)] -pub struct KDTreeInternals { - pub kd_tree_allow_update: bool, - pub current_number_of_kd_tree_nodes: usize, - pub rebuild_threshold: f32, - pub previous_tree_size: usize, - pub rebuild_counter: usize, +use core::{DbError, DenseVector, IndexedVector, PointId, Similarity}; +use std::{ + cmp::Ordering, + collections::{BinaryHeap, HashMap}, + vec, +}; + +use crate::{distance, VectorIndex}; + +pub struct KDTree { + dim: usize, + root: Option>, + // An in memory point map for lookup during delete + point_map: HashMap, } -#[derive(Serialize, Deserialize)] +// the node which will be the part of the KD Tree pub struct KDTreeNode { - pub left: Option>, - pub right: Option>, - pub key: String, - pub vector: Vec, - pub dim: usize, + indexed_vector: IndexedVector, + split_dim: usize, + left: Option>, + right: Option>, + is_deleted: bool, } -impl KDTreeNode { - // Add the logic here to create a new db and insert the tree into the database - fn new(data: (String, Vec), dim: usize) -> KDTreeNode { - KDTreeNode { - left: None, - right: None, - key: data.0, - vector: data.1, - dim, - } +#[derive(Debug, Clone, PartialEq)] +struct Neighbor { + id: PointId, + distance: f32, +} + +impl Eq for Neighbor {} + +// Custom Ord implementation for the max-heap +impl Ord for Neighbor { + fn cmp(&self, other: &Self) -> Ordering { + self.distance + .partial_cmp(&other.distance) + .unwrap_or(Ordering::Equal) } } -pub struct KDTree { - pub _root: Option>, - pub _internals: KDTreeInternals, - pub is_debug_run: bool, - pub dim: usize, +impl PartialOrd for Neighbor { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } impl KDTree { - // Create an empty tree with default values - pub fn new() -> KDTree { + pub fn mock() { + //here is the mock code + } + + // Build an empty index with no points + pub fn build_empty(dim: usize) -> Self { KDTree { - _root: None, - _internals: KDTreeInternals { - kd_tree_allow_update: true, - current_number_of_kd_tree_nodes: 0, - rebuild_threshold: 2.0f32, - previous_tree_size: 0, - rebuild_counter: 0, - }, - is_debug_run: true, - dim: 0, + dim, + root: None, + point_map: HashMap::new(), } } - // Add a node - // If the dimension of the tree is zero, then it becomes equal to the input data - pub fn add_node(&mut self, data: (String, Vec), depth: usize) { - if self._root.is_none() { - self.dim = data.1.len(); - self._root = Some(Box::new(KDTreeNode::new(data, 0))); - self._internals.current_number_of_kd_tree_nodes += 1; - return; - } + // Builds the vector index from provided vectors, there should atleast be single vector for dim calculation + pub fn build(mut vectors: Vec) -> Result { + if vectors.is_empty() { + Err(DbError::IndexInitError) + } else { + let dim = vectors[0].vector.len(); - assert_eq!(self.dim, data.1.len()); + let mut point_map = HashMap::with_capacity(vectors.len()); + for iv in vectors.iter() { + point_map.insert(iv.id, iv.vector.clone()); + } + let root_node = Self::build_recursive(&mut vectors, 0, dim); + Ok(KDTree { + dim, + root: Some(root_node), + point_map, + }) + } + } - if !self._internals.kd_tree_allow_update { - println!("KDTree is locked for rebuild"); - return; + // Builds the tree recursively with given vectors and returns the pointer of the root node + pub fn build_recursive( + vectors: &mut [IndexedVector], + depth: usize, + dim: usize, + ) -> Box { + if vectors.is_empty() { + panic!("Cannot build from an empty slice recursively"); } - if self._internals.previous_tree_size != 0 { - let current_ratio: f32 = self._internals.current_number_of_kd_tree_nodes as f32 - / self._internals.previous_tree_size as f32; - if current_ratio > self._internals.rebuild_threshold { - self._internals.previous_tree_size = - self._internals.current_number_of_kd_tree_nodes; - self.rebuild(); - } + let axis = depth % dim; + let mid_idx = vectors.len() / 2; + + vectors.select_nth_unstable_by(mid_idx, |a, b| { + let a_at_axis = a.vector[axis]; + let b_at_axis = b.vector[axis]; + a_at_axis.partial_cmp(&b_at_axis).unwrap_or(Ordering::Equal) + }); + + // Using swap so that we don't need to clone the whole vector + let mut median_vec = IndexedVector { + id: 0, + vector: vec![], + }; // dummy + std::mem::swap(&mut vectors[mid_idx], &mut median_vec); + + let (left_points, right_points_with_median) = vectors.split_at_mut(mid_idx); + let right_points = &mut right_points_with_median[1..]; // Exclude the swapped-out median + + let left = if left_points.is_empty() { + None } else { - self._internals.previous_tree_size = self._internals.current_number_of_kd_tree_nodes; + Some(Self::build_recursive(left_points, depth + 1, dim)) + }; + + let right = if right_points.is_empty() { + None + } else { + Some(Self::build_recursive(right_points, depth + 1, dim)) + }; + + Box::new(KDTreeNode { + indexed_vector: median_vec, + split_dim: axis, + left, + right, + is_deleted: false, + }) + } + + pub fn insert_point(&mut self, new_vector: IndexedVector) { + // use a traverse function to get the final leaf where this belongs + if self.root.is_none() { + self.root = Some(Box::new(KDTreeNode { + indexed_vector: new_vector, + split_dim: 0, + left: None, + right: None, + is_deleted: false, + })); + return; } - self._internals.current_number_of_kd_tree_nodes += 1; - - let mut current_node = self._root.as_deref_mut().unwrap(); - let mut current_depth = depth; - loop { - let current_dimension = current_depth % self.dim; - if data.1[current_dimension] < current_node.vector[current_dimension] { - if current_node.left.is_none() { - current_node.left = Some(Box::new(KDTreeNode::new(data, current_dimension))); - break; - } else { - current_node = current_node.left.as_deref_mut().unwrap(); - current_depth += 1; - } + let mut current_link = &mut self.root; + let mut depth = 0; + let dim = self.dim; + + while let Some(ref mut node_box) = current_link { + let axis = depth % dim; + let current_node = node_box.as_mut(); + + let va = new_vector.vector[axis]; + let vb = current_node.indexed_vector.vector[axis]; + + if va <= vb { + current_link = &mut current_node.left; } else { - if current_node.right.is_none() { - current_node.right = Some(Box::new(KDTreeNode::new(data, current_dimension))); - break; - } else { - current_node = current_node.right.as_deref_mut().unwrap(); - current_depth += 1; - } + current_link = &mut current_node.right; } + depth += 1; } + + // Assign the new node to current link which is &mut Option> + let axis = depth % dim; + *current_link = Some(Box::new(KDTreeNode { + indexed_vector: new_vector, + split_dim: axis, + left: None, + right: None, + is_deleted: false, + })) } - // rebuild tree - fn rebuild(&mut self) { - self._internals.kd_tree_allow_update = false; - self._internals.rebuild_counter += 1; - if self.is_debug_run { - println!( - "Rebuilding tree..., Rebuild counter: {:?}", - self._internals.rebuild_counter + // Deletes the point by first finding the corresponding node using DFS and then deleting + // Returns true if point found and deleted, else false + // First make a lookup of vector from map, then traverse the tree to obtain the point and mark it as deleted + pub fn delete_point(&mut self, point_id: PointId) -> bool { + if let Some(vector_to_delete) = self.point_map.get(&point_id) { + let found_and_deleted = Self::find_and_mark_recursive( + &mut self.root, + vector_to_delete, + point_id, + 0, + self.dim, ); + + if found_and_deleted { + self.point_map.remove(&point_id); + } + + return found_and_deleted; } - let mut points = Vec::into_boxed_slice(self.traversal(0)); - self._root = Some(Box::new(create_tree_helper(points.as_mut(), 0))); - self._internals.kd_tree_allow_update = true; + false } - // traversal - pub fn traversal(&self, k_value: usize) -> Vec<(String, Vec)> { - let mut result: Vec<(String, Vec)> = Vec::new(); - inorder_traversal_helper(self._root.as_deref(), &mut result, k_value); - result - } + // Recursively finds and marks a node as deleted, + fn find_and_mark_recursive( + node_opt: &mut Option>, + target_vector: &DenseVector, + target_id: PointId, + depth: usize, + dim: usize, + ) -> bool { + if let Some(node) = node_opt { + if node.indexed_vector.id == target_id { + node.is_deleted = true; + return true; + } - // delete a node - pub fn delete_node(&mut self, data: String) { - self._internals.kd_tree_allow_update = false; - let mut points = self.traversal(0); - let index = points.iter().position(|x| *x.0 == data).unwrap(); - points.remove(index); - let mut points = Vec::into_boxed_slice(points); - self._root = Some(Box::new(create_tree_helper(points.as_mut(), 0))); - self._internals.kd_tree_allow_update = true; - } + let axis = depth % dim; + let target_val = target_vector[axis]; + let node_val = node.indexed_vector.vector[axis]; - // print data for debug - pub fn print_tree_for_debug(&self) { - let iterated: Vec<(String, Vec)> = self.traversal(0); - for iter in iterated { - println!("{}", iter.0); + if target_val < node_val { + Self::find_and_mark_recursive( + &mut node.left, + target_vector, + target_id, + depth + 1, + dim, + ) + } else if target_val > node_val { + Self::find_and_mark_recursive( + &mut node.right, + target_vector, + target_id, + depth + 1, + dim, + ) + } else { + // Need to check both right and left nodes in this case + let left_found = Self::find_and_mark_recursive( + &mut node.left, + target_vector, + target_id, + depth + 1, + dim, + ); + let right_found = Self::find_and_mark_recursive( + &mut node.right, + target_vector, + target_id, + depth + 1, + dim, + ); + left_found || right_found + } + } else { + false } } - // different methods of knn -} + pub fn search_top_k( + &self, + query_vector: DenseVector, + k: usize, + dist_type: Similarity, + ) -> Vec<(PointId, f32)> { + //Searches for top k closest vectors according to specified metric -// Traversal helper function -fn inorder_traversal_helper( - node: Option<&KDTreeNode>, - result: &mut Vec<(String, Vec)>, - k_value: usize, -) -> Option { - if node.is_none() { - return None; - } - if k_value != 0 && k_value <= result.len() { - return None; - } - let current_node = node.unwrap(); - inorder_traversal_helper(current_node.to_owned().left.as_deref(), result, k_value); - result.push((current_node.key.clone(), current_node.vector.clone())); - inorder_traversal_helper(current_node.to_owned().right.as_deref(), result, k_value); + if self.root.is_none() || k == 0 { + return Vec::new(); + } - Some(true) -} + let mut best_neighbours = BinaryHeap::with_capacity(k); -// Rebuild tree helper functions -fn create_tree_helper(points: &mut [(String, Vec)], dim: usize) -> KDTreeNode { - let points_len = points.len(); - if points_len == 1 { - return KDTreeNode { - key: points[0].0.clone(), - vector: points[0].1.clone(), - left: None, - right: None, - dim, - }; + self.search_recursive( + &self.root, + &query_vector, + k, + &mut best_neighbours, + 0, + dist_type, + ); + + best_neighbours + .into_sorted_vec() + .iter() + .map(|neighbor| (neighbor.id, neighbor.distance)) + .collect() } - // Split around the median - let pivot = quickselect_by(points, points_len / 2, &|a, b| { - a.1[dim].partial_cmp(&b.1[dim]).unwrap() - }); - - let left = Some(Box::new(create_tree_helper( - &mut points[0..points_len / 2], - (dim + 1) % pivot.1.len(), - ))); - let right = if points.len() >= 3 { - Some(Box::new(create_tree_helper( - &mut points[points_len / 2 + 1..points_len], - (dim + 1) % pivot.1.len(), - ))) - } else { - None - }; - - KDTreeNode { - key: pivot.0, - vector: pivot.1, - left, - right, - dim, + fn search_recursive( + &self, + node_opt: &Option>, + query_vector: &DenseVector, + k: usize, + heap: &mut BinaryHeap, + depth: usize, + dist_type: Similarity, + ) { + // Base case is that we hit a leaf node don't do anything + if let Some(node) = node_opt { + let axis = depth % self.dim; + + let (near_side, far_side) = if query_vector[axis] <= node.indexed_vector.vector[axis] { + (&node.left, &node.right) + } else { + (&node.right, &node.left) + }; + + // Recurse on near side first + self.search_recursive(&near_side, query_vector, k, heap, depth + 1, dist_type); + + // Process the current node + if !node.is_deleted { + //TODO: Use square distance in distance, why is there overhead of square + let distance = distance(query_vector, &node.indexed_vector.vector, dist_type); + if heap.len() < k { + heap.push(Neighbor { + id: node.indexed_vector.id, + distance, + }); + } else if distance < heap.peek().unwrap().distance { + heap.pop(); + heap.push(Neighbor { + id: node.indexed_vector.id, + distance, + }); + } + } + + // Pruning on the farther side to check if there are better candidates + //TODO: Change this when implementing square distance + let dist_to_plane = match dist_type { + Similarity::Euclidean => query_vector[axis] - node.indexed_vector.vector[axis], + Similarity::Manhattan => 1.0, + _ => unreachable!(), + }; + + if heap.len() < k || dist_to_plane < heap.peek().unwrap().distance { + self.search_recursive(far_side, query_vector, k, heap, depth + 1, dist_type); + } + } } } -fn quickselect_by(arr: &mut [T], position: usize, cmp: &dyn Fn(&T, &T) -> Ordering) -> T -where - T: Clone, -{ - let mut pivot_index = 0; - // Need to wrap in another closure or we get ownership complaints. - // Tried using an unboxed closure to get around this but couldn't get it to work. - pivot_index = partition_by(arr, pivot_index, &|a: &T, b: &T| cmp(a, b)); - let array_len = arr.len(); - match position.cmp(&pivot_index) { - Ordering::Equal => arr[position].clone(), - Ordering::Less => quickselect_by(&mut arr[0..pivot_index], position, cmp), - Ordering::Greater => quickselect_by( - &mut arr[pivot_index + 1..array_len], - position - pivot_index - 1, - cmp, - ), +impl VectorIndex for KDTree { + fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError> { + self.insert_point(vector); + Ok(()) + } + + fn delete(&mut self, point_id: PointId) -> Result { + Ok(self.delete_point(point_id)) } -} -fn partition_by(arr: &mut [T], pivot_index: usize, cmp: &dyn Fn(&T, &T) -> Ordering) -> usize { - let array_len = arr.len(); - arr.swap(pivot_index, array_len - 1); - let mut store_index = 0; - for i in 0..array_len - 1 { - if cmp(&arr[i], &arr[array_len - 1]) == Less { - arr.swap(i, store_index); - store_index += 1; + fn search( + &self, + query_vector: core::DenseVector, + similarity: Similarity, + k: usize, + ) -> Result, DbError> { + if matches!(similarity, Similarity::Cosine | Similarity::Hamming) { + panic!("Cosine and hamming are not suitable similariyt metric when using a KDTree") } + + Ok(vec![]) } - arr.swap(array_len - 1, store_index); - store_index } diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index ef93755..eac20f0 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -1,6 +1,7 @@ use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; pub mod flat; +pub mod kd_tree; pub trait VectorIndex { fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError>; @@ -19,7 +20,7 @@ pub trait VectorIndex { } /// Distance function to get the distance between two vectors (taken from old version) -pub fn distance(a: DenseVector, b: DenseVector, dist_type: Similarity) -> f32 { +pub fn distance(a: &DenseVector, b: &DenseVector, dist_type: Similarity) -> f32 { assert_eq!(a.len(), b.len()); match dist_type { Similarity::Euclidean => { From 799dc77e74787a09f3ce2a28d57f5e5a4ac050b1 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Wed, 15 Oct 2025 00:28:15 +0530 Subject: [PATCH 2/9] Fix merge residues --- crates/index/src/flat.rs | 2 +- crates/index/src/kd_tree.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/index/src/flat.rs b/crates/index/src/flat.rs index 53dc361..682a4d3 100644 --- a/crates/index/src/flat.rs +++ b/crates/index/src/flat.rs @@ -47,7 +47,7 @@ impl VectorIndex for FlatIndex { .index .iter() .map(|point| DistanceOrderedVector { - distance: distance(point.vector.clone(), query_vector.clone(), similarity), + distance: distance(&point.vector, &query_vector, similarity), query_vector: &query_vector, point_id: Some(point.id), }) diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 92b30b8..310ee4f 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -1,4 +1,4 @@ -use core::{DbError, DenseVector, IndexedVector, PointId, Similarity}; +use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; use std::{ cmp::Ordering, collections::{BinaryHeap, HashMap}, @@ -349,7 +349,7 @@ impl VectorIndex for KDTree { fn search( &self, - query_vector: core::DenseVector, + query_vector: DenseVector, similarity: Similarity, k: usize, ) -> Result, DbError> { From 84b3069dd0ba9a8b6f62493b50f027077aac9ec9 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Wed, 17 Dec 2025 12:14:03 +0000 Subject: [PATCH 3/9] Update Cargo.lock --- Cargo.lock | 522 +++++++++++++++++------------------- crates/index/src/kd_tree.rs | 10 +- 2 files changed, 255 insertions(+), 277 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c82b36b..f6a1628 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -82,22 +82,15 @@ dependencies = [ "miniz_oxide", "object", "rustc-demangle", - "windows-link 0.2.1", + "windows-link", ] -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - [[package]] name = "bincode" version = "1.3.3" @@ -134,7 +127,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -154,21 +147,21 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "bzip2-sys" @@ -197,9 +190,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.40" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ "find-msvc-tools", "jobserver", @@ -218,9 +211,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" @@ -233,7 +226,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -309,9 +302,7 @@ version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ - "defs", - "ordered-float", - "bitflags 2.9.4", + "bitflags 2.10.0", "crossterm_winapi", "libc", "mio 0.8.11", @@ -405,9 +396,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" [[package]] name = "fnv" @@ -456,34 +447,12 @@ dependencies = [ ] [[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "once_cell" -version = "1.21.3" name = "futures-core" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] -name = "ordered-float" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" -dependencies = [ - "num-traits", -] - -[[package]] -name = "peeking_take_while" -version = "0.1.2" name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -525,19 +494,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "wasi 0.14.7+wasi-0.2.4", + "wasip2", ] [[package]] @@ -582,6 +551,12 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "heck" version = "0.5.0" @@ -590,12 +565,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -630,9 +604,9 @@ checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", @@ -684,9 +658,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ "base64", "bytes", @@ -734,9 +708,9 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -747,9 +721,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -760,11 +734,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -775,42 +748,38 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", "yoke", "zerofrom", @@ -850,28 +819,18 @@ name = "index" version = "0.1.0" dependencies = [ "defs", + "ordered-float", "uuid", ] [[package]] name = "indexmap" -version = "2.11.4" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown", -] - -[[package]] -name = "io-uring" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" -dependencies = [ - "bitflags 2.9.4", - "cfg-if", - "libc", + "hashbrown 0.16.1", ] [[package]] @@ -882,9 +841,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -920,15 +879,15 @@ version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] [[package]] name = "js-sys" -version = "0.3.81" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -948,9 +907,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.176" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "libloading" @@ -959,7 +918,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -980,9 +939,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.22" +version = "1.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" dependencies = [ "cc", "pkg-config", @@ -997,9 +956,9 @@ checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] name = "lock_api" @@ -1012,9 +971,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru" @@ -1022,7 +981,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown", + "hashbrown 0.15.5", ] [[package]] @@ -1080,19 +1039,19 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.48.0", ] [[package]] name = "mio" -version = "1.0.4" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", + "wasi", + "windows-sys 0.61.2", ] [[package]] @@ -1148,11 +1107,11 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "openssl" -version = "0.10.73" +version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cfg-if", "foreign-types", "libc", @@ -1180,9 +1139,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.109" +version = "0.9.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ "cc", "libc", @@ -1190,6 +1149,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "ordered-float" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" +dependencies = [ + "num-traits", +] + [[package]] name = "owo-colors" version = "4.2.3" @@ -1216,7 +1184,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -1257,9 +1225,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "potential_utf" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -1276,18 +1244,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -1304,7 +1272,7 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f44c9e68fd46eda15c646fbb85e1040b657a58cdc8c98db1d97a55930d991eef" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cassowary", "compact_str", "crossterm", @@ -1324,14 +1292,14 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", ] [[package]] name = "regex" -version = "1.11.3" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -1341,9 +1309,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.11" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -1352,15 +1320,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "reqwest" -version = "0.12.23" +version = "0.12.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" +checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f" dependencies = [ "base64", "bytes", @@ -1447,7 +1415,7 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "errno", "libc", "linux-raw-sys", @@ -1456,9 +1424,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.32" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd3c25631629d034ce7cd9940adc9d45762d46de2b0f57193c4443b92c6d4d40" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "once_cell", "rustls-pki-types", @@ -1469,18 +1437,18 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" dependencies = [ "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.103.7" +version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ "ring", "rustls-pki-types", @@ -1520,7 +1488,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "core-foundation", "core-foundation-sys", "libc", @@ -1628,9 +1596,9 @@ dependencies = [ [[package]] name = "signal-hook-mio" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" +checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc" dependencies = [ "libc", "mio 0.8.11", @@ -1639,9 +1607,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" dependencies = [ "libc", ] @@ -1660,12 +1628,12 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1732,9 +1700,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.106" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -1767,7 +1735,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "core-foundation", "system-configuration-sys", ] @@ -1789,7 +1757,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "rustix", "windows-sys 0.61.2", @@ -1806,9 +1774,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -1816,29 +1784,26 @@ dependencies = [ [[package]] name = "tokio" -version = "1.47.1" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ - "backtrace", "bytes", - "io-uring", "libc", - "mio 1.0.4", + "mio 1.1.1", "parking_lot", "pin-project-lite", "signal-hook-registry", - "slab", "socket2", "tokio-macros", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", @@ -1867,9 +1832,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -1895,11 +1860,11 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "bytes", "futures-util", "http", @@ -1925,9 +1890,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "pin-project-lite", "tracing-core", @@ -1935,9 +1900,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -1955,9 +1920,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "sharded-slab", "thread_local", @@ -1999,9 +1964,9 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-segmentation" @@ -2052,13 +2017,13 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] @@ -2089,15 +2054,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" -dependencies = [ - "wasip2", -] - [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -2109,9 +2065,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.104" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -2120,25 +2076,11 @@ dependencies = [ "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - [[package]] name = "wasm-bindgen-futures" -version = "0.4.54" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -2149,9 +2091,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.104" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2159,31 +2101,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.104" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.104" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.81" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -2219,9 +2161,9 @@ checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.2.1", - "windows-result 0.4.1", - "windows-strings 0.5.1", + "windows-link", + "windows-result", + "windows-strings", ] [[package]] @@ -2246,12 +2188,6 @@ dependencies = [ "syn", ] -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - [[package]] name = "windows-link" version = "0.2.1" @@ -2260,22 +2196,13 @@ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-registry" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" -dependencies = [ - "windows-link 0.1.3", - "windows-result 0.3.4", - "windows-strings 0.4.2", -] - -[[package]] -name = "windows-result" -version = "0.3.4" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" dependencies = [ - "windows-link 0.1.3", + "windows-link", + "windows-result", + "windows-strings", ] [[package]] @@ -2284,16 +2211,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.2.1", -] - -[[package]] -name = "windows-strings" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" -dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -2302,7 +2220,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -2325,11 +2243,11 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.52.6", + "windows-targets 0.53.5", ] [[package]] @@ -2338,7 +2256,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -2365,13 +2283,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -2384,6 +2319,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -2396,6 +2337,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -2408,12 +2355,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -2426,6 +2385,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -2438,6 +2403,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -2450,6 +2421,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -2462,6 +2439,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "wit-bindgen" version = "0.46.0" @@ -2470,17 +2453,16 @@ checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "yoke" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -2488,9 +2470,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", @@ -2527,9 +2509,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", "yoke", @@ -2538,9 +2520,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -2549,9 +2531,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 310ee4f..5f25edd 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -1,11 +1,11 @@ +use crate::{distance, VectorIndex}; use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; use std::{ cmp::Ordering, collections::{BinaryHeap, HashMap}, vec, }; - -use crate::{distance, VectorIndex}; +use uuid::Uuid; pub struct KDTree { dim: usize, @@ -47,10 +47,6 @@ impl PartialOrd for Neighbor { } impl KDTree { - pub fn mock() { - //here is the mock code - } - // Build an empty index with no points pub fn build_empty(dim: usize) -> Self { KDTree { @@ -101,7 +97,7 @@ impl KDTree { // Using swap so that we don't need to clone the whole vector let mut median_vec = IndexedVector { - id: 0, + id: Uuid::new_v4(), vector: vec![], }; // dummy std::mem::swap(&mut vectors[mid_idx], &mut median_vec); From fd02eb0d05b7548e8718fa0a5b19aff7da2787d9 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Wed, 17 Dec 2025 20:42:23 +0000 Subject: [PATCH 4/9] Debug without rebuild --- crates/defs/src/error.rs | 1 + crates/index/src/kd_tree.rs | 113 +++++++++++------------------------- 2 files changed, 34 insertions(+), 80 deletions(-) diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index ff15895..ef476e0 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -7,4 +7,5 @@ pub enum DbError { IndexError(String), LockError, IndexInitError, //TODO: Change this + UnsupportedSimilarity, } diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 5f25edd..29f8fb3 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -2,7 +2,7 @@ use crate::{distance, VectorIndex}; use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; use std::{ cmp::Ordering, - collections::{BinaryHeap, HashMap}, + collections::{BinaryHeap, HashSet}, vec, }; use uuid::Uuid; @@ -10,8 +10,8 @@ use uuid::Uuid; pub struct KDTree { dim: usize, root: Option>, - // An in memory point map for lookup during delete - point_map: HashMap, + // In memory point ids, to check existence before O(n) deletion logic + point_ids: HashSet, } // the node which will be the part of the KD Tree @@ -52,7 +52,7 @@ impl KDTree { KDTree { dim, root: None, - point_map: HashMap::new(), + point_ids: HashSet::new(), } } @@ -63,15 +63,16 @@ impl KDTree { } else { let dim = vectors[0].vector.len(); - let mut point_map = HashMap::with_capacity(vectors.len()); - for iv in vectors.iter() { - point_map.insert(iv.id, iv.vector.clone()); + let mut point_ids = HashSet::with_capacity(vectors.len()); + for indexed_vector in vectors.iter() { + point_ids.insert(indexed_vector.id); } + let root_node = Self::build_recursive(&mut vectors, 0, dim); Ok(KDTree { dim, root: Some(root_node), - point_map, + point_ids, }) } } @@ -127,6 +128,9 @@ impl KDTree { } pub fn insert_point(&mut self, new_vector: IndexedVector) { + // Add to point_ids + self.point_ids.insert(new_vector.id); + // use a traverse function to get the final leaf where this belongs if self.root.is_none() { self.root = Some(Box::new(KDTreeNode { @@ -166,83 +170,31 @@ impl KDTree { left: None, right: None, is_deleted: false, - })) + })); } - // Deletes the point by first finding the corresponding node using DFS and then deleting // Returns true if point found and deleted, else false - // First make a lookup of vector from map, then traverse the tree to obtain the point and mark it as deleted - pub fn delete_point(&mut self, point_id: PointId) -> bool { - if let Some(vector_to_delete) = self.point_map.get(&point_id) { - let found_and_deleted = Self::find_and_mark_recursive( - &mut self.root, - vector_to_delete, - point_id, - 0, - self.dim, - ); - - if found_and_deleted { - self.point_map.remove(&point_id); + pub fn delete_point(&mut self, point_id: &PointId) -> bool { + if self.point_ids.contains(point_id) { + let deleted = Self::find_and_mark_deleted(&mut self.root, *point_id); + if deleted { + self.point_ids.remove(point_id); } - - return found_and_deleted; + return deleted; } false } - // Recursively finds and marks a node as deleted, - fn find_and_mark_recursive( - node_opt: &mut Option>, - target_vector: &DenseVector, - target_id: PointId, - depth: usize, - dim: usize, - ) -> bool { + fn find_and_mark_deleted(node_opt: &mut Option>, target_id: PointId) -> bool { if let Some(node) = node_opt { if node.indexed_vector.id == target_id { node.is_deleted = true; return true; } - let axis = depth % dim; - let target_val = target_vector[axis]; - let node_val = node.indexed_vector.vector[axis]; - - if target_val < node_val { - Self::find_and_mark_recursive( - &mut node.left, - target_vector, - target_id, - depth + 1, - dim, - ) - } else if target_val > node_val { - Self::find_and_mark_recursive( - &mut node.right, - target_vector, - target_id, - depth + 1, - dim, - ) - } else { - // Need to check both right and left nodes in this case - let left_found = Self::find_and_mark_recursive( - &mut node.left, - target_vector, - target_id, - depth + 1, - dim, - ); - let right_found = Self::find_and_mark_recursive( - &mut node.right, - target_vector, - target_id, - depth + 1, - dim, - ); - left_found || right_found - } + // Search left first then right + Self::find_and_mark_deleted(&mut node.left, target_id) + || Self::find_and_mark_deleted(&mut node.right, target_id) } else { false } @@ -298,11 +250,11 @@ impl KDTree { }; // Recurse on near side first - self.search_recursive(&near_side, query_vector, k, heap, depth + 1, dist_type); + self.search_recursive(near_side, query_vector, k, heap, depth + 1, dist_type); // Process the current node if !node.is_deleted { - //TODO: Use square distance in distance, why is there overhead of square + // TODO: Possible overhead, here heap stores sqrt euclidean distance, we can eliminate that by storing squared distances in case of euclidean let distance = distance(query_vector, &node.indexed_vector.vector, dist_type); if heap.len() < k { heap.push(Neighbor { @@ -319,11 +271,11 @@ impl KDTree { } // Pruning on the farther side to check if there are better candidates - //TODO: Change this when implementing square distance + let axis_diff = query_vector[axis] - node.indexed_vector.vector[axis]; let dist_to_plane = match dist_type { - Similarity::Euclidean => query_vector[axis] - node.indexed_vector.vector[axis], - Similarity::Manhattan => 1.0, - _ => unreachable!(), + Similarity::Euclidean => axis_diff.abs(), + Similarity::Manhattan => axis_diff.abs(), + _ => 0.0, // Cosine/Hamming - no effective pruning, always search }; if heap.len() < k || dist_to_plane < heap.peek().unwrap().distance { @@ -340,7 +292,7 @@ impl VectorIndex for KDTree { } fn delete(&mut self, point_id: PointId) -> Result { - Ok(self.delete_point(point_id)) + Ok(self.delete_point(&point_id)) } fn search( @@ -350,9 +302,10 @@ impl VectorIndex for KDTree { k: usize, ) -> Result, DbError> { if matches!(similarity, Similarity::Cosine | Similarity::Hamming) { - panic!("Cosine and hamming are not suitable similariyt metric when using a KDTree") + return Err(DbError::UnsupportedSimilarity); } - Ok(vec![]) + let results = self.search_top_k(query_vector, k, similarity); + Ok(results.into_iter().map(|(id, _)| id).collect()) } } From 687e30c57af312aefad919d884d36a7dd3d0d47c Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Thu, 18 Dec 2025 10:12:34 +0000 Subject: [PATCH 5/9] Add rebuild --- crates/index/src/kd_tree.rs | 184 ++++++++++++++++++++++++++++++++++-- 1 file changed, 175 insertions(+), 9 deletions(-) diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 29f8fb3..08d7b48 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -12,15 +12,19 @@ pub struct KDTree { root: Option>, // In memory point ids, to check existence before O(n) deletion logic point_ids: HashSet, + // Rebuild tracking + total_nodes: usize, + deleted_count: usize, } // the node which will be the part of the KD Tree pub struct KDTreeNode { indexed_vector: IndexedVector, - split_dim: usize, left: Option>, right: Option>, is_deleted: bool, + + subtree_size: usize, } #[derive(Debug, Clone, PartialEq)] @@ -47,12 +51,18 @@ impl PartialOrd for Neighbor { } impl KDTree { + // Rebuild threshold + const BALANCE_THRESHOLD: f32 = 0.7; + const DELETE_REBUILD_RATIO: f32 = 0.25; + // Build an empty index with no points pub fn build_empty(dim: usize) -> Self { KDTree { dim, root: None, point_ids: HashSet::new(), + total_nodes: 0, + deleted_count: 0, } } @@ -73,6 +83,8 @@ impl KDTree { dim, root: Some(root_node), point_ids, + total_nodes: vectors.len(), + deleted_count: 0, }) } } @@ -120,41 +132,50 @@ impl KDTree { Box::new(KDTreeNode { indexed_vector: median_vec, - split_dim: axis, left, right, is_deleted: false, + subtree_size: vectors.len(), }) } pub fn insert_point(&mut self, new_vector: IndexedVector) { // Add to point_ids self.point_ids.insert(new_vector.id); + self.total_nodes += 1; // use a traverse function to get the final leaf where this belongs if self.root.is_none() { self.root = Some(Box::new(KDTreeNode { indexed_vector: new_vector, - split_dim: 0, left: None, right: None, is_deleted: false, + subtree_size: 1, })); return; } + let mut path: Vec<(usize, bool)> = Vec::new(); + let dim = self.dim; + let mut current_link = &mut self.root; let mut depth = 0; - let dim = self.dim; + // let dim = self.dim; while let Some(ref mut node_box) = current_link { let axis = depth % dim; let current_node = node_box.as_mut(); + current_node.subtree_size += 1; + let va = new_vector.vector[axis]; let vb = current_node.indexed_vector.vector[axis]; - if va <= vb { + let go_left = va <= vb; + path.push((depth, go_left)); + + if go_left { current_link = &mut current_node.left; } else { current_link = &mut current_node.right; @@ -163,14 +184,144 @@ impl KDTree { } // Assign the new node to current link which is &mut Option> - let axis = depth % dim; - *current_link = Some(Box::new(KDTreeNode { + let new_node = Box::new(KDTreeNode { indexed_vector: new_vector, - split_dim: axis, left: None, right: None, is_deleted: false, - })); + subtree_size: 1, + }); + + *current_link = Some(new_node); + + self.check_and_rebalance(&path); + } + + // Rebuild helper methods + fn is_unbalanced(node: &KDTreeNode) -> bool { + let left_size = node.left.as_ref().map_or(0, |n| n.subtree_size); + let right_size = node.right.as_ref().map_or(0, |n| n.subtree_size); + let max_child = left_size.max(right_size); + + max_child as f32 > Self::BALANCE_THRESHOLD * node.subtree_size as f32 + } + + fn collect_recursive(node: KDTreeNode, result: &mut Vec) { + if !node.is_deleted { + result.push(node.indexed_vector); + } + if let Some(left) = node.left { + Self::collect_recursive(*left, result); + } + if let Some(right) = node.right { + Self::collect_recursive(*right, result); + } + } + + fn collect_active_vectors(node: KDTreeNode) -> Vec { + let mut result = Vec::with_capacity(node.subtree_size); + Self::collect_recursive(node, &mut result); + result + } + + fn rebuild_at_depth(&mut self, path: &[(usize, bool)], target_depth: usize) { + let dim = self.dim; + + // Navigate to parent of target node + if target_depth == 0 { + // Rebuild root + if let Some(root) = self.root.take() { + let old_size = root.subtree_size; + let mut vectors = Self::collect_active_vectors(*root); + let new_size = vectors.len(); + if !vectors.is_empty() { + self.root = Some(Self::build_recursive(&mut vectors, 0, dim)); + } + // Update global counts as deleted nodes were purged + self.total_nodes -= old_size - new_size; + self.deleted_count = 0; + } + } else { + // Navigate to target node + let mut current_link = &mut self.root; + for (_depth, go_left) in path.iter().take(target_depth) { + let node = current_link.as_mut().unwrap(); + current_link = if *go_left { + &mut node.left + } else { + &mut node.right + }; + } + + // Rebuild tree at current link + if let Some(subtree_root) = current_link.take() { + let old_size = subtree_root.subtree_size; + let mut vectors = Self::collect_active_vectors(*subtree_root); + let new_size = vectors.len(); + + if !vectors.is_empty() { + *current_link = Some(Self::build_recursive(&mut vectors, target_depth, dim)); + } + + // Only update ancestors if size changed (deleted nodes were purged) + if old_size != new_size { + let size_diff = old_size - new_size; + self.subtract_size_from_ancestors(path, target_depth, size_diff); + + self.total_nodes -= size_diff; + self.deleted_count = self.deleted_count.saturating_sub(size_diff); + } + } + } + } + + fn subtract_size_from_ancestors( + &mut self, + path: &[(usize, bool)], + up_to_depth: usize, + diff: usize, + ) { + let mut current = &mut self.root; + for (_, go_left) in path.iter().take(up_to_depth) { + if let Some(node) = current { + node.subtree_size -= diff; + current = if *go_left { + &mut node.left + } else { + &mut node.right + }; + } + } + } + + fn check_and_rebalance(&mut self, path: &[(usize, bool)]) { + // Find the lowest depth where imbalance occurs + let mut unbalaced_depth: Option = None; + + let mut current = self.root.as_ref(); + + for (depth, go_left) in path { + if let Some(node) = current { + if Self::is_unbalanced(node) { + unbalaced_depth = Some(*depth); + break; + } + current = if *go_left { + node.left.as_ref() + } else { + node.right.as_ref() + }; + } + } + + if let Some(target_depth) = unbalaced_depth { + self.rebuild_at_depth(path, target_depth); + } + } + + fn should_rebuild_global(&self) -> bool { + self.total_nodes > 0 + && (self.deleted_count as f32 / self.total_nodes as f32) > Self::DELETE_REBUILD_RATIO } // Returns true if point found and deleted, else false @@ -178,8 +329,22 @@ impl KDTree { if self.point_ids.contains(point_id) { let deleted = Self::find_and_mark_deleted(&mut self.root, *point_id); if deleted { + self.deleted_count += 1; self.point_ids.remove(point_id); } + + if Self::should_rebuild_global(self) { + if let Some(root) = self.root.take() { + let mut vectors = Self::collect_active_vectors(*root); + if !vectors.is_empty() { + self.root = Some(Self::build_recursive(&mut vectors, 0, self.dim)); + } + + self.total_nodes = vectors.len(); + self.deleted_count = 0; + } + } + return deleted; } false @@ -286,6 +451,7 @@ impl KDTree { } impl VectorIndex for KDTree { + //TODO: Recalculate the total counts and deleted in main KD tree after rebuilds fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError> { self.insert_point(vector); Ok(()) From 496bfc2ae768301fb3b5b7b80b5dfaae7ca4d623 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Thu, 18 Dec 2025 10:51:36 +0000 Subject: [PATCH 6/9] Add tests --- crates/index/src/kd_tree.rs | 500 ++++++++++++++++++++++++++++++++++-- 1 file changed, 484 insertions(+), 16 deletions(-) diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 08d7b48..3e563b1 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -130,12 +130,15 @@ impl KDTree { Some(Self::build_recursive(right_points, depth + 1, dim)) }; + let left_size = left.as_ref().map_or(0, |n| n.subtree_size); + let right_size = right.as_ref().map_or(0, |n| n.subtree_size); + Box::new(KDTreeNode { indexed_vector: median_vec, left, right, is_deleted: false, - subtree_size: vectors.len(), + subtree_size: left_size + right_size + 1, }) } @@ -295,26 +298,44 @@ impl KDTree { } fn check_and_rebalance(&mut self, path: &[(usize, bool)]) { - // Find the lowest depth where imbalance occurs - let mut unbalaced_depth: Option = None; + // Find the shallowest (closest to root) depth where imbalance occurs + // so that rebuilding fixes the largest unbalanced subtree + let mut unbalanced_depth: Option = None; let mut current = self.root.as_ref(); - for (depth, go_left) in path { + // Check root first (depth 0) + if let Some(node) = current { + if Self::is_unbalanced(node) { + unbalanced_depth = Some(0); + } + } + + // Then traverse the path and check each node + // Once we find the shallowest unbalanced node, break immediately + for (idx, (_depth, go_left)) in path.iter().enumerate() { + if unbalanced_depth.is_some() { + break; + } + if let Some(node) = current { - if Self::is_unbalanced(node) { - unbalaced_depth = Some(*depth); - break; - } current = if *go_left { node.left.as_ref() } else { node.right.as_ref() }; + + // Check the child node we just moved to (at depth idx + 1) + if let Some(child) = current { + if Self::is_unbalanced(child) { + unbalanced_depth = Some(idx + 1); + break; + } + } } } - if let Some(target_depth) = unbalaced_depth { + if let Some(target_depth) = unbalanced_depth { self.rebuild_at_depth(path, target_depth); } } @@ -436,14 +457,17 @@ impl KDTree { } // Pruning on the farther side to check if there are better candidates - let axis_diff = query_vector[axis] - node.indexed_vector.vector[axis]; - let dist_to_plane = match dist_type { - Similarity::Euclidean => axis_diff.abs(), - Similarity::Manhattan => axis_diff.abs(), - _ => 0.0, // Cosine/Hamming - no effective pruning, always search + // For Euclidean: the heap stores sqrt distances, so we compare axis_diff with the heap's max distance + // For Manhattan: direct comparison works since it's a sum of absolute differences + let axis_diff = (query_vector[axis] - node.indexed_vector.vector[axis]).abs(); + let should_search_far = match dist_type { + Similarity::Euclidean | Similarity::Manhattan => { + heap.len() < k || axis_diff < heap.peek().unwrap().distance + } + _ => true, // Cosine/Hamming - no effective pruning, always search }; - if heap.len() < k || dist_to_plane < heap.peek().unwrap().distance { + if should_search_far { self.search_recursive(far_side, query_vector, k, heap, depth + 1, dist_type); } } @@ -451,7 +475,6 @@ impl KDTree { } impl VectorIndex for KDTree { - //TODO: Recalculate the total counts and deleted in main KD tree after rebuilds fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError> { self.insert_point(vector); Ok(()) @@ -475,3 +498,448 @@ impl VectorIndex for KDTree { Ok(results.into_iter().map(|(id, _)| id).collect()) } } + +#[cfg(test)] +mod tests { + use super::*; + + fn make_vector(vector: Vec) -> IndexedVector { + IndexedVector { + id: Uuid::new_v4(), + vector, + } + } + + fn make_vector_with_id(id: Uuid, vector: Vec) -> IndexedVector { + IndexedVector { id, vector } + } + + // Build Tests + + #[test] + fn test_build_empty() { + let tree = KDTree::build_empty(3); + assert!(tree.root.is_none()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 0); + assert!(tree.point_ids.is_empty()); + } + + #[test] + fn test_build_with_empty_vectors_returns_error() { + let result = KDTree::build(vec![]); + assert!(result.is_err()); + } + + #[test] + fn test_build_single_vector() { + let id = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id, vec![1.0, 2.0, 3.0])]; + let tree = KDTree::build(vectors).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 1); + assert!(tree.point_ids.contains(&id)); + } + + #[test] + fn test_build_multiple_vectors() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 2.0]), + make_vector_with_id(id2, vec![3.0, 4.0]), + make_vector_with_id(id3, vec![5.0, 6.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 2); + assert_eq!(tree.total_nodes, 3); + assert!(tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); + } + + // Insert Tests + + #[test] + fn test_insert_into_empty_tree() { + let mut tree = KDTree::build_empty(2); + let id = Uuid::new_v4(); + let vector = make_vector_with_id(id, vec![1.0, 2.0]); + + let result = tree.insert(vector); + assert!(result.is_ok()); + assert_eq!(tree.total_nodes, 1); + assert!(tree.point_ids.contains(&id)); + assert!(tree.root.is_some()); + } + + #[test] + fn test_insert_multiple_vectors() { + let mut tree = KDTree::build_empty(2); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + + tree.insert(make_vector_with_id(id1, vec![1.0, 2.0])) + .unwrap(); + tree.insert(make_vector_with_id(id2, vec![3.0, 4.0])) + .unwrap(); + tree.insert(make_vector_with_id(id3, vec![5.0, 6.0])) + .unwrap(); + + assert_eq!(tree.total_nodes, 3); + assert!(tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); + } + + // Delete Tests + + #[test] + fn test_delete_existing_point() { + let mut ids = Vec::new(); + let mut vectors = Vec::new(); + + // Create enough vectors so deleting one doesn't trigger global rebuild + for i in 0..10 { + let id = Uuid::new_v4(); + ids.push(id); + vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); + } + + let mut tree = KDTree::build(vectors).unwrap(); + + let result = tree.delete(ids[0]).unwrap(); + assert!(result); + assert!(!tree.point_ids.contains(&ids[0])); + assert_eq!(tree.deleted_count, 1); + } + + #[test] + fn test_delete_non_existing_point() { + let id1 = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id1, vec![1.0, 2.0])]; + let mut tree = KDTree::build(vectors).unwrap(); + + let non_existing_id = Uuid::new_v4(); + let result = tree.delete(non_existing_id).unwrap(); + assert!(!result); + assert_eq!(tree.deleted_count, 0); + } + + #[test] + fn test_delete_from_empty_tree() { + let mut tree = KDTree::build_empty(2); + let result = tree.delete(Uuid::new_v4()).unwrap(); + assert!(!result); + } + + #[test] + fn test_deleted_point_not_in_search_results() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![0.0, 0.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), + make_vector_with_id(id3, vec![10.0, 10.0]), + ]; + let mut tree = KDTree::build(vectors).unwrap(); + + // Delete the closest point + tree.delete(id1).unwrap(); + + // Search should not return the deleted point + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert!(!results.contains(&id1)); + assert!(results.contains(&id2)); + } + + // Search Tests (VectorIndex trait) + + #[test] + fn test_search_empty_tree() { + let tree = KDTree::build_empty(2); + let results = tree + .search(vec![1.0, 2.0], Similarity::Euclidean, 5) + .unwrap(); + assert!(results.is_empty()); + } + + #[test] + fn test_search_euclidean() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + make_vector_with_id(id3, vec![10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results.len(), 2); + assert_eq!(results[0], id1); // Closest + assert_eq!(results[1], id2); // Second closest + } + + #[test] + fn test_search_manhattan() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + make_vector_with_id(id3, vec![5.0, 5.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Manhattan, 2) + .unwrap(); + assert_eq!(results.len(), 2); + assert_eq!(results[0], id1); + assert_eq!(results[1], id2); + } + + #[test] + fn test_search_unsupported_similarity_cosine() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let result = tree.search(vec![1.0, 2.0], Similarity::Cosine, 1); + assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); + } + + #[test] + fn test_search_unsupported_similarity_hamming() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let result = tree.search(vec![1.0, 2.0], Similarity::Hamming, 1); + assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); + } + + #[test] + fn test_search_k_larger_than_tree_size() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 10) + .unwrap(); + assert_eq!(results.len(), 2); // Should return all available points + } + + #[test] + fn test_search_exact_match() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![5.0, 5.0]), + make_vector_with_id(id2, vec![10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![5.0, 5.0], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], id1); + } + + // Search Correctness Tests + + #[test] + fn test_search_correctness_3d() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let id4 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![0.0, 0.0, 0.0]), + make_vector_with_id(id2, vec![1.0, 1.0, 1.0]), + make_vector_with_id(id3, vec![2.0, 2.0, 2.0]), + make_vector_with_id(id4, vec![10.0, 10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.5, 0.5, 0.5], Similarity::Euclidean, 2) + .unwrap(); + // id1 at distance sqrt(0.75) ≈ 0.866 + // id2 at distance sqrt(0.75) ≈ 0.866 + // Both are equidistant, should return both + assert_eq!(results.len(), 2); + assert!(results.contains(&id1) || results.contains(&id2)); + } + + #[test] + fn test_search_after_insert() { + let mut tree = KDTree::build_empty(2); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + + tree.insert(make_vector_with_id(id1, vec![10.0, 10.0])) + .unwrap(); + tree.insert(make_vector_with_id(id2, vec![1.0, 1.0])) + .unwrap(); + tree.insert(make_vector_with_id(id3, vec![5.0, 5.0])) + .unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results[0], id2); // Closest to origin + assert_eq!(results[1], id3); // Second closest + } + + #[test] + fn test_search_high_dimensional() { + let dim = 10; + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + + let vectors = vec![ + make_vector_with_id(id1, vec![0.0; dim]), + make_vector_with_id(id2, vec![1.0; dim]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let query = vec![0.1; dim]; + let results = tree.search(query, Similarity::Euclidean, 1).unwrap(); + assert_eq!(results[0], id1); // Closer to all-zeros + } + + // Rebalancing Tests + + #[test] + fn test_many_inserts_maintains_searchability() { + let mut tree = KDTree::build_empty(2); + let mut ids = Vec::new(); + + // Insert many points that would cause imbalance + for i in 0..20 { + let id = Uuid::new_v4(); + ids.push(id); + tree.insert(make_vector_with_id(id, vec![i as f32, i as f32])) + .unwrap(); + } + + // Search should still work correctly + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 5) + .unwrap(); + assert_eq!(results.len(), 5); + // First result should be the point at (0, 0) + assert_eq!(results[0], ids[0]); + } + + #[test] + fn test_delete_triggers_rebuild() { + let mut ids = Vec::new(); + let mut vectors = Vec::new(); + + for i in 0..10 { + let id = Uuid::new_v4(); + ids.push(id); + vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); + } + + let mut tree = KDTree::build(vectors).unwrap(); + + // Delete enough points to trigger rebuild (> 25%) + for id in ids.iter().take(3) { + tree.delete(*id).unwrap(); + } + + // Tree should still function correctly + let results = tree + .search(vec![5.0, 5.0], Similarity::Euclidean, 3) + .unwrap(); + assert_eq!(results.len(), 3); + // Deleted points should not appear + for id in ids.iter().take(3) { + assert!(!results.contains(id)); + } + } + + // ==================== Edge Cases ==================== + + #[test] + fn test_single_point_search() { + let id = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id, vec![5.0, 5.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], id); + } + + #[test] + fn test_duplicate_coordinates() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), // Same coordinates + make_vector_with_id(id3, vec![2.0, 2.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![1.0, 1.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results.len(), 2); + // Both id1 and id2 should be in results (both at distance 0) + assert!(results.contains(&id1) || results.contains(&id2)); + } + + #[test] + fn test_negative_coordinates() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![-1.0, -1.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![-0.5, -0.5], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results[0], id1); + } + + #[test] + fn test_search_with_zero_k() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![1.0, 2.0], Similarity::Euclidean, 0) + .unwrap(); + assert!(results.is_empty()); + } +} From 327bb031f08ac3059814b88548130a40e69a998f Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Sat, 27 Dec 2025 13:36:56 +0000 Subject: [PATCH 7/9] Refactor code --- Cargo.lock | 25 - crates/index/Cargo.toml | 3 +- .../src/{kd_tree.rs => kd_tree/index.rs} | 497 +------------ crates/index/src/kd_tree/mod.rs | 5 + crates/index/src/kd_tree/tests.rs | 703 ++++++++++++++++++ crates/index/src/kd_tree/types.rs | 37 + 6 files changed, 755 insertions(+), 515 deletions(-) rename crates/index/src/{kd_tree.rs => kd_tree/index.rs} (50%) create mode 100644 crates/index/src/kd_tree/mod.rs create mode 100644 crates/index/src/kd_tree/tests.rs create mode 100644 crates/index/src/kd_tree/types.rs diff --git a/Cargo.lock b/Cargo.lock index 94b4c6b..cf44b74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,12 +66,6 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - [[package]] name = "axum" version = "0.8.8" @@ -896,7 +890,6 @@ name = "index" version = "0.1.0" dependencies = [ "defs", - "ordered-float", "uuid", ] @@ -1188,15 +1181,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - [[package]] name = "object" version = "0.37.3" @@ -1256,15 +1240,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "ordered-float" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" -dependencies = [ - "num-traits", -] - [[package]] name = "owo-colors" version = "4.2.3" diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml index bb610cf..35f9957 100644 --- a/crates/index/Cargo.toml +++ b/crates/index/Cargo.toml @@ -7,6 +7,5 @@ edition.workspace = true license.workspace = true [dependencies] -defs = { path = "../defs" } -ordered-float = "5.0.0" +defs.workspace = true uuid.workspace = true diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree/index.rs similarity index 50% rename from crates/index/src/kd_tree.rs rename to crates/index/src/kd_tree/index.rs index fbc1943..dc623a2 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree/index.rs @@ -1,3 +1,4 @@ +use super::types::{KDTreeNode, Neighbor}; use crate::{VectorIndex, distance}; use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; use std::{ @@ -8,46 +9,13 @@ use std::{ use uuid::Uuid; pub struct KDTree { - dim: usize, - root: Option>, + pub dim: usize, + pub root: Option>, // In memory point ids, to check existence before O(n) deletion logic - point_ids: HashSet, + pub point_ids: HashSet, // Rebuild tracking - total_nodes: usize, - deleted_count: usize, -} - -// the node which will be the part of the KD Tree -pub struct KDTreeNode { - indexed_vector: IndexedVector, - left: Option>, - right: Option>, - is_deleted: bool, - - subtree_size: usize, -} - -#[derive(Debug, Clone, PartialEq)] -struct Neighbor { - id: PointId, - distance: f32, -} - -impl Eq for Neighbor {} - -// Custom Ord implementation for the max-heap -impl Ord for Neighbor { - fn cmp(&self, other: &Self) -> Ordering { - self.distance - .partial_cmp(&other.distance) - .unwrap_or(Ordering::Equal) - } -} - -impl PartialOrd for Neighbor { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } + pub total_nodes: usize, + pub deleted_count: usize, } impl KDTree { @@ -164,7 +132,6 @@ impl KDTree { let mut current_link = &mut self.root; let mut depth = 0; - // let dim = self.dim; while let Some(node_box) = current_link { let axis = depth % dim; @@ -438,7 +405,6 @@ impl KDTree { // Recurse on near side first self.search_recursive(near_side, query_vector, k, heap, depth + 1, dist_type); - // Process the current node if !node.is_deleted { // TODO: Possible overhead, here heap stores sqrt euclidean distance, we can eliminate that by storing squared distances in case of euclidean let distance = distance(query_vector, &node.indexed_vector.vector, dist_type); @@ -457,12 +423,12 @@ impl KDTree { } // Pruning on the farther side to check if there are better candidates - // For Euclidean: the heap stores sqrt distances, so we compare axis_diff with the heap's max distance - // For Manhattan: direct comparison works since it's a sum of absolute differences + // Use <= to handle ties: when axis_diff == current worst distance, there could be + // a point on the far side with the same distance that should be included let axis_diff = (query_vector[axis] - node.indexed_vector.vector[axis]).abs(); let should_search_far = match dist_type { Similarity::Euclidean | Similarity::Manhattan => { - heap.len() < k || axis_diff < heap.peek().unwrap().distance + heap.len() < k || axis_diff <= heap.peek().unwrap().distance } _ => true, // Cosine/Hamming - no effective pruning, always search }; @@ -498,448 +464,3 @@ impl VectorIndex for KDTree { Ok(results.into_iter().map(|(id, _)| id).collect()) } } - -#[cfg(test)] -mod tests { - use super::*; - - fn make_vector(vector: Vec) -> IndexedVector { - IndexedVector { - id: Uuid::new_v4(), - vector, - } - } - - fn make_vector_with_id(id: Uuid, vector: Vec) -> IndexedVector { - IndexedVector { id, vector } - } - - // Build Tests - - #[test] - fn test_build_empty() { - let tree = KDTree::build_empty(3); - assert!(tree.root.is_none()); - assert_eq!(tree.dim, 3); - assert_eq!(tree.total_nodes, 0); - assert!(tree.point_ids.is_empty()); - } - - #[test] - fn test_build_with_empty_vectors_returns_error() { - let result = KDTree::build(vec![]); - assert!(result.is_err()); - } - - #[test] - fn test_build_single_vector() { - let id = Uuid::new_v4(); - let vectors = vec![make_vector_with_id(id, vec![1.0, 2.0, 3.0])]; - let tree = KDTree::build(vectors).unwrap(); - - assert!(tree.root.is_some()); - assert_eq!(tree.dim, 3); - assert_eq!(tree.total_nodes, 1); - assert!(tree.point_ids.contains(&id)); - } - - #[test] - fn test_build_multiple_vectors() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 2.0]), - make_vector_with_id(id2, vec![3.0, 4.0]), - make_vector_with_id(id3, vec![5.0, 6.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - assert!(tree.root.is_some()); - assert_eq!(tree.dim, 2); - assert_eq!(tree.total_nodes, 3); - assert!(tree.point_ids.contains(&id1)); - assert!(tree.point_ids.contains(&id2)); - assert!(tree.point_ids.contains(&id3)); - } - - // Insert Tests - - #[test] - fn test_insert_into_empty_tree() { - let mut tree = KDTree::build_empty(2); - let id = Uuid::new_v4(); - let vector = make_vector_with_id(id, vec![1.0, 2.0]); - - let result = tree.insert(vector); - assert!(result.is_ok()); - assert_eq!(tree.total_nodes, 1); - assert!(tree.point_ids.contains(&id)); - assert!(tree.root.is_some()); - } - - #[test] - fn test_insert_multiple_vectors() { - let mut tree = KDTree::build_empty(2); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - - tree.insert(make_vector_with_id(id1, vec![1.0, 2.0])) - .unwrap(); - tree.insert(make_vector_with_id(id2, vec![3.0, 4.0])) - .unwrap(); - tree.insert(make_vector_with_id(id3, vec![5.0, 6.0])) - .unwrap(); - - assert_eq!(tree.total_nodes, 3); - assert!(tree.point_ids.contains(&id1)); - assert!(tree.point_ids.contains(&id2)); - assert!(tree.point_ids.contains(&id3)); - } - - // Delete Tests - - #[test] - fn test_delete_existing_point() { - let mut ids = Vec::new(); - let mut vectors = Vec::new(); - - // Create enough vectors so deleting one doesn't trigger global rebuild - for i in 0..10 { - let id = Uuid::new_v4(); - ids.push(id); - vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); - } - - let mut tree = KDTree::build(vectors).unwrap(); - - let result = tree.delete(ids[0]).unwrap(); - assert!(result); - assert!(!tree.point_ids.contains(&ids[0])); - assert_eq!(tree.deleted_count, 1); - } - - #[test] - fn test_delete_non_existing_point() { - let id1 = Uuid::new_v4(); - let vectors = vec![make_vector_with_id(id1, vec![1.0, 2.0])]; - let mut tree = KDTree::build(vectors).unwrap(); - - let non_existing_id = Uuid::new_v4(); - let result = tree.delete(non_existing_id).unwrap(); - assert!(!result); - assert_eq!(tree.deleted_count, 0); - } - - #[test] - fn test_delete_from_empty_tree() { - let mut tree = KDTree::build_empty(2); - let result = tree.delete(Uuid::new_v4()).unwrap(); - assert!(!result); - } - - #[test] - fn test_deleted_point_not_in_search_results() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![0.0, 0.0]), - make_vector_with_id(id2, vec![1.0, 1.0]), - make_vector_with_id(id3, vec![10.0, 10.0]), - ]; - let mut tree = KDTree::build(vectors).unwrap(); - - // Delete the closest point - tree.delete(id1).unwrap(); - - // Search should not return the deleted point - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 2) - .unwrap(); - assert!(!results.contains(&id1)); - assert!(results.contains(&id2)); - } - - // Search Tests (VectorIndex trait) - - #[test] - fn test_search_empty_tree() { - let tree = KDTree::build_empty(2); - let results = tree - .search(vec![1.0, 2.0], Similarity::Euclidean, 5) - .unwrap(); - assert!(results.is_empty()); - } - - #[test] - fn test_search_euclidean() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 1.0]), - make_vector_with_id(id2, vec![2.0, 2.0]), - make_vector_with_id(id3, vec![10.0, 10.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 2) - .unwrap(); - assert_eq!(results.len(), 2); - assert_eq!(results[0], id1); // Closest - assert_eq!(results[1], id2); // Second closest - } - - #[test] - fn test_search_manhattan() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 1.0]), - make_vector_with_id(id2, vec![2.0, 2.0]), - make_vector_with_id(id3, vec![5.0, 5.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Manhattan, 2) - .unwrap(); - assert_eq!(results.len(), 2); - assert_eq!(results[0], id1); - assert_eq!(results[1], id2); - } - - #[test] - fn test_search_unsupported_similarity_cosine() { - let vectors = vec![make_vector(vec![1.0, 2.0])]; - let tree = KDTree::build(vectors).unwrap(); - - let result = tree.search(vec![1.0, 2.0], Similarity::Cosine, 1); - assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); - } - - #[test] - fn test_search_unsupported_similarity_hamming() { - let vectors = vec![make_vector(vec![1.0, 2.0])]; - let tree = KDTree::build(vectors).unwrap(); - - let result = tree.search(vec![1.0, 2.0], Similarity::Hamming, 1); - assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); - } - - #[test] - fn test_search_k_larger_than_tree_size() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 1.0]), - make_vector_with_id(id2, vec![2.0, 2.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 10) - .unwrap(); - assert_eq!(results.len(), 2); // Should return all available points - } - - #[test] - fn test_search_exact_match() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![5.0, 5.0]), - make_vector_with_id(id2, vec![10.0, 10.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![5.0, 5.0], Similarity::Euclidean, 1) - .unwrap(); - assert_eq!(results.len(), 1); - assert_eq!(results[0], id1); - } - - // Search Correctness Tests - - #[test] - fn test_search_correctness_3d() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let id4 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![0.0, 0.0, 0.0]), - make_vector_with_id(id2, vec![1.0, 1.0, 1.0]), - make_vector_with_id(id3, vec![2.0, 2.0, 2.0]), - make_vector_with_id(id4, vec![10.0, 10.0, 10.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.5, 0.5, 0.5], Similarity::Euclidean, 2) - .unwrap(); - // id1 at distance sqrt(0.75) ≈ 0.866 - // id2 at distance sqrt(0.75) ≈ 0.866 - // Both are equidistant, should return both - assert_eq!(results.len(), 2); - assert!(results.contains(&id1) || results.contains(&id2)); - } - - #[test] - fn test_search_after_insert() { - let mut tree = KDTree::build_empty(2); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - - tree.insert(make_vector_with_id(id1, vec![10.0, 10.0])) - .unwrap(); - tree.insert(make_vector_with_id(id2, vec![1.0, 1.0])) - .unwrap(); - tree.insert(make_vector_with_id(id3, vec![5.0, 5.0])) - .unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 2) - .unwrap(); - assert_eq!(results[0], id2); // Closest to origin - assert_eq!(results[1], id3); // Second closest - } - - #[test] - fn test_search_high_dimensional() { - let dim = 10; - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - - let vectors = vec![ - make_vector_with_id(id1, vec![0.0; dim]), - make_vector_with_id(id2, vec![1.0; dim]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let query = vec![0.1; dim]; - let results = tree.search(query, Similarity::Euclidean, 1).unwrap(); - assert_eq!(results[0], id1); // Closer to all-zeros - } - - // Rebalancing Tests - - #[test] - fn test_many_inserts_maintains_searchability() { - let mut tree = KDTree::build_empty(2); - let mut ids = Vec::new(); - - // Insert many points that would cause imbalance - for i in 0..20 { - let id = Uuid::new_v4(); - ids.push(id); - tree.insert(make_vector_with_id(id, vec![i as f32, i as f32])) - .unwrap(); - } - - // Search should still work correctly - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 5) - .unwrap(); - assert_eq!(results.len(), 5); - // First result should be the point at (0, 0) - assert_eq!(results[0], ids[0]); - } - - #[test] - fn test_delete_triggers_rebuild() { - let mut ids = Vec::new(); - let mut vectors = Vec::new(); - - for i in 0..10 { - let id = Uuid::new_v4(); - ids.push(id); - vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); - } - - let mut tree = KDTree::build(vectors).unwrap(); - - // Delete enough points to trigger rebuild (> 25%) - for id in ids.iter().take(3) { - tree.delete(*id).unwrap(); - } - - // Tree should still function correctly - let results = tree - .search(vec![5.0, 5.0], Similarity::Euclidean, 3) - .unwrap(); - assert_eq!(results.len(), 3); - // Deleted points should not appear - for id in ids.iter().take(3) { - assert!(!results.contains(id)); - } - } - - // ==================== Edge Cases ==================== - - #[test] - fn test_single_point_search() { - let id = Uuid::new_v4(); - let vectors = vec![make_vector_with_id(id, vec![5.0, 5.0])]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 1) - .unwrap(); - assert_eq!(results.len(), 1); - assert_eq!(results[0], id); - } - - #[test] - fn test_duplicate_coordinates() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 1.0]), - make_vector_with_id(id2, vec![1.0, 1.0]), // Same coordinates - make_vector_with_id(id3, vec![2.0, 2.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![1.0, 1.0], Similarity::Euclidean, 2) - .unwrap(); - assert_eq!(results.len(), 2); - // Both id1 and id2 should be in results (both at distance 0) - assert!(results.contains(&id1) || results.contains(&id2)); - } - - #[test] - fn test_negative_coordinates() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![-1.0, -1.0]), - make_vector_with_id(id2, vec![1.0, 1.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![-0.5, -0.5], Similarity::Euclidean, 1) - .unwrap(); - assert_eq!(results[0], id1); - } - - #[test] - fn test_search_with_zero_k() { - let vectors = vec![make_vector(vec![1.0, 2.0])]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![1.0, 2.0], Similarity::Euclidean, 0) - .unwrap(); - assert!(results.is_empty()); - } -} diff --git a/crates/index/src/kd_tree/mod.rs b/crates/index/src/kd_tree/mod.rs new file mode 100644 index 0000000..8765acf --- /dev/null +++ b/crates/index/src/kd_tree/mod.rs @@ -0,0 +1,5 @@ +pub mod index; +pub mod types; + +#[cfg(test)] +mod tests; diff --git a/crates/index/src/kd_tree/tests.rs b/crates/index/src/kd_tree/tests.rs new file mode 100644 index 0000000..faefae5 --- /dev/null +++ b/crates/index/src/kd_tree/tests.rs @@ -0,0 +1,703 @@ +use super::index::KDTree; +use crate::VectorIndex; +use crate::distance; +use crate::flat::FlatIndex; +use defs::{DbError, IndexedVector, Similarity}; +use std::collections::HashSet; +use uuid::Uuid; + +fn make_vector(vector: Vec) -> IndexedVector { + IndexedVector { + id: Uuid::new_v4(), + vector, + } +} + +fn make_vector_with_id(id: Uuid, vector: Vec) -> IndexedVector { + IndexedVector { id, vector } +} + +// Build Tests + +#[test] +fn test_build_empty() { + let tree = KDTree::build_empty(3); + assert!(tree.root.is_none()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 0); + assert!(tree.point_ids.is_empty()); +} + +#[test] +fn test_build_with_empty_vectors_returns_error() { + let result = KDTree::build(vec![]); + assert!(result.is_err()); +} + +#[test] +fn test_build_single_vector() { + let id = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id, vec![1.0, 2.0, 3.0])]; + let tree = KDTree::build(vectors).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 1); + assert!(tree.point_ids.contains(&id)); +} + +#[test] +fn test_build_multiple_vectors() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 2.0]), + make_vector_with_id(id2, vec![3.0, 4.0]), + make_vector_with_id(id3, vec![5.0, 6.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 2); + assert_eq!(tree.total_nodes, 3); + assert!(tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); +} + +// Insert Tests + +#[test] +fn test_insert_into_empty_tree() { + let mut tree = KDTree::build_empty(2); + let id = Uuid::new_v4(); + let vector = make_vector_with_id(id, vec![1.0, 2.0]); + + let result = tree.insert(vector); + assert!(result.is_ok()); + assert_eq!(tree.total_nodes, 1); + assert!(tree.point_ids.contains(&id)); + assert!(tree.root.is_some()); +} + +#[test] +fn test_insert_multiple_vectors() { + let mut tree = KDTree::build_empty(2); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + + tree.insert(make_vector_with_id(id1, vec![1.0, 2.0])) + .unwrap(); + tree.insert(make_vector_with_id(id2, vec![3.0, 4.0])) + .unwrap(); + tree.insert(make_vector_with_id(id3, vec![5.0, 6.0])) + .unwrap(); + + assert_eq!(tree.total_nodes, 3); + assert!(tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); +} + +// Delete Tests + +#[test] +fn test_delete_existing_point() { + let mut ids = Vec::new(); + let mut vectors = Vec::new(); + + // Create enough vectors so deleting one doesn't trigger global rebuild + for i in 0..10 { + let id = Uuid::new_v4(); + ids.push(id); + vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); + } + + let mut tree = KDTree::build(vectors).unwrap(); + + let result = tree.delete(ids[0]).unwrap(); + assert!(result); + assert!(!tree.point_ids.contains(&ids[0])); + assert_eq!(tree.deleted_count, 1); +} + +#[test] +fn test_delete_non_existing_point() { + let id1 = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id1, vec![1.0, 2.0])]; + let mut tree = KDTree::build(vectors).unwrap(); + + let non_existing_id = Uuid::new_v4(); + let result = tree.delete(non_existing_id).unwrap(); + assert!(!result); + assert_eq!(tree.deleted_count, 0); +} + +#[test] +fn test_delete_from_empty_tree() { + let mut tree = KDTree::build_empty(2); + let result = tree.delete(Uuid::new_v4()).unwrap(); + assert!(!result); +} + +#[test] +fn test_deleted_point_not_in_search_results() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![0.0, 0.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), + make_vector_with_id(id3, vec![10.0, 10.0]), + ]; + let mut tree = KDTree::build(vectors).unwrap(); + + // Delete the closest point + tree.delete(id1).unwrap(); + + // Search should not return the deleted point + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert!(!results.contains(&id1)); + assert!(results.contains(&id2)); +} + +// Search Tests (VectorIndex trait) + +#[test] +fn test_search_empty_tree() { + let tree = KDTree::build_empty(2); + let results = tree + .search(vec![1.0, 2.0], Similarity::Euclidean, 5) + .unwrap(); + assert!(results.is_empty()); +} + +#[test] +fn test_search_euclidean() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + make_vector_with_id(id3, vec![10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results.len(), 2); + assert_eq!(results[0], id1); // Closest + assert_eq!(results[1], id2); // Second closest +} + +#[test] +fn test_search_manhattan() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + make_vector_with_id(id3, vec![5.0, 5.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Manhattan, 2) + .unwrap(); + assert_eq!(results.len(), 2); + assert_eq!(results[0], id1); + assert_eq!(results[1], id2); +} + +#[test] +fn test_search_unsupported_similarity_cosine() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let result = tree.search(vec![1.0, 2.0], Similarity::Cosine, 1); + assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); +} + +#[test] +fn test_search_unsupported_similarity_hamming() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let result = tree.search(vec![1.0, 2.0], Similarity::Hamming, 1); + assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); +} + +#[test] +fn test_search_k_larger_than_tree_size() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 10) + .unwrap(); + assert_eq!(results.len(), 2); // Should return all available points +} + +#[test] +fn test_search_exact_match() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![5.0, 5.0]), + make_vector_with_id(id2, vec![10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![5.0, 5.0], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], id1); +} + +// Search Correctness Tests + +#[test] +fn test_search_correctness_3d() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let id4 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![0.0, 0.0, 0.0]), + make_vector_with_id(id2, vec![1.0, 1.0, 1.0]), + make_vector_with_id(id3, vec![2.0, 2.0, 2.0]), + make_vector_with_id(id4, vec![10.0, 10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.5, 0.5, 0.5], Similarity::Euclidean, 2) + .unwrap(); + // id1 at distance sqrt(0.75) ≈ 0.866 + // id2 at distance sqrt(0.75) ≈ 0.866 + // Both are equidistant, should return both + assert_eq!(results.len(), 2); + assert!(results.contains(&id1) || results.contains(&id2)); +} + +#[test] +fn test_search_after_insert() { + let mut tree = KDTree::build_empty(2); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + + tree.insert(make_vector_with_id(id1, vec![10.0, 10.0])) + .unwrap(); + tree.insert(make_vector_with_id(id2, vec![1.0, 1.0])) + .unwrap(); + tree.insert(make_vector_with_id(id3, vec![5.0, 5.0])) + .unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results[0], id2); // Closest to origin + assert_eq!(results[1], id3); // Second closest +} + +#[test] +fn test_search_high_dimensional() { + let dim = 10; + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + + let vectors = vec![ + make_vector_with_id(id1, vec![0.0; dim]), + make_vector_with_id(id2, vec![1.0; dim]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let query = vec![0.1; dim]; + let results = tree.search(query, Similarity::Euclidean, 1).unwrap(); + assert_eq!(results[0], id1); // Closer to all-zeros +} + +// Rebalancing Tests + +#[test] +fn test_many_inserts_maintains_searchability() { + let mut tree = KDTree::build_empty(2); + let mut ids = Vec::new(); + + // Insert many points that would cause imbalance + for i in 0..20 { + let id = Uuid::new_v4(); + ids.push(id); + tree.insert(make_vector_with_id(id, vec![i as f32, i as f32])) + .unwrap(); + } + + // Search should still work correctly + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 5) + .unwrap(); + assert_eq!(results.len(), 5); + // First result should be the point at (0, 0) + assert_eq!(results[0], ids[0]); +} + +#[test] +fn test_delete_triggers_rebuild() { + let mut ids = Vec::new(); + let mut vectors = Vec::new(); + + for i in 0..10 { + let id = Uuid::new_v4(); + ids.push(id); + vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); + } + + let mut tree = KDTree::build(vectors).unwrap(); + + // Delete enough points to trigger rebuild (> 25%) + for id in ids.iter().take(3) { + tree.delete(*id).unwrap(); + } + + // Tree should still function correctly + let results = tree + .search(vec![5.0, 5.0], Similarity::Euclidean, 3) + .unwrap(); + assert_eq!(results.len(), 3); + // Deleted points should not appear + for id in ids.iter().take(3) { + assert!(!results.contains(id)); + } +} + +// Edge Cases + +#[test] +fn test_single_point_search() { + let id = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id, vec![5.0, 5.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], id); +} + +#[test] +fn test_duplicate_coordinates() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), // Same coordinates + make_vector_with_id(id3, vec![2.0, 2.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![1.0, 1.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results.len(), 2); + // Both id1 and id2 should be in results (both at distance 0) + assert!(results.contains(&id1) || results.contains(&id2)); +} + +#[test] +fn test_negative_coordinates() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![-1.0, -1.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![-0.5, -0.5], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results[0], id1); +} + +#[test] +fn test_search_with_zero_k() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![1.0, 2.0], Similarity::Euclidean, 0) + .unwrap(); + assert!(results.is_empty()); +} + +// Comparison Tests: KDTree vs FlatIndex + +/// Helper to create a fixed set of 10 vectors with known UUIDs for comparison tests +fn create_test_vectors_2d() -> Vec { + let ids: Vec = (0..10).map(|_| Uuid::new_v4()).collect(); + vec![ + make_vector_with_id(ids[0], vec![0.5, 0.5]), + make_vector_with_id(ids[1], vec![2.3, 1.7]), + make_vector_with_id(ids[2], vec![-1.0, 3.0]), + make_vector_with_id(ids[3], vec![4.5, -2.0]), + make_vector_with_id(ids[4], vec![7.0, 7.0]), + make_vector_with_id(ids[5], vec![-3.5, -1.5]), + make_vector_with_id(ids[6], vec![1.0, 5.0]), + make_vector_with_id(ids[7], vec![6.0, 2.0]), + make_vector_with_id(ids[8], vec![-2.0, -4.0]), + make_vector_with_id(ids[9], vec![3.0, 3.0]), + ] +} + +fn create_test_vectors_3d() -> Vec { + let ids: Vec = (0..10).map(|_| Uuid::new_v4()).collect(); + vec![ + make_vector_with_id(ids[0], vec![1.0, 2.0, 3.0]), + make_vector_with_id(ids[1], vec![-1.5, 0.5, 2.0]), + make_vector_with_id(ids[2], vec![4.0, 4.0, 4.0]), + make_vector_with_id(ids[3], vec![0.0, 0.0, 0.0]), + make_vector_with_id(ids[4], vec![2.5, -1.0, 3.5]), + make_vector_with_id(ids[5], vec![-2.0, 3.0, -1.0]), + make_vector_with_id(ids[6], vec![5.0, 1.0, 2.0]), + make_vector_with_id(ids[7], vec![3.0, 3.0, 3.0]), + make_vector_with_id(ids[8], vec![-0.5, -0.5, 1.0]), + make_vector_with_id(ids[9], vec![1.5, 2.5, 0.5]), + ] +} + +/// Helper to verify that two result sets are valid k-nearest neighbor results +/// Both should return the k closest points (by distance), but may differ on tie-breaking +fn verify_same_results( + tree_results: &[Uuid], + flat_results: &[Uuid], + vectors: &[IndexedVector], + query: &[f32], + similarity: Similarity, + k: usize, +) { + // Same length + assert_eq!( + tree_results.len(), + flat_results.len(), + "Result lengths differ" + ); + + // Both should return at most k results + assert!(tree_results.len() <= k); + + // Get distances for all results + let query_vec = query.to_vec(); + let get_distance = |id: &Uuid| -> f32 { + let vec = vectors.iter().find(|v| v.id == *id).unwrap(); + distance(&vec.vector, &query_vec, similarity) + }; + + // Verify tree results are sorted by distance + for i in 1..tree_results.len() { + let d1 = get_distance(&tree_results[i - 1]); + let d2 = get_distance(&tree_results[i]); + assert!( + d1 <= d2 + 1e-6, + "KDTree results not sorted: {} > {}", + d1, + d2 + ); + } + + // Verify flat results are sorted by distance + for i in 1..flat_results.len() { + let d1 = get_distance(&flat_results[i - 1]); + let d2 = get_distance(&flat_results[i]); + assert!(d1 <= d2 + 1e-6, "Flat results not sorted: {} > {}", d1, d2); + } + + // The maximum distance in both result sets should be the same (k-th nearest distance) + if !tree_results.is_empty() { + let tree_max_dist = get_distance(tree_results.last().unwrap()); + let flat_max_dist = get_distance(flat_results.last().unwrap()); + assert!( + (tree_max_dist - flat_max_dist).abs() < 1e-6, + "Max distances differ: tree={}, flat={}", + tree_max_dist, + flat_max_dist + ); + } + + // Verify that for each result in tree_results, either: + // 1. It's also in flat_results, OR + // 2. It has the same distance as the last element (tie-breaking difference) + let flat_set: HashSet<_> = flat_results.iter().collect(); + let flat_max_dist = if flat_results.is_empty() { + 0.0 + } else { + get_distance(flat_results.last().unwrap()) + }; + + for id in tree_results { + if !flat_set.contains(id) { + // This ID is not in flat results, verify it's a tie + let dist = get_distance(id); + assert!( + (dist - flat_max_dist).abs() < 1e-6, + "KDTree returned {:?} with distance {} but it's not in flat results and not a tie (flat max: {})", + id, + dist, + flat_max_dist + ); + } + } + + // Similarly verify flat_results + let tree_set: HashSet<_> = tree_results.iter().collect(); + let tree_max_dist = if tree_results.is_empty() { + 0.0 + } else { + get_distance(tree_results.last().unwrap()) + }; + + for id in flat_results { + if !tree_set.contains(id) { + let dist = get_distance(id); + assert!( + (dist - tree_max_dist).abs() < 1e-6, + "Flat returned {:?} with distance {} but it's not in tree results and not a tie (tree max: {})", + id, + dist, + tree_max_dist + ); + } + } +} + +#[test] +fn test_kdtree_vs_flat_euclidean_2d() { + let vectors = create_test_vectors_2d(); + let tree = KDTree::build(vectors.clone()).unwrap(); + let flat = FlatIndex::build(vectors.clone()); + + // Test multiple query points and different k values + let queries = vec![ + vec![0.0, 0.0], + vec![3.0, 3.0], + vec![-1.0, 2.0], + vec![5.0, 5.0], + ]; + + for query in queries { + for k in [1, 3, 5, 10] { + let tree_results = tree + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + let flat_results = flat + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + + verify_same_results( + &tree_results, + &flat_results, + &vectors, + &query, + Similarity::Euclidean, + k, + ); + } + } +} + +#[test] +fn test_kdtree_vs_flat_euclidean_3d() { + let vectors = create_test_vectors_3d(); + let tree = KDTree::build(vectors.clone()).unwrap(); + let flat = FlatIndex::build(vectors.clone()); + + let queries = vec![ + vec![0.0, 0.0, 0.0], + vec![2.0, 2.0, 2.0], + vec![-1.0, 1.0, 1.0], + vec![4.0, 3.0, 3.0], + ]; + + for query in queries { + for k in [1, 3, 5, 10] { + let tree_results = tree + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + let flat_results = flat + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + + verify_same_results( + &tree_results, + &flat_results, + &vectors, + &query, + Similarity::Euclidean, + k, + ); + } + } +} + +#[test] +fn test_kdtree_vs_flat_euclidean_5d() { + // Test with higher dimensionality + let ids: Vec = (0..10).map(|_| Uuid::new_v4()).collect(); + let vectors = vec![ + make_vector_with_id(ids[0], vec![1.0, 2.0, 3.0, 4.0, 5.0]), + make_vector_with_id(ids[1], vec![-1.0, 0.0, 1.0, 2.0, 3.0]), + make_vector_with_id(ids[2], vec![5.0, 4.0, 3.0, 2.0, 1.0]), + make_vector_with_id(ids[3], vec![0.0, 0.0, 0.0, 0.0, 0.0]), + make_vector_with_id(ids[4], vec![2.5, 2.5, 2.5, 2.5, 2.5]), + make_vector_with_id(ids[5], vec![-2.0, -1.0, 0.0, 1.0, 2.0]), + make_vector_with_id(ids[6], vec![3.0, 3.0, 3.0, 3.0, 3.0]), + make_vector_with_id(ids[7], vec![1.0, 1.0, 1.0, 1.0, 1.0]), + make_vector_with_id(ids[8], vec![4.0, 0.0, -1.0, 2.0, 5.0]), + make_vector_with_id(ids[9], vec![-0.5, 1.5, 2.5, 3.5, 4.5]), + ]; + + let tree = KDTree::build(vectors.clone()).unwrap(); + let flat = FlatIndex::build(vectors.clone()); + + let queries = vec![ + vec![0.0, 0.0, 0.0, 0.0, 0.0], + vec![2.0, 2.0, 2.0, 2.0, 2.0], + vec![1.0, 2.0, 3.0, 4.0, 5.0], + vec![-1.0, -1.0, 0.0, 1.0, 1.0], + ]; + + for query in queries { + for k in [1, 3, 5, 10] { + let tree_results = tree + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + let flat_results = flat + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + + verify_same_results( + &tree_results, + &flat_results, + &vectors, + &query, + Similarity::Euclidean, + k, + ); + } + } +} diff --git a/crates/index/src/kd_tree/types.rs b/crates/index/src/kd_tree/types.rs new file mode 100644 index 0000000..9999cb3 --- /dev/null +++ b/crates/index/src/kd_tree/types.rs @@ -0,0 +1,37 @@ +use std::cmp::Ordering; + +use defs::{IndexedVector, PointId}; + +// the node which will be the part of the KD Tree +pub struct KDTreeNode { + pub indexed_vector: IndexedVector, + pub left: Option>, + pub right: Option>, + pub is_deleted: bool, + + pub subtree_size: usize, +} + +// The struct definition which is present in max heap while search +#[derive(Debug, Clone, PartialEq)] +pub struct Neighbor { + pub id: PointId, + pub distance: f32, +} + +impl Eq for Neighbor {} + +// Custom Ord implementation for the max-heap +impl Ord for Neighbor { + fn cmp(&self, other: &Self) -> Ordering { + self.distance + .partial_cmp(&other.distance) + .unwrap_or(Ordering::Equal) + } +} + +impl PartialOrd for Neighbor { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} From dc178c2def815ddeb048234cb95af49491c2fb9d Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Mon, 19 Jan 2026 20:37:46 +0000 Subject: [PATCH 8/9] Add OrdF32, add axis field --- crates/defs/src/types.rs | 32 +++++++++++++ crates/index/src/kd_tree/helpers.rs | 39 +++++++++++++++ crates/index/src/kd_tree/index.rs | 74 ++++++++--------------------- crates/index/src/kd_tree/mod.rs | 1 + crates/index/src/kd_tree/types.rs | 28 ++--------- 5 files changed, 97 insertions(+), 77 deletions(-) create mode 100644 crates/index/src/kd_tree/helpers.rs diff --git a/crates/defs/src/types.rs b/crates/defs/src/types.rs index ae69f17..f78ce69 100644 --- a/crates/defs/src/types.rs +++ b/crates/defs/src/types.rs @@ -89,3 +89,35 @@ impl<'q> Eq for DistanceOrderedVector<'q> {} // Discovery(DiscoveryQuery), // Context(ContextQuery), // } + +#[derive(Clone, Copy, Debug, PartialEq, Default)] +pub struct OrdF32(pub f32); + +impl OrdF32 { + pub fn new(x: f32) -> Self { + Self(x) + } + pub fn into_inner(self) -> f32 { + self.0 + } +} + +impl Eq for OrdF32 {} + +impl Ord for OrdF32 { + fn cmp(&self, other: &Self) -> Ordering { + self.0.total_cmp(&other.0) + } +} + +impl PartialOrd for OrdF32 { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl From for OrdF32 { + fn from(x: f32) -> Self { + Self(x) + } +} diff --git a/crates/index/src/kd_tree/helpers.rs b/crates/index/src/kd_tree/helpers.rs new file mode 100644 index 0000000..4865184 --- /dev/null +++ b/crates/index/src/kd_tree/helpers.rs @@ -0,0 +1,39 @@ +use super::types::KDTreeNode; +use defs::IndexedVector; + +pub const BALANCE_THRESHOLD: f32 = 0.7; +pub const DELETE_REBUILD_RATIO: f32 = 0.25; + +/// Checks if a node is unbalanced based on the balance threshold +pub fn is_unbalanced(node: &KDTreeNode) -> bool { + let left_size = node.left.as_ref().map_or(0, |n| n.subtree_size); + let right_size = node.right.as_ref().map_or(0, |n| n.subtree_size); + let max_child = left_size.max(right_size); + + max_child as f32 > BALANCE_THRESHOLD * node.subtree_size as f32 +} + +/// Recursively collects non-deleted vectors from the tree +pub fn collect_recursive(node: KDTreeNode, result: &mut Vec) { + if !node.is_deleted { + result.push(node.indexed_vector); + } + if let Some(left) = node.left { + collect_recursive(*left, result); + } + if let Some(right) = node.right { + collect_recursive(*right, result); + } +} + +/// Collects all active (non-deleted) vectors from a subtree +pub fn collect_active_vectors(node: KDTreeNode) -> Vec { + let mut result = Vec::with_capacity(node.subtree_size); + collect_recursive(node, &mut result); + result +} + +/// Checks if the tree should be globally rebuilt based on deletion ratio +pub fn should_rebuild_global(total_nodes: usize, deleted_count: usize) -> bool { + total_nodes > 0 && (deleted_count as f32 / total_nodes as f32) > DELETE_REBUILD_RATIO +} diff --git a/crates/index/src/kd_tree/index.rs b/crates/index/src/kd_tree/index.rs index dc623a2..63ced1b 100644 --- a/crates/index/src/kd_tree/index.rs +++ b/crates/index/src/kd_tree/index.rs @@ -1,6 +1,7 @@ +use super::helpers::{collect_active_vectors, is_unbalanced, should_rebuild_global}; use super::types::{KDTreeNode, Neighbor}; use crate::{VectorIndex, distance}; -use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; +use defs::{DbError, DenseVector, IndexedVector, OrdF32, PointId, Similarity}; use std::{ cmp::Ordering, collections::{BinaryHeap, HashSet}, @@ -19,10 +20,6 @@ pub struct KDTree { } impl KDTree { - // Rebuild threshold - const BALANCE_THRESHOLD: f32 = 0.7; - const DELETE_REBUILD_RATIO: f32 = 0.25; - // Build an empty index with no points pub fn build_empty(dim: usize) -> Self { KDTree { @@ -106,6 +103,7 @@ impl KDTree { left, right, is_deleted: false, + axis, subtree_size: left_size + right_size + 1, }) } @@ -122,6 +120,7 @@ impl KDTree { left: None, right: None, is_deleted: false, + axis: 0, subtree_size: 1, })); return; @@ -134,8 +133,8 @@ impl KDTree { let mut depth = 0; while let Some(node_box) = current_link { - let axis = depth % dim; let current_node = node_box.as_mut(); + let axis = current_node.axis; current_node.subtree_size += 1; @@ -159,6 +158,7 @@ impl KDTree { left: None, right: None, is_deleted: false, + axis: depth % dim, subtree_size: 1, }); @@ -167,33 +167,6 @@ impl KDTree { self.check_and_rebalance(&path); } - // Rebuild helper methods - fn is_unbalanced(node: &KDTreeNode) -> bool { - let left_size = node.left.as_ref().map_or(0, |n| n.subtree_size); - let right_size = node.right.as_ref().map_or(0, |n| n.subtree_size); - let max_child = left_size.max(right_size); - - max_child as f32 > Self::BALANCE_THRESHOLD * node.subtree_size as f32 - } - - fn collect_recursive(node: KDTreeNode, result: &mut Vec) { - if !node.is_deleted { - result.push(node.indexed_vector); - } - if let Some(left) = node.left { - Self::collect_recursive(*left, result); - } - if let Some(right) = node.right { - Self::collect_recursive(*right, result); - } - } - - fn collect_active_vectors(node: KDTreeNode) -> Vec { - let mut result = Vec::with_capacity(node.subtree_size); - Self::collect_recursive(node, &mut result); - result - } - fn rebuild_at_depth(&mut self, path: &[(usize, bool)], target_depth: usize) { let dim = self.dim; @@ -202,7 +175,7 @@ impl KDTree { // Rebuild root if let Some(root) = self.root.take() { let old_size = root.subtree_size; - let mut vectors = Self::collect_active_vectors(*root); + let mut vectors = collect_active_vectors(*root); let new_size = vectors.len(); if !vectors.is_empty() { self.root = Some(Self::build_recursive(&mut vectors, 0, dim)); @@ -226,7 +199,7 @@ impl KDTree { // Rebuild tree at current link if let Some(subtree_root) = current_link.take() { let old_size = subtree_root.subtree_size; - let mut vectors = Self::collect_active_vectors(*subtree_root); + let mut vectors = collect_active_vectors(*subtree_root); let new_size = vectors.len(); if !vectors.is_empty() { @@ -273,7 +246,7 @@ impl KDTree { // Check root first (depth 0) if let Some(node) = current - && Self::is_unbalanced(node) + && is_unbalanced(node) { unbalanced_depth = Some(0); } @@ -294,7 +267,7 @@ impl KDTree { // Check the child node we just moved to (at depth idx + 1) if let Some(child) = current - && Self::is_unbalanced(child) + && is_unbalanced(child) { unbalanced_depth = Some(idx + 1); break; @@ -307,11 +280,6 @@ impl KDTree { } } - fn should_rebuild_global(&self) -> bool { - self.total_nodes > 0 - && (self.deleted_count as f32 / self.total_nodes as f32) > Self::DELETE_REBUILD_RATIO - } - // Returns true if point found and deleted, else false pub fn delete_point(&mut self, point_id: &PointId) -> bool { if self.point_ids.contains(point_id) { @@ -321,10 +289,10 @@ impl KDTree { self.point_ids.remove(point_id); } - if Self::should_rebuild_global(self) + if should_rebuild_global(self.total_nodes, self.deleted_count) && let Some(root) = self.root.take() { - let mut vectors = Self::collect_active_vectors(*root); + let mut vectors = collect_active_vectors(*root); if !vectors.is_empty() { self.root = Some(Self::build_recursive(&mut vectors, 0, self.dim)); } @@ -372,14 +340,13 @@ impl KDTree { &query_vector, k, &mut best_neighbours, - 0, dist_type, ); best_neighbours .into_sorted_vec() .iter() - .map(|neighbor| (neighbor.id, neighbor.distance)) + .map(|neighbor| (neighbor.id, neighbor.distance.into_inner())) .collect() } @@ -389,12 +356,11 @@ impl KDTree { query_vector: &DenseVector, k: usize, heap: &mut BinaryHeap, - depth: usize, dist_type: Similarity, ) { // Base case is that we hit a leaf node don't do anything if let Some(node) = node_opt { - let axis = depth % self.dim; + let axis = node.axis; let (near_side, far_side) = if query_vector[axis] <= node.indexed_vector.vector[axis] { (&node.left, &node.right) @@ -403,21 +369,21 @@ impl KDTree { }; // Recurse on near side first - self.search_recursive(near_side, query_vector, k, heap, depth + 1, dist_type); + self.search_recursive(near_side, query_vector, k, heap, dist_type); if !node.is_deleted { // TODO: Possible overhead, here heap stores sqrt euclidean distance, we can eliminate that by storing squared distances in case of euclidean let distance = distance(query_vector, &node.indexed_vector.vector, dist_type); if heap.len() < k { heap.push(Neighbor { + distance: OrdF32::new(distance), id: node.indexed_vector.id, - distance, }); - } else if distance < heap.peek().unwrap().distance { + } else if distance < heap.peek().unwrap().distance.into_inner() { heap.pop(); heap.push(Neighbor { + distance: OrdF32::new(distance), id: node.indexed_vector.id, - distance, }); } } @@ -428,13 +394,13 @@ impl KDTree { let axis_diff = (query_vector[axis] - node.indexed_vector.vector[axis]).abs(); let should_search_far = match dist_type { Similarity::Euclidean | Similarity::Manhattan => { - heap.len() < k || axis_diff <= heap.peek().unwrap().distance + heap.len() < k || axis_diff <= heap.peek().unwrap().distance.into_inner() } _ => true, // Cosine/Hamming - no effective pruning, always search }; if should_search_far { - self.search_recursive(far_side, query_vector, k, heap, depth + 1, dist_type); + self.search_recursive(far_side, query_vector, k, heap, dist_type); } } } diff --git a/crates/index/src/kd_tree/mod.rs b/crates/index/src/kd_tree/mod.rs index 8765acf..fa8a23d 100644 --- a/crates/index/src/kd_tree/mod.rs +++ b/crates/index/src/kd_tree/mod.rs @@ -1,3 +1,4 @@ +pub mod helpers; pub mod index; pub mod types; diff --git a/crates/index/src/kd_tree/types.rs b/crates/index/src/kd_tree/types.rs index 9999cb3..aca187c 100644 --- a/crates/index/src/kd_tree/types.rs +++ b/crates/index/src/kd_tree/types.rs @@ -1,6 +1,4 @@ -use std::cmp::Ordering; - -use defs::{IndexedVector, PointId}; +use defs::{IndexedVector, OrdF32, PointId}; // the node which will be the part of the KD Tree pub struct KDTreeNode { @@ -8,30 +6,14 @@ pub struct KDTreeNode { pub left: Option>, pub right: Option>, pub is_deleted: bool, - + pub axis: usize, pub subtree_size: usize, } // The struct definition which is present in max heap while search -#[derive(Debug, Clone, PartialEq)] +// distance is first for correct Ord derivation (primary sort key) +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Neighbor { + pub distance: OrdF32, pub id: PointId, - pub distance: f32, -} - -impl Eq for Neighbor {} - -// Custom Ord implementation for the max-heap -impl Ord for Neighbor { - fn cmp(&self, other: &Self) -> Ordering { - self.distance - .partial_cmp(&other.distance) - .unwrap_or(Ordering::Equal) - } -} - -impl PartialOrd for Neighbor { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } } From 28932adce5f6c2a8eeeae464000a5d52d35423c4 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Tue, 27 Jan 2026 18:19:36 +0000 Subject: [PATCH 9/9] Move helpers to impl KDTree --- crates/index/src/kd_tree/helpers.rs | 62 +++++++++++++++-------------- crates/index/src/kd_tree/index.rs | 14 +++---- crates/index/src/lib.rs | 2 +- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/crates/index/src/kd_tree/helpers.rs b/crates/index/src/kd_tree/helpers.rs index 4865184..8934ae7 100644 --- a/crates/index/src/kd_tree/helpers.rs +++ b/crates/index/src/kd_tree/helpers.rs @@ -1,39 +1,43 @@ +use crate::kd_tree::index::KDTree; + use super::types::KDTreeNode; use defs::IndexedVector; -pub const BALANCE_THRESHOLD: f32 = 0.7; -pub const DELETE_REBUILD_RATIO: f32 = 0.25; - -/// Checks if a node is unbalanced based on the balance threshold -pub fn is_unbalanced(node: &KDTreeNode) -> bool { - let left_size = node.left.as_ref().map_or(0, |n| n.subtree_size); - let right_size = node.right.as_ref().map_or(0, |n| n.subtree_size); - let max_child = left_size.max(right_size); +impl KDTree { + pub const BALANCE_THRESHOLD: f32 = 0.7; + pub const DELETE_REBUILD_RATIO: f32 = 0.25; - max_child as f32 > BALANCE_THRESHOLD * node.subtree_size as f32 -} + /// Checks if a node is unbalanced based on the balance threshold + pub fn is_unbalanced(node: &KDTreeNode) -> bool { + let left_size = node.left.as_ref().map_or(0, |n| n.subtree_size); + let right_size = node.right.as_ref().map_or(0, |n| n.subtree_size); + let max_child = left_size.max(right_size); -/// Recursively collects non-deleted vectors from the tree -pub fn collect_recursive(node: KDTreeNode, result: &mut Vec) { - if !node.is_deleted { - result.push(node.indexed_vector); + max_child as f32 > Self::BALANCE_THRESHOLD * node.subtree_size as f32 } - if let Some(left) = node.left { - collect_recursive(*left, result); - } - if let Some(right) = node.right { - collect_recursive(*right, result); + + /// Recursively collects non-deleted vectors from the tree + pub fn collect_recursive(node: KDTreeNode, result: &mut Vec) { + if !node.is_deleted { + result.push(node.indexed_vector); + } + if let Some(left) = node.left { + Self::collect_recursive(*left, result); + } + if let Some(right) = node.right { + Self::collect_recursive(*right, result); + } } -} -/// Collects all active (non-deleted) vectors from a subtree -pub fn collect_active_vectors(node: KDTreeNode) -> Vec { - let mut result = Vec::with_capacity(node.subtree_size); - collect_recursive(node, &mut result); - result -} + /// Collects all active (non-deleted) vectors from a subtree + pub fn collect_active_vectors(node: KDTreeNode) -> Vec { + let mut result = Vec::with_capacity(node.subtree_size); + Self::collect_recursive(node, &mut result); + result + } -/// Checks if the tree should be globally rebuilt based on deletion ratio -pub fn should_rebuild_global(total_nodes: usize, deleted_count: usize) -> bool { - total_nodes > 0 && (deleted_count as f32 / total_nodes as f32) > DELETE_REBUILD_RATIO + /// Checks if the tree should be globally rebuilt based on deletion ratio + pub fn should_rebuild_global(total_nodes: usize, deleted_count: usize) -> bool { + total_nodes > 0 && (deleted_count as f32 / total_nodes as f32) > Self::DELETE_REBUILD_RATIO + } } diff --git a/crates/index/src/kd_tree/index.rs b/crates/index/src/kd_tree/index.rs index 63ced1b..a188f01 100644 --- a/crates/index/src/kd_tree/index.rs +++ b/crates/index/src/kd_tree/index.rs @@ -1,4 +1,4 @@ -use super::helpers::{collect_active_vectors, is_unbalanced, should_rebuild_global}; +// use super::helpers::{collect_active_vectors, is_unbalanced, should_rebuild_global}; use super::types::{KDTreeNode, Neighbor}; use crate::{VectorIndex, distance}; use defs::{DbError, DenseVector, IndexedVector, OrdF32, PointId, Similarity}; @@ -175,7 +175,7 @@ impl KDTree { // Rebuild root if let Some(root) = self.root.take() { let old_size = root.subtree_size; - let mut vectors = collect_active_vectors(*root); + let mut vectors = Self::collect_active_vectors(*root); let new_size = vectors.len(); if !vectors.is_empty() { self.root = Some(Self::build_recursive(&mut vectors, 0, dim)); @@ -199,7 +199,7 @@ impl KDTree { // Rebuild tree at current link if let Some(subtree_root) = current_link.take() { let old_size = subtree_root.subtree_size; - let mut vectors = collect_active_vectors(*subtree_root); + let mut vectors = Self::collect_active_vectors(*subtree_root); let new_size = vectors.len(); if !vectors.is_empty() { @@ -246,7 +246,7 @@ impl KDTree { // Check root first (depth 0) if let Some(node) = current - && is_unbalanced(node) + && Self::is_unbalanced(node) { unbalanced_depth = Some(0); } @@ -267,7 +267,7 @@ impl KDTree { // Check the child node we just moved to (at depth idx + 1) if let Some(child) = current - && is_unbalanced(child) + && Self::is_unbalanced(child) { unbalanced_depth = Some(idx + 1); break; @@ -289,10 +289,10 @@ impl KDTree { self.point_ids.remove(point_id); } - if should_rebuild_global(self.total_nodes, self.deleted_count) + if Self::should_rebuild_global(self.total_nodes, self.deleted_count) && let Some(root) = self.root.take() { - let mut vectors = collect_active_vectors(*root); + let mut vectors = Self::collect_active_vectors(*root); if !vectors.is_empty() { self.root = Some(Self::build_recursive(&mut vectors, 0, self.dim)); } diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index 8d12581..4e59ced 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -1,8 +1,8 @@ use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; pub mod flat; -pub mod kd_tree; pub mod hnsw; +pub mod kd_tree; pub trait VectorIndex: Send + Sync { fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError>;