stanfish06 · stanfish06 · Jan 27, 2026 · Jan 18, 2026 · Jan 18, 2026 · Jan 19, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -86,7 +86,6 @@ url = "https://download.pytorch.org/whl/cu128"
 explicit = true
 
 [tool.ruff]
-exclude = [ "src/scloop/preprocessing/delve" ]
 format.indent-style = "space"
 format.quote-style = "double"
 lint.extend-select = [ "I" ]
@@ -123,4 +122,4 @@ venvPath = "/home/stanfish/Git/scloop"
 venv = ".venv"
 
 [tool.ty]
-src = { include = [ "src" ], exclude = [ "src/scloop/preprocessing/delve" ] }
+src = { include = [ "src" ] }
diff --git a/src/scloop/computing/homology.py b/src/scloop/computing/homology.py
@@ -15,8 +15,15 @@
     get_boundary_matrix,
     ripser,
 )
-from ..data.types import Count_t, Diameter_t, IndexListDistMatrix, LoopDistMethod
+from ..data.types import (
+    Count_t,
+    Diameter_t,
+    IndexListDistMatrix,
+    LoopDistMethod,
+    Percent_t,
+)
 from ..data.utils import encode_triangles_and_edges
+from ..preprocessing.delve.kh import kernel_herding_main
 from ..preprocessing.downsample import (
     sample_farthest_points,
     sample_farthest_points_randomized,
@@ -37,9 +44,11 @@ def compute_sparse_pairwise_distance(
     noise_scale: float = 1e-3,
     thresh: Diameter_t | None = None,
     bootstrap_sampling: str = "resample",
-    bootstrap_fps_fraction: float = 2 / 3,
+    bootstrap_downsample_fraction: Percent_t = 2 / 3,
     bootstrap_fps_top_k: int = 5,
     bootstrap_fps_alpha: float = 1.0,
+    bootstrap_herding_n_features: int = 1000,
+    bootstrap_herding_seed: int | None = None,
     **nei_kwargs,
 ) -> tuple[csr_matrix, IndexListDistMatrix | None]:
     # important, default is binary graph
@@ -65,9 +74,9 @@ def compute_sparse_pairwise_distance(
                 scale=std_X * noise_scale, size=X.shape
             )
         elif bootstrap_sampling == "fps":
-            if not (0 < bootstrap_fps_fraction <= 1):
-                raise ValueError("bootstrap_fps_fraction must be in (0, 1].")
-            n_keep = max(2, int(round(len(selected_indices) * bootstrap_fps_fraction)))
+            n_keep = max(
+                2, int(round(len(selected_indices) * bootstrap_downsample_fraction))
+            )
             n_keep = min(n_keep, len(selected_indices))
             sample_idx = sample_farthest_points(X, n_keep)
             boot_idx = [selected_indices[int(i)] for i in sample_idx.tolist()]
@@ -76,13 +85,13 @@ def compute_sparse_pairwise_distance(
                 scale=std_X * noise_scale, size=(n_keep, X.shape[1])
             )
         elif bootstrap_sampling == "fps_random":
-            if not (0 < bootstrap_fps_fraction <= 1):
-                raise ValueError("bootstrap_fps_fraction must be in (0, 1].")
             if bootstrap_fps_top_k <= 0:
                 raise ValueError("bootstrap_fps_top_k must be > 0.")
             if bootstrap_fps_alpha < 0:
                 raise ValueError("bootstrap_fps_alpha must be >= 0.")
-            n_keep = max(2, int(round(len(selected_indices) * bootstrap_fps_fraction)))
+            n_keep = max(
+                2, int(round(len(selected_indices) * bootstrap_downsample_fraction))
+            )
             n_keep = min(n_keep, len(selected_indices))
             sample_idx = sample_farthest_points_randomized(
                 X, n_keep, top_k=bootstrap_fps_top_k, alpha=bootstrap_fps_alpha
@@ -92,9 +101,24 @@ def compute_sparse_pairwise_distance(
             X = X[sample_idx] + np.random.normal(
                 scale=std_X * noise_scale, size=(n_keep, X.shape[1])
             )
-        else:
-            raise ValueError(
-                f"Unknown bootstrap_sampling={bootstrap_sampling!r}. Expected 'resample', 'fps', or 'fps_random'."
+        elif bootstrap_sampling == "herding": # TODO: increase randomness of thsi approach
+            n_keep = max(
+                2, int(round(len(selected_indices) * bootstrap_downsample_fraction))
+            )
+            n_keep = min(n_keep, len(selected_indices))
+            if bootstrap_herding_seed is None:
+                bootstrap_herding_seed = int(np.random.randint(0, 1_000_000))
+            sample_idx = kernel_herding_main(
+                sample_set_ind=np.arange(len(selected_indices)),
+                X=X,
+                num_subsamples=n_keep,
+                frequency_seed=bootstrap_herding_seed,
+                n_features=int(bootstrap_herding_n_features),
+            )
+            boot_idx = [selected_indices[int(i)] for i in sample_idx.tolist()]
+            std_X = np.std(X, axis=0)
+            X = X[sample_idx] + np.random.normal(
+                scale=std_X * noise_scale, size=(n_keep, X.shape[1])
             )
     else:
         boot_idx = selected_indices

diff --git a/src/scloop/matching/cross_dataset.py b/src/scloop/matching/cross_dataset.py
@@ -201,7 +201,7 @@ def _to_dataframe(self) -> tuple[pd.DataFrame, pd.DataFrame]:
         try:
             cols = ["track_id"] + [f"dataset_{i}" for i in sorted_dataset_indices]
             tracks_df = tracks_df[cols]
-        except:
+        except KeyError:
             tracks_df = None
 
         match_rows = []

diff --git a/src/scloop/matching/nf.py b/src/scloop/matching/nf.py
@@ -35,7 +35,7 @@ def __init__(
         t_span: torch.Tensor,
         n_hidden=64,
         n_layers=1,
-        solver="rk4",
+        solver="dopri5", # use same solver and solver_adjoint is more robust
         solver_adjoint="dopri5",
         atol_adjoint=1e-4,
         rtol_adjoint=1e-4,

diff --git a/src/scloop/preprocessing/delve/delve.py b/src/scloop/preprocessing/delve/delve.py
@@ -26,6 +26,7 @@ def delve_fs(
     random_state: int = 0,
     n_random_state: int = 10,
     n_pcs=None,
+    density_weighted_sketch: bool = False,
     n_jobs: int = -1,
 ):
     """Performs DELVE feature selection
@@ -81,6 +82,7 @@ def delve_fs(
             random_state=random_state,
             n_random_state=n_random_state,
             n_pcs=n_pcs,
+            density_weighted_sketch=density_weighted_sketch,
             n_jobs=n_jobs,
         )
 
@@ -115,6 +117,7 @@ def seed_select(
     random_state: int = 0,
     n_random_state: int = 10,
     n_pcs=None,
+    density_weighted_sketch: bool = False,
     n_jobs: int = -1,
 ):
     """Identifies dynamic seed clusters
@@ -173,6 +176,7 @@ def seed_select(
         num_subsamples=num_subsamples,
         random_state=random_state,
         n_pcs=n_pcs,
+        density_weighted_sketch=density_weighted_sketch,
         n_jobs=n_jobs,
     )
 
@@ -297,6 +301,7 @@ def delta_exp(
     num_subsamples: int = 1000,
     random_state: int = 0,
     n_pcs=None,
+    density_weighted_sketch: bool = False,
     n_jobs: int = -1,
 ):
     """Estimates change in expression of features across representative cellular neighborhoods
@@ -328,7 +333,13 @@ def delta_exp(
     ----------
     """
     # construct between cell affinity kNN graph according to all profiled features
-    W = construct_affinity(X=X, k=k, n_pcs=n_pcs, n_jobs=-1)
+    if density_weighted_sketch:
+        W, density = construct_affinity(
+            X=X, k=k, n_pcs=n_pcs, n_jobs=-1, return_density=True
+        )
+    else:
+        W = construct_affinity(X=X, k=k, n_pcs=n_pcs, n_jobs=-1)
+        density = None
 
     # compute neighborhood means
     n_bool = W.astype(bool)
@@ -340,6 +351,7 @@ def delta_exp(
         anndata.AnnData(n_mean),
         num_subsamples=num_subsamples,
         frequency_seed=random_state,
+        density=density,
         n_jobs=n_jobs,
     )
 
@@ -424,7 +436,12 @@ def laplacian_score(X=None, W=None):
 
 
 def construct_affinity(
-    X=None, k: int = 10, radius: int = 3, n_pcs=None, n_jobs: int = -1
+    X=None,
+    k: int = 10,
+    radius: int = 3,
+    n_pcs=None,
+    n_jobs: int = -1,
+    return_density: bool = False,
 ):
     """Computes between cell affinity knn graph using heat kernel
     Parameters
@@ -459,6 +476,7 @@ def construct_affinity(
 
     # transform distances using heat kernel
     s = heat_kernel(dist, radius=radius)  # -||x_i - x_j||^2 / 2*sigma_i**2
+    density = np.sum(s, axis=1)
     rows = np.repeat(np.arange(X.shape[0]), k)
     cols = nn.reshape(-1)
     W = scipy.sparse.csr_matrix(
@@ -469,6 +487,8 @@ def construct_affinity(
     bigger = W.transpose() > W
     W = W - W.multiply(bigger) + W.transpose().multiply(bigger)
 
+    if return_density:
+        return W, density
     return W