From 3d0cc8f22899e2c9fd8f1e62d5d181f2d35d8f68 Mon Sep 17 00:00:00 2001 From: Ian Date: Wed, 9 Apr 2025 20:06:30 +0000 Subject: [PATCH 1/7] added initial scaffolding for randomized SVD --- Cargo.lock | 18 ++ Cargo.toml | 1 + src/lib.rs | 2 + src/randomized/mod.rs | 593 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 614 insertions(+) create mode 100644 src/randomized/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 9c21ee7..7a94414 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -90,6 +90,12 @@ version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + [[package]] name = "matrixmultiply" version = "0.3.9" @@ -198,6 +204,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -284,6 +291,16 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -340,6 +357,7 @@ dependencies = [ "ndarray", "num-traits", "rand", + "rand_distr", "rayon", "thiserror", ] diff --git a/Cargo.toml b/Cargo.toml index 52835e4..221fcee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,5 +13,6 @@ nalgebra-sparse = "0.10.0" ndarray = "0.16.1" num-traits = "0.2.19" rand = "0.9.0" +rand_distr = "0.5.1" rayon = "1.10.0" thiserror = "2.0.9" diff --git a/src/lib.rs b/src/lib.rs index 50e59da..9422b98 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,8 @@ mod new; mod masked; pub(crate) mod utils; +pub mod randomized; + pub use new::*; pub use masked::*; diff --git a/src/randomized/mod.rs b/src/randomized/mod.rs new file mode 100644 index 0000000..c716397 --- /dev/null +++ b/src/randomized/mod.rs @@ -0,0 +1,593 @@ +use crate::error::SvdLibError; +use crate::{Diagnostics, SMat, SvdFloat, SvdRec}; +use ndarray::{s, Array, Array1, Array2, Axis}; +use num_traits::Float; +use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; +use rand_distr::{Distribution, Normal}; +use rayon::prelude::*; +use std::cmp::min; + +/// Computes a randomized SVD with default parameters +/// +/// # Parameters +/// - A: Sparse matrix +/// - rank: Number of singular values/vectors to compute +/// +/// # Returns +/// - SvdRec: Singular value decomposition (U, S, V) +pub fn randomized_svd(a: &M, rank: usize) -> Result, SvdLibError> +where + T: SvdFloat, + M: SMat, +{ + let oversampling = 10; + let n_power_iterations = 2; + let random_seed = 0; // Will be generated randomly + randomized_svd_full(a, rank, oversampling, n_power_iterations, random_seed) +} + +/// Computes a randomized SVD with control over oversampling and power iterations +/// +/// # Parameters +/// - A: Sparse matrix +/// - rank: Number of singular values/vectors to compute +/// - oversampling: Additional columns to sample for improved accuracy (typically 5-10) +/// - n_power_iterations: Number of power iterations to enhance accuracy for smaller singular values +/// +/// # Returns +/// - SvdRec: Singular value decomposition (U, S, V) +pub fn randomized_svd_with_params( + a: &M, + rank: usize, + oversampling: usize, + n_power_iterations: usize, +) -> Result, SvdLibError> +where + T: SvdFloat, + M: SMat, +{ + randomized_svd_full(a, rank, oversampling, n_power_iterations, 0) +} + +/// Computes a randomized SVD with full parameter control +/// +/// # Parameters +/// - A: Sparse matrix +/// - rank: Number of singular values/vectors to compute +/// - oversampling: Additional columns to sample for improved accuracy +/// - n_power_iterations: Number of power iterations to enhance accuracy for smaller singular values +/// - random_seed: Seed for random number generation (0 for random seed) +/// +/// # Returns +/// - SvdRec: Singular value decomposition (U, S, V) +pub fn randomized_svd_full( + a: &M, + rank: usize, + oversampling: usize, + n_power_iterations: usize, + random_seed: u32, +) -> Result, SvdLibError> +where + T: SvdFloat, + M: SMat, +{ + // Determine the actual seed to use + let seed = if random_seed == 0 { + // Use the system time as a seed if none is provided + use std::time::{SystemTime, UNIX_EPOCH}; + let duration = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards"); + duration.as_nanos() as u32 + } else { + random_seed + }; + + // Validate parameters + let nrows = a.nrows(); + let ncols = a.ncols(); + let min_dim = min(nrows, ncols); + + if rank == 0 { + return Err(SvdLibError::Las2Error( + "Rank must be greater than 0".to_string(), + )); + } + + if rank > min_dim { + return Err(SvdLibError::Las2Error(format!( + "Requested rank {} exceeds matrix dimensions {}x{}", + rank, nrows, ncols + ))); + } + + // The target rank with oversampling (can't exceed matrix dimensions) + let target_rank = min(rank + oversampling, min_dim); + + // Transpose large matrices for efficiency if needed + let transposed = ncols > nrows; + let (work_rows, work_cols) = if transposed { + (ncols, nrows) + } else { + (nrows, ncols) + }; + + // Stage 1: Generate a random projection matrix Omega + let omega = generate_random_matrix(work_cols, target_rank, seed)?; + + // Stage 2: Form Y = A * Omega (or Y = A^T * Omega if transposed) + let mut y = Array2::zeros((work_rows, target_rank)); + + // Fill Y by matrix-vector products + for j in 0..target_rank { + let omega_col = omega.slice(s![.., j]).to_vec(); + let mut y_col = vec![T::zero(); work_rows]; + + // Apply A or A^T + a.svd_opa(&omega_col, &mut y_col, transposed); + + // Copy result to column of Y + for i in 0..work_rows { + y[[i, j]] = y_col[i]; + } + } + + // Stage 3: Power iteration scheme to increase accuracy for smaller singular values + if n_power_iterations > 0 { + // Compute power iterations (Y = (A*A^T)^q * Y) + y = power_iteration(a, y, n_power_iterations, transposed)?; + } + + // Stage 4: Orthogonalize the basis using QR decomposition + let q = orthogonalize(y)?; + + // Stage 5: Form B = Q^T * A (or B = Q^T * A^T if transposed) + let mut b = Array2::zeros((target_rank, work_cols)); + + // Fill B by matrix-vector products + for i in 0..work_cols { + let mut e_i = vec![T::zero(); work_cols]; + e_i[i] = T::one(); + + let mut b_row = vec![T::zero(); target_rank]; + + // Apply A or A^T + a.svd_opa(&e_i, &mut b_row, !transposed); + + // Apply Q^T + let qt_b = matrix_vector_multiply(&q, &b_row, true); + + // Copy result to row of B + for j in 0..target_rank { + b[[j, i]] = qt_b[j]; + } + } + + // Stage 6: Compute the SVD of B + let (u_b, s_b, v_b) = compute_svd_dense(b, rank)?; + + // Stage 7: Form the SVD of A + let u_a = if transposed { + v_b + } else { + matrix_multiply(&q, &u_b) + }; + + let v_a = if transposed { + matrix_multiply(&q, &u_b) + } else { + v_b + }; + + // Return the SVD in the requested format + Ok(SvdRec { + d: rank, + ut: if transposed { v_a } else { u_a.t().to_owned() }, + s: s_b, + vt: if transposed { u_a.t().to_owned() } else { v_a }, + diagnostics: Diagnostics { + non_zero: a.nnz(), + dimensions: rank, + iterations: n_power_iterations, + transposed, + lanczos_steps: 0, // Not applicable for randomized SVD + ritz_values_stabilized: 0, // Not applicable + significant_values: rank, + singular_values: rank, + end_interval: [T::zero(), T::zero()], // Not applicable + kappa: T::from_f64(1e-6).unwrap(), // Standard value + random_seed: seed, + }, + }) +} + +// Helper functions (implementations to be added) + +fn generate_random_matrix( + n_rows: usize, + n_cols: usize, + seed: u32, +) -> Result, SvdLibError> { + // Create a Gaussian random matrix + let mut rng = StdRng::seed_from_u64(seed as u64); + let normal = Normal::new(0.0, 1.0).unwrap(); + + let mut omega = Array2::zeros((n_rows, n_cols)); + + // Fill with random normal values + omega.par_iter_mut().for_each(|x| { + // Using local RNG for each parallel task + let mut local_rng = StdRng::from_entropy(); + *x = T::from_f64(normal.sample(&mut local_rng)).unwrap(); + }); + + Ok(omega) +} + +fn power_iteration( + a: &M, + mut y: Array2, + n_iterations: usize, + transposed: bool, +) -> Result, SvdLibError> +where + T: SvdFloat, + M: SMat, +{ + let (n_rows, n_cols) = y.dim(); + + // Temporary arrays for matrix operations + let mut temp_rows = vec![T::zero(); a.nrows()]; + let mut temp_cols = vec![T::zero(); a.ncols()]; + + for _ in 0..n_iterations { + // Apply (A*A^T) to Y through two matrix-vector products + for j in 0..n_cols { + let y_col = y.slice(s![.., j]).to_vec(); + + // First multiply: temp = A^T * y_col (or A * y_col if transposed) + a.svd_opa(&y_col, &mut temp_cols, !transposed); + + // Second multiply: y_col = A * temp (or A^T * temp if transposed) + a.svd_opa(&temp_cols, &mut temp_rows, transposed); + + // Update Y + for i in 0..n_rows { + y[[i, j]] = temp_rows[i]; + } + } + + // Orthogonalize after each iteration for numerical stability + y = orthogonalize(y)?; + } + + Ok(y) +} + +fn orthogonalize(a: Array2) -> Result, SvdLibError> { + // Implement a modified Gram-Schmidt orthogonalization + // This is more numerically stable than the standard Gram-Schmidt process + let (n_rows, n_cols) = a.dim(); + let mut q = a.clone(); + + for j in 0..n_cols { + // Normalize the j-th column + let mut norm_squared = T::zero(); + for i in 0..n_rows { + norm_squared += q[[i, j]] * q[[i, j]]; + } + + let norm = norm_squared.sqrt(); + + // Handle near-zero columns + if norm <= T::from_f64(1e-10).unwrap() { + // If column is essentially zero, replace with random vector + let mut rng = StdRng::from_entropy(); + let normal = Normal::new(0.0, 1.0).unwrap(); + + for i in 0..n_rows { + q[[i, j]] = T::from_f64(normal.sample(&mut rng)).unwrap(); + } + + // Recursively orthogonalize this column against previous columns + for k in 0..j { + let mut dot = T::zero(); + for i in 0..n_rows { + dot += q[[i, j]] * q[[i, k]]; + } + + for i in 0..n_rows { + q[[i, j]] -= dot * q[[i, k]]; + } + } + + // Normalize again + norm_squared = T::zero(); + for i in 0..n_rows { + norm_squared += q[[i, j]] * q[[i, j]]; + } + let norm = norm_squared.sqrt(); + + for i in 0..n_rows { + q[[i, j]] /= norm; + } + } else { + // Normalize the column + for i in 0..n_rows { + q[[i, j]] /= norm; + } + + // Orthogonalize remaining columns against this one + for k in (j+1)..n_cols { + let mut dot = T::zero(); + for i in 0..n_rows { + dot += q[[i, j]] * q[[i, k]]; + } + + for i in 0..n_rows { + q[[i, k]] -= dot * q[[i, j]]; + } + } + } + } + + Ok(q) +} + +fn matrix_vector_multiply( + a: &Array2, + x: &[T], + transpose: bool, +) -> Vec { + let (n_rows, n_cols) = a.dim(); + + if !transpose { + // y = A * x + assert_eq!(x.len(), n_cols, "Vector length must match number of columns"); + + let mut y = vec![T::zero(); n_rows]; + + if n_rows > 1000 { + // Parallel implementation for large matrices + y.par_iter_mut().enumerate().for_each(|(i, y_i)| { + let row = a.row(i); + *y_i = row.iter().zip(x.iter()).map(|(&a_ij, &x_j)| a_ij * x_j).sum(); + }); + } else { + // Sequential for smaller matrices to avoid parallel overhead + for i in 0..n_rows { + let row = a.row(i); + y[i] = row.iter().zip(x.iter()).map(|(&a_ij, &x_j)| a_ij * x_j).sum(); + } + } + + y + } else { + // y = A^T * x + assert_eq!(x.len(), n_rows, "Vector length must match number of rows"); + + let mut y = vec![T::zero(); n_cols]; + + if n_cols > 1000 { + // Parallel implementation for large matrices + y.par_iter_mut().enumerate().for_each(|(j, y_j)| { + let col = a.column(j); + *y_j = col.iter().zip(x.iter()).map(|(&a_ij, &x_i)| a_ij * x_i).sum(); + }); + } else { + // Sequential for smaller matrices + for j in 0..n_cols { + let col = a.column(j); + y[j] = col.iter().zip(x.iter()).map(|(&a_ij, &x_i)| a_ij * x_i).sum(); + } + } + + y + } +} + +fn matrix_multiply(a: &Array2, b: &Array2) -> Array2 { + // Basic matrix multiplication C = A * B + let (a_rows, a_cols) = a.dim(); + let (b_rows, b_cols) = b.dim(); + + assert_eq!(a_cols, b_rows, "Matrix dimensions do not match for multiplication"); + + let mut c = Array2::zeros((a_rows, b_cols)); + + // For large matrices, use parallel execution + if a_rows * b_cols > 10000 { + c.axis_iter_mut(Axis(0)) + .into_par_iter() + .enumerate() + .for_each(|(i, mut row)| { + for j in 0..b_cols { + let mut sum = T::zero(); + for k in 0..a_cols { + sum += a[[i, k]] * b[[k, j]]; + } + row[j] = sum; + } + }); + } else { + // For smaller matrices, sequential is faster due to less overhead + for i in 0..a_rows { + for j in 0..b_cols { + let mut sum = T::zero(); + for k in 0..a_cols { + sum += a[[i, k]] * b[[k, j]]; + } + c[[i, j]] = sum; + } + } + } + + c +} + +fn compute_svd_dense( + a: Array2, + rank: usize, +) -> Result<(Array2, Array1, Array2), SvdLibError> { + // For the dense SVD computation, we'll use a simplified approach + // In a real implementation, you would use a high-quality SVD library + // such as LAPACK via ndarray-linalg or similar + + let (n_rows, n_cols) = a.dim(); + + // Form A^T * A (or A * A^T for tall matrices) + let mut ata = if n_rows <= n_cols { + // For wide matrices, compute A^T * A (smaller) + let mut ata = Array2::zeros((n_cols, n_cols)); + + for i in 0..n_cols { + for j in 0..=i { + let mut sum = T::zero(); + for k in 0..n_rows { + sum += a[[k, i]] * a[[k, j]]; + } + ata[[i, j]] = sum; + if i != j { + ata[[j, i]] = sum; // Symmetric matrix + } + } + } + ata + } else { + // For tall matrices, compute A * A^T (smaller) + let mut aat = Array2::zeros((n_rows, n_rows)); + + for i in 0..n_rows { + for j in 0..=i { + let mut sum = T::zero(); + for k in 0..n_cols { + sum += a[[i, k]] * a[[j, k]]; + } + aat[[i, j]] = sum; + if i != j { + aat[[j, i]] = sum; // Symmetric matrix + } + } + } + aat + }; + + // Compute eigendecomposition of A^T*A or A*A^T + // For simplicity, we'll use a basic power iteration method + // In practice, use a specialized eigenvalue solver + let (eigvals, eigvecs) = compute_eigen_decomposition(&mut ata, rank)?; + + let mut s = Array1::zeros(rank); + for i in 0..rank { + // Singular values are square roots of eigenvalues + s[i] = eigvals[i].abs().sqrt(); + } + + let mut v = if n_rows <= n_cols { + // Wide matrix case: we computed A^T*A, so eigvecs are V + eigvecs + } else { + // Tall matrix case: we computed A*A^T, so need to compute V = A^T*U*S^(-1) + let mut v = Array2::zeros((n_cols, rank)); + + for j in 0..rank { + if s[j] > T::from_f64(1e-10).unwrap() { + for i in 0..n_cols { + let mut sum = T::zero(); + for k in 0..n_rows { + sum += a[[k, i]] * eigvecs[[k, j]]; + } + v[[i, j]] = sum / s[j]; + } + } + } + v + }; + + let mut u = if n_rows <= n_cols { + // Wide matrix case: compute U = A*V*S^(-1) + let mut u = Array2::zeros((n_rows, rank)); + + for j in 0..rank { + if s[j] > T::from_f64(1e-10).unwrap() { + for i in 0..n_rows { + let mut sum = T::zero(); + for k in 0..n_cols { + sum += a[[i, k]] * v[[k, j]]; + } + u[[i, j]] = sum / s[j]; + } + } + } + u + } else { + // Tall matrix case: we computed A*A^T, so eigvecs are U + eigvecs + }; + + // Ensure orthogonality and normalize + u = orthogonalize(u)?; + v = orthogonalize(v)?; + + Ok((u, s, v)) +} + +fn compute_eigen_decomposition( + a: &mut Array2, + rank: usize, +) -> Result<(Vec, Array2), SvdLibError> { + let n = a.dim().0; + let mut eigenvalues = vec![T::zero(); n]; + let mut eigenvectors = Array2::zeros((n, n)); + + // Initialize eigenvectors to identity matrix + for i in 0..n { + eigenvectors[[i, i]] = T::one(); + } + + // We'll use the QR algorithm with shifts + // In practice, use a specialized library for this + + // For simplicity, we'll just compute a few iterations of QR decomposition + // This is a simplified approach - real implementations would use more robust methods + let max_iter = 30; + + for _ in 0..max_iter { + // For each iteration, perform a QR decomposition step + let q = orthogonalize(eigenvectors.clone())?; + + // Compute R = Q^T * A * Q (Rayleigh quotient) + let r = matrix_multiply(&matrix_multiply(&q.t().to_owned(), a), &q); + + // Update eigenvectors + eigenvectors = q; + + // Update matrix for next iteration + *a = r; + } + + // Extract eigenvalues from diagonal + for i in 0..n { + eigenvalues[i] = a[[i, i]]; + } + + // Sort eigenvalues and eigenvectors by decreasing eigenvalue magnitude + let mut indices: Vec = (0..n).collect(); + indices.sort_by(|&i, &j| eigenvalues[j].abs().partial_cmp(&eigenvalues[i].abs()).unwrap()); + + let mut sorted_values = vec![T::zero(); n]; + let mut sorted_vectors = Array2::zeros((n, n)); + + for (new_idx, &old_idx) in indices.iter().enumerate() { + sorted_values[new_idx] = eigenvalues[old_idx]; + for i in 0..n { + sorted_vectors[[i, new_idx]] = eigenvectors[[i, old_idx]]; + } + } + + // Return only the top 'rank' eigenvalues and eigenvectors + let top_values = sorted_values.into_iter().take(rank).collect(); + let top_vectors = sorted_vectors.slice(s![.., 0..rank]).to_owned(); + + Ok((top_values, top_vectors)) +} \ No newline at end of file From b5ad644c805575ff1191207da9fe1131d14e6ad8 Mon Sep 17 00:00:00 2001 From: Ian Date: Mon, 14 Apr 2025 18:26:40 +0000 Subject: [PATCH 2/7] First random SVD test --- Cargo.lock | 178 +++++++++- Cargo.toml | 6 +- src/lib.rs | 185 +++++----- src/new.rs | 63 +++- src/randomized/mod.rs | 775 ++++++++++++++---------------------------- 5 files changed, 551 insertions(+), 656 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a94414..b6aa8c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "anyhow" +version = "1.0.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" + [[package]] name = "approx" version = "0.5.1" @@ -11,6 +17,38 @@ dependencies = [ "num-traits", ] +[[package]] +name = "argmin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760a49d596b18b881d2fe6e9e6da4608fa64d4a7653ef5cd43bfaa4da018d596" +dependencies = [ + "anyhow", + "argmin-math", + "instant", + "num-traits", + "paste", + "rand 0.8.5", + "rand_xoshiro", + "rayon", + "thiserror 1.0.69", +] + +[[package]] +name = "argmin-math" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93a0d0269b60bd1cd674de70314e3f0da97406cf8c1936ce760d2a46e0f13fe" +dependencies = [ + "anyhow", + "cfg-if", + "num-complex", + "num-integer", + "num-traits", + "rand 0.8.5", + "thiserror 1.0.69", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -35,6 +73,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "cfg-if" version = "1.0.0" @@ -72,6 +116,17 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.3.2" @@ -81,7 +136,27 @@ dependencies = [ "cfg-if", "libc", "r-efi", - "wasi", + "wasi 0.14.2+wasi-0.2.4", +] + +[[package]] +name = "image" +version = "0.25.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db35664ce6b9810857a38a906215e75a9c879f0696556a39f59c62829710251a" +dependencies = [ + "bytemuck", + "byteorder-lite", + "num-traits", +] + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", ] [[package]] @@ -118,6 +193,7 @@ dependencies = [ "num-complex", "num-rational", "num-traits", + "rayon", "simba", "typenum", ] @@ -158,6 +234,17 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "nshare" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60f2f3256fd1d647fdaac74abad0682766a6ffde01f8ccc138f9e1c641ac997c" +dependencies = [ + "image", + "nalgebra", + "ndarray", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -215,9 +302,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "portable-atomic" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" [[package]] name = "portable-atomic-util" @@ -261,17 +348,38 @@ version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ - "rand_chacha", - "rand_core", + "rand_chacha 0.9.0", + "rand_core 0.9.3", "zerocopy 0.8.24", ] +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + [[package]] name = "rand_chacha" version = "0.9.0" @@ -279,7 +387,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.3", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.15", ] [[package]] @@ -288,7 +405,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom", + "getrandom 0.3.2", ] [[package]] @@ -298,7 +415,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand", + "rand 0.9.0", +] + +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core 0.6.4", ] [[package]] @@ -353,13 +479,17 @@ dependencies = [ name = "single-svdlib" version = "0.4.0" dependencies = [ + "anyhow", + "argmin", + "nalgebra", "nalgebra-sparse", "ndarray", + "nshare", "num-traits", - "rand", + "rand 0.9.0", "rand_distr", "rayon", - "thiserror", + "thiserror 2.0.9", ] [[package]] @@ -373,13 +503,33 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.9", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -405,6 +555,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "wasi" version = "0.14.2+wasi-0.2.4" diff --git a/Cargo.toml b/Cargo.toml index 221fcee..5e8b054 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,10 +9,14 @@ edition = "2021" license-file = "SVDLIBC-LICENSE.txt" [dependencies] +anyhow = "1.0.97" +argmin = { version = "0.10.0", features = ["rayon"] } nalgebra-sparse = "0.10.0" -ndarray = "0.16.1" num-traits = "0.2.19" rand = "0.9.0" rand_distr = "0.5.1" rayon = "1.10.0" thiserror = "2.0.9" +nshare = {version = "0.10.0", features = ["nalgebra", "ndarray"] } +ndarray = "0.16.1" +nalgebra = { version = "0.33.2", features = ["rayon"] } diff --git a/src/lib.rs b/src/lib.rs index 9422b98..6dbc8ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,10 +4,11 @@ mod new; mod masked; pub(crate) mod utils; -pub mod randomized; +pub(crate) mod randomized; pub use new::*; pub use masked::*; +pub use randomized::{randomized_svd, PowerIterationNormalizer}; #[cfg(test)] mod simple_comparison_tests { @@ -17,6 +18,8 @@ mod simple_comparison_tests { use nalgebra_sparse::CsrMatrix; use rand::{Rng, SeedableRng}; use rand::rngs::StdRng; + use rayon::ThreadPoolBuilder; + use crate::randomized::randomized_svd; fn create_sparse_matrix(rows: usize, cols: usize, density: f64) -> nalgebra_sparse::coo::CooMatrix { use rand::{rngs::StdRng, Rng, SeedableRng}; @@ -152,8 +155,6 @@ mod simple_comparison_tests { } } - - #[test] fn test_real_sparse_matrix() { // Create a matrix with similar sparsity to your real one (99.02%) @@ -165,114 +166,86 @@ mod simple_comparison_tests { } #[test] - fn test_real_sparse_matrix_very_big() { - // Create a matrix with similar sparsity to your real one (99.02%) - let test_matrix = create_sparse_matrix(10000, 1000, 0.01); // 0.98% non-zeros - - // Should no longer fail with convergence error - let thread_pool = rayon::ThreadPoolBuilder::new().num_threads(3).build().unwrap(); - let result = thread_pool.install(|| { - svd_dim_seed(&test_matrix, 50, 42) - }); - assert!(result.is_ok(), "{}", format!("SVD failed on 99% sparse matrix, {:?}", result.err().unwrap())); - } - - #[test] - fn test_real_sparse_matrix_very_very_big() { - // Create a matrix with similar sparsity to your real one (99.02%) - let test_matrix = create_sparse_matrix(100000, 2500, 0.01); // 0.98% non-zeros - - // Should no longer fail with convergence error - - let result = svd(&test_matrix); - assert!(result.is_ok(), "{}", format!("SVD failed on 99% sparse matrix, {:?}", result.err().unwrap())); - } - - //#[test] - fn f32_precision_test() { - let seed = 12345; - let (nrows, ncols) = (40, 20); - let mut rng = StdRng::seed_from_u64(seed); - - // Create random sparse matrix with f32 values - let mut coo_f32 = CooMatrix::::new(nrows, ncols); - // And the same matrix with f64 values - let mut coo_f64 = CooMatrix::::new(nrows, ncols); - - // Insert the same random non-zero elements in both matrices - for _ in 0..(nrows * ncols / 4) { // ~25% density - let i = rng.gen_range(0..nrows); - let j = rng.gen_range(0..ncols); - let value = rng.gen_range(-10.0..10.0); - coo_f32.push(i, j, value as f32); - coo_f64.push(i, j, value); - } - - let csr_f32 = CsrMatrix::from(&coo_f32); - let csr_f64 = CsrMatrix::from(&coo_f64); - - // Calculate SVD for both types - let svd_f32 = svd_dim_seed(&csr_f32, 0, seed as u32).unwrap(); - let svd_f64 = svd_dim_seed(&csr_f64, 0, seed as u32).unwrap(); - - // Adaptive tolerance - increases for smaller singular values - // and values further down the list (which are more affected by accumulated errors) - fn calculate_tolerance(index: usize, magnitude: f64) -> f64 { - // Base tolerance is 0.2% - let base_tol = 0.002; - - // Scale up for smaller values (more affected by precision) - let magnitude_factor = if magnitude < 1.0 { - 2.0 // Double tolerance for very small values - } else if magnitude < 10.0 { - 1.5 // 1.5x tolerance for moderately small values - } else { - 1.0 // Normal tolerance for larger values - }; - - // Scale up for later indices (more affected by accumulated errors) - let index_factor = 1.0 + (index as f64 * 0.001); // Add 0.1% per index - - base_tol * magnitude_factor * index_factor - } - - println!("f32 rank: {}, f64 rank: {}", svd_f32.d, svd_f64.d); - - // Compare singular values up to the minimum rank - let min_rank = svd_f32.d.min(svd_f64.d); - for i in 0..min_rank { - let f32_val = svd_f32.s[i]; - let f64_val = svd_f64.s[i]; - let abs_diff = (f64_val - f32_val as f64).abs(); - let rel_diff = abs_diff / f64_val.max(f32_val as f64); - - // Calculate appropriate tolerance for this value - let tol = calculate_tolerance(i, f64_val); + fn test_random_svd_computation() { + use crate::{randomized_svd, PowerIterationNormalizer}; + + // Create a matrix with high sparsity (99%) + let test_matrix = create_sparse_matrix(1000, 250, 0.01); // 1% non-zeros + + // Convert to CSR for processing + let csr = CsrMatrix::from(&test_matrix); + + // Run randomized SVD with reasonable defaults for a sparse matrix + let result = randomized_svd( + &csr, + 50, // target rank + 10, // oversampling parameter + 3, // power iterations + PowerIterationNormalizer::QR, // use QR normalization + Some(42), // random seed + ); + + // Verify the computation succeeds on a highly sparse matrix + assert!( + result.is_ok(), + "Randomized SVD failed on 99% sparse matrix: {:?}", + result.err().unwrap() + ); + + // Additional checks on the result if successful + if let Ok(svd_result) = result { + // Verify dimensions match expectations + assert_eq!(svd_result.d, 50, "Expected rank of 50"); + + // Verify singular values are positive and in descending order + for i in 0..svd_result.s.len() { + assert!(svd_result.s[i] > 0.0, "Singular values should be positive"); + if i > 0 { + assert!( + svd_result.s[i-1] >= svd_result.s[i], + "Singular values should be in descending order" + ); + } + } - println!("Singular value {}: f32 = {}, f64 = {}, rel_diff = {:.6}%, tol = {:.6}%", - i, f32_val, f64_val, rel_diff * 100.0, tol * 100.0); + // Verify basics of U and V dimensions + assert_eq!(svd_result.ut.nrows(), 50, "U transpose should have 50 rows"); + assert_eq!(svd_result.ut.ncols(), 1000, "U transpose should have 1000 columns"); + assert_eq!(svd_result.vt.nrows(), 50, "V transpose should have 50 rows"); + assert_eq!(svd_result.vt.ncols(), 250, "V transpose should have 250 columns"); - assert!( - rel_diff <= tol, - "Singular value {} differs too much: f64 = {}, f32 = {}. Relative diff: {} > tolerance: {}", - i, f64_val, f32_val, rel_diff, tol - ); } + } - // Optional: Also check that overall behavior is reasonable - // For example, check that both implementations find similar condition numbers - if min_rank >= 2 { - let condition_f32 = svd_f32.s[0] / svd_f32.s[min_rank - 1]; - let condition_f64 = svd_f64.s[0] / svd_f64.s[min_rank - 1]; - let condition_rel_diff = ((condition_f64 - condition_f32 as f64) / condition_f64).abs(); + #[test] + fn test_randomized_svd_very_large_sparse_matrix() { + use crate::{randomized_svd, PowerIterationNormalizer}; + + // Create a very large matrix with high sparsity (99%) + let test_matrix = create_sparse_matrix(100000, 2500, 0.01); // 1% non-zeros + + // Convert to CSR for processing + let csr = CsrMatrix::from(&test_matrix); + + // Run randomized SVD with reasonable defaults for a sparse matrix + let threadpool = ThreadPoolBuilder::new().num_threads(10).build().unwrap(); + let result = threadpool.install(|| { + randomized_svd( + &csr, + 50, // target rank + 10, // oversampling parameter + 2, // power iterations + PowerIterationNormalizer::QR, // use QR normalization + Some(42), // random seed + ) + }); - println!("Condition number: f32 = {}, f64 = {}, rel_diff = {:.6}%", - condition_f32, condition_f64, condition_rel_diff * 100.0); - // Condition number can vary more, use 5% tolerance - assert!(condition_rel_diff <= 0.05, - "Condition numbers differ too much: f32 = {}, f64 = {}", - condition_f32, condition_f64); - } + // Simply verify that the computation succeeds on a highly sparse matrix + assert!( + result.is_ok(), + "Randomized SVD failed on 99% sparse matrix: {:?}", + result.err().unwrap() + ); } } \ No newline at end of file diff --git a/src/new.rs b/src/new.rs index faf5e53..e50b0b0 100644 --- a/src/new.rs +++ b/src/new.rs @@ -1157,7 +1157,7 @@ fn ritvec( let sparsity = T::one() - (T::from_usize(A.nnz()).unwrap() - / (T::from_usize(A.nrows()).unwrap() * T::from_usize(A.ncols()).unwrap())); + / (T::from_usize(A.nrows()).unwrap() * T::from_usize(A.ncols()).unwrap())); let epsilon = T::epsilon(); let adaptive_eps = if sparsity > T::from_f64(0.99).unwrap() { @@ -1322,7 +1322,7 @@ fn ritvec( S[i] = sval; let ut_offset = i * Ut.cols; let mut ut_vec = a_products[i].clone(); - + if sval > adaptive_eps { svd_dscal(T::one() / sval, &mut ut_vec); } else { @@ -1541,7 +1541,7 @@ impl SMat for nalgebra_sparse::cs } #[rustfmt::skip] -impl SMat for nalgebra_sparse::csr::CsrMatrix { +impl SMat for nalgebra_sparse::csr::CsrMatrix { fn nrows(&self) -> usize { self.nrows() } fn ncols(&self) -> usize { self.ncols() } fn nnz(&self) -> usize { self.nnz() } @@ -1556,23 +1556,60 @@ impl SMat for nalgebra_sparse::cs let (major_offsets, minor_indices, values) = self.csr_data(); - for y_val in y.iter_mut() { - *y_val = T::zero(); - } + y.fill(T::zero()); if !transposed { - for (i, yval) in y.iter_mut().enumerate() { - for j in major_offsets[i]..major_offsets[i + 1] { - *yval += values[j] * x[minor_indices[j]]; - } + let nrows = self.nrows(); + let chunk_size = crate::utils::determine_chunk_size(nrows); + + // Create thread-local vectors with results + let results: Vec<(usize, T)> = (0..nrows) + .into_par_iter() + .map(|i| { + let mut sum = T::zero(); + for j in major_offsets[i]..major_offsets[i + 1] { + sum += values[j] * x[minor_indices[j]]; + } + (i, sum) + }) + .collect(); + + // Apply the results to y + for (i, val) in results { + y[i] = val; } } else { - for (i, xval) in x.iter().enumerate() { - for j in major_offsets[i]..major_offsets[i + 1] { - y[minor_indices[j]] += values[j] * *xval; + let nrows = self.nrows(); + let chunk_size = crate::utils::determine_chunk_size(nrows); + + // Process input in chunks and create partial results + let results: Vec> = (0..((nrows + chunk_size - 1) / chunk_size)) + .into_par_iter() + .map(|chunk_idx| { + let start = chunk_idx * chunk_size; + let end = (start + chunk_size).min(nrows); + + let mut local_y = vec![T::zero(); y.len()]; + for i in start..end { + let row_val = x[i]; + for j in major_offsets[i]..major_offsets[i + 1] { + let col = minor_indices[j]; + local_y[col] += values[j] * row_val; + } + } + local_y + }) + .collect(); + + // Combine partial results + for local_y in results { + for (idx, val) in local_y.iter().enumerate() { + if !val.is_zero() { + y[idx] += *val; } } } + } } } diff --git a/src/randomized/mod.rs b/src/randomized/mod.rs index c716397..1c15057 100644 --- a/src/randomized/mod.rs +++ b/src/randomized/mod.rs @@ -1,593 +1,318 @@ +use rayon::iter::ParallelIterator; use crate::error::SvdLibError; use crate::{Diagnostics, SMat, SvdFloat, SvdRec}; -use ndarray::{s, Array, Array1, Array2, Axis}; -use num_traits::Float; -use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; -use rand_distr::{Distribution, Normal}; -use rayon::prelude::*; -use std::cmp::min; - -/// Computes a randomized SVD with default parameters -/// -/// # Parameters -/// - A: Sparse matrix -/// - rank: Number of singular values/vectors to compute -/// -/// # Returns -/// - SvdRec: Singular value decomposition (U, S, V) -pub fn randomized_svd(a: &M, rank: usize) -> Result, SvdLibError> -where - T: SvdFloat, - M: SMat, -{ - let oversampling = 10; - let n_power_iterations = 2; - let random_seed = 0; // Will be generated randomly - randomized_svd_full(a, rank, oversampling, n_power_iterations, random_seed) +use nalgebra_sparse::na::{ComplexField, DMatrix, DVector, RealField}; +use ndarray::{Array1, Array2}; +use nshare::IntoNdarray2; +use rand::prelude::{Distribution, StdRng}; +use rand::SeedableRng; +use rand_distr::Normal; +use std::ops::Mul; +use rayon::current_num_threads; +use rayon::prelude::{IndexedParallelIterator, IntoParallelIterator}; + +pub enum PowerIterationNormalizer { + QR, + LU, + None, } -/// Computes a randomized SVD with control over oversampling and power iterations -/// -/// # Parameters -/// - A: Sparse matrix -/// - rank: Number of singular values/vectors to compute -/// - oversampling: Additional columns to sample for improved accuracy (typically 5-10) -/// - n_power_iterations: Number of power iterations to enhance accuracy for smaller singular values -/// -/// # Returns -/// - SvdRec: Singular value decomposition (U, S, V) -pub fn randomized_svd_with_params( - a: &M, - rank: usize, - oversampling: usize, - n_power_iterations: usize, -) -> Result, SvdLibError> -where - T: SvdFloat, - M: SMat, -{ - randomized_svd_full(a, rank, oversampling, n_power_iterations, 0) -} -/// Computes a randomized SVD with full parameter control -/// -/// # Parameters -/// - A: Sparse matrix -/// - rank: Number of singular values/vectors to compute -/// - oversampling: Additional columns to sample for improved accuracy -/// - n_power_iterations: Number of power iterations to enhance accuracy for smaller singular values -/// - random_seed: Seed for random number generation (0 for random seed) -/// -/// # Returns -/// - SvdRec: Singular value decomposition (U, S, V) -pub fn randomized_svd_full( - a: &M, - rank: usize, - oversampling: usize, - n_power_iterations: usize, - random_seed: u32, -) -> Result, SvdLibError> +const PARALLEL_THRESHOLD_ROWS: usize = 5000; +const PARALLEL_THRESHOLD_COLS: usize = 1000; +const PARALLEL_THRESHOLD_ELEMENTS: usize = 100_000; + +pub fn randomized_svd( + m: &M, + target_rank: usize, + n_oversamples: usize, + n_power_iters: usize, + power_iteration_normalizer: PowerIterationNormalizer, + seed: Option, +) -> anyhow::Result> where - T: SvdFloat, + T: SvdFloat + RealField, M: SMat, + T: ComplexField, { - // Determine the actual seed to use - let seed = if random_seed == 0 { - // Use the system time as a seed if none is provided - use std::time::{SystemTime, UNIX_EPOCH}; - let duration = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards"); - duration.as_nanos() as u32 - } else { - random_seed - }; - - // Validate parameters - let nrows = a.nrows(); - let ncols = a.ncols(); - let min_dim = min(nrows, ncols); - - if rank == 0 { - return Err(SvdLibError::Las2Error( - "Rank must be greater than 0".to_string(), - )); - } - - if rank > min_dim { - return Err(SvdLibError::Las2Error(format!( - "Requested rank {} exceeds matrix dimensions {}x{}", - rank, nrows, ncols - ))); - } - - // The target rank with oversampling (can't exceed matrix dimensions) - let target_rank = min(rank + oversampling, min_dim); + let m_rows = m.nrows(); + let m_cols = m.ncols(); - // Transpose large matrices for efficiency if needed - let transposed = ncols > nrows; - let (work_rows, work_cols) = if transposed { - (ncols, nrows) - } else { - (nrows, ncols) - }; + let rank = target_rank.min(m_rows.min(m_cols)); + let l = rank + n_oversamples; - // Stage 1: Generate a random projection matrix Omega - let omega = generate_random_matrix(work_cols, target_rank, seed)?; + let omega = generate_random_matrix(m_cols, l, seed); - // Stage 2: Form Y = A * Omega (or Y = A^T * Omega if transposed) - let mut y = Array2::zeros((work_rows, target_rank)); + let mut y = DMatrix::::zeros(m_rows, l); + multiply_matrix(m, &omega, &mut y, false); - // Fill Y by matrix-vector products - for j in 0..target_rank { - let omega_col = omega.slice(s![.., j]).to_vec(); - let mut y_col = vec![T::zero(); work_rows]; + if n_power_iters > 0 { + let mut z = DMatrix::::zeros(m_cols, l); - // Apply A or A^T - a.svd_opa(&omega_col, &mut y_col, transposed); + for _ in 0..n_power_iters { + multiply_matrix(m, &y, &mut z, true); + match power_iteration_normalizer { + PowerIterationNormalizer::QR => { + let qr = z.qr(); + z = qr.q(); + } + PowerIterationNormalizer::LU => { + normalize_columns(&mut z); + } + PowerIterationNormalizer::None => {} + } - // Copy result to column of Y - for i in 0..work_rows { - y[[i, j]] = y_col[i]; + multiply_matrix(m, &z, &mut y, false); + match power_iteration_normalizer { + PowerIterationNormalizer::QR => { + let qr = y.qr(); + y = qr.q(); + } + PowerIterationNormalizer::LU => normalize_columns(&mut y), + PowerIterationNormalizer::None => {} + } } } - // Stage 3: Power iteration scheme to increase accuracy for smaller singular values - if n_power_iterations > 0 { - // Compute power iterations (Y = (A*A^T)^q * Y) - y = power_iteration(a, y, n_power_iterations, transposed)?; - } - - // Stage 4: Orthogonalize the basis using QR decomposition - let q = orthogonalize(y)?; + let qr = y.qr(); + let q = qr.q(); - // Stage 5: Form B = Q^T * A (or B = Q^T * A^T if transposed) - let mut b = Array2::zeros((target_rank, work_cols)); + let mut b = DMatrix::::zeros(q.ncols(), m_cols); + multiply_transposed_by_matrix(&q, m, &mut b); - // Fill B by matrix-vector products - for i in 0..work_cols { - let mut e_i = vec![T::zero(); work_cols]; - e_i[i] = T::one(); + let svd = b.svd(true, true); + let u_b = svd + .u + .ok_or_else(|| SvdLibError::Las2Error("SVD U computation failed".to_string()))?; + let singular_values = svd.singular_values; + let vt = svd + .v_t + .ok_or_else(|| SvdLibError::Las2Error("SVD V_t computation failed".to_string()))?; - let mut b_row = vec![T::zero(); target_rank]; + let actual_rank = target_rank.min(singular_values.len()); - // Apply A or A^T - a.svd_opa(&e_i, &mut b_row, !transposed); + let u_b_subset = u_b.columns(0, actual_rank); + let u = q.mul(&u_b_subset); - // Apply Q^T - let qt_b = matrix_vector_multiply(&q, &b_row, true); + let vt_subset = vt.rows(0, actual_rank).into_owned(); - // Copy result to row of B - for j in 0..target_rank { - b[[j, i]] = qt_b[j]; - } - } + // Convert to the format required by SvdRec + let d = actual_rank; - // Stage 6: Compute the SVD of B - let (u_b, s_b, v_b) = compute_svd_dense(b, rank)?; + let ut = u.transpose().into_ndarray2(); + let s = convert_singular_values(>::from(singular_values.rows(0, actual_rank)), actual_rank); + let vt = vt_subset.into_ndarray2(); - // Stage 7: Form the SVD of A - let u_a = if transposed { - v_b - } else { - matrix_multiply(&q, &u_b) - }; - - let v_a = if transposed { - matrix_multiply(&q, &u_b) - } else { - v_b - }; - - // Return the SVD in the requested format Ok(SvdRec { - d: rank, - ut: if transposed { v_a } else { u_a.t().to_owned() }, - s: s_b, - vt: if transposed { u_a.t().to_owned() } else { v_a }, - diagnostics: Diagnostics { - non_zero: a.nnz(), - dimensions: rank, - iterations: n_power_iterations, - transposed, - lanczos_steps: 0, // Not applicable for randomized SVD - ritz_values_stabilized: 0, // Not applicable - significant_values: rank, - singular_values: rank, - end_interval: [T::zero(), T::zero()], // Not applicable - kappa: T::from_f64(1e-6).unwrap(), // Standard value - random_seed: seed, - }, + d, + ut, + s, + vt, + diagnostics: create_diagnostics(m, d, target_rank, n_power_iters, seed.unwrap_or(0) as u32), }) } -// Helper functions (implementations to be added) - -fn generate_random_matrix( - n_rows: usize, - n_cols: usize, - seed: u32, -) -> Result, SvdLibError> { - // Create a Gaussian random matrix - let mut rng = StdRng::seed_from_u64(seed as u64); - let normal = Normal::new(0.0, 1.0).unwrap(); +fn convert_singular_values( + values: DVector, + size: usize, +) -> Array1 { + let mut array = Array1::zeros(size); - let mut omega = Array2::zeros((n_rows, n_cols)); - - // Fill with random normal values - omega.par_iter_mut().for_each(|x| { - // Using local RNG for each parallel task - let mut local_rng = StdRng::from_entropy(); - *x = T::from_f64(normal.sample(&mut local_rng)).unwrap(); - }); + for i in 0..size { + // Convert from RealField to T using f64 as intermediate + array[i] = T::from_real(values[i].clone()); + } - Ok(omega) + array } -fn power_iteration( +fn create_diagnostics>( a: &M, - mut y: Array2, - n_iterations: usize, - transposed: bool, -) -> Result, SvdLibError> + d: usize, + target_rank: usize, + power_iterations: usize, + seed: u32, +) -> Diagnostics where T: SvdFloat, - M: SMat, { - let (n_rows, n_cols) = y.dim(); - - // Temporary arrays for matrix operations - let mut temp_rows = vec![T::zero(); a.nrows()]; - let mut temp_cols = vec![T::zero(); a.ncols()]; - - for _ in 0..n_iterations { - // Apply (A*A^T) to Y through two matrix-vector products - for j in 0..n_cols { - let y_col = y.slice(s![.., j]).to_vec(); - - // First multiply: temp = A^T * y_col (or A * y_col if transposed) - a.svd_opa(&y_col, &mut temp_cols, !transposed); - - // Second multiply: y_col = A * temp (or A^T * temp if transposed) - a.svd_opa(&temp_cols, &mut temp_rows, transposed); - - // Update Y - for i in 0..n_rows { - y[[i, j]] = temp_rows[i]; - } - } - - // Orthogonalize after each iteration for numerical stability - y = orthogonalize(y)?; + Diagnostics { + non_zero: a.nnz(), + dimensions: target_rank, + iterations: power_iterations, + transposed: false, + lanczos_steps: 0, // we dont do that + ritz_values_stabilized: d, + significant_values: d, + singular_values: d, + end_interval: [T::from(-1e-30).unwrap(), T::from(1e-30).unwrap()], + kappa: T::from(1e-6).unwrap(), + random_seed: seed, } - - Ok(y) } -fn orthogonalize(a: Array2) -> Result, SvdLibError> { - // Implement a modified Gram-Schmidt orthogonalization - // This is more numerically stable than the standard Gram-Schmidt process - let (n_rows, n_cols) = a.dim(); - let mut q = a.clone(); - - for j in 0..n_cols { - // Normalize the j-th column - let mut norm_squared = T::zero(); - for i in 0..n_rows { - norm_squared += q[[i, j]] * q[[i, j]]; - } - - let norm = norm_squared.sqrt(); +fn normalize_columns(matrix: &mut DMatrix) { + let rows = matrix.nrows(); + let cols = matrix.ncols(); - // Handle near-zero columns - if norm <= T::from_f64(1e-10).unwrap() { - // If column is essentially zero, replace with random vector - let mut rng = StdRng::from_entropy(); - let normal = Normal::new(0.0, 1.0).unwrap(); + // Use sequential processing for small matrices + if rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS { + for j in 0..cols { + let mut norm = T::zero(); - for i in 0..n_rows { - q[[i, j]] = T::from_f64(normal.sample(&mut rng)).unwrap(); + // Calculate column norm + for i in 0..rows { + norm += ComplexField::powi(matrix[(i, j)], 2); } + norm = ComplexField::sqrt(norm); - // Recursively orthogonalize this column against previous columns - for k in 0..j { - let mut dot = T::zero(); - for i in 0..n_rows { - dot += q[[i, j]] * q[[i, k]]; - } - - for i in 0..n_rows { - q[[i, j]] -= dot * q[[i, k]]; - } - } - - // Normalize again - norm_squared = T::zero(); - for i in 0..n_rows { - norm_squared += q[[i, j]] * q[[i, j]]; - } - let norm = norm_squared.sqrt(); - - for i in 0..n_rows { - q[[i, j]] /= norm; - } - } else { - // Normalize the column - for i in 0..n_rows { - q[[i, j]] /= norm; - } - - // Orthogonalize remaining columns against this one - for k in (j+1)..n_cols { - let mut dot = T::zero(); - for i in 0..n_rows { - dot += q[[i, j]] * q[[i, k]]; - } - - for i in 0..n_rows { - q[[i, k]] -= dot * q[[i, j]]; + // Normalize the column if the norm is not too small + if norm > T::from_f64(1e-10).unwrap() { + let scale = T::one() / norm; + for i in 0..rows { + matrix[(i, j)] *= scale; } } } + return; } - Ok(q) -} - -fn matrix_vector_multiply( - a: &Array2, - x: &[T], - transpose: bool, -) -> Vec { - let (n_rows, n_cols) = a.dim(); - - if !transpose { - // y = A * x - assert_eq!(x.len(), n_cols, "Vector length must match number of columns"); - - let mut y = vec![T::zero(); n_rows]; - - if n_rows > 1000 { - // Parallel implementation for large matrices - y.par_iter_mut().enumerate().for_each(|(i, y_i)| { - let row = a.row(i); - *y_i = row.iter().zip(x.iter()).map(|(&a_ij, &x_j)| a_ij * x_j).sum(); - }); - } else { - // Sequential for smaller matrices to avoid parallel overhead - for i in 0..n_rows { - let row = a.row(i); - y[i] = row.iter().zip(x.iter()).map(|(&a_ij, &x_j)| a_ij * x_j).sum(); + let norms: Vec = (0..cols) + .into_par_iter() + .map(|j| { + let mut norm = T::zero(); + for i in 0..rows { + let val = unsafe { *matrix.get_unchecked((i, j)) }; + norm += ComplexField::powi(val, 2); } - } - - y - } else { - // y = A^T * x - assert_eq!(x.len(), n_rows, "Vector length must match number of rows"); - - let mut y = vec![T::zero(); n_cols]; - - if n_cols > 1000 { - // Parallel implementation for large matrices - y.par_iter_mut().enumerate().for_each(|(j, y_j)| { - let col = a.column(j); - *y_j = col.iter().zip(x.iter()).map(|(&a_ij, &x_i)| a_ij * x_i).sum(); - }); - } else { - // Sequential for smaller matrices - for j in 0..n_cols { - let col = a.column(j); - y[j] = col.iter().zip(x.iter()).map(|(&a_ij, &x_i)| a_ij * x_i).sum(); + ComplexField::sqrt(norm) + }) + .collect(); + + // Now create a vector of (column_index, scale) pairs + let scales: Vec<(usize, T)> = norms + .into_iter() + .enumerate() + .filter_map(|(j, norm)| { + if norm > T::from_f64(1e-10).unwrap() { + Some((j, T::one() / norm)) + } else { + None // Skip columns with too small norms } - } - - y - } + }) + .collect(); + + // Apply normalization + scales + .iter() + .for_each(|(j, scale)| { + for i in 0..rows { + let value = matrix.get_mut((i,*j)).unwrap(); + *value = value.clone() * scale.clone(); + } + }); } -fn matrix_multiply(a: &Array2, b: &Array2) -> Array2 { - // Basic matrix multiplication C = A * B - let (a_rows, a_cols) = a.dim(); - let (b_rows, b_cols) = b.dim(); - - assert_eq!(a_cols, b_rows, "Matrix dimensions do not match for multiplication"); - - let mut c = Array2::zeros((a_rows, b_cols)); - - // For large matrices, use parallel execution - if a_rows * b_cols > 10000 { - c.axis_iter_mut(Axis(0)) - .into_par_iter() - .enumerate() - .for_each(|(i, mut row)| { - for j in 0..b_cols { - let mut sum = T::zero(); - for k in 0..a_cols { - sum += a[[i, k]] * b[[k, j]]; - } - row[j] = sum; - } - }); - } else { - // For smaller matrices, sequential is faster due to less overhead - for i in 0..a_rows { - for j in 0..b_cols { - let mut sum = T::zero(); - for k in 0..a_cols { - sum += a[[i, k]] * b[[k, j]]; +// ---------------------------------------- +// Utils Functions +// ---------------------------------------- + + +fn generate_random_matrix( + rows: usize, + cols: usize, + seed: Option, +) -> DMatrix { + //if rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS && rows * cols < PARALLEL_THRESHOLD_ELEMENTS { + let mut rng = match seed { + Some(s) => StdRng::seed_from_u64(s), + None => StdRng::seed_from_u64(0), + }; + + let normal = Normal::new(0.0, 1.0).unwrap(); + return DMatrix::from_fn(rows, cols, |_, _| { + T::from_f64(normal.sample(&mut rng)).unwrap() + }); + //} + + /*let seed_value = seed.unwrap_or(0); + let mut matrix = DMatrix::::zeros(rows, cols); + let num_threads = current_num_threads(); + let chunk_size = (rows * cols + num_threads - 1) / num_threads; + + (0..(rows * cols)).into_par_iter() + .chunks(chunk_size) + .enumerate() + .for_each(|(chunk_idx, indices)| { + let thread_seed = seed_value.wrapping_add(chunk_idx as u64); + let mut rng = StdRng::seed_from_u64(thread_seed); + let normal = Normal::new(0.0, 1.0).unwrap(); + for idx in indices { + let i = idx / cols; + let j = idx % cols; + unsafe { + *matrix.get_unchecked_mut((i, j)) = T::from_f64(normal.sample(&mut rng)).unwrap(); } - c[[i, j]] = sum; } - } - } + }); + matrix*/ - c } -fn compute_svd_dense( - a: Array2, - rank: usize, -) -> Result<(Array2, Array1, Array2), SvdLibError> { - // For the dense SVD computation, we'll use a simplified approach - // In a real implementation, you would use a high-quality SVD library - // such as LAPACK via ndarray-linalg or similar - - let (n_rows, n_cols) = a.dim(); - - // Form A^T * A (or A * A^T for tall matrices) - let mut ata = if n_rows <= n_cols { - // For wide matrices, compute A^T * A (smaller) - let mut ata = Array2::zeros((n_cols, n_cols)); - - for i in 0..n_cols { - for j in 0..=i { - let mut sum = T::zero(); - for k in 0..n_rows { - sum += a[[k, i]] * a[[k, j]]; - } - ata[[i, j]] = sum; - if i != j { - ata[[j, i]] = sum; // Symmetric matrix - } - } - } - ata - } else { - // For tall matrices, compute A * A^T (smaller) - let mut aat = Array2::zeros((n_rows, n_rows)); - - for i in 0..n_rows { - for j in 0..=i { - let mut sum = T::zero(); - for k in 0..n_cols { - sum += a[[i, k]] * a[[j, k]]; - } - aat[[i, j]] = sum; - if i != j { - aat[[j, i]] = sum; // Symmetric matrix - } +fn multiply_matrix>( + sparse: &M, + dense: &DMatrix, + result: &mut DMatrix, + transpose_sparse: bool, +) { + let cols = dense.ncols(); + //let matrix_rows = if transpose_sparse { sparse.ncols() } else { sparse.nrows() }; + + //if matrix_rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS { + let mut col_vec = vec![T::zero(); dense.nrows()]; + let mut result_vec = vec![T::zero(); result.nrows()]; + + for j in 0..cols { + // Extract column from dense matrix + for i in 0..dense.nrows() { + col_vec[i] = dense[(i, j)]; } - } - aat - }; - - // Compute eigendecomposition of A^T*A or A*A^T - // For simplicity, we'll use a basic power iteration method - // In practice, use a specialized eigenvalue solver - let (eigvals, eigvecs) = compute_eigen_decomposition(&mut ata, rank)?; - - let mut s = Array1::zeros(rank); - for i in 0..rank { - // Singular values are square roots of eigenvalues - s[i] = eigvals[i].abs().sqrt(); - } - let mut v = if n_rows <= n_cols { - // Wide matrix case: we computed A^T*A, so eigvecs are V - eigvecs - } else { - // Tall matrix case: we computed A*A^T, so need to compute V = A^T*U*S^(-1) - let mut v = Array2::zeros((n_cols, rank)); - - for j in 0..rank { - if s[j] > T::from_f64(1e-10).unwrap() { - for i in 0..n_cols { - let mut sum = T::zero(); - for k in 0..n_rows { - sum += a[[k, i]] * eigvecs[[k, j]]; - } - v[[i, j]] = sum / s[j]; - } - } - } - v - }; - - let mut u = if n_rows <= n_cols { - // Wide matrix case: compute U = A*V*S^(-1) - let mut u = Array2::zeros((n_rows, rank)); - - for j in 0..rank { - if s[j] > T::from_f64(1e-10).unwrap() { - for i in 0..n_rows { - let mut sum = T::zero(); - for k in 0..n_cols { - sum += a[[i, k]] * v[[k, j]]; - } - u[[i, j]] = sum / s[j]; - } + // Perform sparse matrix operation + sparse.svd_opa(&col_vec, &mut result_vec, transpose_sparse); + + // Store results + for i in 0..result.nrows() { + result[(i, j)] = result_vec[i]; } + + // Clear result vector for reuse + result_vec.iter_mut().for_each(|v| *v = T::zero()); } - u - } else { - // Tall matrix case: we computed A*A^T, so eigvecs are U - eigvecs - }; + return; + //} - // Ensure orthogonality and normalize - u = orthogonalize(u)?; - v = orthogonalize(v)?; - Ok((u, s, v)) } -fn compute_eigen_decomposition( - a: &mut Array2, - rank: usize, -) -> Result<(Vec, Array2), SvdLibError> { - let n = a.dim().0; - let mut eigenvalues = vec![T::zero(); n]; - let mut eigenvectors = Array2::zeros((n, n)); - - // Initialize eigenvectors to identity matrix - for i in 0..n { - eigenvectors[[i, i]] = T::one(); - } - - // We'll use the QR algorithm with shifts - // In practice, use a specialized library for this - - // For simplicity, we'll just compute a few iterations of QR decomposition - // This is a simplified approach - real implementations would use more robust methods - let max_iter = 30; - - for _ in 0..max_iter { - // For each iteration, perform a QR decomposition step - let q = orthogonalize(eigenvectors.clone())?; - - // Compute R = Q^T * A * Q (Rayleigh quotient) - let r = matrix_multiply(&matrix_multiply(&q.t().to_owned(), a), &q); - - // Update eigenvectors - eigenvectors = q; - - // Update matrix for next iteration - *a = r; - } - - // Extract eigenvalues from diagonal - for i in 0..n { - eigenvalues[i] = a[[i, i]]; - } - - // Sort eigenvalues and eigenvectors by decreasing eigenvalue magnitude - let mut indices: Vec = (0..n).collect(); - indices.sort_by(|&i, &j| eigenvalues[j].abs().partial_cmp(&eigenvalues[i].abs()).unwrap()); - - let mut sorted_values = vec![T::zero(); n]; - let mut sorted_vectors = Array2::zeros((n, n)); - - for (new_idx, &old_idx) in indices.iter().enumerate() { - sorted_values[new_idx] = eigenvalues[old_idx]; - for i in 0..n { - sorted_vectors[[i, new_idx]] = eigenvectors[[i, old_idx]]; +fn multiply_transposed_by_matrix>( + q: &DMatrix, + sparse: &M, + result: &mut DMatrix, +) { + for j in 0..sparse.ncols() { + let mut unit_vec = vec![T::zero(); sparse.ncols()]; + unit_vec[j] = T::one(); + + let mut col_vec = vec![T::zero(); sparse.nrows()]; + sparse.svd_opa(&unit_vec, &mut col_vec, false); + + for i in 0..q.ncols() { + let mut sum = T::zero(); + for k in 0..q.nrows() { + sum += q[(k, i)] * col_vec[k]; + } + result[(i, j)] = sum; } } - - // Return only the top 'rank' eigenvalues and eigenvectors - let top_values = sorted_values.into_iter().take(rank).collect(); - let top_vectors = sorted_vectors.slice(s![.., 0..rank]).to_owned(); - - Ok((top_values, top_vectors)) -} \ No newline at end of file +} From f834a2791e38cd453bf675f422e2464cc4b70db3 Mon Sep 17 00:00:00 2001 From: Ian Date: Mon, 14 Apr 2025 20:27:03 +0000 Subject: [PATCH 3/7] Added optimited random SVD algorithm --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/lib.rs | 34 ++++++- src/randomized/mod.rs | 207 +++++++++++++++++++++++++----------------- 4 files changed, 161 insertions(+), 84 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b6aa8c3..ea4eb52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -477,7 +477,7 @@ dependencies = [ [[package]] name = "single-svdlib" -version = "0.4.0" +version = "0.5.0" dependencies = [ "anyhow", "argmin", diff --git a/Cargo.toml b/Cargo.toml index 5e8b054..f304368 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ description = "A Rust port of LAS2 from SVDLIBC" keywords = ["svd"] categories = ["algorithms", "data-structures", "mathematics", "science"] name = "single-svdlib" -version = "0.4.0" +version = "0.5.0" edition = "2021" license-file = "SVDLIBC-LICENSE.txt" diff --git a/src/lib.rs b/src/lib.rs index 6dbc8ae..d78cb40 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -226,7 +226,7 @@ mod simple_comparison_tests { // Convert to CSR for processing let csr = CsrMatrix::from(&test_matrix); - + // Run randomized SVD with reasonable defaults for a sparse matrix let threadpool = ThreadPoolBuilder::new().num_threads(10).build().unwrap(); let result = threadpool.install(|| { @@ -241,6 +241,38 @@ mod simple_comparison_tests { }); + // Simply verify that the computation succeeds on a highly sparse matrix + assert!( + result.is_ok(), + "Randomized SVD failed on 99% sparse matrix: {:?}", + result.err().unwrap() + ); + } + + #[test] + fn test_randomized_svd_small_sparse_matrix() { + use crate::{randomized_svd, PowerIterationNormalizer}; + + // Create a very large matrix with high sparsity (99%) + let test_matrix = create_sparse_matrix(1000, 250, 0.01); // 1% non-zeros + + // Convert to CSR for processing + let csr = CsrMatrix::from(&test_matrix); + + // Run randomized SVD with reasonable defaults for a sparse matrix + let threadpool = ThreadPoolBuilder::new().num_threads(10).build().unwrap(); + let result = threadpool.install(|| { + randomized_svd( + &csr, + 50, // target rank + 10, // oversampling parameter + 2, // power iterations + PowerIterationNormalizer::QR, // use QR normalization + Some(42), // random seed + ) + }); + + // Simply verify that the computation succeeds on a highly sparse matrix assert!( result.is_ok(), diff --git a/src/randomized/mod.rs b/src/randomized/mod.rs index 1c15057..be6aad8 100644 --- a/src/randomized/mod.rs +++ b/src/randomized/mod.rs @@ -1,15 +1,15 @@ -use rayon::iter::ParallelIterator; use crate::error::SvdLibError; use crate::{Diagnostics, SMat, SvdFloat, SvdRec}; use nalgebra_sparse::na::{ComplexField, DMatrix, DVector, RealField}; -use ndarray::{Array1, Array2}; +use ndarray::Array1; use nshare::IntoNdarray2; use rand::prelude::{Distribution, StdRng}; use rand::SeedableRng; use rand_distr::Normal; -use std::ops::Mul; -use rayon::current_num_threads; +use rayon::iter::ParallelIterator; use rayon::prelude::{IndexedParallelIterator, IntoParallelIterator}; +use std::ops::Mul; +use crate::utils::determine_chunk_size; pub enum PowerIterationNormalizer { QR, @@ -17,7 +17,6 @@ pub enum PowerIterationNormalizer { None, } - const PARALLEL_THRESHOLD_ROWS: usize = 5000; const PARALLEL_THRESHOLD_COLS: usize = 1000; const PARALLEL_THRESHOLD_ELEMENTS: usize = 100_000; @@ -35,22 +34,30 @@ where M: SMat, T: ComplexField, { + let start = std::time::Instant::now(); // only for debugging let m_rows = m.nrows(); let m_cols = m.ncols(); let rank = target_rank.min(m_rows.min(m_cols)); let l = rank + n_oversamples; + println!("Basic statistics: {:?}", start.elapsed()); let omega = generate_random_matrix(m_cols, l, seed); + println!("Generated Random Matrix here: {:?}", start.elapsed()); let mut y = DMatrix::::zeros(m_rows, l); multiply_matrix(m, &omega, &mut y, false); + println!( + "First multiplication took: {:?}, Continuing for power iterations:", + start.elapsed() + ); if n_power_iters > 0 { let mut z = DMatrix::::zeros(m_cols, l); - for _ in 0..n_power_iters { + for w in 0..n_power_iters { multiply_matrix(m, &y, &mut z, true); + println!("{}-nd power-iteration forward: {:?}", w, start.elapsed()); match power_iteration_normalizer { PowerIterationNormalizer::QR => { let qr = z.qr(); @@ -61,8 +68,14 @@ where } PowerIterationNormalizer::None => {} } + println!( + "{}-nd power-iteration forward, normalization: {:?}", + w, + start.elapsed() + ); multiply_matrix(m, &z, &mut y, false); + println!("{}-nd power-iteration backward: {:?}", w, start.elapsed()); match power_iteration_normalizer { PowerIterationNormalizer::QR => { let qr = y.qr(); @@ -71,16 +84,30 @@ where PowerIterationNormalizer::LU => normalize_columns(&mut y), PowerIterationNormalizer::None => {} } + println!( + "{}-nd power-iteration backward, normalization: {:?}", + w, + start.elapsed() + ); } } - + println!( + "Finished power-iteration, continuing QR: {:?}", + start.elapsed() + ); let qr = y.qr(); + println!("QR finished: {:?}", start.elapsed()); let q = qr.q(); let mut b = DMatrix::::zeros(q.ncols(), m_cols); multiply_transposed_by_matrix(&q, m, &mut b); + println!( + "QMB matrix multiplication transposed: {:?}", + start.elapsed() + ); let svd = b.svd(true, true); + println!("SVD decomposition took: {:?}", start.elapsed()); let u_b = svd .u .ok_or_else(|| SvdLibError::Las2Error("SVD U computation failed".to_string()))?; @@ -98,10 +125,15 @@ where // Convert to the format required by SvdRec let d = actual_rank; + println!("SVD Result Cropping: {:?}", start.elapsed()); let ut = u.transpose().into_ndarray2(); - let s = convert_singular_values(>::from(singular_values.rows(0, actual_rank)), actual_rank); + let s = convert_singular_values( + >::from(singular_values.rows(0, actual_rank)), + actual_rank, + ); let vt = vt_subset.into_ndarray2(); + println!("Translation to ndarray: {:?}", start.elapsed()); Ok(SvdRec { d, @@ -203,60 +235,32 @@ fn normalize_columns(matrix: &mut DMatrix .collect(); // Apply normalization - scales - .iter() - .for_each(|(j, scale)| { - for i in 0..rows { - let value = matrix.get_mut((i,*j)).unwrap(); - *value = value.clone() * scale.clone(); - } - }); + scales.iter().for_each(|(j, scale)| { + for i in 0..rows { + let value = matrix.get_mut((i, *j)).unwrap(); + *value = value.clone() * scale.clone(); + } + }); } // ---------------------------------------- // Utils Functions // ---------------------------------------- - fn generate_random_matrix( rows: usize, cols: usize, seed: Option, ) -> DMatrix { - //if rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS && rows * cols < PARALLEL_THRESHOLD_ELEMENTS { - let mut rng = match seed { - Some(s) => StdRng::seed_from_u64(s), - None => StdRng::seed_from_u64(0), - }; - - let normal = Normal::new(0.0, 1.0).unwrap(); - return DMatrix::from_fn(rows, cols, |_, _| { - T::from_f64(normal.sample(&mut rng)).unwrap() - }); - //} - - /*let seed_value = seed.unwrap_or(0); - let mut matrix = DMatrix::::zeros(rows, cols); - let num_threads = current_num_threads(); - let chunk_size = (rows * cols + num_threads - 1) / num_threads; - - (0..(rows * cols)).into_par_iter() - .chunks(chunk_size) - .enumerate() - .for_each(|(chunk_idx, indices)| { - let thread_seed = seed_value.wrapping_add(chunk_idx as u64); - let mut rng = StdRng::seed_from_u64(thread_seed); - let normal = Normal::new(0.0, 1.0).unwrap(); - for idx in indices { - let i = idx / cols; - let j = idx % cols; - unsafe { - *matrix.get_unchecked_mut((i, j)) = T::from_f64(normal.sample(&mut rng)).unwrap(); - } - } - }); - matrix*/ - + let mut rng = match seed { + Some(s) => StdRng::seed_from_u64(s), + None => StdRng::seed_from_u64(0), + }; + + let normal = Normal::new(0.0, 1.0).unwrap(); + DMatrix::from_fn(rows, cols, |_, _| { + T::from_f64(normal.sample(&mut rng)).unwrap() + }) } fn multiply_matrix>( @@ -266,53 +270,94 @@ fn multiply_matrix>( transpose_sparse: bool, ) { let cols = dense.ncols(); - //let matrix_rows = if transpose_sparse { sparse.ncols() } else { sparse.nrows() }; - //if matrix_rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS { - let mut col_vec = vec![T::zero(); dense.nrows()]; - let mut result_vec = vec![T::zero(); result.nrows()]; + let results: Vec<(usize, Vec)> = (0..cols) + .into_par_iter() + .map(|j| { + let mut col_vec = vec![T::zero(); dense.nrows()]; + let mut result_vec = vec![T::zero(); result.nrows()]; - for j in 0..cols { - // Extract column from dense matrix for i in 0..dense.nrows() { col_vec[i] = dense[(i, j)]; } - // Perform sparse matrix operation sparse.svd_opa(&col_vec, &mut result_vec, transpose_sparse); - // Store results - for i in 0..result.nrows() { - result[(i, j)] = result_vec[i]; - } + (j, result_vec) + }) + .collect(); - // Clear result vector for reuse - result_vec.iter_mut().for_each(|v| *v = T::zero()); + for (j, col_result) in results { + for i in 0..result.nrows() { + result[(i, j)] = col_result[i]; } - return; - //} - - + } } fn multiply_transposed_by_matrix>( - q: &DMatrix, + q: &DMatrix, sparse: &M, result: &mut DMatrix, ) { - for j in 0..sparse.ncols() { - let mut unit_vec = vec![T::zero(); sparse.ncols()]; - unit_vec[j] = T::one(); - - let mut col_vec = vec![T::zero(); sparse.nrows()]; - sparse.svd_opa(&unit_vec, &mut col_vec, false); - - for i in 0..q.ncols() { - let mut sum = T::zero(); - for k in 0..q.nrows() { - sum += q[(k, i)] * col_vec[k]; + let q_rows = q.nrows(); + let q_cols = q.ncols(); + let sparse_rows = sparse.nrows(); + let sparse_cols = sparse.ncols(); + + eprintln!("Q dimensions: {} x {}", q_rows, q_cols); + eprintln!("Sparse dimensions: {} x {}", sparse_rows, sparse_cols); + eprintln!("Result dimensions: {} x {}", result.nrows(), result.ncols()); + + assert_eq!( + q_rows, sparse_rows, + "Dimension mismatch: Q has {} rows but sparse has {} rows", + q_rows, sparse_rows + ); + + assert_eq!( + result.nrows(), + q_cols, + "Result matrix has incorrect row count: expected {}, got {}", + q_cols, + result.nrows() + ); + assert_eq!( + result.ncols(), + sparse_cols, + "Result matrix has incorrect column count: expected {}, got {}", + sparse_cols, + result.ncols() + ); + + let chunk_size = determine_chunk_size(q_cols); + + let chunk_results: Vec)>> = (0..q_cols) + .into_par_iter() + .chunks(chunk_size) + .map(|chunk| { + let mut chunk_results = Vec::with_capacity(chunk.len()); + + for &col_idx in &chunk { + let mut q_col = vec![T::zero(); q_rows]; + for i in 0..q_rows { + q_col[i] = q[(i, col_idx)]; + } + + let mut result_row = vec![T::zero(); sparse_cols]; + + sparse.svd_opa(&q_col, &mut result_row, true); + + chunk_results.push((col_idx, result_row)); + } + chunk_results + }) + .collect(); + + for chunk_result in chunk_results { + for (row_idx, row_values) in chunk_result { + for j in 0..sparse_cols { + result[(row_idx, j)] = row_values[j]; } - result[(i, j)] = sum; } } } From ef2f5b55f74f8dfe4b036560de8b4e96066e7b32 Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 15 Apr 2025 06:39:37 +0000 Subject: [PATCH 4/7] Reorganized structure into multiple modules --- src/{ => laczos}/masked.rs | 10 +-- src/{new.rs => laczos/mod.rs} | 111 ++-------------------------------- src/lib.rs | 39 ++++++------ src/utils.rs | 106 ++++++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+), 132 deletions(-) rename src/{ => laczos}/masked.rs (97%) rename src/{new.rs => laczos/mod.rs} (94%) diff --git a/src/masked.rs b/src/laczos/masked.rs similarity index 97% rename from src/masked.rs rename to src/laczos/masked.rs index d10006f..d8a21c8 100644 --- a/src/masked.rs +++ b/src/laczos/masked.rs @@ -76,7 +76,7 @@ impl<'a, T: Float + AddAssign + Sync + Send> SMat for MaskedCSRMatrix<'a, T> for i in 0..self.matrix.nrows() { for j in major_offsets[i]..major_offsets[i + 1] { - let col = minor_indices[j]; + let col = minor_indices[j]; if self.column_mask[col] { count += 1; } @@ -213,7 +213,7 @@ impl<'a, T: Float + AddAssign + Sync + Send> SMat for MaskedCSRMatrix<'a, T> #[cfg(test)] mod tests { use super::*; - use crate::{svd, SMat}; + use crate::{SMat}; use nalgebra_sparse::{coo::CooMatrix, csr::CsrMatrix}; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -243,7 +243,7 @@ mod tests { assert_eq!(masked.nnz(), 6); // Only entries in the selected columns // Test SVD on the masked matrix - let svd_result = svd(&masked); + let svd_result = crate::laczos::svd(&masked); assert!(svd_result.is_ok()); } @@ -304,8 +304,8 @@ mod tests { assert_eq!(masked_matrix.nnz(), physical_csr.nnz()); // Perform SVD on both - let svd_masked = svd(&masked_matrix).unwrap(); - let svd_physical = svd(&physical_csr).unwrap(); + let svd_masked = crate::laczos::svd(&masked_matrix).unwrap(); + let svd_physical = crate::laczos::svd(&physical_csr).unwrap(); // Compare SVD results - they should be very close but not exactly the same // due to potential differences in numerical computation diff --git a/src/new.rs b/src/laczos/mod.rs similarity index 94% rename from src/new.rs rename to src/laczos/mod.rs index e50b0b0..99789b5 100644 --- a/src/new.rs +++ b/src/laczos/mod.rs @@ -1,5 +1,8 @@ +pub mod masked; + use crate::error::SvdLibError; -use ndarray::{Array, Array1, Array2}; +use crate::{Diagnostics, SMat, SvdFloat, SvdRec}; +use ndarray::{Array, Array2}; use num_traits::real::Real; use num_traits::{Float, FromPrimitive, One, Zero}; use rand::rngs::StdRng; @@ -12,106 +15,7 @@ use std::iter::Sum; use std::mem; use std::ops::{AddAssign, MulAssign, Neg, SubAssign}; -pub trait SMat: Sync { - fn nrows(&self) -> usize; - fn ncols(&self) -> usize; - fn nnz(&self) -> usize; - fn svd_opa(&self, x: &[T], y: &mut [T], transposed: bool); // y = A*x -} - -/// Singular Value Decomposition Components -/// -/// # Fields -/// - d: Dimensionality (rank), the number of rows of both `ut`, `vt` and the length of `s` -/// - ut: Transpose of left singular vectors, the vectors are the rows of `ut` -/// - s: Singular values (length `d`) -/// - vt: Transpose of right singular vectors, the vectors are the rows of `vt` -/// - diagnostics: Computational diagnostics -#[derive(Debug, Clone, PartialEq)] -pub struct SvdRec { - pub d: usize, - pub ut: Array2, - pub s: Array1, - pub vt: Array2, - pub diagnostics: Diagnostics, -} - -/// Computational Diagnostics -/// -/// # Fields -/// - non_zero: Number of non-zeros in the matrix -/// - dimensions: Number of dimensions attempted (bounded by matrix shape) -/// - iterations: Number of iterations attempted (bounded by dimensions and matrix shape) -/// - transposed: True if the matrix was transposed internally -/// - lanczos_steps: Number of Lanczos steps performed -/// - ritz_values_stabilized: Number of ritz values -/// - significant_values: Number of significant values discovered -/// - singular_values: Number of singular values returned -/// - end_interval: left, right end of interval containing unwanted eigenvalues -/// - kappa: relative accuracy of ritz values acceptable as eigenvalues -/// - random_seed: Random seed provided or the seed generated -#[derive(Debug, Clone, PartialEq)] -pub struct Diagnostics { - pub non_zero: usize, - pub dimensions: usize, - pub iterations: usize, - pub transposed: bool, - pub lanczos_steps: usize, - pub ritz_values_stabilized: usize, - pub significant_values: usize, - pub singular_values: usize, - pub end_interval: [T; 2], - pub kappa: T, - pub random_seed: u32, -} - /// Trait for floating point types that can be used with the SVD algorithm -pub trait SvdFloat: - Float - + FromPrimitive - + Debug - + Send - + Sync - + Zero - + One - + AddAssign - + SubAssign - + MulAssign - + Neg - + Sum -{ - fn eps() -> Self; - fn eps34() -> Self; - fn compare(a: Self, b: Self) -> bool; -} - -impl SvdFloat for f32 { - fn eps() -> Self { - f32::EPSILON - } - - fn eps34() -> Self { - f32::EPSILON.powf(0.75) - } - - fn compare(a: Self, b: Self) -> bool { - (b - a).abs() < f32::EPSILON - } -} - -impl SvdFloat for f64 { - fn eps() -> Self { - f64::EPSILON - } - - fn eps34() -> Self { - f64::EPSILON.powf(0.75) - } - - fn compare(a: Self, b: Self) -> bool { - (b - a).abs() < f64::EPSILON - } -} /// SVD at full dimensionality, calls `svdLAS2` with the highlighted defaults /// @@ -1229,7 +1133,6 @@ fn ritvec( let significant_indices: Vec = (0..js) .into_par_iter() .filter(|&k| { - // Adaptive error bound check using relative tolerance let relative_bound = adaptive_kappa * wrk.ritz[k].abs().max(max_eigenvalue * adaptive_eps); wrk.bnd[k] <= relative_bound && k + 1 > js - neig @@ -1242,11 +1145,10 @@ fn ritvec( .into_par_iter() .map(|k| { let mut vec = vec![T::zero(); wrk.ncols]; - let mut idx = (jsq - js) + k + 1; for i in 0..js { - idx -= js; - // Non-zero check with adaptive threshold + let idx = k * js + i; + if s[idx].abs() > adaptive_eps { for (j, item) in store_vectors[i].iter().enumerate().take(wrk.ncols) { vec[j] += s[idx] * *item; @@ -1254,7 +1156,6 @@ fn ritvec( } } - // Return with position index (for proper ordering) (k, vec) }) .collect(); diff --git a/src/lib.rs b/src/lib.rs index d78cb40..17564c0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,13 @@ pub mod legacy; pub mod error; -mod new; -mod masked; pub(crate) mod utils; -pub(crate) mod randomized; +pub mod randomized; + +pub mod laczos; + +pub use utils::*; -pub use new::*; -pub use masked::*; -pub use randomized::{randomized_svd, PowerIterationNormalizer}; #[cfg(test)] mod simple_comparison_tests { @@ -19,7 +18,6 @@ mod simple_comparison_tests { use rand::{Rng, SeedableRng}; use rand::rngs::StdRng; use rayon::ThreadPoolBuilder; - use crate::randomized::randomized_svd; fn create_sparse_matrix(rows: usize, cols: usize, density: f64) -> nalgebra_sparse::coo::CooMatrix { use rand::{rngs::StdRng, Rng, SeedableRng}; @@ -77,7 +75,7 @@ mod simple_comparison_tests { // Run both implementations with the same seed for deterministic behavior let seed = 42; - let current_result = svd_dim_seed(&test_matrix, 0, seed).unwrap(); + let current_result = laczos::svd_dim_seed(&test_matrix, 0, seed).unwrap(); let legacy_result = legacy::svd_dim_seed(&test_matrix, 0, seed).unwrap(); // Compare dimensions @@ -129,12 +127,12 @@ mod simple_comparison_tests { let csr = CsrMatrix::from(&coo); // Calculate SVD using original method - let legacy_svd = svd_dim_seed(&csr, 0, seed as u32).unwrap(); + let legacy_svd = laczos::svd_dim_seed(&csr, 0, seed as u32).unwrap(); // Calculate SVD using our masked method (using all columns) let mask = vec![true; ncols]; - let masked_matrix = MaskedCSRMatrix::new(&csr, mask); - let current_svd = svd_dim_seed(&masked_matrix, 0, seed as u32).unwrap(); + let masked_matrix = laczos::masked::MaskedCSRMatrix::new(&csr, mask); + let current_svd = laczos::svd_dim_seed(&masked_matrix, 0, seed as u32).unwrap(); // Compare with relative tolerance let rel_tol = 1e-3; // 0.1% relative tolerance @@ -161,13 +159,12 @@ mod simple_comparison_tests { let test_matrix = create_sparse_matrix(100, 100, 0.0098); // 0.98% non-zeros // Should no longer fail with convergence error - let result = svd_dim_seed(&test_matrix, 50, 42); + let result = laczos::svd_dim_seed(&test_matrix, 50, 42); assert!(result.is_ok(), "{}", format!("SVD failed on 99.02% sparse matrix, {:?}", result.err().unwrap())); } #[test] fn test_random_svd_computation() { - use crate::{randomized_svd, PowerIterationNormalizer}; // Create a matrix with high sparsity (99%) let test_matrix = create_sparse_matrix(1000, 250, 0.01); // 1% non-zeros @@ -176,12 +173,12 @@ mod simple_comparison_tests { let csr = CsrMatrix::from(&test_matrix); // Run randomized SVD with reasonable defaults for a sparse matrix - let result = randomized_svd( + let result = randomized::randomized_svd( &csr, 50, // target rank 10, // oversampling parameter 3, // power iterations - PowerIterationNormalizer::QR, // use QR normalization + randomized::PowerIterationNormalizer::QR, // use QR normalization Some(42), // random seed ); @@ -219,7 +216,6 @@ mod simple_comparison_tests { #[test] fn test_randomized_svd_very_large_sparse_matrix() { - use crate::{randomized_svd, PowerIterationNormalizer}; // Create a very large matrix with high sparsity (99%) let test_matrix = create_sparse_matrix(100000, 2500, 0.01); // 1% non-zeros @@ -230,12 +226,12 @@ mod simple_comparison_tests { // Run randomized SVD with reasonable defaults for a sparse matrix let threadpool = ThreadPoolBuilder::new().num_threads(10).build().unwrap(); let result = threadpool.install(|| { - randomized_svd( + randomized::randomized_svd( &csr, 50, // target rank 10, // oversampling parameter - 2, // power iterations - PowerIterationNormalizer::QR, // use QR normalization + 7, // power iterations + randomized::PowerIterationNormalizer::QR, // use QR normalization Some(42), // random seed ) }); @@ -251,7 +247,6 @@ mod simple_comparison_tests { #[test] fn test_randomized_svd_small_sparse_matrix() { - use crate::{randomized_svd, PowerIterationNormalizer}; // Create a very large matrix with high sparsity (99%) let test_matrix = create_sparse_matrix(1000, 250, 0.01); // 1% non-zeros @@ -262,12 +257,12 @@ mod simple_comparison_tests { // Run randomized SVD with reasonable defaults for a sparse matrix let threadpool = ThreadPoolBuilder::new().num_threads(10).build().unwrap(); let result = threadpool.install(|| { - randomized_svd( + randomized::randomized_svd( &csr, 50, // target rank 10, // oversampling parameter 2, // power iterations - PowerIterationNormalizer::QR, // use QR normalization + randomized::PowerIterationNormalizer::QR, // use QR normalization Some(42), // random seed ) }); diff --git a/src/utils.rs b/src/utils.rs index 5c7aae0..e6b8027 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,3 +1,9 @@ +use ndarray::{Array1, Array2}; +use num_traits::{Float, FromPrimitive, One, Zero}; +use std::fmt::Debug; +use std::iter::Sum; +use std::ops::{AddAssign, MulAssign, Neg, SubAssign}; + pub fn determine_chunk_size(nrows: usize) -> usize { let num_threads = rayon::current_num_threads(); @@ -9,3 +15,103 @@ pub fn determine_chunk_size(nrows: usize) -> usize { chunk_size.max(min_rows_per_thread) } + +pub trait SMat: Sync { + fn nrows(&self) -> usize; + fn ncols(&self) -> usize; + fn nnz(&self) -> usize; + fn svd_opa(&self, x: &[T], y: &mut [T], transposed: bool); // y = A*x +} + +/// Singular Value Decomposition Components +/// +/// # Fields +/// - d: Dimensionality (rank), the number of rows of both `ut`, `vt` and the length of `s` +/// - ut: Transpose of left singular vectors, the vectors are the rows of `ut` +/// - s: Singular values (length `d`) +/// - vt: Transpose of right singular vectors, the vectors are the rows of `vt` +/// - diagnostics: Computational diagnostics +#[derive(Debug, Clone, PartialEq)] +pub struct SvdRec { + pub d: usize, + pub ut: Array2, + pub s: Array1, + pub vt: Array2, + pub diagnostics: Diagnostics, +} + +/// Computational Diagnostics +/// +/// # Fields +/// - non_zero: Number of non-zeros in the matrix +/// - dimensions: Number of dimensions attempted (bounded by matrix shape) +/// - iterations: Number of iterations attempted (bounded by dimensions and matrix shape) +/// - transposed: True if the matrix was transposed internally +/// - lanczos_steps: Number of Lanczos steps performed +/// - ritz_values_stabilized: Number of ritz values +/// - significant_values: Number of significant values discovered +/// - singular_values: Number of singular values returned +/// - end_interval: left, right end of interval containing unwanted eigenvalues +/// - kappa: relative accuracy of ritz values acceptable as eigenvalues +/// - random_seed: Random seed provided or the seed generated +#[derive(Debug, Clone, PartialEq)] +pub struct Diagnostics { + pub non_zero: usize, + pub dimensions: usize, + pub iterations: usize, + pub transposed: bool, + pub lanczos_steps: usize, + pub ritz_values_stabilized: usize, + pub significant_values: usize, + pub singular_values: usize, + pub end_interval: [T; 2], + pub kappa: T, + pub random_seed: u32, +} + +pub trait SvdFloat: + Float + + FromPrimitive + + Debug + + Send + + Sync + + Zero + + One + + AddAssign + + SubAssign + + MulAssign + + Neg + + Sum +{ + fn eps() -> Self; + fn eps34() -> Self; + fn compare(a: Self, b: Self) -> bool; +} + +impl SvdFloat for f32 { + fn eps() -> Self { + f32::EPSILON + } + + fn eps34() -> Self { + f32::EPSILON.powf(0.75) + } + + fn compare(a: Self, b: Self) -> bool { + (b - a).abs() < f32::EPSILON + } +} + +impl SvdFloat for f64 { + fn eps() -> Self { + f64::EPSILON + } + + fn eps34() -> Self { + f64::EPSILON.powf(0.75) + } + + fn compare(a: Self, b: Self) -> bool { + (b - a).abs() < f64::EPSILON + } +} From 8336e54b057e6e0103f1709acff449322052f83d Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 15 Apr 2025 06:45:52 +0000 Subject: [PATCH 5/7] Updated README and bumped version up --- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 215 +++++++++++++++++++++++++++-------------------------- 3 files changed, 113 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea4eb52..f78e18f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -477,7 +477,7 @@ dependencies = [ [[package]] name = "single-svdlib" -version = "0.5.0" +version = "0.6.0" dependencies = [ "anyhow", "argmin", diff --git a/Cargo.toml b/Cargo.toml index f304368..a310c7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ description = "A Rust port of LAS2 from SVDLIBC" keywords = ["svd"] categories = ["algorithms", "data-structures", "mathematics", "science"] name = "single-svdlib" -version = "0.5.0" +version = "0.6.0" edition = "2021" license-file = "SVDLIBC-LICENSE.txt" diff --git a/README.md b/README.md index 2ea6e0d..3e8a66f 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,27 @@ -# single_svdlib +# Single-SVDLib: Singular Value Decomposition for Sparse Matrices -A Rust library for performing Singular Value Decomposition (SVD) on sparse matrices using the Lanczos algorithm. It is build on the original library and expan +[![Crate](https://img.shields.io/crates/v/single-svdlib.svg)](https://crates.io/crates/single-svdlib) +[![Documentation](https://docs.rs/single-svdlib/badge.svg)](https://docs.rs/single-svdlib) +[![License](https://img.shields.io/crates/l/single-svdlib.svg)](LICENSE) -## Overview - -`svdlibrs` is a Rust port of LAS2 from SVDLIBC, originally developed by Doug Rohde. This library efficiently computes SVD on sparse matrices, particularly large ones, and returns the decomposition as ndarray components. - -This implementation extends the original [svdlibrs](https://github.com/dfarnham/svdlibrs) by Dave Farnham with: -- Updated dependency versions -- Support for a broader range of numeric types (f64, f32, others) -- Column masking capabilities for analyzing specific subsets of data +A high-performance Rust library for computing Singular Value Decomposition (SVD) on sparse matrices, with support for both Lanczos and randomized SVD algorithms. ## Features -- Performs SVD on sparse matrices using the Lanczos algorithm -- Works with various input formats: CSR, CSC, or COO matrices -- Column masking for dimension selection without data copying -- Generic implementation supporting different numeric types -- High numerical precision for critical calculations +- **Multiple SVD algorithms**: + - Lanczos algorithm (based on SVDLIBC) + - Randomized SVD for very large and sparse matrices +- **Sparse matrix support**: + - Compressed Sparse Row (CSR) format + - Compressed Sparse Column (CSC) format + - Coordinate (COO) format +- **Performance optimizations**: + - Parallel execution with Rayon + - Adaptive tuning for highly sparse matrices + - Column masking for subspace SVD +- **Generic interface**: + - Works with both `f32` and `f64` precision +- **Comprehensive error handling and diagnostics** ## Installation @@ -25,144 +29,147 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -single-svdlib = "0.1.0" -nalgebra-sparse = "0.10.0" -ndarray = "0.16.1" +single-svdlib = "0.6.0" ``` -## Basic Usage +## Quick Start ```rust -use single_svdlib::{svd, svd_dim, svd_dim_seed}; use nalgebra_sparse::{coo::CooMatrix, csr::CsrMatrix}; +use single_svdlib::laczos::svd_dim_seed; -// Create a sparse matrix +// Create a matrix in COO format let mut coo = CooMatrix::::new(3, 3); coo.push(0, 0, 1.0); coo.push(0, 1, 16.0); coo.push(0, 2, 49.0); coo.push(1, 0, 4.0); coo.push(1, 1, 25.0); coo.push(1, 2, 64.0); coo.push(2, 0, 9.0); coo.push(2, 1, 36.0); coo.push(2, 2, 81.0); +// Convert to CSR for better performance let csr = CsrMatrix::from(&coo); -// Compute SVD -let svd_result = svd(&csr)?; +// Compute SVD with a fixed random seed +let svd = svd_dim_seed(&csr, 3, 42).unwrap(); // Access the results -println!("Rank: {}", svd_result.d); -println!("Singular values: {:?}", svd_result.s); -println!("Left singular vectors (U): {:?}", svd_result.ut.t()); -println!("Right singular vectors (V): {:?}", svd_result.vt.t()); +let singular_values = &svd.s; +let left_singular_vectors = &svd.ut; // Note: These are transposed +let right_singular_vectors = &svd.vt; // Note: These are transposed // Reconstruct the original matrix -let reconstructed = svd_result.recompose(); +let reconstructed = svd.recompose(); ``` -## Column Masking +## SVD Methods -The library supports analyzing specific columns without copying the data: +### Lanczos Algorithm (LAS2) -```rust -use single_svdlib::{svd, MaskedCSRMatrix}; -use nalgebra_sparse::{coo::CooMatrix, csr::CsrMatrix}; +The Lanczos algorithm is well-suited for sparse matrices of moderate size: -// Create a sparse matrix -let mut coo = CooMatrix::::new(3, 5); -coo.push(0, 0, 1.0); coo.push(0, 2, 2.0); coo.push(0, 4, 3.0); -coo.push(1, 1, 4.0); coo.push(1, 3, 5.0); -coo.push(2, 0, 6.0); coo.push(2, 2, 7.0); coo.push(2, 4, 8.0); +```rust +use single_svdlib::laczos; -let csr = CsrMatrix::from(&coo); +// Basic SVD computation (uses defaults) +let svd = laczos::svd(&matrix)?; -// Method 1: Using a boolean mask (true = include column) -let mask = vec![true, false, true, false, true]; // Only columns 0, 2, 4 -let masked_matrix = MaskedCSRMatrix::new(&csr, mask); +// SVD with specified target rank +let svd = laczos::svd_dim(&matrix, 10)?; -// Method 2: Specifying which columns to include -let columns = vec![0, 2, 4]; -let masked_matrix = MaskedCSRMatrix::with_columns(&csr, &columns); +// SVD with specified target rank and fixed random seed +let svd = laczos::svd_dim_seed(&matrix, 10, 42)?; -// Run SVD on the masked matrix -let svd_result = svd(&masked_matrix)?; +// Full control over SVD parameters +let svd = laczos::svd_las2( + &matrix, + dimensions, // upper limit of desired number of dimensions + iterations, // number of Lanczos iterations + end_interval, // interval containing unwanted eigenvalues, e.g. [-1e-30, 1e-30] + kappa, // relative accuracy of eigenvalues, e.g. 1e-6 + random_seed, // random seed (0 for automatic) +)?; ``` -## Support for Different Numeric Types +### Randomized SVD -The library supports various numeric types: +For very large sparse matrices, the randomized SVD algorithm offers better performance: ```rust -// With f64 (double precision) -let csr_f64 = CsrMatrix::::from(&coo); -let svd_result = svd(&csr_f64)?; - -// With f32 (single precision) -let csr_f32 = CsrMatrix::::from(&coo); -let svd_result = svd(&csr_f32)?; - -// With integer types (converted internally) -let csr_i32 = CsrMatrix::::from(&coo); -let masked_i32 = MaskedCSRMatrix::with_columns(&csr_i32, &columns); -let svd_result = svd(&masked_i32)?; +use single_svdlib::randomized; + +let svd = randomized::randomized_svd( + &matrix, + target_rank, // desired rank + n_oversamples, // oversampling parameter (typically 5-10) + n_power_iterations, // number of power iterations (typically 2-4) + randomized::PowerIterationNormalizer::QR, // normalization method + Some(42), // random seed (None for automatic) +)?; ``` -## Advanced Usage +### Column Masking -For more control over the SVD computation: +For operations on specific columns of a matrix: ```rust -use single_svdlib::{svdLAS2, SvdRec}; - -// Customize the SVD calculation -let svd: SvdRec = svdLAS2( - &matrix, // sparse matrix - dimensions, // upper limit of desired dimensions (0 = max) - iterations, // number of algorithm iterations (0 = auto) - &[-1.0e-30, 1.0e-30], // interval for unwanted eigenvalues - 1.0e-6, // relative accuracy threshold - random_seed, // random seed (0 = auto-generate) -)?; +use single_svdlib::laczos::masked::MaskedCSRMatrix; + +// Create a mask for selected columns +let columns = vec![0, 2, 5, 7]; // Only use these columns +let masked_matrix = MaskedCSRMatrix::with_columns(&csr_matrix, &columns); + +// Compute SVD on the masked matrix +let svd = laczos::svd(&masked_matrix)?; ``` -## SVD Results and Diagnostics +## Result Structure -The SVD results are returned in a `SvdRec` struct: +The SVD result contains: ```rust -pub struct SvdRec { - pub d: usize, // Dimensionality (rank) - pub ut: Array2, // Transpose of left singular vectors - pub s: Array1, // Singular values - pub vt: Array2, // Transpose of right singular vectors - pub diagnostics: Diagnostics, // Computational diagnostics +struct SvdRec { + d: usize, // Rank (number of singular values) + ut: Array2, // Transpose of left singular vectors (d x m) + s: Array1, // Singular values (d) + vt: Array2, // Transpose of right singular vectors (d x n) + diagnostics: Diagnostics, // Computation diagnostics } ``` -The `Diagnostics` struct provides detailed information about the computation: +Note that `ut` and `vt` are returned in transposed form. + +## Diagnostics + +Each SVD computation returns detailed diagnostics: ```rust -pub struct Diagnostics { - pub non_zero: usize, // Number of non-zeros in the input matrix - pub dimensions: usize, // Number of dimensions attempted - pub iterations: usize, // Number of iterations attempted - pub transposed: bool, // True if the matrix was transposed internally - pub lanczos_steps: usize, // Number of Lanczos steps - pub ritz_values_stabilized: usize, // Number of ritz values - pub significant_values: usize, // Number of significant values - pub singular_values: usize, // Number of singular values - pub end_interval: [f64; 2], // Interval for unwanted eigenvalues - pub kappa: f64, // Relative accuracy threshold - pub random_seed: u32, // Random seed used -} +let svd = laczos::svd(&matrix)?; +println!("Non-zero elements: {}", svd.diagnostics.non_zero); +println!("Transposed during computation: {}", svd.diagnostics.transposed); +println!("Lanczos steps: {}", svd.diagnostics.lanczos_steps); +println!("Significant values found: {}", svd.diagnostics.significant_values); ``` -## License +## Performance Tips -This library is provided under the BSD License, as per the original SVDLIBC implementation. +1. **Choose the right algorithm**: + - For matrices up to ~10,000 x 10,000 with moderate sparsity, use the Lanczos algorithm + - For larger matrices or very high sparsity (>99%), use randomized SVD + +2. **Matrix format matters**: + - Convert COO matrices to CSR or CSC for computation + - CSR typically performs better for row-oriented operations + +3. **Adjust parameters for very sparse matrices**: + - Increase power iterations in randomized SVD (e.g., 5-7) + - Use a higher `kappa` value in Lanczos for very sparse matrices + +4. **Consider column masking** for operations that only need a subset of the data + +## License -## Acknowledgments +This crate is licensed under the BSD License, the same as the original SVDLIBC implementation. See the `SVDLIBC-LICENSE.txt` file for details. -- Dave Farnham for the original Rust port -- Doug Rohde for the original SVDLIBC implementation -- University of Tennessee Research Foundation for the underlying mathematical library +## Credits -[Latest Version]: https://img.shields.io/crates/v/single-svdlib.svg -[crates.io]: https://crates.io/crates/single-svdlib \ No newline at end of file +- Original SVDLIBC implementation by Doug Rohde +- Rust port maintainer of SVDLIBC: Dave Farnham +- Extensions and modifications of the original algorithm: Ian F. Diks From 74b8ec518aeaf9eb8f1b9dd1873af9f4b4e57dc9 Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 15 Apr 2025 09:21:14 +0200 Subject: [PATCH 6/7] Fixed typo --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/{laczos => lanczos}/masked.rs | 6 +++--- src/{laczos => lanczos}/mod.rs | 0 src/lib.rs | 12 ++++++------ 5 files changed, 11 insertions(+), 11 deletions(-) rename src/{laczos => lanczos}/masked.rs (98%) rename src/{laczos => lanczos}/mod.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index f78e18f..894bfd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -477,7 +477,7 @@ dependencies = [ [[package]] name = "single-svdlib" -version = "0.6.0" +version = "0.7.0" dependencies = [ "anyhow", "argmin", diff --git a/Cargo.toml b/Cargo.toml index a310c7b..42cf8f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ description = "A Rust port of LAS2 from SVDLIBC" keywords = ["svd"] categories = ["algorithms", "data-structures", "mathematics", "science"] name = "single-svdlib" -version = "0.6.0" +version = "0.7.0" edition = "2021" license-file = "SVDLIBC-LICENSE.txt" diff --git a/src/laczos/masked.rs b/src/lanczos/masked.rs similarity index 98% rename from src/laczos/masked.rs rename to src/lanczos/masked.rs index d8a21c8..6f991e4 100644 --- a/src/laczos/masked.rs +++ b/src/lanczos/masked.rs @@ -243,7 +243,7 @@ mod tests { assert_eq!(masked.nnz(), 6); // Only entries in the selected columns // Test SVD on the masked matrix - let svd_result = crate::laczos::svd(&masked); + let svd_result = crate::lanczos::svd(&masked); assert!(svd_result.is_ok()); } @@ -304,8 +304,8 @@ mod tests { assert_eq!(masked_matrix.nnz(), physical_csr.nnz()); // Perform SVD on both - let svd_masked = crate::laczos::svd(&masked_matrix).unwrap(); - let svd_physical = crate::laczos::svd(&physical_csr).unwrap(); + let svd_masked = crate::lanczos::svd(&masked_matrix).unwrap(); + let svd_physical = crate::lanczos::svd(&physical_csr).unwrap(); // Compare SVD results - they should be very close but not exactly the same // due to potential differences in numerical computation diff --git a/src/laczos/mod.rs b/src/lanczos/mod.rs similarity index 100% rename from src/laczos/mod.rs rename to src/lanczos/mod.rs diff --git a/src/lib.rs b/src/lib.rs index 17564c0..bbba6c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ pub(crate) mod utils; pub mod randomized; -pub mod laczos; +pub mod lanczos; pub use utils::*; @@ -75,7 +75,7 @@ mod simple_comparison_tests { // Run both implementations with the same seed for deterministic behavior let seed = 42; - let current_result = laczos::svd_dim_seed(&test_matrix, 0, seed).unwrap(); + let current_result = lanczos::svd_dim_seed(&test_matrix, 0, seed).unwrap(); let legacy_result = legacy::svd_dim_seed(&test_matrix, 0, seed).unwrap(); // Compare dimensions @@ -127,12 +127,12 @@ mod simple_comparison_tests { let csr = CsrMatrix::from(&coo); // Calculate SVD using original method - let legacy_svd = laczos::svd_dim_seed(&csr, 0, seed as u32).unwrap(); + let legacy_svd = lanczos::svd_dim_seed(&csr, 0, seed as u32).unwrap(); // Calculate SVD using our masked method (using all columns) let mask = vec![true; ncols]; - let masked_matrix = laczos::masked::MaskedCSRMatrix::new(&csr, mask); - let current_svd = laczos::svd_dim_seed(&masked_matrix, 0, seed as u32).unwrap(); + let masked_matrix = lanczos::masked::MaskedCSRMatrix::new(&csr, mask); + let current_svd = lanczos::svd_dim_seed(&masked_matrix, 0, seed as u32).unwrap(); // Compare with relative tolerance let rel_tol = 1e-3; // 0.1% relative tolerance @@ -159,7 +159,7 @@ mod simple_comparison_tests { let test_matrix = create_sparse_matrix(100, 100, 0.0098); // 0.98% non-zeros // Should no longer fail with convergence error - let result = laczos::svd_dim_seed(&test_matrix, 50, 42); + let result = lanczos::svd_dim_seed(&test_matrix, 50, 42); assert!(result.is_ok(), "{}", format!("SVD failed on 99.02% sparse matrix, {:?}", result.err().unwrap())); } From cc1b3f4d975b4747b8fdf981c58c3cbd8b72ce13 Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 15 Apr 2025 09:29:31 +0200 Subject: [PATCH 7/7] Added derive for normalization method and version bump --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/randomized/mod.rs | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 894bfd4..2fa2cf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -477,7 +477,7 @@ dependencies = [ [[package]] name = "single-svdlib" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "argmin", diff --git a/Cargo.toml b/Cargo.toml index 42cf8f1..b7a46a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ description = "A Rust port of LAS2 from SVDLIBC" keywords = ["svd"] categories = ["algorithms", "data-structures", "mathematics", "science"] name = "single-svdlib" -version = "0.7.0" +version = "0.8.0" edition = "2021" license-file = "SVDLIBC-LICENSE.txt" diff --git a/src/randomized/mod.rs b/src/randomized/mod.rs index be6aad8..a477bcc 100644 --- a/src/randomized/mod.rs +++ b/src/randomized/mod.rs @@ -11,6 +11,7 @@ use rayon::prelude::{IndexedParallelIterator, IntoParallelIterator}; use std::ops::Mul; use crate::utils::determine_chunk_size; +#[derive(Debug, Clone, Copy, PartialEq)] pub enum PowerIterationNormalizer { QR, LU,