diff --git a/Cargo.lock b/Cargo.lock
index 9c21ee7..2fa2cf3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,12 @@
 # It is not intended for manual editing.
 version = 4
 
+[[package]]
+name = "anyhow"
+version = "1.0.97"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f"
+
 [[package]]
 name = "approx"
 version = "0.5.1"
@@ -11,6 +17,38 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "argmin"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "760a49d596b18b881d2fe6e9e6da4608fa64d4a7653ef5cd43bfaa4da018d596"
+dependencies = [
+ "anyhow",
+ "argmin-math",
+ "instant",
+ "num-traits",
+ "paste",
+ "rand 0.8.5",
+ "rand_xoshiro",
+ "rayon",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "argmin-math"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d93a0d0269b60bd1cd674de70314e3f0da97406cf8c1936ce760d2a46e0f13fe"
+dependencies = [
+ "anyhow",
+ "cfg-if",
+ "num-complex",
+ "num-integer",
+ "num-traits",
+ "rand 0.8.5",
+ "thiserror 1.0.69",
+]
+
 [[package]]
 name = "autocfg"
 version = "1.4.0"
@@ -35,6 +73,12 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
+[[package]]
+name = "byteorder-lite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
+
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
@@ -72,6 +116,17 @@ version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
+[[package]]
+name = "getrandom"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.3.2"
@@ -81,7 +136,27 @@ dependencies = [
  "cfg-if",
  "libc",
  "r-efi",
- "wasi",
+ "wasi 0.14.2+wasi-0.2.4",
+]
+
+[[package]]
+name = "image"
+version = "0.25.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db35664ce6b9810857a38a906215e75a9c879f0696556a39f59c62829710251a"
+dependencies = [
+ "bytemuck",
+ "byteorder-lite",
+ "num-traits",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
+dependencies = [
+ "cfg-if",
 ]
 
 [[package]]
@@ -90,6 +165,12 @@ version = "0.2.169"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
 
+[[package]]
+name = "libm"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
+
 [[package]]
 name = "matrixmultiply"
 version = "0.3.9"
@@ -112,6 +193,7 @@ dependencies = [
  "num-complex",
  "num-rational",
  "num-traits",
+ "rayon",
  "simba",
  "typenum",
 ]
@@ -152,6 +234,17 @@ dependencies = [
  "rawpointer",
 ]
 
+[[package]]
+name = "nshare"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60f2f3256fd1d647fdaac74abad0682766a6ffde01f8ccc138f9e1c641ac997c"
+dependencies = [
+ "image",
+ "nalgebra",
+ "ndarray",
+]
+
 [[package]]
 name = "num-bigint"
 version = "0.4.6"
@@ -198,6 +291,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 dependencies = [
  "autocfg",
+ "libm",
 ]
 
 [[package]]
@@ -208,9 +302,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
 [[package]]
 name = "portable-atomic"
-version = "1.10.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
+checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
 
 [[package]]
 name = "portable-atomic-util"
@@ -254,17 +348,38 @@ version = "5.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
 
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
 [[package]]
 name = "rand"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
 dependencies = [
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.3",
  "zerocopy 0.8.24",
 ]
 
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.4",
+]
+
 [[package]]
 name = "rand_chacha"
 version = "0.9.0"
@@ -272,7 +387,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
 dependencies = [
  "ppv-lite86",
- "rand_core",
+ "rand_core 0.9.3",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.15",
 ]
 
 [[package]]
@@ -281,7 +405,26 @@ version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 dependencies = [
- "getrandom",
+ "getrandom 0.3.2",
+]
+
+[[package]]
+name = "rand_distr"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
+dependencies = [
+ "num-traits",
+ "rand 0.9.0",
+]
+
+[[package]]
+name = "rand_xoshiro"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa"
+dependencies = [
+ "rand_core 0.6.4",
 ]
 
 [[package]]
@@ -334,14 +477,19 @@ dependencies = [
 
 [[package]]
 name = "single-svdlib"
-version = "0.4.0"
+version = "0.8.0"
 dependencies = [
+ "anyhow",
+ "argmin",
+ "nalgebra",
  "nalgebra-sparse",
  "ndarray",
+ "nshare",
  "num-traits",
- "rand",
+ "rand 0.9.0",
+ "rand_distr",
  "rayon",
- "thiserror",
+ "thiserror 2.0.9",
 ]
 
 [[package]]
@@ -355,13 +503,33 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
 [[package]]
 name = "thiserror"
 version = "2.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc"
 dependencies = [
- "thiserror-impl",
+ "thiserror-impl 2.0.9",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
 ]
 
 [[package]]
@@ -387,6 +555,12 @@ version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
 
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
 [[package]]
 name = "wasi"
 version = "0.14.2+wasi-0.2.4"
diff --git a/Cargo.toml b/Cargo.toml
index 52835e4..b7a46a2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,14 +4,19 @@ description = "A Rust port of LAS2 from SVDLIBC"
 keywords = ["svd"]
 categories = ["algorithms", "data-structures", "mathematics", "science"]
 name = "single-svdlib"
-version = "0.4.0"
+version = "0.8.0"
 edition = "2021"
 license-file = "SVDLIBC-LICENSE.txt"
 
 [dependencies]
+anyhow = "1.0.97"
+argmin = { version = "0.10.0", features = ["rayon"] }
 nalgebra-sparse = "0.10.0"
-ndarray = "0.16.1"
 num-traits = "0.2.19"
 rand = "0.9.0"
+rand_distr = "0.5.1"
 rayon = "1.10.0"
 thiserror = "2.0.9"
+nshare = {version = "0.10.0", features = ["nalgebra", "ndarray"] }
+ndarray = "0.16.1"
+nalgebra = { version = "0.33.2", features = ["rayon"] }
diff --git a/README.md b/README.md
index 2ea6e0d..3e8a66f 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,27 @@
-# single_svdlib
+# Single-SVDLib: Singular Value Decomposition for Sparse Matrices
 
-A Rust library for performing Singular Value Decomposition (SVD) on sparse matrices using the Lanczos algorithm. It is build on the original library and expan
+[![Crate](https://img.shields.io/crates/v/single-svdlib.svg)](https://crates.io/crates/single-svdlib)
+[![Documentation](https://docs.rs/single-svdlib/badge.svg)](https://docs.rs/single-svdlib)
+[![License](https://img.shields.io/crates/l/single-svdlib.svg)](LICENSE)
 
-## Overview
-
-`svdlibrs` is a Rust port of LAS2 from SVDLIBC, originally developed by Doug Rohde. This library efficiently computes SVD on sparse matrices, particularly large ones, and returns the decomposition as ndarray components.
-
-This implementation extends the original [svdlibrs](https://github.com/dfarnham/svdlibrs) by Dave Farnham with:
-- Updated dependency versions
-- Support for a broader range of numeric types (f64, f32, others)
-- Column masking capabilities for analyzing specific subsets of data
+A high-performance Rust library for computing Singular Value Decomposition (SVD) on sparse matrices, with support for both Lanczos and randomized SVD algorithms.
 
 ## Features
 
-- Performs SVD on sparse matrices using the Lanczos algorithm
-- Works with various input formats: CSR, CSC, or COO matrices
-- Column masking for dimension selection without data copying
-- Generic implementation supporting different numeric types
-- High numerical precision for critical calculations
+- **Multiple SVD algorithms**:
+    - Lanczos algorithm (based on SVDLIBC)
+    - Randomized SVD for very large and sparse matrices
+- **Sparse matrix support**:
+    - Compressed Sparse Row (CSR) format
+    - Compressed Sparse Column (CSC) format
+    - Coordinate (COO) format
+- **Performance optimizations**:
+    - Parallel execution with Rayon
+    - Adaptive tuning for highly sparse matrices
+    - Column masking for subspace SVD
+- **Generic interface**:
+    - Works with both `f32` and `f64` precision
+- **Comprehensive error handling and diagnostics**
 
 ## Installation
 
@@ -25,144 +29,147 @@ Add this to your `Cargo.toml`:
 
 ```toml
 [dependencies]
-single-svdlib = "0.1.0"
-nalgebra-sparse = "0.10.0"
-ndarray = "0.16.1"
+single-svdlib = "0.6.0"
 ```
 
-## Basic Usage
+## Quick Start
 
 ```rust
-use single_svdlib::{svd, svd_dim, svd_dim_seed};
 use nalgebra_sparse::{coo::CooMatrix, csr::CsrMatrix};
+use single_svdlib::laczos::svd_dim_seed;
 
-// Create a sparse matrix
+// Create a matrix in COO format
 let mut coo = CooMatrix::<f64>::new(3, 3);
 coo.push(0, 0, 1.0); coo.push(0, 1, 16.0); coo.push(0, 2, 49.0);
 coo.push(1, 0, 4.0); coo.push(1, 1, 25.0); coo.push(1, 2, 64.0);
 coo.push(2, 0, 9.0); coo.push(2, 1, 36.0); coo.push(2, 2, 81.0);
 
+// Convert to CSR for better performance
 let csr = CsrMatrix::from(&coo);
 
-// Compute SVD
-let svd_result = svd(&csr)?;
+// Compute SVD with a fixed random seed
+let svd = svd_dim_seed(&csr, 3, 42).unwrap();
 
 // Access the results
-println!("Rank: {}", svd_result.d);
-println!("Singular values: {:?}", svd_result.s);
-println!("Left singular vectors (U): {:?}", svd_result.ut.t());
-println!("Right singular vectors (V): {:?}", svd_result.vt.t());
+let singular_values = &svd.s;
+let left_singular_vectors = &svd.ut;  // Note: These are transposed
+let right_singular_vectors = &svd.vt; // Note: These are transposed
 
 // Reconstruct the original matrix
-let reconstructed = svd_result.recompose();
+let reconstructed = svd.recompose();
 ```
 
-## Column Masking
+## SVD Methods
 
-The library supports analyzing specific columns without copying the data:
+### Lanczos Algorithm (LAS2)
 
-```rust
-use single_svdlib::{svd, MaskedCSRMatrix};
-use nalgebra_sparse::{coo::CooMatrix, csr::CsrMatrix};
+The Lanczos algorithm is well-suited for sparse matrices of moderate size:
 
-// Create a sparse matrix
-let mut coo = CooMatrix::<f64>::new(3, 5);
-coo.push(0, 0, 1.0); coo.push(0, 2, 2.0); coo.push(0, 4, 3.0);
-coo.push(1, 1, 4.0); coo.push(1, 3, 5.0);
-coo.push(2, 0, 6.0); coo.push(2, 2, 7.0); coo.push(2, 4, 8.0);
+```rust
+use single_svdlib::laczos;
 
-let csr = CsrMatrix::from(&coo);
+// Basic SVD computation (uses defaults)
+let svd = laczos::svd(&matrix)?;
 
-// Method 1: Using a boolean mask (true = include column)
-let mask = vec![true, false, true, false, true]; // Only columns 0, 2, 4
-let masked_matrix = MaskedCSRMatrix::new(&csr, mask);
+// SVD with specified target rank
+let svd = laczos::svd_dim(&matrix, 10)?;
 
-// Method 2: Specifying which columns to include
-let columns = vec![0, 2, 4];
-let masked_matrix = MaskedCSRMatrix::with_columns(&csr, &columns);
+// SVD with specified target rank and fixed random seed
+let svd = laczos::svd_dim_seed(&matrix, 10, 42)?;
 
-// Run SVD on the masked matrix
-let svd_result = svd(&masked_matrix)?;
+// Full control over SVD parameters
+let svd = laczos::svd_las2(
+    &matrix,
+    dimensions,    // upper limit of desired number of dimensions
+    iterations,    // number of Lanczos iterations
+    end_interval,  // interval containing unwanted eigenvalues, e.g. [-1e-30, 1e-30]
+    kappa,         // relative accuracy of eigenvalues, e.g. 1e-6
+    random_seed,   // random seed (0 for automatic)
+)?;
 ```
 
-## Support for Different Numeric Types
+### Randomized SVD
 
-The library supports various numeric types:
+For very large sparse matrices, the randomized SVD algorithm offers better performance:
 
 ```rust
-// With f64 (double precision)
-let csr_f64 = CsrMatrix::<f64>::from(&coo);
-let svd_result = svd(&csr_f64)?;
-
-// With f32 (single precision)
-let csr_f32 = CsrMatrix::<f32>::from(&coo);
-let svd_result = svd(&csr_f32)?;
-
-// With integer types (converted internally)
-let csr_i32 = CsrMatrix::<i32>::from(&coo);
-let masked_i32 = MaskedCSRMatrix::with_columns(&csr_i32, &columns);
-let svd_result = svd(&masked_i32)?;
+use single_svdlib::randomized;
+
+let svd = randomized::randomized_svd(
+    &matrix,
+    target_rank,                         // desired rank
+    n_oversamples,                       // oversampling parameter (typically 5-10)
+    n_power_iterations,                  // number of power iterations (typically 2-4)
+    randomized::PowerIterationNormalizer::QR,  // normalization method
+    Some(42),                           // random seed (None for automatic)
+)?;
 ```
 
-## Advanced Usage
+### Column Masking
 
-For more control over the SVD computation:
+For operations on specific columns of a matrix:
 
 ```rust
-use single_svdlib::{svdLAS2, SvdRec};
-
-// Customize the SVD calculation
-let svd: SvdRec = svdLAS2(
-    &matrix,        // sparse matrix
-    dimensions,     // upper limit of desired dimensions (0 = max)
-    iterations,     // number of algorithm iterations (0 = auto)
-    &[-1.0e-30, 1.0e-30], // interval for unwanted eigenvalues
-    1.0e-6,         // relative accuracy threshold
-    random_seed,    // random seed (0 = auto-generate)
-)?;
+use single_svdlib::laczos::masked::MaskedCSRMatrix;
+
+// Create a mask for selected columns
+let columns = vec![0, 2, 5, 7];  // Only use these columns
+let masked_matrix = MaskedCSRMatrix::with_columns(&csr_matrix, &columns);
+
+// Compute SVD on the masked matrix
+let svd = laczos::svd(&masked_matrix)?;
 ```
 
-## SVD Results and Diagnostics
+## Result Structure
 
-The SVD results are returned in a `SvdRec` struct:
+The SVD result contains:
 
 ```rust
-pub struct SvdRec {
-    pub d: usize,        // Dimensionality (rank)
-    pub ut: Array2<f64>, // Transpose of left singular vectors
-    pub s: Array1<f64>,  // Singular values
-    pub vt: Array2<f64>, // Transpose of right singular vectors
-    pub diagnostics: Diagnostics, // Computational diagnostics
+struct SvdRec<T> {
+    d: usize,              // Rank (number of singular values)
+    ut: Array2<T>,         // Transpose of left singular vectors (d x m)
+    s: Array1<T>,          // Singular values (d)
+    vt: Array2<T>,         // Transpose of right singular vectors (d x n)
+    diagnostics: Diagnostics<T>,  // Computation diagnostics
 }
 ```
 
-The `Diagnostics` struct provides detailed information about the computation:
+Note that `ut` and `vt` are returned in transposed form.
+
+## Diagnostics
+
+Each SVD computation returns detailed diagnostics:
 
 ```rust
-pub struct Diagnostics {
-    pub non_zero: usize,   // Number of non-zeros in the input matrix
-    pub dimensions: usize, // Number of dimensions attempted
-    pub iterations: usize, // Number of iterations attempted
-    pub transposed: bool,  // True if the matrix was transposed internally
-    pub lanczos_steps: usize,          // Number of Lanczos steps
-    pub ritz_values_stabilized: usize, // Number of ritz values
-    pub significant_values: usize,     // Number of significant values
-    pub singular_values: usize,        // Number of singular values
-    pub end_interval: [f64; 2], // Interval for unwanted eigenvalues
-    pub kappa: f64,             // Relative accuracy threshold
-    pub random_seed: u32,       // Random seed used
-}
+let svd = laczos::svd(&matrix)?;
+println!("Non-zero elements: {}", svd.diagnostics.non_zero);
+println!("Transposed during computation: {}", svd.diagnostics.transposed);
+println!("Lanczos steps: {}", svd.diagnostics.lanczos_steps);
+println!("Significant values found: {}", svd.diagnostics.significant_values);
 ```
 
-## License
+## Performance Tips
 
-This library is provided under the BSD License, as per the original SVDLIBC implementation.
+1. **Choose the right algorithm**:
+    - For matrices up to ~10,000 x 10,000 with moderate sparsity, use the Lanczos algorithm
+    - For larger matrices or very high sparsity (>99%), use randomized SVD
+
+2. **Matrix format matters**:
+    - Convert COO matrices to CSR or CSC for computation
+    - CSR typically performs better for row-oriented operations
+
+3. **Adjust parameters for very sparse matrices**:
+    - Increase power iterations in randomized SVD (e.g., 5-7)
+    - Use a higher `kappa` value in Lanczos for very sparse matrices
+
+4. **Consider column masking** for operations that only need a subset of the data
+
+## License
 
-## Acknowledgments
+This crate is licensed under the BSD License, the same as the original SVDLIBC implementation. See the `SVDLIBC-LICENSE.txt` file for details.
 
-- Dave Farnham for the original Rust port
-- Doug Rohde for the original SVDLIBC implementation
-- University of Tennessee Research Foundation for the underlying mathematical library
+## Credits
 
-[Latest Version]: https://img.shields.io/crates/v/single-svdlib.svg
-[crates.io]: https://crates.io/crates/single-svdlib
\ No newline at end of file
+- Original SVDLIBC implementation by Doug Rohde
+- Rust port maintainer of SVDLIBC: Dave Farnham
+- Extensions and modifications of the original algorithm: Ian F. Diks
diff --git a/src/masked.rs b/src/lanczos/masked.rs
similarity index 97%
rename from src/masked.rs
rename to src/lanczos/masked.rs
index d10006f..6f991e4 100644
--- a/src/masked.rs
+++ b/src/lanczos/masked.rs
@@ -76,7 +76,7 @@ impl<'a, T: Float + AddAssign + Sync + Send> SMat<T> for MaskedCSRMatrix<'a, T>
 
         for i in 0..self.matrix.nrows() {
             for j in major_offsets[i]..major_offsets[i + 1] {
-                let col = minor_indices[j]; 
+                let col = minor_indices[j];
                 if self.column_mask[col] {
                     count += 1;
                 }
@@ -213,7 +213,7 @@ impl<'a, T: Float + AddAssign + Sync + Send> SMat<T> for MaskedCSRMatrix<'a, T>
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::{svd, SMat};
+    use crate::{SMat};
     use nalgebra_sparse::{coo::CooMatrix, csr::CsrMatrix};
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
@@ -243,7 +243,7 @@ mod tests {
         assert_eq!(masked.nnz(), 6); // Only entries in the selected columns
 
         // Test SVD on the masked matrix
-        let svd_result = svd(&masked);
+        let svd_result = crate::lanczos::svd(&masked);
         assert!(svd_result.is_ok());
     }
 
@@ -304,8 +304,8 @@ mod tests {
         assert_eq!(masked_matrix.nnz(), physical_csr.nnz());
 
         // Perform SVD on both
-        let svd_masked = svd(&masked_matrix).unwrap();
-        let svd_physical = svd(&physical_csr).unwrap();
+        let svd_masked = crate::lanczos::svd(&masked_matrix).unwrap();
+        let svd_physical = crate::lanczos::svd(&physical_csr).unwrap();
 
         // Compare SVD results - they should be very close but not exactly the same
         // due to potential differences in numerical computation
diff --git a/src/new.rs b/src/lanczos/mod.rs
similarity index 92%
rename from src/new.rs
rename to src/lanczos/mod.rs
index faf5e53..99789b5 100644
--- a/src/new.rs
+++ b/src/lanczos/mod.rs
@@ -1,5 +1,8 @@
+pub mod masked;
+
 use crate::error::SvdLibError;
-use ndarray::{Array, Array1, Array2};
+use crate::{Diagnostics, SMat, SvdFloat, SvdRec};
+use ndarray::{Array, Array2};
 use num_traits::real::Real;
 use num_traits::{Float, FromPrimitive, One, Zero};
 use rand::rngs::StdRng;
@@ -12,106 +15,7 @@ use std::iter::Sum;
 use std::mem;
 use std::ops::{AddAssign, MulAssign, Neg, SubAssign};
 
-pub trait SMat<T: Float>: Sync {
-    fn nrows(&self) -> usize;
-    fn ncols(&self) -> usize;
-    fn nnz(&self) -> usize;
-    fn svd_opa(&self, x: &[T], y: &mut [T], transposed: bool); // y = A*x
-}
-
-/// Singular Value Decomposition Components
-///
-/// # Fields
-/// - d:  Dimensionality (rank), the number of rows of both `ut`, `vt` and the length of `s`
-/// - ut: Transpose of left singular vectors, the vectors are the rows of `ut`
-/// - s:  Singular values (length `d`)
-/// - vt: Transpose of right singular vectors, the vectors are the rows of `vt`
-/// - diagnostics: Computational diagnostics
-#[derive(Debug, Clone, PartialEq)]
-pub struct SvdRec<T: Float> {
-    pub d: usize,
-    pub ut: Array2<T>,
-    pub s: Array1<T>,
-    pub vt: Array2<T>,
-    pub diagnostics: Diagnostics<T>,
-}
-
-/// Computational Diagnostics
-///
-/// # Fields
-/// - non_zero:  Number of non-zeros in the matrix
-/// - dimensions: Number of dimensions attempted (bounded by matrix shape)
-/// - iterations: Number of iterations attempted (bounded by dimensions and matrix shape)
-/// - transposed:  True if the matrix was transposed internally
-/// - lanczos_steps: Number of Lanczos steps performed
-/// - ritz_values_stabilized: Number of ritz values
-/// - significant_values: Number of significant values discovered
-/// - singular_values: Number of singular values returned
-/// - end_interval: left, right end of interval containing unwanted eigenvalues
-/// - kappa: relative accuracy of ritz values acceptable as eigenvalues
-/// - random_seed: Random seed provided or the seed generated
-#[derive(Debug, Clone, PartialEq)]
-pub struct Diagnostics<T: Float> {
-    pub non_zero: usize,
-    pub dimensions: usize,
-    pub iterations: usize,
-    pub transposed: bool,
-    pub lanczos_steps: usize,
-    pub ritz_values_stabilized: usize,
-    pub significant_values: usize,
-    pub singular_values: usize,
-    pub end_interval: [T; 2],
-    pub kappa: T,
-    pub random_seed: u32,
-}
-
 /// Trait for floating point types that can be used with the SVD algorithm
-pub trait SvdFloat:
-    Float
-    + FromPrimitive
-    + Debug
-    + Send
-    + Sync
-    + Zero
-    + One
-    + AddAssign
-    + SubAssign
-    + MulAssign
-    + Neg<Output = Self>
-    + Sum
-{
-    fn eps() -> Self;
-    fn eps34() -> Self;
-    fn compare(a: Self, b: Self) -> bool;
-}
-
-impl SvdFloat for f32 {
-    fn eps() -> Self {
-        f32::EPSILON
-    }
-
-    fn eps34() -> Self {
-        f32::EPSILON.powf(0.75)
-    }
-
-    fn compare(a: Self, b: Self) -> bool {
-        (b - a).abs() < f32::EPSILON
-    }
-}
-
-impl SvdFloat for f64 {
-    fn eps() -> Self {
-        f64::EPSILON
-    }
-
-    fn eps34() -> Self {
-        f64::EPSILON.powf(0.75)
-    }
-
-    fn compare(a: Self, b: Self) -> bool {
-        (b - a).abs() < f64::EPSILON
-    }
-}
 
 /// SVD at full dimensionality, calls `svdLAS2` with the highlighted defaults
 ///
@@ -1157,7 +1061,7 @@ fn ritvec<T: SvdFloat>(
 
     let sparsity = T::one()
         - (T::from_usize(A.nnz()).unwrap()
-        / (T::from_usize(A.nrows()).unwrap() * T::from_usize(A.ncols()).unwrap()));
+            / (T::from_usize(A.nrows()).unwrap() * T::from_usize(A.ncols()).unwrap()));
 
     let epsilon = T::epsilon();
     let adaptive_eps = if sparsity > T::from_f64(0.99).unwrap() {
@@ -1229,7 +1133,6 @@ fn ritvec<T: SvdFloat>(
     let significant_indices: Vec<usize> = (0..js)
         .into_par_iter()
         .filter(|&k| {
-            // Adaptive error bound check using relative tolerance
             let relative_bound =
                 adaptive_kappa * wrk.ritz[k].abs().max(max_eigenvalue * adaptive_eps);
             wrk.bnd[k] <= relative_bound && k + 1 > js - neig
@@ -1242,11 +1145,10 @@ fn ritvec<T: SvdFloat>(
         .into_par_iter()
         .map(|k| {
             let mut vec = vec![T::zero(); wrk.ncols];
-            let mut idx = (jsq - js) + k + 1;
 
             for i in 0..js {
-                idx -= js;
-                // Non-zero check with adaptive threshold
+                let idx = k * js + i;
+
                 if s[idx].abs() > adaptive_eps {
                     for (j, item) in store_vectors[i].iter().enumerate().take(wrk.ncols) {
                         vec[j] += s[idx] * *item;
@@ -1254,7 +1156,6 @@ fn ritvec<T: SvdFloat>(
                 }
             }
 
-            // Return with position index (for proper ordering)
             (k, vec)
         })
         .collect();
@@ -1322,7 +1223,7 @@ fn ritvec<T: SvdFloat>(
         S[i] = sval;
         let ut_offset = i * Ut.cols;
         let mut ut_vec = a_products[i].clone();
-        
+
         if sval > adaptive_eps {
             svd_dscal(T::one() / sval, &mut ut_vec);
         } else {
@@ -1541,7 +1442,7 @@ impl<T: Float + Zero + AddAssign + Clone + Sync> SMat<T> for nalgebra_sparse::cs
 }
 
 #[rustfmt::skip]
-impl<T: Float + Zero + AddAssign + Clone + Sync> SMat<T> for nalgebra_sparse::csr::CsrMatrix<T> {
+impl<T: Float + Zero + AddAssign + Clone + Sync + Send> SMat<T> for nalgebra_sparse::csr::CsrMatrix<T> {
     fn nrows(&self) -> usize { self.nrows() }
     fn ncols(&self) -> usize { self.ncols() }
     fn nnz(&self) -> usize { self.nnz() }
@@ -1556,23 +1457,60 @@ impl<T: Float + Zero + AddAssign + Clone + Sync> SMat<T> for nalgebra_sparse::cs
 
         let (major_offsets, minor_indices, values) = self.csr_data();
 
-        for y_val in y.iter_mut() {
-            *y_val = T::zero();
-        }
+        y.fill(T::zero());
 
         if !transposed {
-            for (i, yval) in y.iter_mut().enumerate() {
-                for j in major_offsets[i]..major_offsets[i + 1] {
-                    *yval += values[j] * x[minor_indices[j]];
-                }
+            let nrows = self.nrows();
+            let chunk_size = crate::utils::determine_chunk_size(nrows);
+
+            // Create thread-local vectors with results
+            let results: Vec<(usize, T)> = (0..nrows)
+                .into_par_iter()
+                .map(|i| {
+                    let mut sum = T::zero();
+                    for j in major_offsets[i]..major_offsets[i + 1] {
+                        sum += values[j] * x[minor_indices[j]];
+                    }
+                (i, sum)
+            })
+            .collect();
+
+            // Apply the results to y
+            for (i, val) in results {
+                y[i] = val;
             }
         } else {
-            for (i, xval) in x.iter().enumerate() {
-                for j in major_offsets[i]..major_offsets[i + 1] {
-                    y[minor_indices[j]] += values[j] * *xval;
+            let nrows = self.nrows();
+        let chunk_size = crate::utils::determine_chunk_size(nrows);
+
+        // Process input in chunks and create partial results
+        let results: Vec<Vec<T>> = (0..((nrows + chunk_size - 1) / chunk_size))
+            .into_par_iter()
+            .map(|chunk_idx| {
+                let start = chunk_idx * chunk_size;
+                let end = (start + chunk_size).min(nrows);
+
+                let mut local_y = vec![T::zero(); y.len()];
+                for i in start..end {
+                    let row_val = x[i];
+                    for j in major_offsets[i]..major_offsets[i + 1] {
+                        let col = minor_indices[j];
+                        local_y[col] += values[j] * row_val;
+                    }
+                }
+                local_y
+            })
+            .collect();
+
+        // Combine partial results
+        for local_y in results {
+            for (idx, val) in local_y.iter().enumerate() {
+                if !val.is_zero() {
+                    y[idx] += *val;
                 }
             }
         }
+        }
     }
 }
 
diff --git a/src/lib.rs b/src/lib.rs
index 50e59da..bbba6c2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,11 +1,13 @@
 pub mod legacy;
 pub mod error;
-mod new;
-mod masked;
 pub(crate) mod utils;
 
-pub use new::*;
-pub use masked::*;
+pub mod randomized;
+
+pub mod lanczos;
+
+pub use utils::*;
+
 
 #[cfg(test)]
 mod simple_comparison_tests {
@@ -15,6 +17,7 @@ mod simple_comparison_tests {
     use nalgebra_sparse::CsrMatrix;
     use rand::{Rng, SeedableRng};
     use rand::rngs::StdRng;
+    use rayon::ThreadPoolBuilder;
 
     fn create_sparse_matrix(rows: usize, cols: usize, density: f64) -> nalgebra_sparse::coo::CooMatrix<f64> {
         use rand::{rngs::StdRng, Rng, SeedableRng};
@@ -72,7 +75,7 @@ mod simple_comparison_tests {
 
         // Run both implementations with the same seed for deterministic behavior
         let seed = 42;
-        let current_result = svd_dim_seed(&test_matrix, 0, seed).unwrap();
+        let current_result = lanczos::svd_dim_seed(&test_matrix, 0, seed).unwrap();
         let legacy_result = legacy::svd_dim_seed(&test_matrix, 0, seed).unwrap();
 
         // Compare dimensions
@@ -124,12 +127,12 @@ mod simple_comparison_tests {
         let csr = CsrMatrix::from(&coo);
 
         // Calculate SVD using original method
-        let legacy_svd = svd_dim_seed(&csr, 0, seed as u32).unwrap();
+        let legacy_svd = lanczos::svd_dim_seed(&csr, 0, seed as u32).unwrap();
 
         // Calculate SVD using our masked method (using all columns)
         let mask = vec![true; ncols];
-        let masked_matrix = MaskedCSRMatrix::new(&csr, mask);
-        let current_svd = svd_dim_seed(&masked_matrix, 0, seed as u32).unwrap();
+        let masked_matrix = lanczos::masked::MaskedCSRMatrix::new(&csr, mask);
+        let current_svd = lanczos::svd_dim_seed(&masked_matrix, 0, seed as u32).unwrap();
 
         // Compare with relative tolerance
         let rel_tol = 1e-3;  // 0.1% relative tolerance
@@ -150,127 +153,126 @@ mod simple_comparison_tests {
         }
     }
 
-
-
     #[test]
     fn test_real_sparse_matrix() {
         // Create a matrix with similar sparsity to your real one (99.02%)
         let test_matrix = create_sparse_matrix(100, 100, 0.0098); // 0.98% non-zeros
         
         // Should no longer fail with convergence error
-        let result = svd_dim_seed(&test_matrix, 50, 42);
+        let result = lanczos::svd_dim_seed(&test_matrix, 50, 42);
         assert!(result.is_ok(), "{}", format!("SVD failed on 99.02% sparse matrix, {:?}", result.err().unwrap()));
     }
 
     #[test]
-    fn test_real_sparse_matrix_very_big() {
-        // Create a matrix with similar sparsity to your real one (99.02%)
-        let test_matrix = create_sparse_matrix(10000, 1000, 0.01); // 0.98% non-zeros
+    fn test_random_svd_computation() {
+
+        // Create a matrix with high sparsity (99%)
+        let test_matrix = create_sparse_matrix(1000, 250, 0.01); // 1% non-zeros
+
+        // Convert to CSR for processing
+        let csr = CsrMatrix::from(&test_matrix);
+
+        // Run randomized SVD with reasonable defaults for a sparse matrix
+        let result = randomized::randomized_svd(
+            &csr,
+            50,                              // target rank
+            10,                              // oversampling parameter
+            3,                               // power iterations
+            randomized::PowerIterationNormalizer::QR,    // use QR normalization
+            Some(42),                        // random seed
+        );
+
+        // Verify the computation succeeds on a highly sparse matrix
+        assert!(
+            result.is_ok(),
+            "Randomized SVD failed on 99% sparse matrix: {:?}",
+            result.err().unwrap()
+        );
+
+        // Additional checks on the result if successful
+        if let Ok(svd_result) = result {
+            // Verify dimensions match expectations
+            assert_eq!(svd_result.d, 50, "Expected rank of 50");
+
+            // Verify singular values are positive and in descending order
+            for i in 0..svd_result.s.len() {
+                assert!(svd_result.s[i] > 0.0, "Singular values should be positive");
+                if i > 0 {
+                    assert!(
+                        svd_result.s[i-1] >= svd_result.s[i],
+                        "Singular values should be in descending order"
+                    );
+                }
+            }
 
-        // Should no longer fail with convergence error
-        let thread_pool = rayon::ThreadPoolBuilder::new().num_threads(3).build().unwrap();
-        let result = thread_pool.install(|| {
-            svd_dim_seed(&test_matrix, 50, 42)
-        });
-        assert!(result.is_ok(), "{}", format!("SVD failed on 99% sparse matrix, {:?}", result.err().unwrap()));
+            // Verify basics of U and V dimensions
+            assert_eq!(svd_result.ut.nrows(), 50, "U transpose should have 50 rows");
+            assert_eq!(svd_result.ut.ncols(), 1000, "U transpose should have 1000 columns");
+            assert_eq!(svd_result.vt.nrows(), 50, "V transpose should have 50 rows");
+            assert_eq!(svd_result.vt.ncols(), 250, "V transpose should have 250 columns");
+
+        }
     }
 
     #[test]
-    fn test_real_sparse_matrix_very_very_big() {
-        // Create a matrix with similar sparsity to your real one (99.02%)
-        let test_matrix = create_sparse_matrix(100000, 2500, 0.01); // 0.98% non-zeros
+    fn test_randomized_svd_very_large_sparse_matrix() {
+
+        // Create a very large matrix with high sparsity (99%)
+        let test_matrix = create_sparse_matrix(100000, 2500, 0.01); // 1% non-zeros
+
+        // Convert to CSR for processing
+        let csr = CsrMatrix::from(&test_matrix);
+    
+        // Run randomized SVD with reasonable defaults for a sparse matrix
+        let threadpool = ThreadPoolBuilder::new().num_threads(10).build().unwrap();
+        let result = threadpool.install(|| {
+            randomized::randomized_svd(
+                &csr,
+                50,                              // target rank
+                10,                              // oversampling parameter
+                7,                               // power iterations
+                randomized::PowerIterationNormalizer::QR,    // use QR normalization
+                Some(42),                        // random seed
+            )
+        });
 
-        // Should no longer fail with convergence error
 
-        let result = svd(&test_matrix);
-        assert!(result.is_ok(), "{}", format!("SVD failed on 99% sparse matrix, {:?}", result.err().unwrap()));
+        // Simply verify that the computation succeeds on a highly sparse matrix
+        assert!(
+            result.is_ok(),
+            "Randomized SVD failed on 99% sparse matrix: {:?}",
+            result.err().unwrap()
+        );
     }
 
-    //#[test]
-    fn f32_precision_test() {
-        let seed = 12345;
-        let (nrows, ncols) = (40, 20);
-        let mut rng = StdRng::seed_from_u64(seed);
-
-        // Create random sparse matrix with f32 values
-        let mut coo_f32 = CooMatrix::<f32>::new(nrows, ncols);
-        // And the same matrix with f64 values
-        let mut coo_f64 = CooMatrix::<f64>::new(nrows, ncols);
-
-        // Insert the same random non-zero elements in both matrices
-        for _ in 0..(nrows * ncols / 4) {  // ~25% density
-            let i = rng.gen_range(0..nrows);
-            let j = rng.gen_range(0..ncols);
-            let value = rng.gen_range(-10.0..10.0);
-            coo_f32.push(i, j, value as f32);
-            coo_f64.push(i, j, value);
-        }
-
-        let csr_f32 = CsrMatrix::from(&coo_f32);
-        let csr_f64 = CsrMatrix::from(&coo_f64);
-
-        // Calculate SVD for both types
-        let svd_f32 = svd_dim_seed(&csr_f32, 0, seed as u32).unwrap();
-        let svd_f64 = svd_dim_seed(&csr_f64, 0, seed as u32).unwrap();
-
-        // Adaptive tolerance - increases for smaller singular values
-        // and values further down the list (which are more affected by accumulated errors)
-        fn calculate_tolerance(index: usize, magnitude: f64) -> f64 {
-            // Base tolerance is 0.2%
-            let base_tol = 0.002;
-
-            // Scale up for smaller values (more affected by precision)
-            let magnitude_factor = if magnitude < 1.0 {
-                2.0  // Double tolerance for very small values
-            } else if magnitude < 10.0 {
-                1.5  // 1.5x tolerance for moderately small values
-            } else {
-                1.0  // Normal tolerance for larger values
-            };
-
-            // Scale up for later indices (more affected by accumulated errors)
-            let index_factor = 1.0 + (index as f64 * 0.001);  // Add 0.1% per index
-
-            base_tol * magnitude_factor * index_factor
-        }
-
-        println!("f32 rank: {}, f64 rank: {}", svd_f32.d, svd_f64.d);
-
-        // Compare singular values up to the minimum rank
-        let min_rank = svd_f32.d.min(svd_f64.d);
-        for i in 0..min_rank {
-            let f32_val = svd_f32.s[i];
-            let f64_val = svd_f64.s[i];
-            let abs_diff = (f64_val - f32_val as f64).abs();
-            let rel_diff = abs_diff / f64_val.max(f32_val as f64);
-
-            // Calculate appropriate tolerance for this value
-            let tol = calculate_tolerance(i, f64_val);
-
-            println!("Singular value {}: f32 = {}, f64 = {}, rel_diff = {:.6}%, tol = {:.6}%",
-                     i, f32_val, f64_val, rel_diff * 100.0, tol * 100.0);
-
-            assert!(
-                rel_diff <= tol,
-                "Singular value {} differs too much: f64 = {}, f32 = {}. Relative diff: {} > tolerance: {}",
-                i, f64_val, f32_val, rel_diff, tol
-            );
-        }
-
-        // Optional: Also check that overall behavior is reasonable
-        // For example, check that both implementations find similar condition numbers
-        if min_rank >= 2 {
-            let condition_f32 = svd_f32.s[0] / svd_f32.s[min_rank - 1];
-            let condition_f64 = svd_f64.s[0] / svd_f64.s[min_rank - 1];
-            let condition_rel_diff = ((condition_f64 - condition_f32 as f64) / condition_f64).abs();
+    #[test]
+    fn test_randomized_svd_small_sparse_matrix() {
+
+        // Create a very large matrix with high sparsity (99%)
+        let test_matrix = create_sparse_matrix(1000, 250, 0.01); // 1% non-zeros
+
+        // Convert to CSR for processing
+        let csr = CsrMatrix::from(&test_matrix);
+
+        // Run randomized SVD with reasonable defaults for a sparse matrix
+        let threadpool = ThreadPoolBuilder::new().num_threads(10).build().unwrap();
+        let result = threadpool.install(|| {
+            randomized::randomized_svd(
+                &csr,
+                50,                              // target rank
+                10,                              // oversampling parameter
+                2,                               // power iterations
+                randomized::PowerIterationNormalizer::QR,    // use QR normalization
+                Some(42),                        // random seed
+            )
+        });
 
-            println!("Condition number: f32 = {}, f64 = {}, rel_diff = {:.6}%",
-                     condition_f32, condition_f64, condition_rel_diff * 100.0);
 
-            // Condition number can vary more, use 5% tolerance
-            assert!(condition_rel_diff <= 0.05,
-                    "Condition numbers differ too much: f32 = {}, f64 = {}",
-                    condition_f32, condition_f64);
-        }
+        // Simply verify that the computation succeeds on a highly sparse matrix
+        assert!(
+            result.is_ok(),
+            "Randomized SVD failed on 99% sparse matrix: {:?}",
+            result.err().unwrap()
+        );
     }
 }
\ No newline at end of file
diff --git a/src/randomized/mod.rs b/src/randomized/mod.rs
new file mode 100644
index 0000000..a477bcc
--- /dev/null
+++ b/src/randomized/mod.rs
@@ -0,0 +1,364 @@
+use crate::error::SvdLibError;
+use crate::{Diagnostics, SMat, SvdFloat, SvdRec};
+use nalgebra_sparse::na::{ComplexField, DMatrix, DVector, RealField};
+use ndarray::Array1;
+use nshare::IntoNdarray2;
+use rand::prelude::{Distribution, StdRng};
+use rand::SeedableRng;
+use rand_distr::Normal;
+use rayon::iter::ParallelIterator;
+use rayon::prelude::{IndexedParallelIterator, IntoParallelIterator};
+use std::ops::Mul;
+use crate::utils::determine_chunk_size;
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum PowerIterationNormalizer {
+    QR,
+    LU,
+    None,
+}
+
+const PARALLEL_THRESHOLD_ROWS: usize = 5000;
+const PARALLEL_THRESHOLD_COLS: usize = 1000;
+const PARALLEL_THRESHOLD_ELEMENTS: usize = 100_000;
+
+pub fn randomized_svd<T, M>(
+    m: &M,
+    target_rank: usize,
+    n_oversamples: usize,
+    n_power_iters: usize,
+    power_iteration_normalizer: PowerIterationNormalizer,
+    seed: Option<u64>,
+) -> anyhow::Result<SvdRec<T>>
+where
+    T: SvdFloat + RealField,
+    M: SMat<T>,
+    T: ComplexField,
+{
+    let start = std::time::Instant::now(); // only for debugging
+    let m_rows = m.nrows();
+    let m_cols = m.ncols();
+
+    let rank = target_rank.min(m_rows.min(m_cols));
+    let l = rank + n_oversamples;
+    println!("Basic statistics: {:?}", start.elapsed());
+
+    let omega = generate_random_matrix(m_cols, l, seed);
+    println!("Generated Random Matrix here: {:?}", start.elapsed());
+
+    let mut y = DMatrix::<T>::zeros(m_rows, l);
+    multiply_matrix(m, &omega, &mut y, false);
+    println!(
+        "First multiplication took: {:?}, Continuing for power iterations:",
+        start.elapsed()
+    );
+
+    if n_power_iters > 0 {
+        let mut z = DMatrix::<T>::zeros(m_cols, l);
+
+        for w in 0..n_power_iters {
+            multiply_matrix(m, &y, &mut z, true);
+            println!("{}-nd power-iteration forward: {:?}", w, start.elapsed());
+            match power_iteration_normalizer {
+                PowerIterationNormalizer::QR => {
+                    let qr = z.qr();
+                    z = qr.q();
+                }
+                PowerIterationNormalizer::LU => {
+                    normalize_columns(&mut z);
+                }
+                PowerIterationNormalizer::None => {}
+            }
+            println!(
+                "{}-nd power-iteration forward, normalization: {:?}",
+                w,
+                start.elapsed()
+            );
+
+            multiply_matrix(m, &z, &mut y, false);
+            println!("{}-nd power-iteration backward: {:?}", w, start.elapsed());
+            match power_iteration_normalizer {
+                PowerIterationNormalizer::QR => {
+                    let qr = y.qr();
+                    y = qr.q();
+                }
+                PowerIterationNormalizer::LU => normalize_columns(&mut y),
+                PowerIterationNormalizer::None => {}
+            }
+            println!(
+                "{}-nd power-iteration backward, normalization: {:?}",
+                w,
+                start.elapsed()
+            );
+        }
+    }
+    println!(
+        "Finished power-iteration, continuing QR: {:?}",
+        start.elapsed()
+    );
+    let qr = y.qr();
+    println!("QR finished: {:?}", start.elapsed());
+    let q = qr.q();
+
+    let mut b = DMatrix::<T>::zeros(q.ncols(), m_cols);
+    multiply_transposed_by_matrix(&q, m, &mut b);
+    println!(
+        "QMB matrix multiplication transposed: {:?}",
+        start.elapsed()
+    );
+
+    let svd = b.svd(true, true);
+    println!("SVD decomposition took: {:?}", start.elapsed());
+    let u_b = svd
+        .u
+        .ok_or_else(|| SvdLibError::Las2Error("SVD U computation failed".to_string()))?;
+    let singular_values = svd.singular_values;
+    let vt = svd
+        .v_t
+        .ok_or_else(|| SvdLibError::Las2Error("SVD V_t computation failed".to_string()))?;
+
+    let actual_rank = target_rank.min(singular_values.len());
+
+    let u_b_subset = u_b.columns(0, actual_rank);
+    let u = q.mul(&u_b_subset);
+
+    let vt_subset = vt.rows(0, actual_rank).into_owned();
+
+    // Convert to the format required by SvdRec
+    let d = actual_rank;
+    println!("SVD Result Cropping: {:?}", start.elapsed());
+
+    let ut = u.transpose().into_ndarray2();
+    let s = convert_singular_values(
+        <DVector<T>>::from(singular_values.rows(0, actual_rank)),
+        actual_rank,
+    );
+    let vt = vt_subset.into_ndarray2();
+    println!("Translation to ndarray: {:?}", start.elapsed());
+
+    Ok(SvdRec {
+        d,
+        ut,
+        s,
+        vt,
+        diagnostics: create_diagnostics(m, d, target_rank, n_power_iters, seed.unwrap_or(0) as u32),
+    })
+}
+
+fn convert_singular_values<T: SvdFloat + ComplexField>(
+    values: DVector<T::RealField>,
+    size: usize,
+) -> Array1<T> {
+    let mut array = Array1::zeros(size);
+
+    for i in 0..size {
+        // Convert from RealField to T using f64 as intermediate
+        array[i] = T::from_real(values[i].clone());
+    }
+
+    array
+}
+
+fn create_diagnostics<T, M: SMat<T>>(
+    a: &M,
+    d: usize,
+    target_rank: usize,
+    power_iterations: usize,
+    seed: u32,
+) -> Diagnostics<T>
+where
+    T: SvdFloat,
+{
+    Diagnostics {
+        non_zero: a.nnz(),
+        dimensions: target_rank,
+        iterations: power_iterations,
+        transposed: false,
+        lanczos_steps: 0, // we dont do that
+        ritz_values_stabilized: d,
+        significant_values: d,
+        singular_values: d,
+        end_interval: [T::from(-1e-30).unwrap(), T::from(1e-30).unwrap()],
+        kappa: T::from(1e-6).unwrap(),
+        random_seed: seed,
+    }
+}
+
+fn normalize_columns<T: SvdFloat + RealField + Send + Sync>(matrix: &mut DMatrix<T>) {
+    let rows = matrix.nrows();
+    let cols = matrix.ncols();
+
+    // Use sequential processing for small matrices
+    if rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS {
+        for j in 0..cols {
+            let mut norm = T::zero();
+
+            // Calculate column norm
+            for i in 0..rows {
+                norm += ComplexField::powi(matrix[(i, j)], 2);
+            }
+            norm = ComplexField::sqrt(norm);
+
+            // Normalize the column if the norm is not too small
+            if norm > T::from_f64(1e-10).unwrap() {
+                let scale = T::one() / norm;
+                for i in 0..rows {
+                    matrix[(i, j)] *= scale;
+                }
+            }
+        }
+        return;
+    }
+
+    let norms: Vec<T> = (0..cols)
+        .into_par_iter()
+        .map(|j| {
+            let mut norm = T::zero();
+            for i in 0..rows {
+                let val = unsafe { *matrix.get_unchecked((i, j)) };
+                norm += ComplexField::powi(val, 2);
+            }
+            ComplexField::sqrt(norm)
+        })
+        .collect();
+
+    // Now create a vector of (column_index, scale) pairs
+    let scales: Vec<(usize, T)> = norms
+        .into_iter()
+        .enumerate()
+        .filter_map(|(j, norm)| {
+            if norm > T::from_f64(1e-10).unwrap() {
+                Some((j, T::one() / norm))
+            } else {
+                None // Skip columns with too small norms
+            }
+        })
+        .collect();
+
+    // Apply normalization
+    scales.iter().for_each(|(j, scale)| {
+        for i in 0..rows {
+            let value = matrix.get_mut((i, *j)).unwrap();
+            *value = value.clone() * scale.clone();
+        }
+    });
+}
+
+// ----------------------------------------
+// Utils Functions
+// ----------------------------------------
+
+fn generate_random_matrix<T: SvdFloat + RealField>(
+    rows: usize,
+    cols: usize,
+    seed: Option<u64>,
+) -> DMatrix<T> {
+    let mut rng = match seed {
+        Some(s) => StdRng::seed_from_u64(s),
+        None => StdRng::seed_from_u64(0),
+    };
+
+    let normal = Normal::new(0.0, 1.0).unwrap();
+    DMatrix::from_fn(rows, cols, |_, _| {
+        T::from_f64(normal.sample(&mut rng)).unwrap()
+    })
+}
+
+fn multiply_matrix<T: SvdFloat, M: SMat<T>>(
+    sparse: &M,
+    dense: &DMatrix<T>,
+    result: &mut DMatrix<T>,
+    transpose_sparse: bool,
+) {
+    let cols = dense.ncols();
+
+    let results: Vec<(usize, Vec<T>)> = (0..cols)
+        .into_par_iter()
+        .map(|j| {
+            let mut col_vec = vec![T::zero(); dense.nrows()];
+            let mut result_vec = vec![T::zero(); result.nrows()];
+
+            for i in 0..dense.nrows() {
+                col_vec[i] = dense[(i, j)];
+            }
+
+            sparse.svd_opa(&col_vec, &mut result_vec, transpose_sparse);
+
+            (j, result_vec)
+        })
+        .collect();
+
+    for (j, col_result) in results {
+        for i in 0..result.nrows() {
+            result[(i, j)] = col_result[i];
+        }
+    }
+}
+
+fn multiply_transposed_by_matrix<T: SvdFloat, M: SMat<T>>(
+    q: &DMatrix<T>, 
+    sparse: &M,
+    result: &mut DMatrix<T>,
+) {
+    let q_rows = q.nrows();
+    let q_cols = q.ncols();
+    let sparse_rows = sparse.nrows();
+    let sparse_cols = sparse.ncols();
+    
+    eprintln!("Q dimensions: {} x {}", q_rows, q_cols);
+    eprintln!("Sparse dimensions: {} x {}", sparse_rows, sparse_cols);
+    eprintln!("Result dimensions: {} x {}", result.nrows(), result.ncols());
+    
+    assert_eq!(
+        q_rows, sparse_rows,
+        "Dimension mismatch: Q has {} rows but sparse has {} rows",
+        q_rows, sparse_rows
+    );
+    
+    assert_eq!(
+        result.nrows(),
+        q_cols,
+        "Result matrix has incorrect row count: expected {}, got {}",
+        q_cols,
+        result.nrows()
+    );
+    assert_eq!(
+        result.ncols(),
+        sparse_cols,
+        "Result matrix has incorrect column count: expected {}, got {}",
+        sparse_cols,
+        result.ncols()
+    );
+    
+    let chunk_size = determine_chunk_size(q_cols);
+    
+    let chunk_results: Vec<Vec<(usize, Vec<T>)>> = (0..q_cols)
+        .into_par_iter()
+        .chunks(chunk_size)
+        .map(|chunk| {
+            let mut chunk_results = Vec::with_capacity(chunk.len());
+            
+            for &col_idx in &chunk {
+                let mut q_col = vec![T::zero(); q_rows];
+                for i in 0..q_rows {
+                    q_col[i] = q[(i, col_idx)];
+                }
+                
+                let mut result_row = vec![T::zero(); sparse_cols];
+                
+                sparse.svd_opa(&q_col, &mut result_row, true);
+                
+                chunk_results.push((col_idx, result_row));
+            }
+            chunk_results
+        })
+        .collect();
+    
+    for chunk_result in chunk_results {
+        for (row_idx, row_values) in chunk_result {
+            for j in 0..sparse_cols {
+                result[(row_idx, j)] = row_values[j];
+            }
+        }
+    }
+}
diff --git a/src/utils.rs b/src/utils.rs
index 5c7aae0..e6b8027 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -1,3 +1,9 @@
+use ndarray::{Array1, Array2};
+use num_traits::{Float, FromPrimitive, One, Zero};
+use std::fmt::Debug;
+use std::iter::Sum;
+use std::ops::{AddAssign, MulAssign, Neg, SubAssign};
+
 pub fn determine_chunk_size(nrows: usize) -> usize {
     let num_threads = rayon::current_num_threads();
 
@@ -9,3 +15,103 @@ pub fn determine_chunk_size(nrows: usize) -> usize {
 
     chunk_size.max(min_rows_per_thread)
 }
+
+pub trait SMat<T: Float>: Sync {
+    fn nrows(&self) -> usize;
+    fn ncols(&self) -> usize;
+    fn nnz(&self) -> usize;
+    fn svd_opa(&self, x: &[T], y: &mut [T], transposed: bool); // y = A*x
+}
+
+/// Singular Value Decomposition Components
+///
+/// # Fields
+/// - d:  Dimensionality (rank), the number of rows of both `ut`, `vt` and the length of `s`
+/// - ut: Transpose of left singular vectors, the vectors are the rows of `ut`
+/// - s:  Singular values (length `d`)
+/// - vt: Transpose of right singular vectors, the vectors are the rows of `vt`
+/// - diagnostics: Computational diagnostics
+#[derive(Debug, Clone, PartialEq)]
+pub struct SvdRec<T: Float> {
+    pub d: usize,
+    pub ut: Array2<T>,
+    pub s: Array1<T>,
+    pub vt: Array2<T>,
+    pub diagnostics: Diagnostics<T>,
+}
+
+/// Computational Diagnostics
+///
+/// # Fields
+/// - non_zero:  Number of non-zeros in the matrix
+/// - dimensions: Number of dimensions attempted (bounded by matrix shape)
+/// - iterations: Number of iterations attempted (bounded by dimensions and matrix shape)
+/// - transposed:  True if the matrix was transposed internally
+/// - lanczos_steps: Number of Lanczos steps performed
+/// - ritz_values_stabilized: Number of ritz values
+/// - significant_values: Number of significant values discovered
+/// - singular_values: Number of singular values returned
+/// - end_interval: left, right end of interval containing unwanted eigenvalues
+/// - kappa: relative accuracy of ritz values acceptable as eigenvalues
+/// - random_seed: Random seed provided or the seed generated
+#[derive(Debug, Clone, PartialEq)]
+pub struct Diagnostics<T: Float> {
+    pub non_zero: usize,
+    pub dimensions: usize,
+    pub iterations: usize,
+    pub transposed: bool,
+    pub lanczos_steps: usize,
+    pub ritz_values_stabilized: usize,
+    pub significant_values: usize,
+    pub singular_values: usize,
+    pub end_interval: [T; 2],
+    pub kappa: T,
+    pub random_seed: u32,
+}
+
+pub trait SvdFloat:
+    Float
+    + FromPrimitive
+    + Debug
+    + Send
+    + Sync
+    + Zero
+    + One
+    + AddAssign
+    + SubAssign
+    + MulAssign
+    + Neg<Output = Self>
+    + Sum
+{
+    fn eps() -> Self;
+    fn eps34() -> Self;
+    fn compare(a: Self, b: Self) -> bool;
+}
+
+impl SvdFloat for f32 {
+    fn eps() -> Self {
+        f32::EPSILON
+    }
+
+    fn eps34() -> Self {
+        f32::EPSILON.powf(0.75)
+    }
+
+    fn compare(a: Self, b: Self) -> bool {
+        (b - a).abs() < f32::EPSILON
+    }
+}
+
+impl SvdFloat for f64 {
+    fn eps() -> Self {
+        f64::EPSILON
+    }
+
+    fn eps34() -> Self {
+        f64::EPSILON.powf(0.75)
+    }
+
+    fn compare(a: Self, b: Self) -> bool {
+        (b - a).abs() < f64::EPSILON
+    }
+}