From 9437fd3081f9c7dd3dd527a0695923e1c59457de Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sat, 15 Oct 2016 10:50:26 -0700
Subject: [PATCH 01/21] Use const for convergence epsilon

---
 src/learning/gmm.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 4c88e9ce..03976f6a 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -37,6 +37,8 @@ use learning::{LearningResult, UnSupModel};
 use learning::toolkit::rand_utils;
 use learning::error::{Error, ErrorKind};
 
+const CONVERGENCE_EPS: f64 = 1.0e-15;
+
 /// Covariance options for GMMs.
 ///
 /// - Full : The full covariance structure.
@@ -92,7 +94,7 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
 
             let (weights, log_lik_1) = try!(self.membership_weights(inputs));
 
-            if (log_lik_1 - log_lik_0).abs() < 1e-15 {
+            if (log_lik_1 - log_lik_0).abs() < CONVERGENCE_EPS {
                 break;
             }
 

From 67553dc414c79801bcd8e26e20885ee67997375d Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sat, 15 Oct 2016 11:19:13 -0700
Subject: [PATCH 02/21] Make new use with_weights

---
 src/learning/gmm.rs | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 03976f6a..7118e0fd 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -130,15 +130,7 @@ impl GaussianMixtureModel {
     /// let gmm = GaussianMixtureModel::new(3);
     /// ```
     pub fn new(k: usize) -> GaussianMixtureModel {
-        GaussianMixtureModel {
-            comp_count: k,
-            mix_weights: Vector::ones(k) / (k as f64),
-            model_means: None,
-            model_covars: None,
-            log_lik: 0f64,
-            max_iters: 100,
-            cov_option: CovOption::Full,
-        }
+        Self::with_weights(k, Vector::ones(k) / k as f64).unwrap()
     }
 
     /// Constructs a new GMM with the specified prior mixture weights.

From 3fe49dddc910708c9511def1fa85034f5d03517d Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sun, 16 Oct 2016 17:59:23 -0700
Subject: [PATCH 03/21] bump deps

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 2ef44396..bc352753 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,4 +16,4 @@ stats = []
 [dependencies]
 num = { version = "0.1.35", default-features = false }
 rand = "0.3.14"
-rulinalg = "0.3.3"
+rulinalg = "0.3.4"

From 92af69832498dfaf3c59404ec6594eb82626ec19 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sun, 16 Oct 2016 17:59:55 -0700
Subject: [PATCH 04/21] Add an integration test for GMM

---
 tests/learning/gmm.rs | 55 +++++++++++++++++++++++++++++++++++++++++++
 tests/lib.rs          |  3 ++-
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 tests/learning/gmm.rs

diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs
new file mode 100644
index 00000000..47d80559
--- /dev/null
+++ b/tests/learning/gmm.rs
@@ -0,0 +1,55 @@
+extern crate rand;
+
+use rm::linalg::{Matrix, BaseMatrix};
+use rm::learning::gmm::GaussianMixtureModel;
+use rm::learning::UnSupModel;
+
+use self::rand::thread_rng;
+use self::rand::distributions::IndependentSample;
+use self::rand::distributions::normal::Normal;
+
+fn generate_data(centroids: &Matrix<f64>, points_per_centroid: usize, noise: f64) -> Matrix<f64> {
+    assert!(centroids.cols() > 0, "Centroids cannot be empty.");
+    assert!(centroids.rows() > 0, "Centroids cannot be empty.");
+    assert!(noise >= 0f64, "Noise must be non-negative.");
+    let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid *
+                                                  centroids.cols());
+
+    let mut rng = thread_rng();
+    let normal_rv = Normal::new(0f64, noise);
+
+    for _ in 0..points_per_centroid {
+        // Generate points from each centroid
+        for centroid in centroids.iter_rows() {
+            // Generate a point randomly around the centroid
+            let mut point = Vec::with_capacity(centroids.cols());
+            for feature in centroid {
+                point.push(feature + normal_rv.ind_sample(&mut rng));
+            }
+
+            // Push point to raw_cluster_data
+            raw_cluster_data.extend(point);
+        }
+    }
+
+    Matrix::new(centroids.rows() * points_per_centroid,
+                centroids.cols(),
+                raw_cluster_data)
+}
+
+#[test]
+fn gmm_train() {
+
+    const SAMPLES_PER_CENTROID: usize = 500;
+    // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5).
+    let centroids = Matrix::new(3, 2, vec![-0.5, -0.5, 0.0, 0.5, 0.5, 0.0]);
+
+    // Generate some data randomly around the centroids
+    let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.4);
+    let mut model = GaussianMixtureModel::new(3);
+    model.set_max_iters(100);
+    match model.train(&samples) {
+        Ok(_) => println!("means: \n{:.4}", model.means().unwrap()),
+        Err(e) => println!("error: {}", e)
+    }
+}
diff --git a/tests/lib.rs b/tests/lib.rs
index f1261809..c536cd82 100644
--- a/tests/lib.rs
+++ b/tests/lib.rs
@@ -6,8 +6,9 @@ pub mod learning {
     mod lin_reg;
     mod k_means;
     mod gp;
+    mod gmm;
 
     pub mod optim {
     	mod grad_desc;
     }
-}
\ No newline at end of file
+}

From 3e564f8e9f0e5243ca210a1e35933def0284329f Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sun, 16 Oct 2016 18:00:27 -0700
Subject: [PATCH 05/21] Commit current status of GaussianMixtureModel rewrite

---
 src/learning/gmm.rs | 321 ++++++++++++++++++++++++++++++++------------
 1 file changed, 238 insertions(+), 83 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 7118e0fd..782c810e 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -30,6 +30,8 @@
 //! // Probabilities that each point comes from each Gaussian.
 //! println!("{:?}", post_probs.data());
 //! ```
+extern crate rand;
+
 use linalg::{Matrix, MatrixSlice, Vector, BaseMatrix, BaseMatrixMut, Axes};
 use rulinalg::utils;
 
@@ -37,6 +39,10 @@ use learning::{LearningResult, UnSupModel};
 use learning::toolkit::rand_utils;
 use learning::error::{Error, ErrorKind};
 
+use std::f64::consts::PI;
+use std::f64::EPSILON;
+use std::mem;
+
 const CONVERGENCE_EPS: f64 = 1.0e-15;
 
 /// Covariance options for GMMs.
@@ -59,10 +65,15 @@ pub enum CovOption {
 #[derive(Debug)]
 pub struct GaussianMixtureModel {
     comp_count: usize,
+    // [n_features]
     mix_weights: Vector<f64>,
+    // [n_components, n_features]
     model_means: Option<Matrix<f64>>,
+    // n_components elements: [n_features, n_features]
     model_covars: Option<Vec<Matrix<f64>>>,
-    log_lik: f64,
+    // n_components elements: [n_features, n_features]
+    precisions_cholesky: Option<Vec<Matrix<f64>>>,
+    log_lik: Vector<f64>,
     max_iters: usize,
     /// The covariance options for the GMM.
     pub cov_option: CovOption,
@@ -79,28 +90,66 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
 
         // Initialization:
         let k = self.comp_count;
+        // println!("\n");
 
         self.model_covars = {
             let cov_mat = try!(self.initialize_covariances(inputs, reg_value));
             Some(vec![cov_mat; k])
         };
 
-        let random_rows: Vec<usize> =
-            rand_utils::reservoir_sample(&(0..inputs.rows()).collect::<Vec<usize>>(), k);
-        self.model_means = Some(inputs.select_rows(&random_rows));
+        {
+            use rand::distributions::{IndependentSample, Range};
+            let between = Range::new(0f64, 1.);
+            let mut rng = rand::thread_rng();
+
+            self.model_means = Some(Matrix::new(inputs.cols(), self.comp_count, 
+                                                vec![0.; inputs.cols() * self.comp_count]));
+            let random_numbers: Vec<f64> = 
+                (0..(inputs.rows()*k)).map(|_| between.ind_sample(&mut rng).exp()).collect();
+            let mut resp = Matrix::new(inputs.rows(), k, random_numbers);
+            let sum = resp.sum_cols();
+            for row in resp.iter_rows_mut() {
+                for (v, s) in row.iter_mut().zip(sum.iter()) {
+                    *v /= *s;
+                }
+            }
+            self.update_gaussian_parameters(inputs, resp);
+        }
 
-        for _ in 0..self.max_iters {
-            let log_lik_0 = self.log_lik;
+        for c in self.model_covars.as_ref().unwrap().iter() {
+            // println!("covs: \n{:.4}", &c);
+        }
 
-            let (weights, log_lik_1) = try!(self.membership_weights(inputs));
+        self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
+        for c in self.precisions_cholesky.as_ref().unwrap().iter() {
+            // println!("prec: \n{:.4}", &c);
+        }
 
-            if (log_lik_1 - log_lik_0).abs() < CONVERGENCE_EPS {
+        self.log_lik = Vector::new(vec![0.0; inputs.rows()]);
+        for iter in 0..self.max_iters {
+            let log_lik_0 = self.log_lik.clone();
+            // println!("\nIteration {}", iter);
+            // println!("cov:\n{:.4}", self.covariances().unwrap());
+
+            // e_step
+            let (log_prob_norm, mut resp) = self.estimate_log_prob_resp(inputs);
+            // Return to normal space
+            for v in resp.iter_mut() { *v = v.exp(); }
+
+            println!("resp: \n{:.4}", &resp.select_rows(&[0, 1, 2, 3, 4]));
+            println!("mix_weights: {}", &self.mix_weights);
+            // m_step
+            self.update_gaussian_parameters(inputs, resp);
+            self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
+            // end of m_step
+            
+            let log_lik_1 = log_prob_norm;
+
+            if (log_lik_0 - &log_lik_1).iter().all(|v| v.abs() < CONVERGENCE_EPS) {
                 break;
             }
 
             self.log_lik = log_lik_1;
-
-            self.update_params(inputs, weights);
         }
 
         Ok(())
@@ -109,7 +158,7 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
     /// Predict output from inputs.
     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
         if let (&Some(_), &Some(_)) = (&self.model_means, &self.model_covars) {
-            Ok(try!(self.membership_weights(inputs)).0)
+            Ok(self.estimate_weighted_log_prob(inputs))
         } else {
             Err(Error::new_untrained())
         }
@@ -169,7 +218,8 @@ impl GaussianMixtureModel {
                 mix_weights: normalized_weights,
                 model_means: None,
                 model_covars: None,
-                log_lik: 0f64,
+                precisions_cholesky: None,
+                log_lik: Vector::new(vec![]),
                 max_iters: 100,
                 cov_option: CovOption::Full,
             })
@@ -223,7 +273,6 @@ impl GaussianMixtureModel {
                 let variance = try!(inputs.variance(Axes::Row));
                 Ok(Matrix::from_diag(&variance.data()) * reg_value.sqrt())
             }
-
             CovOption::Full | CovOption::Regularized(_) => {
                 let means = inputs.mean(Axes::Row);
                 let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols());
@@ -243,108 +292,214 @@ impl GaussianMixtureModel {
         }
     }
 
-    fn membership_weights(&self, inputs: &Matrix<f64>) -> LearningResult<(Matrix<f64>, f64)> {
-        let n = inputs.rows();
-
-        let mut member_weights_data = Vec::with_capacity(n * self.comp_count);
-
-        // We compute the determinants and inverses now
-        let mut cov_sqrt_dets = Vec::with_capacity(self.comp_count);
-        let mut cov_invs = Vec::with_capacity(self.comp_count);
-
-        if let Some(ref covars) = self.model_covars {
-            for cov in covars {
-                // TODO: combine these. We compute det to get the inverse.
-                let covar_det = cov.det();
-                let covar_inv = try!(cov.inverse().map_err(Error::from));
+    fn compute_precision_cholesky(&self) -> LearningResult<Vec<Matrix<f64>>> {
+        let &GaussianMixtureModel {
+            model_covars: ref covariances,
+            cov_option: ref covariance_type,
+            comp_count: n_components,
+            ..
+        } = self;
 
-                cov_sqrt_dets.push(covar_det.sqrt());
-                cov_invs.push(covar_inv);
+        let covariances = covariances.as_ref().unwrap();
+        match *covariance_type {
+            CovOption::Full | CovOption::Regularized(_) => {
+                let mut precisions_chol = Vec::<Matrix<f64>>::with_capacity(n_components);
+                for covariance in covariances {
+                    let mut cov_chol: Matrix<f64> = try!(covariance.cholesky());
+                    // cholesky is correct
+                    let half = (cov_chol.rows() as f64 / 2.0).floor() as usize;
+                    let lower_rows = cov_chol.rows() - half - 1;
+                    let n_col = cov_chol.cols() - 1;
+                    // println!("cov: \n{:.4}", &covariance);
+                    // println!("cov_chol: \n{:.4}", &cov_chol);
+
+                    // solve_l_triangular doesn't work with a matrix, so we have to do it 
+                    // the hard way.
+                    let det: f64 = cov_chol.det();
+                    cov_chol /= det;
+
+                    for idx in 0..half {
+                        // Mirror along the diagonal
+                        let (mut upper, mut lower) = cov_chol.split_at_mut(half, Axes::Row);
+                        {
+                            let swap_row = lower_rows - idx;
+                            let swap_col = n_col - idx;
+                            mem::swap(&mut upper[[idx, idx]], &mut lower[[swap_row, swap_col]]);
+                        }
+
+                        // Transpose and invert all other values
+                        for j in (idx+1)..(n_col+1) {
+                            let swap_row = lower_rows - idx;
+                            let swap_col = n_col - j;
+                            mem::swap(&mut upper[[idx, j]], &mut lower[[swap_row, swap_col]]);
+                            upper[[idx, j]] *= -1.0;
+                        }
+                    }
+                    precisions_chol.push(cov_chol);
+                }
+                Ok(precisions_chol)
+            },
+            CovOption::Diagonal => {
+                let n_features = covariances[0].cols();
+                for covariance in covariances {
+                    for i in 0..covariance.cols() {
+                        if covariance[[i, i]] <= 0.0 {
+                            return Err(Error::new(
+                                    ErrorKind::InvalidState, 
+                                    "Mixture model had zeros along the diagonals."));
+                        }
+                    }
+                }
+                let mut precisions_chol = Vec::<Matrix<f64>>::with_capacity(n_components);
+                for covariance in covariances {
+                    let v: Vec<f64> = covariance.iter().map(|v| 1.0 / v.sqrt()).collect();
+                    precisions_chol.push(Matrix::<f64>::new(n_features, n_features, v));
+                }
+                Ok(precisions_chol)
             }
         }
+    }
 
-        let mut log_lik = 0f64;
-
-        // Now we compute the membership weights
-        if let Some(ref means) = self.model_means {
-            for i in 0..n {
-                let mut pdfs = Vec::with_capacity(self.comp_count);
-                let x_i = MatrixSlice::from_matrix(inputs, [i, 0], 1, inputs.cols());
-
-                for j in 0..self.comp_count {
-                    let mu_j = MatrixSlice::from_matrix(means, [j, 0], 1, means.cols());
-                    let diff = x_i - mu_j;
-
-                    let pdf = (&diff * &cov_invs[j] * diff.transpose() * -0.5).into_vec()[0]
-                        .exp() / cov_sqrt_dets[j];
-                    pdfs.push(pdf);
-                }
-
-                let weighted_pdf_sum = utils::dot(&pdfs, self.mix_weights.data());
+    // == Parameters
+    // inputs : [n_samples, n_features]
+    //
+    // == Returns
+    // weighted_log_prob : [n_features, n_components]
+    //
+    fn estimate_weighted_log_prob(&self, inputs: &Matrix<f64>) -> Matrix<f64> {
+        let mut log_prob = self.estimate_log_prob(&inputs);
+        // println!("log_prob: \n{:.4}", log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
+        // println!("mix_weights: \n{:?}", &self.mix_weights);
+        let log_weights: Vec<f64> = self.mix_weights.iter().map(|w| w.ln()).collect();
+        // println!("log_weights: \n{:?}", &log_weights);
+        for (lp, lw) in log_prob.iter_mut().zip(log_weights.iter()) {
+            *lp *= *lw;
+        }
+        log_prob
+    }
 
-                for (idx, pdf) in pdfs.iter().enumerate() {
-                    member_weights_data.push(self.mix_weights[idx] * pdf / (weighted_pdf_sum));
+    // called estimate_log_gaussian_prob in scipy
+    //
+    // == Paramers
+    // inputs : [n_samples, n_features]
+    //
+    //
+    fn estimate_log_prob(&self, inputs: &Matrix<f64>) -> Matrix<f64> {
+        let precisions_cholesky: &Vec<Matrix<f64>> = self.precisions_cholesky.as_ref().unwrap();
+        let ref model_means = self.model_means.as_ref().unwrap();
+        // The log of the determinant for each precision matrix
+        let log_det = precisions_cholesky.iter().map(|m| m.det().ln());
+        // println!("log_det: {:?}", log_det);
+
+        let mut log_prob = Matrix::zeros(inputs.rows(), self.comp_count);
+        for k in 0..self.comp_count {
+            let prec = &precisions_cholesky[k];
+            // y is a matrix of shape [n_samples, n_features]
+            let mut y = inputs * prec;
+            // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
+            // Matrix of shape [1, n_features]
+            let z: Matrix<f64> = model_means.select_rows(&[k]) * prec;
+            println!("z: \n{:.4}", &z);
+            
+            // Subtract the mean of each column from the matrix y
+            for col in 0..y.cols() {
+                for row in 0..y.rows() {
+                    y[[row, col]] -= z[[0, col]];
                 }
+            }
+            // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
 
-                log_lik += weighted_pdf_sum.ln();
+            for (i, row) in y.iter_rows().enumerate() {
+                let sum_of_squares = row.iter().map(|v| v.powi(2)).sum();
+                log_prob[[i, k]] = sum_of_squares;
             }
         }
 
-        Ok((Matrix::new(n, self.comp_count, member_weights_data), log_lik))
+        log_prob = (log_prob + inputs.cols() as f64 * (2.0 * PI).ln()) * -0.5;
+        for (row, det) in log_prob.iter_rows_mut().zip(log_det) {
+            for v in row {
+                *v += det;
+            }
+        }
+        log_prob
     }
 
-    fn update_params(&mut self, inputs: &Matrix<f64>, membership_weights: Matrix<f64>) {
-        let n = membership_weights.rows();
-        let d = inputs.cols();
+    fn estimate_log_prob_resp(&self, inputs: &Matrix<f64>) -> (Vector<f64>, Matrix<f64>) {
+        let mut weighted_log_prob: Matrix<f64> = self.estimate_weighted_log_prob(inputs);
+        // length of n_samples
+        let log_prob_norm: Vector<f64> = 
+            Vector::new(weighted_log_prob.iter_rows().map(|row: &[f64]| {
+                let a: f64 = row.iter().map(|v| v.exp()).sum();
+                a.ln()
+            }).collect::<Vec<f64>>());
+
+        println!("log_prob_norm: \n{:.4}", &log_prob_norm.select(&[0, 1, 2, 3, 4, 5, 6]));
+        for row in 0..log_prob_norm.size() {
+            for col in 0..weighted_log_prob.cols() {
+                weighted_log_prob[[row, col]] -= log_prob_norm[row];
+            }
+        }
+        println!("log_prob: \n{:.4}", weighted_log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
+        (log_prob_norm, weighted_log_prob)
+    }
 
-        let sum_weights = membership_weights.sum_rows();
+    fn update_gaussian_parameters(&mut self, inputs: &Matrix<f64>, mut resp: Matrix<f64>) {
+        let mut model_means = self.model_means.as_mut().unwrap();
 
-        self.mix_weights = &sum_weights / (n as f64);
+        // println!("resp: {}", resp);
+        self.mix_weights = resp.iter_rows()
+            .fold(Vector::new(vec![0.0; resp.cols()]), |mut acc, row| {
+                for (a, r) in acc.iter_mut().zip(row.iter()) {
+                    *a += *r;
+                }
+                acc
+            })
+            + 10.0 * EPSILON;
 
-        let mut new_means = membership_weights.transpose() * inputs;
+        // println!("nk: \n{:.4}", &self.mix_weights);
+        *model_means = resp.transpose() * inputs;
+        for col in 0..model_means.cols() {
+            let mm_rows = model_means.rows();
+            let mut mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1);
 
-        for (mean, w) in new_means.iter_rows_mut().zip(sum_weights.data().iter()) {
-            for m in mean.iter_mut() {
-                *m /= *w;
-            }
+            let ref mix_weights = self.mix_weights;
+            let div_mix_weights = |v| { v / mix_weights[col] };
+            mm_col = mm_col.apply(&div_mix_weights);
         }
 
-        let mut new_covs = Vec::with_capacity(self.comp_count);
+        let mut model_covars = self.model_covars.as_mut().unwrap();
 
-        for k in 0..self.comp_count {
-            let mut cov_mat = Matrix::zeros(d, d);
-            let new_means_k = MatrixSlice::from_matrix(&new_means, [k, 0], 1, d);
-
-            for i in 0..n {
-                let inputs_i = MatrixSlice::from_matrix(inputs, [i, 0], 1, d);
-                let diff = inputs_i - new_means_k;
-                cov_mat += self.compute_cov(diff, membership_weights[[i, k]]);
+        // println!("means:\n{:.4}", &model_means);
+        // Iterate through each component in the model covariances
+        for (k, covariance) in model_covars.iter_mut().enumerate() {
+            let mut diff: Matrix<f64> = inputs.clone();
+            for (_, mut row) in diff.iter_rows_mut().enumerate() {
+                for (i, v) in row.iter_mut().enumerate() {
+                    *v = *v - model_means[[k, i]];
+                }
             }
 
-            if let CovOption::Regularized(eps) = self.cov_option {
-                cov_mat += Matrix::<f64>::identity(cov_mat.cols()) * eps;
-            }
+            let mut diff_transpose = diff.transpose();
 
-            new_covs.push(cov_mat / sum_weights[k]);
+            for row in 0..diff_transpose.rows() {
+                let mut dt = diff_transpose.sub_slice_mut([row, 0], 1, diff.rows());
+                dt *= resp[[row, k]];
+            }
 
+            *covariance = (diff_transpose.as_mut_slice() * diff) / self.mix_weights[k];
         }
 
-        self.model_means = Some(new_means);
-        self.model_covars = Some(new_covs);
-    }
+        self.mix_weights /= inputs.rows() as f64;
+        // println!("mix_weights: {}", &self.mix_weights);
 
-    fn compute_cov(&self, diff: Matrix<f64>, weight: f64) -> Matrix<f64> {
-        match self.cov_option {
-            CovOption::Full | CovOption::Regularized(_) => (diff.transpose() * diff) * weight,
-            CovOption::Diagonal => Matrix::from_diag(&diff.elemul(&diff).into_vec()) * weight,
-        }
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::GaussianMixtureModel;
-    use linalg::Vector;
+    use learning::UnSupModel;
+    use linalg::{Vector, Matrix};
 
     #[test]
     fn test_means_none() {

From d40b0bd56b634375850394642bd45d48998c3cda Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sun, 16 Oct 2016 18:56:45 -0700
Subject: [PATCH 06/21] Use mean of log_prob_norm for convergence

---
 src/learning/gmm.rs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 782c810e..c50ca55d 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -73,7 +73,7 @@ pub struct GaussianMixtureModel {
     model_covars: Option<Vec<Matrix<f64>>>,
     // n_components elements: [n_features, n_features]
     precisions_cholesky: Option<Vec<Matrix<f64>>>,
-    log_lik: Vector<f64>,
+    log_lik: f64,
     max_iters: usize,
     /// The covariance options for the GMM.
     pub cov_option: CovOption,
@@ -125,9 +125,10 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
             // println!("prec: \n{:.4}", &c);
         }
 
-        self.log_lik = Vector::new(vec![0.0; inputs.rows()]);
+        self.log_lik = 0.;
+
         for iter in 0..self.max_iters {
-            let log_lik_0 = self.log_lik.clone();
+            let log_lik_0 = self.log_lik;
             // println!("\nIteration {}", iter);
             // println!("cov:\n{:.4}", self.covariances().unwrap());
 
@@ -143,9 +144,9 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
             self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
             // end of m_step
             
-            let log_lik_1 = log_prob_norm;
+            let log_lik_1 = log_prob_norm.mean();
 
-            if (log_lik_0 - &log_lik_1).iter().all(|v| v.abs() < CONVERGENCE_EPS) {
+            if (log_lik_0 - log_lik_1).abs() < CONVERGENCE_EPS {
                 break;
             }
 
@@ -219,7 +220,7 @@ impl GaussianMixtureModel {
                 model_means: None,
                 model_covars: None,
                 precisions_cholesky: None,
-                log_lik: Vector::new(vec![]),
+                log_lik: 0.,
                 max_iters: 100,
                 cov_option: CovOption::Full,
             })

From 06ace3bba0921d36d2cdea173061fd0e9a98fd1c Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sun, 16 Oct 2016 19:59:52 -0700
Subject: [PATCH 07/21] Add cov regularization value to stabilize cholesky

---
 src/learning/gmm.rs   | 34 +++++++++++++++++++++++-----------
 tests/learning/gmm.rs | 12 +++++++++---
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index c50ca55d..0176515d 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -99,7 +99,7 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
 
         {
             use rand::distributions::{IndependentSample, Range};
-            let between = Range::new(0f64, 1.);
+            let between = Range::new(0.0f64, 1.);
             let mut rng = rand::thread_rng();
 
             self.model_means = Some(Matrix::new(inputs.cols(), self.comp_count, 
@@ -137,8 +137,8 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
             // Return to normal space
             for v in resp.iter_mut() { *v = v.exp(); }
 
-            println!("resp: \n{:.4}", &resp.select_rows(&[0, 1, 2, 3, 4]));
-            println!("mix_weights: {}", &self.mix_weights);
+            // println!("resp: \n{:.4}", &resp.select_rows(&[0, 1, 2, 3, 4]));
+            // println!("mix_weights: {}", &self.mix_weights);
             // m_step
             self.update_gaussian_parameters(inputs, resp);
             self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
@@ -254,6 +254,13 @@ impl GaussianMixtureModel {
         &self.mix_weights
     }
 
+    /// The model mean likelihood
+    ///
+    /// Returns the mean log likelihood of the model
+    pub fn log_lik(&self) -> f64 {
+        self.log_lik
+    }
+
     /// Sets the max number of iterations for the EM algorithm.
     ///
     /// # Examples
@@ -306,12 +313,12 @@ impl GaussianMixtureModel {
             CovOption::Full | CovOption::Regularized(_) => {
                 let mut precisions_chol = Vec::<Matrix<f64>>::with_capacity(n_components);
                 for covariance in covariances {
+                    // println!("loop-cov: \n{:.4}", &covariance);
                     let mut cov_chol: Matrix<f64> = try!(covariance.cholesky());
                     // cholesky is correct
                     let half = (cov_chol.rows() as f64 / 2.0).floor() as usize;
                     let lower_rows = cov_chol.rows() - half - 1;
                     let n_col = cov_chol.cols() - 1;
-                    // println!("cov: \n{:.4}", &covariance);
                     // println!("cov_chol: \n{:.4}", &cov_chol);
 
                     // solve_l_triangular doesn't work with a matrix, so we have to do it 
@@ -371,10 +378,9 @@ impl GaussianMixtureModel {
         let mut log_prob = self.estimate_log_prob(&inputs);
         // println!("log_prob: \n{:.4}", log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
         // println!("mix_weights: \n{:?}", &self.mix_weights);
-        let log_weights: Vec<f64> = self.mix_weights.iter().map(|w| w.ln()).collect();
-        // println!("log_weights: \n{:?}", &log_weights);
-        for (lp, lw) in log_prob.iter_mut().zip(log_weights.iter()) {
-            *lp *= *lw;
+        let log_weights = self.mix_weights.iter().map(|w| w.ln());
+        for (lp, lw) in log_prob.iter_mut().zip(log_weights) {
+            *lp *= lw;
         }
         log_prob
     }
@@ -400,7 +406,7 @@ impl GaussianMixtureModel {
             // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
             // Matrix of shape [1, n_features]
             let z: Matrix<f64> = model_means.select_rows(&[k]) * prec;
-            println!("z: \n{:.4}", &z);
+            // println!("z: \n{:.4}", &z);
             
             // Subtract the mean of each column from the matrix y
             for col in 0..y.cols() {
@@ -434,13 +440,13 @@ impl GaussianMixtureModel {
                 a.ln()
             }).collect::<Vec<f64>>());
 
-        println!("log_prob_norm: \n{:.4}", &log_prob_norm.select(&[0, 1, 2, 3, 4, 5, 6]));
+        // println!("log_prob_norm: \n{:.4}", &log_prob_norm.select(&[0, 1, 2, 3, 4, 5, 6]));
         for row in 0..log_prob_norm.size() {
             for col in 0..weighted_log_prob.cols() {
                 weighted_log_prob[[row, col]] -= log_prob_norm[row];
             }
         }
-        println!("log_prob: \n{:.4}", weighted_log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
+        // println!("log_prob: \n{:.4}", weighted_log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
         (log_prob_norm, weighted_log_prob)
     }
 
@@ -488,6 +494,12 @@ impl GaussianMixtureModel {
             }
 
             *covariance = (diff_transpose.as_mut_slice() * diff) / self.mix_weights[k];
+
+            // Add the regularization value
+            for idx in 0..covariance.rows() {
+                covariance[[idx, idx]] += 0.03;
+            }
+
         }
 
         self.mix_weights /= inputs.rows() as f64;
diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs
index 47d80559..957ce658 100644
--- a/tests/learning/gmm.rs
+++ b/tests/learning/gmm.rs
@@ -40,16 +40,22 @@ fn generate_data(centroids: &Matrix<f64>, points_per_centroid: usize, noise: f64
 #[test]
 fn gmm_train() {
 
-    const SAMPLES_PER_CENTROID: usize = 500;
+    const SAMPLES_PER_CENTROID: usize = 2000;
     // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5).
     let centroids = Matrix::new(3, 2, vec![-0.5, -0.5, 0.0, 0.5, 0.5, 0.0]);
 
     // Generate some data randomly around the centroids
-    let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.4);
+    let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.2);
     let mut model = GaussianMixtureModel::new(3);
     model.set_max_iters(100);
     match model.train(&samples) {
-        Ok(_) => println!("means: \n{:.4}", model.means().unwrap()),
+        Ok(_) => {
+            println!("means: \n{:.4}", model.means().unwrap());
+            println!("log_lik: \n{:.4}", model.log_lik());
+            for cov in model.covariances().as_ref().unwrap().iter() {
+                println!("cov: \n{:.4}", cov);
+            }
+        },
         Err(e) => println!("error: {}", e)
     }
 }

From 10bafa7bcef4240685e7b8e114fab44d35254760 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sun, 16 Oct 2016 20:57:14 -0700
Subject: [PATCH 08/21] Allow customization of initialization methods

---
 src/learning/gmm.rs   | 85 ++++++++++++++++++++++++++++++-------------
 tests/learning/gmm.rs |  6 +--
 2 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 0176515d..6ea930a0 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -42,6 +42,7 @@ use learning::error::{Error, ErrorKind};
 use std::f64::consts::PI;
 use std::f64::EPSILON;
 use std::mem;
+use std::marker::PhantomData;
 
 const CONVERGENCE_EPS: f64 = 1.0e-15;
 
@@ -60,10 +61,9 @@ pub enum CovOption {
     Diagonal,
 }
 
-
 /// A Gaussian Mixture Model
 #[derive(Debug)]
-pub struct GaussianMixtureModel {
+pub struct GaussianMixtureModel<T: Initializer> {
     comp_count: usize,
     // [n_features]
     mix_weights: Vector<f64>,
@@ -75,11 +75,12 @@ pub struct GaussianMixtureModel {
     precisions_cholesky: Option<Vec<Matrix<f64>>>,
     log_lik: f64,
     max_iters: usize,
+    phantom: PhantomData<T>,
     /// The covariance options for the GMM.
     pub cov_option: CovOption,
 }
 
-impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
+impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel<T> {
     /// Train the model using inputs.
     fn train(&mut self, inputs: &Matrix<f64>) -> LearningResult<()> {
         let reg_value = if inputs.rows() > 1 {
@@ -98,21 +99,9 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
         };
 
         {
-            use rand::distributions::{IndependentSample, Range};
-            let between = Range::new(0.0f64, 1.);
-            let mut rng = rand::thread_rng();
-
             self.model_means = Some(Matrix::new(inputs.cols(), self.comp_count, 
                                                 vec![0.; inputs.cols() * self.comp_count]));
-            let random_numbers: Vec<f64> = 
-                (0..(inputs.rows()*k)).map(|_| between.ind_sample(&mut rng).exp()).collect();
-            let mut resp = Matrix::new(inputs.rows(), k, random_numbers);
-            let sum = resp.sum_cols();
-            for row in resp.iter_rows_mut() {
-                for (v, s) in row.iter_mut().zip(sum.iter()) {
-                    *v /= *s;
-                }
-            }
+            let resp = try!(T::init_resp(k, inputs));
             self.update_gaussian_parameters(inputs, resp);
         }
 
@@ -167,7 +156,7 @@ impl UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel {
     }
 }
 
-impl GaussianMixtureModel {
+impl<T: Initializer> GaussianMixtureModel<T> {
     /// Constructs a new Gaussian Mixture Model
     ///
     /// Defaults to 100 maximum iterations and
@@ -179,7 +168,7 @@ impl GaussianMixtureModel {
     ///
     /// let gmm = GaussianMixtureModel::new(3);
     /// ```
-    pub fn new(k: usize) -> GaussianMixtureModel {
+    pub fn new(k: usize) -> Self {
         Self::with_weights(k, Vector::ones(k) / k as f64).unwrap()
     }
 
@@ -205,7 +194,7 @@ impl GaussianMixtureModel {
     ///
     /// - Mixture weights do not have length k.
     /// - Mixture weights have a negative entry.
-    pub fn with_weights(k: usize, mixture_weights: Vector<f64>) -> LearningResult<GaussianMixtureModel> {
+    pub fn with_weights(k: usize, mixture_weights: Vector<f64>) -> LearningResult<Self> {
         if mixture_weights.size() != k {
             Err(Error::new(ErrorKind::InvalidParameters, "Mixture weights must have length k."))
         } else if mixture_weights.data().iter().any(|&x| x < 0f64) {
@@ -223,6 +212,7 @@ impl GaussianMixtureModel {
                 log_lik: 0.,
                 max_iters: 100,
                 cov_option: CovOption::Full,
+                phantom: PhantomData,
             })
         }
     }
@@ -497,7 +487,7 @@ impl GaussianMixtureModel {
 
             // Add the regularization value
             for idx in 0..covariance.rows() {
-                covariance[[idx, idx]] += 0.03;
+                covariance[[idx, idx]] += 0.08;
             }
 
         }
@@ -508,22 +498,67 @@ impl GaussianMixtureModel {
     }
 }
 
+/// Trait for possible methods of initializing the responsibilities matrix
+pub trait Initializer {
+    /// Should return the responsibilities matrix: 
+    /// shape is [samples, components]
+    fn init_resp(k: usize, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>>;
+}
+
+/// Initialize the responsibilities matrix using randomly generated values
+pub struct Random;
+
+impl Initializer for Random {
+    fn init_resp(k: usize, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
+        use rand::distributions::{IndependentSample, Range};
+        let between = Range::new(0.0f64, 1.);
+        let mut rng = rand::thread_rng();
+        let random_numbers: Vec<f64> = 
+            (0..(inputs.rows()*k)).map(|_| between.ind_sample(&mut rng).exp()).collect();
+        let mut resp = Matrix::new(inputs.rows(), k, random_numbers);
+        let sum = resp.sum_cols();
+        for row in resp.iter_rows_mut() {
+            for (v, s) in row.iter_mut().zip(sum.iter()) {
+                *v /= *s;
+            }
+        }
+        Ok(resp)
+    }
+}
+
+/// Initialize the responsibilities matrix using k-means clustering
+pub struct KMeans;
+
+impl Initializer for KMeans {
+    fn init_resp(k: usize, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
+        use learning::k_means::KMeansClassifier;
+        let mut model = KMeansClassifier::new(k);
+        try!(model.train(inputs));
+        let classes = try!(model.predict(inputs));
+        let mut resp: Matrix<f64> = Matrix::zeros(inputs.rows(), k);
+        for (row, col) in classes.iter().enumerate() {
+            resp[[row, *col]] = 1.;
+        }
+        Ok(resp)
+    }
+}
+
 #[cfg(test)]
 mod tests {
-    use super::GaussianMixtureModel;
+    use super::{GaussianMixtureModel, Random};
     use learning::UnSupModel;
     use linalg::{Vector, Matrix};
 
     #[test]
     fn test_means_none() {
-        let model = GaussianMixtureModel::new(5);
+        let model = GaussianMixtureModel::<Random>::new(5);
 
         assert_eq!(model.means(), None);
     }
 
     #[test]
     fn test_covars_none() {
-        let model = GaussianMixtureModel::new(5);
+        let model = GaussianMixtureModel::<Random>::new(5);
 
         assert_eq!(model.covariances(), None);
     }
@@ -531,14 +566,14 @@ mod tests {
     #[test]
     fn test_negative_mixtures() {
         let mix_weights = Vector::new(vec![-0.25, 0.75, 0.5]);
-        let gmm_res = GaussianMixtureModel::with_weights(3, mix_weights);
+        let gmm_res = GaussianMixtureModel::<Random>::with_weights(3, mix_weights);
         assert!(gmm_res.is_err());
     }
 
     #[test]
     fn test_wrong_length_mixtures() {
         let mix_weights = Vector::new(vec![0.1, 0.25, 0.75, 0.5]);
-        let gmm_res = GaussianMixtureModel::with_weights(3, mix_weights);
+        let gmm_res = GaussianMixtureModel::<Random>::with_weights(3, mix_weights);
         assert!(gmm_res.is_err());
     }
 }
diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs
index 957ce658..2e7ec754 100644
--- a/tests/learning/gmm.rs
+++ b/tests/learning/gmm.rs
@@ -1,7 +1,7 @@
 extern crate rand;
 
 use rm::linalg::{Matrix, BaseMatrix};
-use rm::learning::gmm::GaussianMixtureModel;
+use rm::learning::gmm::{GaussianMixtureModel, Random, KMeans};
 use rm::learning::UnSupModel;
 
 use self::rand::thread_rng;
@@ -40,13 +40,13 @@ fn generate_data(centroids: &Matrix<f64>, points_per_centroid: usize, noise: f64
 #[test]
 fn gmm_train() {
 
-    const SAMPLES_PER_CENTROID: usize = 2000;
+    const SAMPLES_PER_CENTROID: usize = 500;
     // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5).
     let centroids = Matrix::new(3, 2, vec![-0.5, -0.5, 0.0, 0.5, 0.5, 0.0]);
 
     // Generate some data randomly around the centroids
     let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.2);
-    let mut model = GaussianMixtureModel::new(3);
+    let mut model = GaussianMixtureModel::<KMeans>::new(3);
     model.set_max_iters(100);
     match model.train(&samples) {
         Ok(_) => {

From 7db6f0cd38cbb54f152ab5f6e0c8655762b61527 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Sun, 16 Oct 2016 20:57:34 -0700
Subject: [PATCH 09/21] Investigate the initialized means

---
 src/learning/gmm.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 6ea930a0..3377674f 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -105,6 +105,8 @@ impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureMod
             self.update_gaussian_parameters(inputs, resp);
         }
 
+        println!("initialized means: \n{:.4}", self.model_means.as_ref().unwrap());
+
         for c in self.model_covars.as_ref().unwrap().iter() {
             // println!("covs: \n{:.4}", &c);
         }

From 9fd6d36ac35e77cf3e62ac5ad9482095001902a2 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 08:56:20 -0700
Subject: [PATCH 10/21] Add logs, not multiply

---
 src/learning/gmm.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 3377674f..4ed03601 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -372,7 +372,7 @@ impl<T: Initializer> GaussianMixtureModel<T> {
         // println!("mix_weights: \n{:?}", &self.mix_weights);
         let log_weights = self.mix_weights.iter().map(|w| w.ln());
         for (lp, lw) in log_prob.iter_mut().zip(log_weights) {
-            *lp *= lw;
+            *lp += lw;
         }
         log_prob
     }

From b3c136b16a830e7f3684047d40b045042b8a3827 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 08:57:37 -0700
Subject: [PATCH 11/21] Test for lower than epsilon rather than le(0)

---
 src/learning/gmm.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 4ed03601..1a5cfe31 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -343,7 +343,7 @@ impl<T: Initializer> GaussianMixtureModel<T> {
                 let n_features = covariances[0].cols();
                 for covariance in covariances {
                     for i in 0..covariance.cols() {
-                        if covariance[[i, i]] <= 0.0 {
+                        if covariance[[i, i]] < EPSILON {
                             return Err(Error::new(
                                     ErrorKind::InvalidState, 
                                     "Mixture model had zeros along the diagonals."));

From f79c2f1fef92782a3c5bee033fddc84ab95a0bb2 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 15:36:28 -0700
Subject: [PATCH 12/21] Fix covariance calculation in
 update_gaussian_parameters

---
 src/learning/gmm.rs | 36 ++++++++----------------------------
 1 file changed, 8 insertions(+), 28 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 1a5cfe31..80b0ca56 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -91,7 +91,6 @@ impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureMod
 
         // Initialization:
         let k = self.comp_count;
-        // println!("\n");
 
         self.model_covars = {
             let cov_mat = try!(self.initialize_covariances(inputs, reg_value));
@@ -105,31 +104,17 @@ impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureMod
             self.update_gaussian_parameters(inputs, resp);
         }
 
-        println!("initialized means: \n{:.4}", self.model_means.as_ref().unwrap());
-
-        for c in self.model_covars.as_ref().unwrap().iter() {
-            // println!("covs: \n{:.4}", &c);
-        }
-
         self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
-        for c in self.precisions_cholesky.as_ref().unwrap().iter() {
-            // println!("prec: \n{:.4}", &c);
-        }
-
         self.log_lik = 0.;
 
         for iter in 0..self.max_iters {
             let log_lik_0 = self.log_lik;
-            // println!("\nIteration {}", iter);
-            // println!("cov:\n{:.4}", self.covariances().unwrap());
 
             // e_step
             let (log_prob_norm, mut resp) = self.estimate_log_prob_resp(inputs);
             // Return to normal space
             for v in resp.iter_mut() { *v = v.exp(); }
 
-            // println!("resp: \n{:.4}", &resp.select_rows(&[0, 1, 2, 3, 4]));
-            // println!("mix_weights: {}", &self.mix_weights);
             // m_step
             self.update_gaussian_parameters(inputs, resp);
             self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
@@ -445,7 +430,6 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     fn update_gaussian_parameters(&mut self, inputs: &Matrix<f64>, mut resp: Matrix<f64>) {
         let mut model_means = self.model_means.as_mut().unwrap();
 
-        // println!("resp: {}", resp);
         self.mix_weights = resp.iter_rows()
             .fold(Vector::new(vec![0.0; resp.cols()]), |mut acc, row| {
                 for (a, r) in acc.iter_mut().zip(row.iter()) {
@@ -455,12 +439,10 @@ impl<T: Initializer> GaussianMixtureModel<T> {
             })
             + 10.0 * EPSILON;
 
-        // println!("nk: \n{:.4}", &self.mix_weights);
         *model_means = resp.transpose() * inputs;
         for col in 0..model_means.cols() {
             let mm_rows = model_means.rows();
             let mut mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1);
-
             let ref mix_weights = self.mix_weights;
             let div_mix_weights = |v| { v / mix_weights[col] };
             mm_col = mm_col.apply(&div_mix_weights);
@@ -468,35 +450,33 @@ impl<T: Initializer> GaussianMixtureModel<T> {
 
         let mut model_covars = self.model_covars.as_mut().unwrap();
 
-        // println!("means:\n{:.4}", &model_means);
         // Iterate through each component in the model covariances
         for (k, covariance) in model_covars.iter_mut().enumerate() {
             let mut diff: Matrix<f64> = inputs.clone();
             for (_, mut row) in diff.iter_rows_mut().enumerate() {
                 for (i, v) in row.iter_mut().enumerate() {
-                    *v = *v - model_means[[k, i]];
+                    *v -= model_means[[k, i]];
                 }
             }
 
             let mut diff_transpose = diff.transpose();
-
-            for row in 0..diff_transpose.rows() {
-                let mut dt = diff_transpose.sub_slice_mut([row, 0], 1, diff.rows());
-                dt *= resp[[row, k]];
+            let resp_transpose_row = resp.select_cols(&[k]);
+            for row in diff_transpose.iter_rows_mut() {
+                for (v, x) in row.iter_mut().zip(resp_transpose_row.iter()) {
+                    *v *= *x;
+                }
             }
 
-            *covariance = (diff_transpose.as_mut_slice() * diff) / self.mix_weights[k];
+            *covariance = (diff_transpose * diff) / self.mix_weights[k];
 
             // Add the regularization value
             for idx in 0..covariance.rows() {
-                covariance[[idx, idx]] += 0.08;
+                covariance[[idx, idx]] += 0.0;
             }
 
         }
 
         self.mix_weights /= inputs.rows() as f64;
-        // println!("mix_weights: {}", &self.mix_weights);
-
     }
 }
 

From 918a74038936b6115b6c4363fdb332d30b0bf477 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 15:42:31 -0700
Subject: [PATCH 13/21] Fix lints, fix doctests

---
 src/learning/gmm.rs   | 33 ++++++++++++++++-----------------
 tests/learning/gmm.rs |  2 +-
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 80b0ca56..e06852f8 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -6,14 +6,14 @@
 //!
 //! ```
 //! use rusty_machine::linalg::Matrix;
-//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel};
+//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel, Random};
 //! use rusty_machine::learning::UnSupModel;
 //!
 //! let inputs = Matrix::new(4, 2, vec![1.0, 2.0, -3.0, -3.0, 0.1, 1.5, -5.0, -2.5]);
 //! let test_inputs = Matrix::new(3, 2, vec![1.0, 2.0, 3.0, 2.9, -4.4, -2.5]);
 //!
 //! // Create gmm with k(=2) classes.
-//! let mut model = GaussianMixtureModel::new(2);
+//! let mut model: GaussianMixtureModel<Random> = GaussianMixtureModel::new(2);
 //! model.set_max_iters(10);
 //! model.cov_option = CovOption::Diagonal;
 //!
@@ -32,11 +32,9 @@
 //! ```
 extern crate rand;
 
-use linalg::{Matrix, MatrixSlice, Vector, BaseMatrix, BaseMatrixMut, Axes};
-use rulinalg::utils;
+use linalg::{Matrix, Vector, BaseMatrix, BaseMatrixMut, Axes};
 
 use learning::{LearningResult, UnSupModel};
-use learning::toolkit::rand_utils;
 use learning::error::{Error, ErrorKind};
 
 use std::f64::consts::PI;
@@ -107,7 +105,7 @@ impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureMod
         self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
         self.log_lik = 0.;
 
-        for iter in 0..self.max_iters {
+        for _ in 0..self.max_iters {
             let log_lik_0 = self.log_lik;
 
             // e_step
@@ -151,9 +149,9 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     ///
     /// # Examples
     /// ```
-    /// use rusty_machine::learning::gmm::GaussianMixtureModel;
+    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random};
     ///
-    /// let gmm = GaussianMixtureModel::new(3);
+    /// let gmm: GaussianMixtureModel<Random> = GaussianMixtureModel::new(3);
     /// ```
     pub fn new(k: usize) -> Self {
         Self::with_weights(k, Vector::ones(k) / k as f64).unwrap()
@@ -167,12 +165,12 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     /// # Examples
     ///
     /// ```
-    /// use rusty_machine::learning::gmm::GaussianMixtureModel;
+    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random};
     /// use rusty_machine::linalg::Vector;
     ///
     /// let mix_weights = Vector::new(vec![0.25, 0.25, 0.5]);
     ///
-    /// let gmm = GaussianMixtureModel::with_weights(3, mix_weights).unwrap();
+    /// let gmm: GaussianMixtureModel<Random> = GaussianMixtureModel::with_weights(3, mix_weights).unwrap();
     /// ```
     ///
     /// # Failures
@@ -243,9 +241,9 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     /// # Examples
     ///
     /// ```
-    /// use rusty_machine::learning::gmm::GaussianMixtureModel;
+    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random};
     ///
-    /// let mut gmm = GaussianMixtureModel::new(2);
+    /// let mut gmm: GaussianMixtureModel<Random> = GaussianMixtureModel::new(2);
     /// gmm.set_max_iters(5);
     /// ```
     pub fn set_max_iters(&mut self, iters: usize) {
@@ -427,7 +425,7 @@ impl<T: Initializer> GaussianMixtureModel<T> {
         (log_prob_norm, weighted_log_prob)
     }
 
-    fn update_gaussian_parameters(&mut self, inputs: &Matrix<f64>, mut resp: Matrix<f64>) {
+    fn update_gaussian_parameters(&mut self, inputs: &Matrix<f64>, resp: Matrix<f64>) {
         let mut model_means = self.model_means.as_mut().unwrap();
 
         self.mix_weights = resp.iter_rows()
@@ -442,10 +440,10 @@ impl<T: Initializer> GaussianMixtureModel<T> {
         *model_means = resp.transpose() * inputs;
         for col in 0..model_means.cols() {
             let mm_rows = model_means.rows();
-            let mut mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1);
+            let mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1);
             let ref mix_weights = self.mix_weights;
             let div_mix_weights = |v| { v / mix_weights[col] };
-            mm_col = mm_col.apply(&div_mix_weights);
+            mm_col.apply(&div_mix_weights);
         }
 
         let mut model_covars = self.model_covars.as_mut().unwrap();
@@ -488,6 +486,7 @@ pub trait Initializer {
 }
 
 /// Initialize the responsibilities matrix using randomly generated values
+#[derive(Debug)]
 pub struct Random;
 
 impl Initializer for Random {
@@ -509,6 +508,7 @@ impl Initializer for Random {
 }
 
 /// Initialize the responsibilities matrix using k-means clustering
+#[derive(Debug)]
 pub struct KMeans;
 
 impl Initializer for KMeans {
@@ -528,8 +528,7 @@ impl Initializer for KMeans {
 #[cfg(test)]
 mod tests {
     use super::{GaussianMixtureModel, Random};
-    use learning::UnSupModel;
-    use linalg::{Vector, Matrix};
+    use linalg::Vector;
 
     #[test]
     fn test_means_none() {
diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs
index 2e7ec754..5b4bf01f 100644
--- a/tests/learning/gmm.rs
+++ b/tests/learning/gmm.rs
@@ -1,7 +1,7 @@
 extern crate rand;
 
 use rm::linalg::{Matrix, BaseMatrix};
-use rm::learning::gmm::{GaussianMixtureModel, Random, KMeans};
+use rm::learning::gmm::{GaussianMixtureModel, KMeans};
 use rm::learning::UnSupModel;
 
 use self::rand::thread_rng;

From 917656014d2bee2d8dc8e66889f4b46a0424a9af Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 17:12:40 -0700
Subject: [PATCH 14/21] Reorganize and add the cov regularization variable
 again

---
 src/learning/gmm.rs | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index e06852f8..fea5b910 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -426,8 +426,6 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     }
 
     fn update_gaussian_parameters(&mut self, inputs: &Matrix<f64>, resp: Matrix<f64>) {
-        let mut model_means = self.model_means.as_mut().unwrap();
-
         self.mix_weights = resp.iter_rows()
             .fold(Vector::new(vec![0.0; resp.cols()]), |mut acc, row| {
                 for (a, r) in acc.iter_mut().zip(row.iter()) {
@@ -436,17 +434,16 @@ impl<T: Initializer> GaussianMixtureModel<T> {
                 acc
             })
             + 10.0 * EPSILON;
-
+            
+        let mut model_covars = self.model_covars.as_mut().unwrap();
+        let mut model_means = self.model_means.as_mut().unwrap();
         *model_means = resp.transpose() * inputs;
-        for col in 0..model_means.cols() {
-            let mm_rows = model_means.rows();
-            let mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1);
-            let ref mix_weights = self.mix_weights;
-            let div_mix_weights = |v| { v / mix_weights[col] };
-            mm_col.apply(&div_mix_weights);
-        }
 
-        let mut model_covars = self.model_covars.as_mut().unwrap();
+        for (nk, row) in self.mix_weights.iter().zip(model_means.iter_rows_mut()) {
+            for v in row {
+                *v /= *nk;
+            }
+        }
 
         // Iterate through each component in the model covariances
         for (k, covariance) in model_covars.iter_mut().enumerate() {
@@ -469,7 +466,7 @@ impl<T: Initializer> GaussianMixtureModel<T> {
 
             // Add the regularization value
             for idx in 0..covariance.rows() {
-                covariance[[idx, idx]] += 0.0;
+                covariance[[idx, idx]] += 0.05;
             }
 
         }

From 7adee145c6ee296f616ca684d4cac79238f92519 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 17:13:11 -0700
Subject: [PATCH 15/21] Testing higher-dimensional data is more likely to fail,
 so we should do it

---
 tests/learning/gmm.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs
index 5b4bf01f..eb50c3f7 100644
--- a/tests/learning/gmm.rs
+++ b/tests/learning/gmm.rs
@@ -42,7 +42,7 @@ fn gmm_train() {
 
     const SAMPLES_PER_CENTROID: usize = 500;
     // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5).
-    let centroids = Matrix::new(3, 2, vec![-0.5, -0.5, 0.0, 0.5, 0.5, 0.0]);
+    let centroids = Matrix::new(2, 3, vec![-0.4, -0.6, 0.1, 0.6, 0.7, -0.3]);
 
     // Generate some data randomly around the centroids
     let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.2);

From 2f20142ac685c187ed8cdae218d0fb348d736ab6 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 19:29:30 -0700
Subject: [PATCH 16/21] Remove comments, lower coveriance

---
 src/learning/gmm.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index fea5b910..baac4c6d 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -415,13 +415,12 @@ impl<T: Initializer> GaussianMixtureModel<T> {
                 a.ln()
             }).collect::<Vec<f64>>());
 
-        // println!("log_prob_norm: \n{:.4}", &log_prob_norm.select(&[0, 1, 2, 3, 4, 5, 6]));
         for row in 0..log_prob_norm.size() {
             for col in 0..weighted_log_prob.cols() {
                 weighted_log_prob[[row, col]] -= log_prob_norm[row];
             }
         }
-        // println!("log_prob: \n{:.4}", weighted_log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
+
         (log_prob_norm, weighted_log_prob)
     }
 
@@ -466,7 +465,7 @@ impl<T: Initializer> GaussianMixtureModel<T> {
 
             // Add the regularization value
             for idx in 0..covariance.rows() {
-                covariance[[idx, idx]] += 0.05;
+                covariance[[idx, idx]] += 0.01;
             }
 
         }

From 0b7f6d20a8ed02672c7d4b7a2fef8542bde92cc0 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 19:29:58 -0700
Subject: [PATCH 17/21] Test more clusters with GMM

---
 tests/learning/gmm.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs
index eb50c3f7..c2f8d7eb 100644
--- a/tests/learning/gmm.rs
+++ b/tests/learning/gmm.rs
@@ -46,7 +46,7 @@ fn gmm_train() {
 
     // Generate some data randomly around the centroids
     let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.2);
-    let mut model = GaussianMixtureModel::<KMeans>::new(3);
+    let mut model = GaussianMixtureModel::<KMeans>::new(10);
     model.set_max_iters(100);
     match model.train(&samples) {
         Ok(_) => {

From 31b20eae156b58656a9f847fba0ba253751df264 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 19:44:38 -0700
Subject: [PATCH 18/21] Clean up train method

---
 src/learning/gmm.rs | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index baac4c6d..72251ebb 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -81,28 +81,20 @@ pub struct GaussianMixtureModel<T: Initializer> {
 impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel<T> {
     /// Train the model using inputs.
     fn train(&mut self, inputs: &Matrix<f64>) -> LearningResult<()> {
-        let reg_value = if inputs.rows() > 1 {
-            1f64 / (inputs.rows() - 1) as f64
-        } else {
-            return Err(Error::new(ErrorKind::InvalidData, "Only one row of data provided."));
-        };
-
-        // Initialization:
+        // Move k to stack to appease the borrow checker
         let k = self.comp_count;
 
-        self.model_covars = {
-            let cov_mat = try!(self.initialize_covariances(inputs, reg_value));
-            Some(vec![cov_mat; k])
-        };
+        // Initialize empty matrices for covariances and means
+        self.model_covars = Some(
+            vec![Matrix::zeros(inputs.cols(), inputs.cols()); k]);
+        self.model_means = Some(Matrix::zeros(inputs.cols(), k));
 
-        {
-            self.model_means = Some(Matrix::new(inputs.cols(), self.comp_count, 
-                                                vec![0.; inputs.cols() * self.comp_count]));
-            let resp = try!(T::init_resp(k, inputs));
-            self.update_gaussian_parameters(inputs, resp);
-        }
+        // Initialize responsibitilies and calculate parameters
+        self.update_gaussian_parameters(
+            inputs, try!(T::init_resp(k, inputs)));
+        self.precisions_cholesky =
+            Some(try!(self.compute_precision_cholesky()));
 
-        self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
         self.log_lik = 0.;
 
         for _ in 0..self.max_iters {
@@ -115,11 +107,11 @@ impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureMod
 
             // m_step
             self.update_gaussian_parameters(inputs, resp);
-            self.precisions_cholesky = Some(try!(self.compute_precision_cholesky()));
-            // end of m_step
+            self.precisions_cholesky =
+                Some(try!(self.compute_precision_cholesky()));
             
+            // check for convergence
             let log_lik_1 = log_prob_norm.mean();
-
             if (log_lik_0 - log_lik_1).abs() < CONVERGENCE_EPS {
                 break;
             }

From e54cc04f91ca3b43ddf7ba70fa44c47637beb449 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 19:46:24 -0700
Subject: [PATCH 19/21] Remove vestigial method initialize_covariances

---
 src/learning/gmm.rs | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index 72251ebb..ecee527f 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -242,31 +242,6 @@ impl<T: Initializer> GaussianMixtureModel<T> {
         self.max_iters = iters;
     }
 
-    fn initialize_covariances(&self, inputs: &Matrix<f64>, reg_value: f64) -> LearningResult<Matrix<f64>> {
-        match self.cov_option {
-            CovOption::Diagonal => {
-                let variance = try!(inputs.variance(Axes::Row));
-                Ok(Matrix::from_diag(&variance.data()) * reg_value.sqrt())
-            }
-            CovOption::Full | CovOption::Regularized(_) => {
-                let means = inputs.mean(Axes::Row);
-                let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols());
-                for (j, row) in cov_mat.iter_rows_mut().enumerate() {
-                    for (k, elem) in row.iter_mut().enumerate() {
-                        *elem = inputs.iter_rows().map(|r| {
-                            (r[j] - means[j]) * (r[k] - means[k])
-                        }).sum::<f64>();
-                    }
-                }
-                cov_mat *= reg_value;
-                if let CovOption::Regularized(eps) = self.cov_option {
-                    cov_mat += Matrix::<f64>::identity(cov_mat.cols()) * eps;
-                }
-                Ok(cov_mat)
-            }
-        }
-    }
-
     fn compute_precision_cholesky(&self) -> LearningResult<Vec<Matrix<f64>>> {
         let &GaussianMixtureModel {
             model_covars: ref covariances,

From 566c3ed6065f2f51d115fc9e45890339615dfb49 Mon Sep 17 00:00:00 2001
From: Andrew Smith <andrewchristophersmith@gmail.com>
Date: Mon, 17 Oct 2016 23:47:57 -0700
Subject: [PATCH 20/21] Move CovOption into a trait/struct design

---
 src/learning/gmm.rs   | 332 +++++++++++++++++++++++++++---------------
 tests/learning/gmm.rs |   5 +-
 2 files changed, 215 insertions(+), 122 deletions(-)

diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs
index ecee527f..60587f11 100644
--- a/src/learning/gmm.rs
+++ b/src/learning/gmm.rs
@@ -6,16 +6,15 @@
 //!
 //! ```
 //! use rusty_machine::linalg::Matrix;
-//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel, Random};
+//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel, Random, CholeskyFull, Diagonal};
 //! use rusty_machine::learning::UnSupModel;
 //!
 //! let inputs = Matrix::new(4, 2, vec![1.0, 2.0, -3.0, -3.0, 0.1, 1.5, -5.0, -2.5]);
 //! let test_inputs = Matrix::new(3, 2, vec![1.0, 2.0, 3.0, 2.9, -4.4, -2.5]);
 //!
 //! // Create gmm with k(=2) classes.
-//! let mut model: GaussianMixtureModel<Random> = GaussianMixtureModel::new(2);
+//! let mut model: GaussianMixtureModel<Random, Diagonal> = GaussianMixtureModel::new(2);
 //! model.set_max_iters(10);
-//! model.cov_option = CovOption::Diagonal;
 //!
 //! // Where inputs is a Matrix with features in columns.
 //! model.train(&inputs).unwrap();
@@ -44,24 +43,9 @@ use std::marker::PhantomData;
 
 const CONVERGENCE_EPS: f64 = 1.0e-15;
 
-/// Covariance options for GMMs.
-///
-/// - Full : The full covariance structure.
-/// - Regularized : Adds a regularization constant to the covariance diagonal.
-/// - Diagonal : Only the diagonal covariance structure.
-#[derive(Clone, Copy, Debug)]
-pub enum CovOption {
-    /// The full covariance structure.
-    Full,
-    /// Adds a regularization constant to the covariance diagonal.
-    Regularized(f64),
-    /// Only the diagonal covariance structure.
-    Diagonal,
-}
-
 /// A Gaussian Mixture Model
 #[derive(Debug)]
-pub struct GaussianMixtureModel<T: Initializer> {
+pub struct GaussianMixtureModel<T: Initializer, C: CovOption<T>> {
     comp_count: usize,
     // [n_features]
     mix_weights: Vector<f64>,
@@ -70,18 +54,21 @@ pub struct GaussianMixtureModel<T: Initializer> {
     // n_components elements: [n_features, n_features]
     model_covars: Option<Vec<Matrix<f64>>>,
     // n_components elements: [n_features, n_features]
-    precisions_cholesky: Option<Vec<Matrix<f64>>>,
+    precisions: Option<Vec<Matrix<f64>>>,
     log_lik: f64,
     max_iters: usize,
     phantom: PhantomData<T>,
-    /// The covariance options for the GMM.
-    pub cov_option: CovOption,
+    /// Struct that calculates covariance and gaussian parameters
+    pub cov_option: C,
 }
 
-impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel<T> {
+impl<T, C> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureModel<T, C> 
+    where T: Initializer,
+          C: CovOption<T> + Default
+{
     /// Train the model using inputs.
     fn train(&mut self, inputs: &Matrix<f64>) -> LearningResult<()> {
-        // Move k to stack to appease the borrow checker
+        // Move k to stack to appease the borrow checker 
         let k = self.comp_count;
 
         // Initialize empty matrices for covariances and means
@@ -90,10 +77,10 @@ impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureMod
         self.model_means = Some(Matrix::zeros(inputs.cols(), k));
 
         // Initialize responsibitilies and calculate parameters
-        self.update_gaussian_parameters(
-            inputs, try!(T::init_resp(k, inputs)));
-        self.precisions_cholesky =
-            Some(try!(self.compute_precision_cholesky()));
+        self.update_gaussian_parameters(inputs, try!(T::init_resp(k, inputs)));
+        self.precisions =
+            Some(try!(self.cov_option.compute_precision(
+                        self.model_covars.as_ref().unwrap())));
 
         self.log_lik = 0.;
 
@@ -107,8 +94,9 @@ impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureMod
 
             // m_step
             self.update_gaussian_parameters(inputs, resp);
-            self.precisions_cholesky =
-                Some(try!(self.compute_precision_cholesky()));
+            self.precisions =
+                Some(try!(self.cov_option.compute_precision(
+                            self.model_covars.as_ref().unwrap())));
             
             // check for convergence
             let log_lik_1 = log_prob_norm.mean();
@@ -133,7 +121,10 @@ impl<T: Initializer> UnSupModel<Matrix<f64>, Matrix<f64>> for GaussianMixtureMod
     }
 }
 
-impl<T: Initializer> GaussianMixtureModel<T> {
+impl<T, C> GaussianMixtureModel<T, C> 
+    where T: Initializer,
+          C: CovOption<T> + Default
+{
     /// Constructs a new Gaussian Mixture Model
     ///
     /// Defaults to 100 maximum iterations and
@@ -141,9 +132,9 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     ///
     /// # Examples
     /// ```
-    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random};
+    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull};
     ///
-    /// let gmm: GaussianMixtureModel<Random> = GaussianMixtureModel::new(3);
+    /// let gmm: GaussianMixtureModel<Random, CholeskyFull> = GaussianMixtureModel::new(3);
     /// ```
     pub fn new(k: usize) -> Self {
         Self::with_weights(k, Vector::ones(k) / k as f64).unwrap()
@@ -157,12 +148,12 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     /// # Examples
     ///
     /// ```
-    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random};
+    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull};
     /// use rusty_machine::linalg::Vector;
     ///
     /// let mix_weights = Vector::new(vec![0.25, 0.25, 0.5]);
     ///
-    /// let gmm: GaussianMixtureModel<Random> = GaussianMixtureModel::with_weights(3, mix_weights).unwrap();
+    /// let gmm: GaussianMixtureModel<Random, CholeskyFull> = GaussianMixtureModel::with_weights(3, mix_weights).unwrap();
     /// ```
     ///
     /// # Failures
@@ -185,10 +176,10 @@ impl<T: Initializer> GaussianMixtureModel<T> {
                 mix_weights: normalized_weights,
                 model_means: None,
                 model_covars: None,
-                precisions_cholesky: None,
+                precisions: None,
                 log_lik: 0.,
                 max_iters: 100,
-                cov_option: CovOption::Full,
+                cov_option: C::default(),
                 phantom: PhantomData,
             })
         }
@@ -233,83 +224,15 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     /// # Examples
     ///
     /// ```
-    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random};
+    /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull};
     ///
-    /// let mut gmm: GaussianMixtureModel<Random> = GaussianMixtureModel::new(2);
+    /// let mut gmm: GaussianMixtureModel<Random, CholeskyFull> = GaussianMixtureModel::new(2);
     /// gmm.set_max_iters(5);
     /// ```
     pub fn set_max_iters(&mut self, iters: usize) {
         self.max_iters = iters;
     }
 
-    fn compute_precision_cholesky(&self) -> LearningResult<Vec<Matrix<f64>>> {
-        let &GaussianMixtureModel {
-            model_covars: ref covariances,
-            cov_option: ref covariance_type,
-            comp_count: n_components,
-            ..
-        } = self;
-
-        let covariances = covariances.as_ref().unwrap();
-        match *covariance_type {
-            CovOption::Full | CovOption::Regularized(_) => {
-                let mut precisions_chol = Vec::<Matrix<f64>>::with_capacity(n_components);
-                for covariance in covariances {
-                    // println!("loop-cov: \n{:.4}", &covariance);
-                    let mut cov_chol: Matrix<f64> = try!(covariance.cholesky());
-                    // cholesky is correct
-                    let half = (cov_chol.rows() as f64 / 2.0).floor() as usize;
-                    let lower_rows = cov_chol.rows() - half - 1;
-                    let n_col = cov_chol.cols() - 1;
-                    // println!("cov_chol: \n{:.4}", &cov_chol);
-
-                    // solve_l_triangular doesn't work with a matrix, so we have to do it 
-                    // the hard way.
-                    let det: f64 = cov_chol.det();
-                    cov_chol /= det;
-
-                    for idx in 0..half {
-                        // Mirror along the diagonal
-                        let (mut upper, mut lower) = cov_chol.split_at_mut(half, Axes::Row);
-                        {
-                            let swap_row = lower_rows - idx;
-                            let swap_col = n_col - idx;
-                            mem::swap(&mut upper[[idx, idx]], &mut lower[[swap_row, swap_col]]);
-                        }
-
-                        // Transpose and invert all other values
-                        for j in (idx+1)..(n_col+1) {
-                            let swap_row = lower_rows - idx;
-                            let swap_col = n_col - j;
-                            mem::swap(&mut upper[[idx, j]], &mut lower[[swap_row, swap_col]]);
-                            upper[[idx, j]] *= -1.0;
-                        }
-                    }
-                    precisions_chol.push(cov_chol);
-                }
-                Ok(precisions_chol)
-            },
-            CovOption::Diagonal => {
-                let n_features = covariances[0].cols();
-                for covariance in covariances {
-                    for i in 0..covariance.cols() {
-                        if covariance[[i, i]] < EPSILON {
-                            return Err(Error::new(
-                                    ErrorKind::InvalidState, 
-                                    "Mixture model had zeros along the diagonals."));
-                        }
-                    }
-                }
-                let mut precisions_chol = Vec::<Matrix<f64>>::with_capacity(n_components);
-                for covariance in covariances {
-                    let v: Vec<f64> = covariance.iter().map(|v| 1.0 / v.sqrt()).collect();
-                    precisions_chol.push(Matrix::<f64>::new(n_features, n_features, v));
-                }
-                Ok(precisions_chol)
-            }
-        }
-    }
-
     // == Parameters
     // inputs : [n_samples, n_features]
     //
@@ -334,15 +257,15 @@ impl<T: Initializer> GaussianMixtureModel<T> {
     //
     //
     fn estimate_log_prob(&self, inputs: &Matrix<f64>) -> Matrix<f64> {
-        let precisions_cholesky: &Vec<Matrix<f64>> = self.precisions_cholesky.as_ref().unwrap();
+        let precisions: &Vec<Matrix<f64>> = self.precisions.as_ref().unwrap();
         let ref model_means = self.model_means.as_ref().unwrap();
         // The log of the determinant for each precision matrix
-        let log_det = precisions_cholesky.iter().map(|m| m.det().ln());
+        let log_det = precisions.iter().map(|m| m.det().ln());
         // println!("log_det: {:?}", log_det);
 
         let mut log_prob = Matrix::zeros(inputs.rows(), self.comp_count);
         for k in 0..self.comp_count {
-            let prec = &precisions_cholesky[k];
+            let prec = &precisions[k];
             // y is a matrix of shape [n_samples, n_features]
             let mut y = inputs * prec;
             // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6]));
@@ -401,7 +324,6 @@ impl<T: Initializer> GaussianMixtureModel<T> {
             })
             + 10.0 * EPSILON;
             
-        let mut model_covars = self.model_covars.as_mut().unwrap();
         let mut model_means = self.model_means.as_mut().unwrap();
         *model_means = resp.transpose() * inputs;
 
@@ -411,7 +333,109 @@ impl<T: Initializer> GaussianMixtureModel<T> {
             }
         }
 
-        // Iterate through each component in the model covariances
+        // Delegate updating the covariance matrices to the cov_option
+        let mut model_covars = self.model_covars.as_mut().unwrap();
+        self.cov_option.update_covars(model_covars, resp, inputs,
+                                      model_means, &self.mix_weights);
+
+        self.mix_weights /= inputs.rows() as f64;
+    }
+}
+
+/// Contains various methods for calculating covariances
+pub trait CovOption<T: Initializer> {
+    /// Calculate a precision matrix from a covariance matrix
+    fn compute_precision(&self, covariances: &Vec<Matrix<f64>>) -> 
+        LearningResult<Vec<Matrix<f64>>>;
+
+    /// Update covariance matrices
+    fn update_covars(&self, model_covars: &mut Vec<Matrix<f64>>, resp: Matrix<f64>,
+                     inputs: &Matrix<f64>, model_means: &Matrix<f64>,
+                     mix_weights: &Vector<f64>);
+
+    /// Regularization constant added along the diagonal
+    fn reg_covar(&self) -> f64 {
+        0.0
+    }
+}
+
+/// Performs calculations on a full covariance matrix using Cholesky factorization,
+/// adding a constant regularization factor along the diagonal to ensure stability.
+#[derive(Clone, Copy, Debug)]
+pub struct CholeskyFull {
+    /// Regularization constant
+    pub reg_covar: f64
+}
+
+/// Only calculates the diagonal (the variance). This assumes that all parameters 
+/// are uncorrelated. Adds a constant regularization factor (generally unnecessary).
+#[derive(Clone, Copy, Debug)]
+pub struct Diagonal {
+    /// Regularization constant
+    pub reg_covar: f64
+}
+
+impl CholeskyFull {
+    /// Initialize with a covariance regularization constant
+    pub fn new(reg_covar: f64) -> Self {
+        CholeskyFull { 
+            reg_covar: reg_covar 
+        }
+    }
+
+    /// Solves a system given the Cholesky decomposition of the lower triangular matrix
+    pub fn solve_cholesky_decomposition(mut cov_chol: Matrix<f64>) -> Matrix<f64> {
+        let half = (cov_chol.rows() as f64 / 2.0).floor() as usize;
+        let lower_rows = cov_chol.rows() - half - 1;
+        let n_col = cov_chol.cols() - 1;
+
+        let det: f64 = cov_chol.det();
+        cov_chol /= det;
+
+        for idx in 0..half {
+            // Mirror along the diagonal
+            let (mut upper, mut lower) = cov_chol.split_at_mut(half, Axes::Row);
+            {
+                let swap_row = lower_rows - idx;
+                let swap_col = n_col - idx;
+                mem::swap(&mut upper[[idx, idx]], &mut lower[[swap_row, swap_col]]);
+            }
+
+            // Transpose and invert all other values
+            for j in (idx+1)..(n_col+1) {
+                let swap_row = lower_rows - idx;
+                let swap_col = n_col - j;
+                mem::swap(&mut upper[[idx, j]], &mut lower[[swap_row, swap_col]]);
+                upper[[idx, j]] *= -1.0;
+            }
+        }
+        cov_chol
+    }
+}
+
+impl Default for CholeskyFull {
+    /// Initialize with a covariance regularization constant of 0.0
+    fn default() -> Self {
+        Self::new(0.0)
+    }
+}
+
+impl<T: Initializer> CovOption<T> for CholeskyFull {
+    fn compute_precision(&self, covariances: &Vec<Matrix<f64>>) -> LearningResult<Vec<Matrix<f64>>> {
+        let n_components = covariances.len();
+        let mut precisions_chol = Vec::<Matrix<f64>>::with_capacity(n_components);
+
+        for covariance in covariances {
+            let cov_chol: Matrix<f64> = try!(covariance.cholesky());
+            precisions_chol.push(Self::solve_cholesky_decomposition(cov_chol));
+        }
+
+        Ok(precisions_chol)
+    }
+
+    fn update_covars(&self, model_covars: &mut Vec<Matrix<f64>>, resp: Matrix<f64>,
+                     inputs: &Matrix<f64>, model_means: &Matrix<f64>,
+                     mix_weights: &Vector<f64>) {
         for (k, covariance) in model_covars.iter_mut().enumerate() {
             let mut diff: Matrix<f64> = inputs.clone();
             for (_, mut row) in diff.iter_rows_mut().enumerate() {
@@ -428,16 +452,84 @@ impl<T: Initializer> GaussianMixtureModel<T> {
                 }
             }
 
-            *covariance = (diff_transpose * diff) / self.mix_weights[k];
+            *covariance = (diff_transpose * diff) / mix_weights[k];
 
             // Add the regularization value
             for idx in 0..covariance.rows() {
-                covariance[[idx, idx]] += 0.01;
+                covariance[[idx, idx]] += self.reg_covar;
             }
+        }
+    }
 
+    fn reg_covar(&self) -> f64 {
+        self.reg_covar
+    }
+}
+
+impl Default for Diagonal {
+    /// Initialize with a covariance regularization constant of 0.0
+    fn default() -> Self {
+        Diagonal { 
+            reg_covar: 0.0
         }
+    }
+}
 
-        self.mix_weights /= inputs.rows() as f64;
+
+impl<T: Initializer> CovOption<T> for Diagonal {
+    fn compute_precision(&self, covariances: &Vec<Matrix<f64>>) -> LearningResult<Vec<Matrix<f64>>> {
+        let n_components = covariances.len();
+        let n_features = covariances[0].cols();
+        for covariance in covariances {
+            for i in 0..covariance.cols() {
+                if covariance[[i, i]] < EPSILON {
+                    return Err(Error::new(
+                            ErrorKind::InvalidState, 
+                            "Mixture model had zeros along the diagonals."));
+                }
+
+                if covariance[[i, i]].is_nan() {
+                    return Err(Error::new(
+                            ErrorKind::InvalidState, 
+                            "Mixture model had NaN along the diagonals."));
+                }
+            }
+        }
+
+        let mut precisions_chol = Vec::<Matrix<f64>>::with_capacity(n_components);
+        for covariance in covariances {
+            let v: Vec<f64> = covariance.iter().map(|v| {
+                if *v != 0.0 {
+                    1.0 / v.sqrt()
+                } else {
+                    0.0
+                }
+            }).collect();
+            precisions_chol.push(Matrix::<f64>::new(n_features, n_features, v));
+        }
+
+        Ok(precisions_chol)
+    }
+
+    fn update_covars(&self, model_covars: &mut Vec<Matrix<f64>>, _resp: Matrix<f64>,
+                     inputs: &Matrix<f64>, model_means: &Matrix<f64>,
+                     mix_weights: &Vector<f64>) {
+        for (k, covariance) in model_covars.iter_mut().enumerate() {
+            let mut diff: Matrix<f64> = inputs.clone();
+            for (_, mut row) in diff.iter_rows_mut().enumerate() {
+                for (i, v) in row.iter_mut().enumerate() {
+                    *v -= model_means[[k, i]];
+                }
+            }
+
+            *covariance = Matrix::from_diag(
+                &((diff.variance(Axes::Row).unwrap() / mix_weights[k]) 
+                 + self.reg_covar).data()[..]);
+        }
+    }
+
+    fn reg_covar(&self) -> f64 {
+        self.reg_covar
     }
 }
 
@@ -490,19 +582,19 @@ impl Initializer for KMeans {
 
 #[cfg(test)]
 mod tests {
-    use super::{GaussianMixtureModel, Random};
+    use super::{GaussianMixtureModel, Random, CholeskyFull};
     use linalg::Vector;
 
     #[test]
     fn test_means_none() {
-        let model = GaussianMixtureModel::<Random>::new(5);
+        let model = GaussianMixtureModel::<Random, CholeskyFull>::new(5);
 
         assert_eq!(model.means(), None);
     }
 
     #[test]
     fn test_covars_none() {
-        let model = GaussianMixtureModel::<Random>::new(5);
+        let model = GaussianMixtureModel::<Random, CholeskyFull>::new(5);
 
         assert_eq!(model.covariances(), None);
     }
@@ -510,14 +602,14 @@ mod tests {
     #[test]
     fn test_negative_mixtures() {
         let mix_weights = Vector::new(vec![-0.25, 0.75, 0.5]);
-        let gmm_res = GaussianMixtureModel::<Random>::with_weights(3, mix_weights);
+        let gmm_res = GaussianMixtureModel::<Random, CholeskyFull>::with_weights(3, mix_weights);
         assert!(gmm_res.is_err());
     }
 
     #[test]
     fn test_wrong_length_mixtures() {
         let mix_weights = Vector::new(vec![0.1, 0.25, 0.75, 0.5]);
-        let gmm_res = GaussianMixtureModel::<Random>::with_weights(3, mix_weights);
+        let gmm_res = GaussianMixtureModel::<Random, CholeskyFull>::with_weights(3, mix_weights);
         assert!(gmm_res.is_err());
     }
 }
diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs
index c2f8d7eb..8a5eeaba 100644
--- a/tests/learning/gmm.rs
+++ b/tests/learning/gmm.rs
@@ -1,7 +1,7 @@
 extern crate rand;
 
 use rm::linalg::{Matrix, BaseMatrix};
-use rm::learning::gmm::{GaussianMixtureModel, KMeans};
+use rm::learning::gmm::{GaussianMixtureModel, KMeans, CholeskyFull};
 use rm::learning::UnSupModel;
 
 use self::rand::thread_rng;
@@ -46,7 +46,8 @@ fn gmm_train() {
 
     // Generate some data randomly around the centroids
     let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.2);
-    let mut model = GaussianMixtureModel::<KMeans>::new(10);
+    let mut model = GaussianMixtureModel::<KMeans, CholeskyFull>::new(100);
+    model.cov_option.reg_covar = 0.0;
     model.set_max_iters(100);
     match model.train(&samples) {
         Ok(_) => {

From fe8fa5b45a9ec90d638ba4aad52653892239fecd Mon Sep 17 00:00:00 2001
From: James Lucas <LucasJ94@hotmail.co.uk>
Date: Wed, 19 Oct 2016 08:35:50 +0100
Subject: [PATCH 21/21] Adding basic gmm example for verification

---
 examples/gmm_simulation.rs | 81 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 examples/gmm_simulation.rs

diff --git a/examples/gmm_simulation.rs b/examples/gmm_simulation.rs
new file mode 100644
index 00000000..7e686663
--- /dev/null
+++ b/examples/gmm_simulation.rs
@@ -0,0 +1,81 @@
+extern crate rusty_machine;
+extern crate rand;
+
+use rand::distributions::{Normal, IndependentSample};
+use rand::{Rng, thread_rng};
+
+use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull};
+use rusty_machine::learning::UnSupModel;
+use rusty_machine::linalg::Matrix;
+
+// Simulate some data from gaussian clusters
+fn simulate_gmm_1d_data(count: usize,
+                        means: Vec<f64>,
+                        vars: Vec<f64>,
+                        mixture_weights: Vec<f64>)
+                        -> Vec<f64> {
+    assert_eq!(means.len(), vars.len());
+    assert_eq!(means.len(), mixture_weights.len());
+
+    let gmm_count = means.len();
+
+    let mut gaussians = Vec::with_capacity(gmm_count);
+
+    for i in 0..gmm_count {
+        // Create a Gaussian with given mean and var
+        gaussians.push(Normal::new(means[i], vars[i].sqrt()));
+    }
+
+    let mut rng = thread_rng();
+    let mut out_samples = Vec::with_capacity(count);
+
+    for _ in 0..count {
+        // Pick a gaussian from the mixture weights
+        let chosen_gaussian = gaussians[pick_gaussian(&mixture_weights, &mut rng)];
+
+        // Draw sample from it
+        let sample = chosen_gaussian.ind_sample(&mut rng);
+
+        // Add to data
+        out_samples.push(sample);
+    }
+
+    out_samples
+}
+
+// A utility function which chooses an index from some mixture weights
+fn pick_gaussian<R: Rng>(unnorm_dist: &[f64], rng: &mut R) -> usize {
+    assert!(unnorm_dist.len() > 0);
+    let sum = unnorm_dist.iter().fold(0f64, |acc, &x| acc + x);
+    let rand = rng.gen_range(0.0f64, sum);
+
+    let mut tempsum = 0.0;
+    for (i, p) in unnorm_dist.iter().enumerate() {
+        tempsum += *p;
+        if rand < tempsum {
+            return i;
+        }
+    }
+
+    panic!("No random value was sampled!");
+}
+
+
+fn main() {
+    let gmm_count = 3;
+    let count = 1000;
+
+    let means = vec![-3f64, 0., 3.];
+    let vars = vec![1f64, 0.5, 0.25];
+    let weights = vec![0.5, 0.25, 0.25];
+
+    let data = simulate_gmm_1d_data(count, means, vars, weights);
+
+    let mut gmm: GaussianMixtureModel<Random, CholeskyFull> = GaussianMixtureModel::new(gmm_count);
+    gmm.set_max_iters(300);
+    gmm.train(&Matrix::new(count, 1, data)).unwrap();
+
+    println!("Means = {:?}", gmm.means());
+    println!("Covs = {:?}", gmm.covariances());
+    println!("Mix Weights = {:?}", gmm.mixture_weights());
+}
\ No newline at end of file