From 9437fd3081f9c7dd3dd527a0695923e1c59457de Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sat, 15 Oct 2016 10:50:26 -0700 Subject: [PATCH 01/21] Use const for convergence epsilon --- src/learning/gmm.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 4c88e9ce..03976f6a 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -37,6 +37,8 @@ use learning::{LearningResult, UnSupModel}; use learning::toolkit::rand_utils; use learning::error::{Error, ErrorKind}; +const CONVERGENCE_EPS: f64 = 1.0e-15; + /// Covariance options for GMMs. /// /// - Full : The full covariance structure. @@ -92,7 +94,7 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { let (weights, log_lik_1) = try!(self.membership_weights(inputs)); - if (log_lik_1 - log_lik_0).abs() < 1e-15 { + if (log_lik_1 - log_lik_0).abs() < CONVERGENCE_EPS { break; } From 67553dc414c79801bcd8e26e20885ee67997375d Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sat, 15 Oct 2016 11:19:13 -0700 Subject: [PATCH 02/21] Make new use with_weights --- src/learning/gmm.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 03976f6a..7118e0fd 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -130,15 +130,7 @@ impl GaussianMixtureModel { /// let gmm = GaussianMixtureModel::new(3); /// ``` pub fn new(k: usize) -> GaussianMixtureModel { - GaussianMixtureModel { - comp_count: k, - mix_weights: Vector::ones(k) / (k as f64), - model_means: None, - model_covars: None, - log_lik: 0f64, - max_iters: 100, - cov_option: CovOption::Full, - } + Self::with_weights(k, Vector::ones(k) / k as f64).unwrap() } /// Constructs a new GMM with the specified prior mixture weights. From 3fe49dddc910708c9511def1fa85034f5d03517d Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sun, 16 Oct 2016 17:59:23 -0700 Subject: [PATCH 03/21] bump deps --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2ef44396..bc352753 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,4 +16,4 @@ stats = [] [dependencies] num = { version = "0.1.35", default-features = false } rand = "0.3.14" -rulinalg = "0.3.3" +rulinalg = "0.3.4" From 92af69832498dfaf3c59404ec6594eb82626ec19 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sun, 16 Oct 2016 17:59:55 -0700 Subject: [PATCH 04/21] Add an integration test for GMM --- tests/learning/gmm.rs | 55 +++++++++++++++++++++++++++++++++++++++++++ tests/lib.rs | 3 ++- 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 tests/learning/gmm.rs diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs new file mode 100644 index 00000000..47d80559 --- /dev/null +++ b/tests/learning/gmm.rs @@ -0,0 +1,55 @@ +extern crate rand; + +use rm::linalg::{Matrix, BaseMatrix}; +use rm::learning::gmm::GaussianMixtureModel; +use rm::learning::UnSupModel; + +use self::rand::thread_rng; +use self::rand::distributions::IndependentSample; +use self::rand::distributions::normal::Normal; + +fn generate_data(centroids: &Matrix, points_per_centroid: usize, noise: f64) -> Matrix { + assert!(centroids.cols() > 0, "Centroids cannot be empty."); + assert!(centroids.rows() > 0, "Centroids cannot be empty."); + assert!(noise >= 0f64, "Noise must be non-negative."); + let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid * + centroids.cols()); + + let mut rng = thread_rng(); + let normal_rv = Normal::new(0f64, noise); + + for _ in 0..points_per_centroid { + // Generate points from each centroid + for centroid in centroids.iter_rows() { + // Generate a point randomly around the centroid + let mut point = Vec::with_capacity(centroids.cols()); + for feature in centroid { + point.push(feature + normal_rv.ind_sample(&mut rng)); + } + + // Push point to raw_cluster_data + raw_cluster_data.extend(point); + } + } + + Matrix::new(centroids.rows() * points_per_centroid, + centroids.cols(), + raw_cluster_data) +} + +#[test] +fn gmm_train() { + + const SAMPLES_PER_CENTROID: usize = 500; + // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5). + let centroids = Matrix::new(3, 2, vec![-0.5, -0.5, 0.0, 0.5, 0.5, 0.0]); + + // Generate some data randomly around the centroids + let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.4); + let mut model = GaussianMixtureModel::new(3); + model.set_max_iters(100); + match model.train(&samples) { + Ok(_) => println!("means: \n{:.4}", model.means().unwrap()), + Err(e) => println!("error: {}", e) + } +} diff --git a/tests/lib.rs b/tests/lib.rs index f1261809..c536cd82 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -6,8 +6,9 @@ pub mod learning { mod lin_reg; mod k_means; mod gp; + mod gmm; pub mod optim { mod grad_desc; } -} \ No newline at end of file +} From 3e564f8e9f0e5243ca210a1e35933def0284329f Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sun, 16 Oct 2016 18:00:27 -0700 Subject: [PATCH 05/21] Commit current status of GaussianMixtureModel rewrite --- src/learning/gmm.rs | 321 ++++++++++++++++++++++++++++++++------------ 1 file changed, 238 insertions(+), 83 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 7118e0fd..782c810e 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -30,6 +30,8 @@ //! // Probabilities that each point comes from each Gaussian. //! println!("{:?}", post_probs.data()); //! ``` +extern crate rand; + use linalg::{Matrix, MatrixSlice, Vector, BaseMatrix, BaseMatrixMut, Axes}; use rulinalg::utils; @@ -37,6 +39,10 @@ use learning::{LearningResult, UnSupModel}; use learning::toolkit::rand_utils; use learning::error::{Error, ErrorKind}; +use std::f64::consts::PI; +use std::f64::EPSILON; +use std::mem; + const CONVERGENCE_EPS: f64 = 1.0e-15; /// Covariance options for GMMs. @@ -59,10 +65,15 @@ pub enum CovOption { #[derive(Debug)] pub struct GaussianMixtureModel { comp_count: usize, + // [n_features] mix_weights: Vector, + // [n_components, n_features] model_means: Option>, + // n_components elements: [n_features, n_features] model_covars: Option>>, - log_lik: f64, + // n_components elements: [n_features, n_features] + precisions_cholesky: Option>>, + log_lik: Vector, max_iters: usize, /// The covariance options for the GMM. pub cov_option: CovOption, @@ -79,28 +90,66 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { // Initialization: let k = self.comp_count; + // println!("\n"); self.model_covars = { let cov_mat = try!(self.initialize_covariances(inputs, reg_value)); Some(vec![cov_mat; k]) }; - let random_rows: Vec = - rand_utils::reservoir_sample(&(0..inputs.rows()).collect::>(), k); - self.model_means = Some(inputs.select_rows(&random_rows)); + { + use rand::distributions::{IndependentSample, Range}; + let between = Range::new(0f64, 1.); + let mut rng = rand::thread_rng(); + + self.model_means = Some(Matrix::new(inputs.cols(), self.comp_count, + vec![0.; inputs.cols() * self.comp_count])); + let random_numbers: Vec = + (0..(inputs.rows()*k)).map(|_| between.ind_sample(&mut rng).exp()).collect(); + let mut resp = Matrix::new(inputs.rows(), k, random_numbers); + let sum = resp.sum_cols(); + for row in resp.iter_rows_mut() { + for (v, s) in row.iter_mut().zip(sum.iter()) { + *v /= *s; + } + } + self.update_gaussian_parameters(inputs, resp); + } - for _ in 0..self.max_iters { - let log_lik_0 = self.log_lik; + for c in self.model_covars.as_ref().unwrap().iter() { + // println!("covs: \n{:.4}", &c); + } - let (weights, log_lik_1) = try!(self.membership_weights(inputs)); + self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); + for c in self.precisions_cholesky.as_ref().unwrap().iter() { + // println!("prec: \n{:.4}", &c); + } - if (log_lik_1 - log_lik_0).abs() < CONVERGENCE_EPS { + self.log_lik = Vector::new(vec![0.0; inputs.rows()]); + for iter in 0..self.max_iters { + let log_lik_0 = self.log_lik.clone(); + // println!("\nIteration {}", iter); + // println!("cov:\n{:.4}", self.covariances().unwrap()); + + // e_step + let (log_prob_norm, mut resp) = self.estimate_log_prob_resp(inputs); + // Return to normal space + for v in resp.iter_mut() { *v = v.exp(); } + + println!("resp: \n{:.4}", &resp.select_rows(&[0, 1, 2, 3, 4])); + println!("mix_weights: {}", &self.mix_weights); + // m_step + self.update_gaussian_parameters(inputs, resp); + self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); + // end of m_step + + let log_lik_1 = log_prob_norm; + + if (log_lik_0 - &log_lik_1).iter().all(|v| v.abs() < CONVERGENCE_EPS) { break; } self.log_lik = log_lik_1; - - self.update_params(inputs, weights); } Ok(()) @@ -109,7 +158,7 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { /// Predict output from inputs. fn predict(&self, inputs: &Matrix) -> LearningResult> { if let (&Some(_), &Some(_)) = (&self.model_means, &self.model_covars) { - Ok(try!(self.membership_weights(inputs)).0) + Ok(self.estimate_weighted_log_prob(inputs)) } else { Err(Error::new_untrained()) } @@ -169,7 +218,8 @@ impl GaussianMixtureModel { mix_weights: normalized_weights, model_means: None, model_covars: None, - log_lik: 0f64, + precisions_cholesky: None, + log_lik: Vector::new(vec![]), max_iters: 100, cov_option: CovOption::Full, }) @@ -223,7 +273,6 @@ impl GaussianMixtureModel { let variance = try!(inputs.variance(Axes::Row)); Ok(Matrix::from_diag(&variance.data()) * reg_value.sqrt()) } - CovOption::Full | CovOption::Regularized(_) => { let means = inputs.mean(Axes::Row); let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols()); @@ -243,108 +292,214 @@ impl GaussianMixtureModel { } } - fn membership_weights(&self, inputs: &Matrix) -> LearningResult<(Matrix, f64)> { - let n = inputs.rows(); - - let mut member_weights_data = Vec::with_capacity(n * self.comp_count); - - // We compute the determinants and inverses now - let mut cov_sqrt_dets = Vec::with_capacity(self.comp_count); - let mut cov_invs = Vec::with_capacity(self.comp_count); - - if let Some(ref covars) = self.model_covars { - for cov in covars { - // TODO: combine these. We compute det to get the inverse. - let covar_det = cov.det(); - let covar_inv = try!(cov.inverse().map_err(Error::from)); + fn compute_precision_cholesky(&self) -> LearningResult>> { + let &GaussianMixtureModel { + model_covars: ref covariances, + cov_option: ref covariance_type, + comp_count: n_components, + .. + } = self; - cov_sqrt_dets.push(covar_det.sqrt()); - cov_invs.push(covar_inv); + let covariances = covariances.as_ref().unwrap(); + match *covariance_type { + CovOption::Full | CovOption::Regularized(_) => { + let mut precisions_chol = Vec::>::with_capacity(n_components); + for covariance in covariances { + let mut cov_chol: Matrix = try!(covariance.cholesky()); + // cholesky is correct + let half = (cov_chol.rows() as f64 / 2.0).floor() as usize; + let lower_rows = cov_chol.rows() - half - 1; + let n_col = cov_chol.cols() - 1; + // println!("cov: \n{:.4}", &covariance); + // println!("cov_chol: \n{:.4}", &cov_chol); + + // solve_l_triangular doesn't work with a matrix, so we have to do it + // the hard way. + let det: f64 = cov_chol.det(); + cov_chol /= det; + + for idx in 0..half { + // Mirror along the diagonal + let (mut upper, mut lower) = cov_chol.split_at_mut(half, Axes::Row); + { + let swap_row = lower_rows - idx; + let swap_col = n_col - idx; + mem::swap(&mut upper[[idx, idx]], &mut lower[[swap_row, swap_col]]); + } + + // Transpose and invert all other values + for j in (idx+1)..(n_col+1) { + let swap_row = lower_rows - idx; + let swap_col = n_col - j; + mem::swap(&mut upper[[idx, j]], &mut lower[[swap_row, swap_col]]); + upper[[idx, j]] *= -1.0; + } + } + precisions_chol.push(cov_chol); + } + Ok(precisions_chol) + }, + CovOption::Diagonal => { + let n_features = covariances[0].cols(); + for covariance in covariances { + for i in 0..covariance.cols() { + if covariance[[i, i]] <= 0.0 { + return Err(Error::new( + ErrorKind::InvalidState, + "Mixture model had zeros along the diagonals.")); + } + } + } + let mut precisions_chol = Vec::>::with_capacity(n_components); + for covariance in covariances { + let v: Vec = covariance.iter().map(|v| 1.0 / v.sqrt()).collect(); + precisions_chol.push(Matrix::::new(n_features, n_features, v)); + } + Ok(precisions_chol) } } + } - let mut log_lik = 0f64; - - // Now we compute the membership weights - if let Some(ref means) = self.model_means { - for i in 0..n { - let mut pdfs = Vec::with_capacity(self.comp_count); - let x_i = MatrixSlice::from_matrix(inputs, [i, 0], 1, inputs.cols()); - - for j in 0..self.comp_count { - let mu_j = MatrixSlice::from_matrix(means, [j, 0], 1, means.cols()); - let diff = x_i - mu_j; - - let pdf = (&diff * &cov_invs[j] * diff.transpose() * -0.5).into_vec()[0] - .exp() / cov_sqrt_dets[j]; - pdfs.push(pdf); - } - - let weighted_pdf_sum = utils::dot(&pdfs, self.mix_weights.data()); + // == Parameters + // inputs : [n_samples, n_features] + // + // == Returns + // weighted_log_prob : [n_features, n_components] + // + fn estimate_weighted_log_prob(&self, inputs: &Matrix) -> Matrix { + let mut log_prob = self.estimate_log_prob(&inputs); + // println!("log_prob: \n{:.4}", log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6])); + // println!("mix_weights: \n{:?}", &self.mix_weights); + let log_weights: Vec = self.mix_weights.iter().map(|w| w.ln()).collect(); + // println!("log_weights: \n{:?}", &log_weights); + for (lp, lw) in log_prob.iter_mut().zip(log_weights.iter()) { + *lp *= *lw; + } + log_prob + } - for (idx, pdf) in pdfs.iter().enumerate() { - member_weights_data.push(self.mix_weights[idx] * pdf / (weighted_pdf_sum)); + // called estimate_log_gaussian_prob in scipy + // + // == Paramers + // inputs : [n_samples, n_features] + // + // + fn estimate_log_prob(&self, inputs: &Matrix) -> Matrix { + let precisions_cholesky: &Vec> = self.precisions_cholesky.as_ref().unwrap(); + let ref model_means = self.model_means.as_ref().unwrap(); + // The log of the determinant for each precision matrix + let log_det = precisions_cholesky.iter().map(|m| m.det().ln()); + // println!("log_det: {:?}", log_det); + + let mut log_prob = Matrix::zeros(inputs.rows(), self.comp_count); + for k in 0..self.comp_count { + let prec = &precisions_cholesky[k]; + // y is a matrix of shape [n_samples, n_features] + let mut y = inputs * prec; + // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6])); + // Matrix of shape [1, n_features] + let z: Matrix = model_means.select_rows(&[k]) * prec; + println!("z: \n{:.4}", &z); + + // Subtract the mean of each column from the matrix y + for col in 0..y.cols() { + for row in 0..y.rows() { + y[[row, col]] -= z[[0, col]]; } + } + // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6])); - log_lik += weighted_pdf_sum.ln(); + for (i, row) in y.iter_rows().enumerate() { + let sum_of_squares = row.iter().map(|v| v.powi(2)).sum(); + log_prob[[i, k]] = sum_of_squares; } } - Ok((Matrix::new(n, self.comp_count, member_weights_data), log_lik)) + log_prob = (log_prob + inputs.cols() as f64 * (2.0 * PI).ln()) * -0.5; + for (row, det) in log_prob.iter_rows_mut().zip(log_det) { + for v in row { + *v += det; + } + } + log_prob } - fn update_params(&mut self, inputs: &Matrix, membership_weights: Matrix) { - let n = membership_weights.rows(); - let d = inputs.cols(); + fn estimate_log_prob_resp(&self, inputs: &Matrix) -> (Vector, Matrix) { + let mut weighted_log_prob: Matrix = self.estimate_weighted_log_prob(inputs); + // length of n_samples + let log_prob_norm: Vector = + Vector::new(weighted_log_prob.iter_rows().map(|row: &[f64]| { + let a: f64 = row.iter().map(|v| v.exp()).sum(); + a.ln() + }).collect::>()); + + println!("log_prob_norm: \n{:.4}", &log_prob_norm.select(&[0, 1, 2, 3, 4, 5, 6])); + for row in 0..log_prob_norm.size() { + for col in 0..weighted_log_prob.cols() { + weighted_log_prob[[row, col]] -= log_prob_norm[row]; + } + } + println!("log_prob: \n{:.4}", weighted_log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6])); + (log_prob_norm, weighted_log_prob) + } - let sum_weights = membership_weights.sum_rows(); + fn update_gaussian_parameters(&mut self, inputs: &Matrix, mut resp: Matrix) { + let mut model_means = self.model_means.as_mut().unwrap(); - self.mix_weights = &sum_weights / (n as f64); + // println!("resp: {}", resp); + self.mix_weights = resp.iter_rows() + .fold(Vector::new(vec![0.0; resp.cols()]), |mut acc, row| { + for (a, r) in acc.iter_mut().zip(row.iter()) { + *a += *r; + } + acc + }) + + 10.0 * EPSILON; - let mut new_means = membership_weights.transpose() * inputs; + // println!("nk: \n{:.4}", &self.mix_weights); + *model_means = resp.transpose() * inputs; + for col in 0..model_means.cols() { + let mm_rows = model_means.rows(); + let mut mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1); - for (mean, w) in new_means.iter_rows_mut().zip(sum_weights.data().iter()) { - for m in mean.iter_mut() { - *m /= *w; - } + let ref mix_weights = self.mix_weights; + let div_mix_weights = |v| { v / mix_weights[col] }; + mm_col = mm_col.apply(&div_mix_weights); } - let mut new_covs = Vec::with_capacity(self.comp_count); + let mut model_covars = self.model_covars.as_mut().unwrap(); - for k in 0..self.comp_count { - let mut cov_mat = Matrix::zeros(d, d); - let new_means_k = MatrixSlice::from_matrix(&new_means, [k, 0], 1, d); - - for i in 0..n { - let inputs_i = MatrixSlice::from_matrix(inputs, [i, 0], 1, d); - let diff = inputs_i - new_means_k; - cov_mat += self.compute_cov(diff, membership_weights[[i, k]]); + // println!("means:\n{:.4}", &model_means); + // Iterate through each component in the model covariances + for (k, covariance) in model_covars.iter_mut().enumerate() { + let mut diff: Matrix = inputs.clone(); + for (_, mut row) in diff.iter_rows_mut().enumerate() { + for (i, v) in row.iter_mut().enumerate() { + *v = *v - model_means[[k, i]]; + } } - if let CovOption::Regularized(eps) = self.cov_option { - cov_mat += Matrix::::identity(cov_mat.cols()) * eps; - } + let mut diff_transpose = diff.transpose(); - new_covs.push(cov_mat / sum_weights[k]); + for row in 0..diff_transpose.rows() { + let mut dt = diff_transpose.sub_slice_mut([row, 0], 1, diff.rows()); + dt *= resp[[row, k]]; + } + *covariance = (diff_transpose.as_mut_slice() * diff) / self.mix_weights[k]; } - self.model_means = Some(new_means); - self.model_covars = Some(new_covs); - } + self.mix_weights /= inputs.rows() as f64; + // println!("mix_weights: {}", &self.mix_weights); - fn compute_cov(&self, diff: Matrix, weight: f64) -> Matrix { - match self.cov_option { - CovOption::Full | CovOption::Regularized(_) => (diff.transpose() * diff) * weight, - CovOption::Diagonal => Matrix::from_diag(&diff.elemul(&diff).into_vec()) * weight, - } } } #[cfg(test)] mod tests { use super::GaussianMixtureModel; - use linalg::Vector; + use learning::UnSupModel; + use linalg::{Vector, Matrix}; #[test] fn test_means_none() { From d40b0bd56b634375850394642bd45d48998c3cda Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sun, 16 Oct 2016 18:56:45 -0700 Subject: [PATCH 06/21] Use mean of log_prob_norm for convergence --- src/learning/gmm.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 782c810e..c50ca55d 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -73,7 +73,7 @@ pub struct GaussianMixtureModel { model_covars: Option>>, // n_components elements: [n_features, n_features] precisions_cholesky: Option>>, - log_lik: Vector, + log_lik: f64, max_iters: usize, /// The covariance options for the GMM. pub cov_option: CovOption, @@ -125,9 +125,10 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { // println!("prec: \n{:.4}", &c); } - self.log_lik = Vector::new(vec![0.0; inputs.rows()]); + self.log_lik = 0.; + for iter in 0..self.max_iters { - let log_lik_0 = self.log_lik.clone(); + let log_lik_0 = self.log_lik; // println!("\nIteration {}", iter); // println!("cov:\n{:.4}", self.covariances().unwrap()); @@ -143,9 +144,9 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); // end of m_step - let log_lik_1 = log_prob_norm; + let log_lik_1 = log_prob_norm.mean(); - if (log_lik_0 - &log_lik_1).iter().all(|v| v.abs() < CONVERGENCE_EPS) { + if (log_lik_0 - log_lik_1).abs() < CONVERGENCE_EPS { break; } @@ -219,7 +220,7 @@ impl GaussianMixtureModel { model_means: None, model_covars: None, precisions_cholesky: None, - log_lik: Vector::new(vec![]), + log_lik: 0., max_iters: 100, cov_option: CovOption::Full, }) From 06ace3bba0921d36d2cdea173061fd0e9a98fd1c Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sun, 16 Oct 2016 19:59:52 -0700 Subject: [PATCH 07/21] Add cov regularization value to stabilize cholesky --- src/learning/gmm.rs | 34 +++++++++++++++++++++++----------- tests/learning/gmm.rs | 12 +++++++++--- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index c50ca55d..0176515d 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -99,7 +99,7 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { { use rand::distributions::{IndependentSample, Range}; - let between = Range::new(0f64, 1.); + let between = Range::new(0.0f64, 1.); let mut rng = rand::thread_rng(); self.model_means = Some(Matrix::new(inputs.cols(), self.comp_count, @@ -137,8 +137,8 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { // Return to normal space for v in resp.iter_mut() { *v = v.exp(); } - println!("resp: \n{:.4}", &resp.select_rows(&[0, 1, 2, 3, 4])); - println!("mix_weights: {}", &self.mix_weights); + // println!("resp: \n{:.4}", &resp.select_rows(&[0, 1, 2, 3, 4])); + // println!("mix_weights: {}", &self.mix_weights); // m_step self.update_gaussian_parameters(inputs, resp); self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); @@ -254,6 +254,13 @@ impl GaussianMixtureModel { &self.mix_weights } + /// The model mean likelihood + /// + /// Returns the mean log likelihood of the model + pub fn log_lik(&self) -> f64 { + self.log_lik + } + /// Sets the max number of iterations for the EM algorithm. /// /// # Examples @@ -306,12 +313,12 @@ impl GaussianMixtureModel { CovOption::Full | CovOption::Regularized(_) => { let mut precisions_chol = Vec::>::with_capacity(n_components); for covariance in covariances { + // println!("loop-cov: \n{:.4}", &covariance); let mut cov_chol: Matrix = try!(covariance.cholesky()); // cholesky is correct let half = (cov_chol.rows() as f64 / 2.0).floor() as usize; let lower_rows = cov_chol.rows() - half - 1; let n_col = cov_chol.cols() - 1; - // println!("cov: \n{:.4}", &covariance); // println!("cov_chol: \n{:.4}", &cov_chol); // solve_l_triangular doesn't work with a matrix, so we have to do it @@ -371,10 +378,9 @@ impl GaussianMixtureModel { let mut log_prob = self.estimate_log_prob(&inputs); // println!("log_prob: \n{:.4}", log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6])); // println!("mix_weights: \n{:?}", &self.mix_weights); - let log_weights: Vec = self.mix_weights.iter().map(|w| w.ln()).collect(); - // println!("log_weights: \n{:?}", &log_weights); - for (lp, lw) in log_prob.iter_mut().zip(log_weights.iter()) { - *lp *= *lw; + let log_weights = self.mix_weights.iter().map(|w| w.ln()); + for (lp, lw) in log_prob.iter_mut().zip(log_weights) { + *lp *= lw; } log_prob } @@ -400,7 +406,7 @@ impl GaussianMixtureModel { // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6])); // Matrix of shape [1, n_features] let z: Matrix = model_means.select_rows(&[k]) * prec; - println!("z: \n{:.4}", &z); + // println!("z: \n{:.4}", &z); // Subtract the mean of each column from the matrix y for col in 0..y.cols() { @@ -434,13 +440,13 @@ impl GaussianMixtureModel { a.ln() }).collect::>()); - println!("log_prob_norm: \n{:.4}", &log_prob_norm.select(&[0, 1, 2, 3, 4, 5, 6])); + // println!("log_prob_norm: \n{:.4}", &log_prob_norm.select(&[0, 1, 2, 3, 4, 5, 6])); for row in 0..log_prob_norm.size() { for col in 0..weighted_log_prob.cols() { weighted_log_prob[[row, col]] -= log_prob_norm[row]; } } - println!("log_prob: \n{:.4}", weighted_log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6])); + // println!("log_prob: \n{:.4}", weighted_log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6])); (log_prob_norm, weighted_log_prob) } @@ -488,6 +494,12 @@ impl GaussianMixtureModel { } *covariance = (diff_transpose.as_mut_slice() * diff) / self.mix_weights[k]; + + // Add the regularization value + for idx in 0..covariance.rows() { + covariance[[idx, idx]] += 0.03; + } + } self.mix_weights /= inputs.rows() as f64; diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs index 47d80559..957ce658 100644 --- a/tests/learning/gmm.rs +++ b/tests/learning/gmm.rs @@ -40,16 +40,22 @@ fn generate_data(centroids: &Matrix, points_per_centroid: usize, noise: f64 #[test] fn gmm_train() { - const SAMPLES_PER_CENTROID: usize = 500; + const SAMPLES_PER_CENTROID: usize = 2000; // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5). let centroids = Matrix::new(3, 2, vec![-0.5, -0.5, 0.0, 0.5, 0.5, 0.0]); // Generate some data randomly around the centroids - let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.4); + let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.2); let mut model = GaussianMixtureModel::new(3); model.set_max_iters(100); match model.train(&samples) { - Ok(_) => println!("means: \n{:.4}", model.means().unwrap()), + Ok(_) => { + println!("means: \n{:.4}", model.means().unwrap()); + println!("log_lik: \n{:.4}", model.log_lik()); + for cov in model.covariances().as_ref().unwrap().iter() { + println!("cov: \n{:.4}", cov); + } + }, Err(e) => println!("error: {}", e) } } From 10bafa7bcef4240685e7b8e114fab44d35254760 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sun, 16 Oct 2016 20:57:14 -0700 Subject: [PATCH 08/21] Allow customization of initialization methods --- src/learning/gmm.rs | 85 ++++++++++++++++++++++++++++++------------- tests/learning/gmm.rs | 6 +-- 2 files changed, 63 insertions(+), 28 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 0176515d..6ea930a0 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -42,6 +42,7 @@ use learning::error::{Error, ErrorKind}; use std::f64::consts::PI; use std::f64::EPSILON; use std::mem; +use std::marker::PhantomData; const CONVERGENCE_EPS: f64 = 1.0e-15; @@ -60,10 +61,9 @@ pub enum CovOption { Diagonal, } - /// A Gaussian Mixture Model #[derive(Debug)] -pub struct GaussianMixtureModel { +pub struct GaussianMixtureModel { comp_count: usize, // [n_features] mix_weights: Vector, @@ -75,11 +75,12 @@ pub struct GaussianMixtureModel { precisions_cholesky: Option>>, log_lik: f64, max_iters: usize, + phantom: PhantomData, /// The covariance options for the GMM. pub cov_option: CovOption, } -impl UnSupModel, Matrix> for GaussianMixtureModel { +impl UnSupModel, Matrix> for GaussianMixtureModel { /// Train the model using inputs. fn train(&mut self, inputs: &Matrix) -> LearningResult<()> { let reg_value = if inputs.rows() > 1 { @@ -98,21 +99,9 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { }; { - use rand::distributions::{IndependentSample, Range}; - let between = Range::new(0.0f64, 1.); - let mut rng = rand::thread_rng(); - self.model_means = Some(Matrix::new(inputs.cols(), self.comp_count, vec![0.; inputs.cols() * self.comp_count])); - let random_numbers: Vec = - (0..(inputs.rows()*k)).map(|_| between.ind_sample(&mut rng).exp()).collect(); - let mut resp = Matrix::new(inputs.rows(), k, random_numbers); - let sum = resp.sum_cols(); - for row in resp.iter_rows_mut() { - for (v, s) in row.iter_mut().zip(sum.iter()) { - *v /= *s; - } - } + let resp = try!(T::init_resp(k, inputs)); self.update_gaussian_parameters(inputs, resp); } @@ -167,7 +156,7 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { } } -impl GaussianMixtureModel { +impl GaussianMixtureModel { /// Constructs a new Gaussian Mixture Model /// /// Defaults to 100 maximum iterations and @@ -179,7 +168,7 @@ impl GaussianMixtureModel { /// /// let gmm = GaussianMixtureModel::new(3); /// ``` - pub fn new(k: usize) -> GaussianMixtureModel { + pub fn new(k: usize) -> Self { Self::with_weights(k, Vector::ones(k) / k as f64).unwrap() } @@ -205,7 +194,7 @@ impl GaussianMixtureModel { /// /// - Mixture weights do not have length k. /// - Mixture weights have a negative entry. - pub fn with_weights(k: usize, mixture_weights: Vector) -> LearningResult { + pub fn with_weights(k: usize, mixture_weights: Vector) -> LearningResult { if mixture_weights.size() != k { Err(Error::new(ErrorKind::InvalidParameters, "Mixture weights must have length k.")) } else if mixture_weights.data().iter().any(|&x| x < 0f64) { @@ -223,6 +212,7 @@ impl GaussianMixtureModel { log_lik: 0., max_iters: 100, cov_option: CovOption::Full, + phantom: PhantomData, }) } } @@ -497,7 +487,7 @@ impl GaussianMixtureModel { // Add the regularization value for idx in 0..covariance.rows() { - covariance[[idx, idx]] += 0.03; + covariance[[idx, idx]] += 0.08; } } @@ -508,22 +498,67 @@ impl GaussianMixtureModel { } } +/// Trait for possible methods of initializing the responsibilities matrix +pub trait Initializer { + /// Should return the responsibilities matrix: + /// shape is [samples, components] + fn init_resp(k: usize, inputs: &Matrix) -> LearningResult>; +} + +/// Initialize the responsibilities matrix using randomly generated values +pub struct Random; + +impl Initializer for Random { + fn init_resp(k: usize, inputs: &Matrix) -> LearningResult> { + use rand::distributions::{IndependentSample, Range}; + let between = Range::new(0.0f64, 1.); + let mut rng = rand::thread_rng(); + let random_numbers: Vec = + (0..(inputs.rows()*k)).map(|_| between.ind_sample(&mut rng).exp()).collect(); + let mut resp = Matrix::new(inputs.rows(), k, random_numbers); + let sum = resp.sum_cols(); + for row in resp.iter_rows_mut() { + for (v, s) in row.iter_mut().zip(sum.iter()) { + *v /= *s; + } + } + Ok(resp) + } +} + +/// Initialize the responsibilities matrix using k-means clustering +pub struct KMeans; + +impl Initializer for KMeans { + fn init_resp(k: usize, inputs: &Matrix) -> LearningResult> { + use learning::k_means::KMeansClassifier; + let mut model = KMeansClassifier::new(k); + try!(model.train(inputs)); + let classes = try!(model.predict(inputs)); + let mut resp: Matrix = Matrix::zeros(inputs.rows(), k); + for (row, col) in classes.iter().enumerate() { + resp[[row, *col]] = 1.; + } + Ok(resp) + } +} + #[cfg(test)] mod tests { - use super::GaussianMixtureModel; + use super::{GaussianMixtureModel, Random}; use learning::UnSupModel; use linalg::{Vector, Matrix}; #[test] fn test_means_none() { - let model = GaussianMixtureModel::new(5); + let model = GaussianMixtureModel::::new(5); assert_eq!(model.means(), None); } #[test] fn test_covars_none() { - let model = GaussianMixtureModel::new(5); + let model = GaussianMixtureModel::::new(5); assert_eq!(model.covariances(), None); } @@ -531,14 +566,14 @@ mod tests { #[test] fn test_negative_mixtures() { let mix_weights = Vector::new(vec![-0.25, 0.75, 0.5]); - let gmm_res = GaussianMixtureModel::with_weights(3, mix_weights); + let gmm_res = GaussianMixtureModel::::with_weights(3, mix_weights); assert!(gmm_res.is_err()); } #[test] fn test_wrong_length_mixtures() { let mix_weights = Vector::new(vec![0.1, 0.25, 0.75, 0.5]); - let gmm_res = GaussianMixtureModel::with_weights(3, mix_weights); + let gmm_res = GaussianMixtureModel::::with_weights(3, mix_weights); assert!(gmm_res.is_err()); } } diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs index 957ce658..2e7ec754 100644 --- a/tests/learning/gmm.rs +++ b/tests/learning/gmm.rs @@ -1,7 +1,7 @@ extern crate rand; use rm::linalg::{Matrix, BaseMatrix}; -use rm::learning::gmm::GaussianMixtureModel; +use rm::learning::gmm::{GaussianMixtureModel, Random, KMeans}; use rm::learning::UnSupModel; use self::rand::thread_rng; @@ -40,13 +40,13 @@ fn generate_data(centroids: &Matrix, points_per_centroid: usize, noise: f64 #[test] fn gmm_train() { - const SAMPLES_PER_CENTROID: usize = 2000; + const SAMPLES_PER_CENTROID: usize = 500; // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5). let centroids = Matrix::new(3, 2, vec![-0.5, -0.5, 0.0, 0.5, 0.5, 0.0]); // Generate some data randomly around the centroids let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.2); - let mut model = GaussianMixtureModel::new(3); + let mut model = GaussianMixtureModel::::new(3); model.set_max_iters(100); match model.train(&samples) { Ok(_) => { From 7db6f0cd38cbb54f152ab5f6e0c8655762b61527 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Sun, 16 Oct 2016 20:57:34 -0700 Subject: [PATCH 09/21] Investigate the initialized means --- src/learning/gmm.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 6ea930a0..3377674f 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -105,6 +105,8 @@ impl UnSupModel, Matrix> for GaussianMixtureMod self.update_gaussian_parameters(inputs, resp); } + println!("initialized means: \n{:.4}", self.model_means.as_ref().unwrap()); + for c in self.model_covars.as_ref().unwrap().iter() { // println!("covs: \n{:.4}", &c); } From 9fd6d36ac35e77cf3e62ac5ad9482095001902a2 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 08:56:20 -0700 Subject: [PATCH 10/21] Add logs, not multiply --- src/learning/gmm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 3377674f..4ed03601 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -372,7 +372,7 @@ impl GaussianMixtureModel { // println!("mix_weights: \n{:?}", &self.mix_weights); let log_weights = self.mix_weights.iter().map(|w| w.ln()); for (lp, lw) in log_prob.iter_mut().zip(log_weights) { - *lp *= lw; + *lp += lw; } log_prob } From b3c136b16a830e7f3684047d40b045042b8a3827 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 08:57:37 -0700 Subject: [PATCH 11/21] Test for lower than epsilon rather than le(0) --- src/learning/gmm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 4ed03601..1a5cfe31 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -343,7 +343,7 @@ impl GaussianMixtureModel { let n_features = covariances[0].cols(); for covariance in covariances { for i in 0..covariance.cols() { - if covariance[[i, i]] <= 0.0 { + if covariance[[i, i]] < EPSILON { return Err(Error::new( ErrorKind::InvalidState, "Mixture model had zeros along the diagonals.")); From f79c2f1fef92782a3c5bee033fddc84ab95a0bb2 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 15:36:28 -0700 Subject: [PATCH 12/21] Fix covariance calculation in update_gaussian_parameters --- src/learning/gmm.rs | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 1a5cfe31..80b0ca56 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -91,7 +91,6 @@ impl UnSupModel, Matrix> for GaussianMixtureMod // Initialization: let k = self.comp_count; - // println!("\n"); self.model_covars = { let cov_mat = try!(self.initialize_covariances(inputs, reg_value)); @@ -105,31 +104,17 @@ impl UnSupModel, Matrix> for GaussianMixtureMod self.update_gaussian_parameters(inputs, resp); } - println!("initialized means: \n{:.4}", self.model_means.as_ref().unwrap()); - - for c in self.model_covars.as_ref().unwrap().iter() { - // println!("covs: \n{:.4}", &c); - } - self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); - for c in self.precisions_cholesky.as_ref().unwrap().iter() { - // println!("prec: \n{:.4}", &c); - } - self.log_lik = 0.; for iter in 0..self.max_iters { let log_lik_0 = self.log_lik; - // println!("\nIteration {}", iter); - // println!("cov:\n{:.4}", self.covariances().unwrap()); // e_step let (log_prob_norm, mut resp) = self.estimate_log_prob_resp(inputs); // Return to normal space for v in resp.iter_mut() { *v = v.exp(); } - // println!("resp: \n{:.4}", &resp.select_rows(&[0, 1, 2, 3, 4])); - // println!("mix_weights: {}", &self.mix_weights); // m_step self.update_gaussian_parameters(inputs, resp); self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); @@ -445,7 +430,6 @@ impl GaussianMixtureModel { fn update_gaussian_parameters(&mut self, inputs: &Matrix, mut resp: Matrix) { let mut model_means = self.model_means.as_mut().unwrap(); - // println!("resp: {}", resp); self.mix_weights = resp.iter_rows() .fold(Vector::new(vec![0.0; resp.cols()]), |mut acc, row| { for (a, r) in acc.iter_mut().zip(row.iter()) { @@ -455,12 +439,10 @@ impl GaussianMixtureModel { }) + 10.0 * EPSILON; - // println!("nk: \n{:.4}", &self.mix_weights); *model_means = resp.transpose() * inputs; for col in 0..model_means.cols() { let mm_rows = model_means.rows(); let mut mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1); - let ref mix_weights = self.mix_weights; let div_mix_weights = |v| { v / mix_weights[col] }; mm_col = mm_col.apply(&div_mix_weights); @@ -468,35 +450,33 @@ impl GaussianMixtureModel { let mut model_covars = self.model_covars.as_mut().unwrap(); - // println!("means:\n{:.4}", &model_means); // Iterate through each component in the model covariances for (k, covariance) in model_covars.iter_mut().enumerate() { let mut diff: Matrix = inputs.clone(); for (_, mut row) in diff.iter_rows_mut().enumerate() { for (i, v) in row.iter_mut().enumerate() { - *v = *v - model_means[[k, i]]; + *v -= model_means[[k, i]]; } } let mut diff_transpose = diff.transpose(); - - for row in 0..diff_transpose.rows() { - let mut dt = diff_transpose.sub_slice_mut([row, 0], 1, diff.rows()); - dt *= resp[[row, k]]; + let resp_transpose_row = resp.select_cols(&[k]); + for row in diff_transpose.iter_rows_mut() { + for (v, x) in row.iter_mut().zip(resp_transpose_row.iter()) { + *v *= *x; + } } - *covariance = (diff_transpose.as_mut_slice() * diff) / self.mix_weights[k]; + *covariance = (diff_transpose * diff) / self.mix_weights[k]; // Add the regularization value for idx in 0..covariance.rows() { - covariance[[idx, idx]] += 0.08; + covariance[[idx, idx]] += 0.0; } } self.mix_weights /= inputs.rows() as f64; - // println!("mix_weights: {}", &self.mix_weights); - } } From 918a74038936b6115b6c4363fdb332d30b0bf477 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 15:42:31 -0700 Subject: [PATCH 13/21] Fix lints, fix doctests --- src/learning/gmm.rs | 33 ++++++++++++++++----------------- tests/learning/gmm.rs | 2 +- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 80b0ca56..e06852f8 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -6,14 +6,14 @@ //! //! ``` //! use rusty_machine::linalg::Matrix; -//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel}; +//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel, Random}; //! use rusty_machine::learning::UnSupModel; //! //! let inputs = Matrix::new(4, 2, vec![1.0, 2.0, -3.0, -3.0, 0.1, 1.5, -5.0, -2.5]); //! let test_inputs = Matrix::new(3, 2, vec![1.0, 2.0, 3.0, 2.9, -4.4, -2.5]); //! //! // Create gmm with k(=2) classes. -//! let mut model = GaussianMixtureModel::new(2); +//! let mut model: GaussianMixtureModel = GaussianMixtureModel::new(2); //! model.set_max_iters(10); //! model.cov_option = CovOption::Diagonal; //! @@ -32,11 +32,9 @@ //! ``` extern crate rand; -use linalg::{Matrix, MatrixSlice, Vector, BaseMatrix, BaseMatrixMut, Axes}; -use rulinalg::utils; +use linalg::{Matrix, Vector, BaseMatrix, BaseMatrixMut, Axes}; use learning::{LearningResult, UnSupModel}; -use learning::toolkit::rand_utils; use learning::error::{Error, ErrorKind}; use std::f64::consts::PI; @@ -107,7 +105,7 @@ impl UnSupModel, Matrix> for GaussianMixtureMod self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); self.log_lik = 0.; - for iter in 0..self.max_iters { + for _ in 0..self.max_iters { let log_lik_0 = self.log_lik; // e_step @@ -151,9 +149,9 @@ impl GaussianMixtureModel { /// /// # Examples /// ``` - /// use rusty_machine::learning::gmm::GaussianMixtureModel; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random}; /// - /// let gmm = GaussianMixtureModel::new(3); + /// let gmm: GaussianMixtureModel = GaussianMixtureModel::new(3); /// ``` pub fn new(k: usize) -> Self { Self::with_weights(k, Vector::ones(k) / k as f64).unwrap() @@ -167,12 +165,12 @@ impl GaussianMixtureModel { /// # Examples /// /// ``` - /// use rusty_machine::learning::gmm::GaussianMixtureModel; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random}; /// use rusty_machine::linalg::Vector; /// /// let mix_weights = Vector::new(vec![0.25, 0.25, 0.5]); /// - /// let gmm = GaussianMixtureModel::with_weights(3, mix_weights).unwrap(); + /// let gmm: GaussianMixtureModel = GaussianMixtureModel::with_weights(3, mix_weights).unwrap(); /// ``` /// /// # Failures @@ -243,9 +241,9 @@ impl GaussianMixtureModel { /// # Examples /// /// ``` - /// use rusty_machine::learning::gmm::GaussianMixtureModel; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random}; /// - /// let mut gmm = GaussianMixtureModel::new(2); + /// let mut gmm: GaussianMixtureModel = GaussianMixtureModel::new(2); /// gmm.set_max_iters(5); /// ``` pub fn set_max_iters(&mut self, iters: usize) { @@ -427,7 +425,7 @@ impl GaussianMixtureModel { (log_prob_norm, weighted_log_prob) } - fn update_gaussian_parameters(&mut self, inputs: &Matrix, mut resp: Matrix) { + fn update_gaussian_parameters(&mut self, inputs: &Matrix, resp: Matrix) { let mut model_means = self.model_means.as_mut().unwrap(); self.mix_weights = resp.iter_rows() @@ -442,10 +440,10 @@ impl GaussianMixtureModel { *model_means = resp.transpose() * inputs; for col in 0..model_means.cols() { let mm_rows = model_means.rows(); - let mut mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1); + let mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1); let ref mix_weights = self.mix_weights; let div_mix_weights = |v| { v / mix_weights[col] }; - mm_col = mm_col.apply(&div_mix_weights); + mm_col.apply(&div_mix_weights); } let mut model_covars = self.model_covars.as_mut().unwrap(); @@ -488,6 +486,7 @@ pub trait Initializer { } /// Initialize the responsibilities matrix using randomly generated values +#[derive(Debug)] pub struct Random; impl Initializer for Random { @@ -509,6 +508,7 @@ impl Initializer for Random { } /// Initialize the responsibilities matrix using k-means clustering +#[derive(Debug)] pub struct KMeans; impl Initializer for KMeans { @@ -528,8 +528,7 @@ impl Initializer for KMeans { #[cfg(test)] mod tests { use super::{GaussianMixtureModel, Random}; - use learning::UnSupModel; - use linalg::{Vector, Matrix}; + use linalg::Vector; #[test] fn test_means_none() { diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs index 2e7ec754..5b4bf01f 100644 --- a/tests/learning/gmm.rs +++ b/tests/learning/gmm.rs @@ -1,7 +1,7 @@ extern crate rand; use rm::linalg::{Matrix, BaseMatrix}; -use rm::learning::gmm::{GaussianMixtureModel, Random, KMeans}; +use rm::learning::gmm::{GaussianMixtureModel, KMeans}; use rm::learning::UnSupModel; use self::rand::thread_rng; From 917656014d2bee2d8dc8e66889f4b46a0424a9af Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 17:12:40 -0700 Subject: [PATCH 14/21] Reorganize and add the cov regularization variable again --- src/learning/gmm.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index e06852f8..fea5b910 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -426,8 +426,6 @@ impl GaussianMixtureModel { } fn update_gaussian_parameters(&mut self, inputs: &Matrix, resp: Matrix) { - let mut model_means = self.model_means.as_mut().unwrap(); - self.mix_weights = resp.iter_rows() .fold(Vector::new(vec![0.0; resp.cols()]), |mut acc, row| { for (a, r) in acc.iter_mut().zip(row.iter()) { @@ -436,17 +434,16 @@ impl GaussianMixtureModel { acc }) + 10.0 * EPSILON; - + + let mut model_covars = self.model_covars.as_mut().unwrap(); + let mut model_means = self.model_means.as_mut().unwrap(); *model_means = resp.transpose() * inputs; - for col in 0..model_means.cols() { - let mm_rows = model_means.rows(); - let mm_col = model_means.sub_slice_mut([0, col], mm_rows, 1); - let ref mix_weights = self.mix_weights; - let div_mix_weights = |v| { v / mix_weights[col] }; - mm_col.apply(&div_mix_weights); - } - let mut model_covars = self.model_covars.as_mut().unwrap(); + for (nk, row) in self.mix_weights.iter().zip(model_means.iter_rows_mut()) { + for v in row { + *v /= *nk; + } + } // Iterate through each component in the model covariances for (k, covariance) in model_covars.iter_mut().enumerate() { @@ -469,7 +466,7 @@ impl GaussianMixtureModel { // Add the regularization value for idx in 0..covariance.rows() { - covariance[[idx, idx]] += 0.0; + covariance[[idx, idx]] += 0.05; } } From 7adee145c6ee296f616ca684d4cac79238f92519 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 17:13:11 -0700 Subject: [PATCH 15/21] Testing higher-dimensional data is more likely to fail, so we should do it --- tests/learning/gmm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs index 5b4bf01f..eb50c3f7 100644 --- a/tests/learning/gmm.rs +++ b/tests/learning/gmm.rs @@ -42,7 +42,7 @@ fn gmm_train() { const SAMPLES_PER_CENTROID: usize = 500; // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5). - let centroids = Matrix::new(3, 2, vec![-0.5, -0.5, 0.0, 0.5, 0.5, 0.0]); + let centroids = Matrix::new(2, 3, vec![-0.4, -0.6, 0.1, 0.6, 0.7, -0.3]); // Generate some data randomly around the centroids let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.2); From 2f20142ac685c187ed8cdae218d0fb348d736ab6 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 19:29:30 -0700 Subject: [PATCH 16/21] Remove comments, lower coveriance --- src/learning/gmm.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index fea5b910..baac4c6d 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -415,13 +415,12 @@ impl GaussianMixtureModel { a.ln() }).collect::>()); - // println!("log_prob_norm: \n{:.4}", &log_prob_norm.select(&[0, 1, 2, 3, 4, 5, 6])); for row in 0..log_prob_norm.size() { for col in 0..weighted_log_prob.cols() { weighted_log_prob[[row, col]] -= log_prob_norm[row]; } } - // println!("log_prob: \n{:.4}", weighted_log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6])); + (log_prob_norm, weighted_log_prob) } @@ -466,7 +465,7 @@ impl GaussianMixtureModel { // Add the regularization value for idx in 0..covariance.rows() { - covariance[[idx, idx]] += 0.05; + covariance[[idx, idx]] += 0.01; } } From 0b7f6d20a8ed02672c7d4b7a2fef8542bde92cc0 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 19:29:58 -0700 Subject: [PATCH 17/21] Test more clusters with GMM --- tests/learning/gmm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs index eb50c3f7..c2f8d7eb 100644 --- a/tests/learning/gmm.rs +++ b/tests/learning/gmm.rs @@ -46,7 +46,7 @@ fn gmm_train() { // Generate some data randomly around the centroids let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.2); - let mut model = GaussianMixtureModel::::new(3); + let mut model = GaussianMixtureModel::::new(10); model.set_max_iters(100); match model.train(&samples) { Ok(_) => { From 31b20eae156b58656a9f847fba0ba253751df264 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 19:44:38 -0700 Subject: [PATCH 18/21] Clean up train method --- src/learning/gmm.rs | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index baac4c6d..72251ebb 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -81,28 +81,20 @@ pub struct GaussianMixtureModel { impl UnSupModel, Matrix> for GaussianMixtureModel { /// Train the model using inputs. fn train(&mut self, inputs: &Matrix) -> LearningResult<()> { - let reg_value = if inputs.rows() > 1 { - 1f64 / (inputs.rows() - 1) as f64 - } else { - return Err(Error::new(ErrorKind::InvalidData, "Only one row of data provided.")); - }; - - // Initialization: + // Move k to stack to appease the borrow checker let k = self.comp_count; - self.model_covars = { - let cov_mat = try!(self.initialize_covariances(inputs, reg_value)); - Some(vec![cov_mat; k]) - }; + // Initialize empty matrices for covariances and means + self.model_covars = Some( + vec![Matrix::zeros(inputs.cols(), inputs.cols()); k]); + self.model_means = Some(Matrix::zeros(inputs.cols(), k)); - { - self.model_means = Some(Matrix::new(inputs.cols(), self.comp_count, - vec![0.; inputs.cols() * self.comp_count])); - let resp = try!(T::init_resp(k, inputs)); - self.update_gaussian_parameters(inputs, resp); - } + // Initialize responsibitilies and calculate parameters + self.update_gaussian_parameters( + inputs, try!(T::init_resp(k, inputs))); + self.precisions_cholesky = + Some(try!(self.compute_precision_cholesky())); - self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); self.log_lik = 0.; for _ in 0..self.max_iters { @@ -115,11 +107,11 @@ impl UnSupModel, Matrix> for GaussianMixtureMod // m_step self.update_gaussian_parameters(inputs, resp); - self.precisions_cholesky = Some(try!(self.compute_precision_cholesky())); - // end of m_step + self.precisions_cholesky = + Some(try!(self.compute_precision_cholesky())); + // check for convergence let log_lik_1 = log_prob_norm.mean(); - if (log_lik_0 - log_lik_1).abs() < CONVERGENCE_EPS { break; } From e54cc04f91ca3b43ddf7ba70fa44c47637beb449 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 19:46:24 -0700 Subject: [PATCH 19/21] Remove vestigial method initialize_covariances --- src/learning/gmm.rs | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 72251ebb..ecee527f 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -242,31 +242,6 @@ impl GaussianMixtureModel { self.max_iters = iters; } - fn initialize_covariances(&self, inputs: &Matrix, reg_value: f64) -> LearningResult> { - match self.cov_option { - CovOption::Diagonal => { - let variance = try!(inputs.variance(Axes::Row)); - Ok(Matrix::from_diag(&variance.data()) * reg_value.sqrt()) - } - CovOption::Full | CovOption::Regularized(_) => { - let means = inputs.mean(Axes::Row); - let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols()); - for (j, row) in cov_mat.iter_rows_mut().enumerate() { - for (k, elem) in row.iter_mut().enumerate() { - *elem = inputs.iter_rows().map(|r| { - (r[j] - means[j]) * (r[k] - means[k]) - }).sum::(); - } - } - cov_mat *= reg_value; - if let CovOption::Regularized(eps) = self.cov_option { - cov_mat += Matrix::::identity(cov_mat.cols()) * eps; - } - Ok(cov_mat) - } - } - } - fn compute_precision_cholesky(&self) -> LearningResult>> { let &GaussianMixtureModel { model_covars: ref covariances, From 566c3ed6065f2f51d115fc9e45890339615dfb49 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 17 Oct 2016 23:47:57 -0700 Subject: [PATCH 20/21] Move CovOption into a trait/struct design --- src/learning/gmm.rs | 332 +++++++++++++++++++++++++++--------------- tests/learning/gmm.rs | 5 +- 2 files changed, 215 insertions(+), 122 deletions(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index ecee527f..60587f11 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -6,16 +6,15 @@ //! //! ``` //! use rusty_machine::linalg::Matrix; -//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel, Random}; +//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel, Random, CholeskyFull, Diagonal}; //! use rusty_machine::learning::UnSupModel; //! //! let inputs = Matrix::new(4, 2, vec![1.0, 2.0, -3.0, -3.0, 0.1, 1.5, -5.0, -2.5]); //! let test_inputs = Matrix::new(3, 2, vec![1.0, 2.0, 3.0, 2.9, -4.4, -2.5]); //! //! // Create gmm with k(=2) classes. -//! let mut model: GaussianMixtureModel = GaussianMixtureModel::new(2); +//! let mut model: GaussianMixtureModel = GaussianMixtureModel::new(2); //! model.set_max_iters(10); -//! model.cov_option = CovOption::Diagonal; //! //! // Where inputs is a Matrix with features in columns. //! model.train(&inputs).unwrap(); @@ -44,24 +43,9 @@ use std::marker::PhantomData; const CONVERGENCE_EPS: f64 = 1.0e-15; -/// Covariance options for GMMs. -/// -/// - Full : The full covariance structure. -/// - Regularized : Adds a regularization constant to the covariance diagonal. -/// - Diagonal : Only the diagonal covariance structure. -#[derive(Clone, Copy, Debug)] -pub enum CovOption { - /// The full covariance structure. - Full, - /// Adds a regularization constant to the covariance diagonal. - Regularized(f64), - /// Only the diagonal covariance structure. - Diagonal, -} - /// A Gaussian Mixture Model #[derive(Debug)] -pub struct GaussianMixtureModel { +pub struct GaussianMixtureModel> { comp_count: usize, // [n_features] mix_weights: Vector, @@ -70,18 +54,21 @@ pub struct GaussianMixtureModel { // n_components elements: [n_features, n_features] model_covars: Option>>, // n_components elements: [n_features, n_features] - precisions_cholesky: Option>>, + precisions: Option>>, log_lik: f64, max_iters: usize, phantom: PhantomData, - /// The covariance options for the GMM. - pub cov_option: CovOption, + /// Struct that calculates covariance and gaussian parameters + pub cov_option: C, } -impl UnSupModel, Matrix> for GaussianMixtureModel { +impl UnSupModel, Matrix> for GaussianMixtureModel + where T: Initializer, + C: CovOption + Default +{ /// Train the model using inputs. fn train(&mut self, inputs: &Matrix) -> LearningResult<()> { - // Move k to stack to appease the borrow checker + // Move k to stack to appease the borrow checker let k = self.comp_count; // Initialize empty matrices for covariances and means @@ -90,10 +77,10 @@ impl UnSupModel, Matrix> for GaussianMixtureMod self.model_means = Some(Matrix::zeros(inputs.cols(), k)); // Initialize responsibitilies and calculate parameters - self.update_gaussian_parameters( - inputs, try!(T::init_resp(k, inputs))); - self.precisions_cholesky = - Some(try!(self.compute_precision_cholesky())); + self.update_gaussian_parameters(inputs, try!(T::init_resp(k, inputs))); + self.precisions = + Some(try!(self.cov_option.compute_precision( + self.model_covars.as_ref().unwrap()))); self.log_lik = 0.; @@ -107,8 +94,9 @@ impl UnSupModel, Matrix> for GaussianMixtureMod // m_step self.update_gaussian_parameters(inputs, resp); - self.precisions_cholesky = - Some(try!(self.compute_precision_cholesky())); + self.precisions = + Some(try!(self.cov_option.compute_precision( + self.model_covars.as_ref().unwrap()))); // check for convergence let log_lik_1 = log_prob_norm.mean(); @@ -133,7 +121,10 @@ impl UnSupModel, Matrix> for GaussianMixtureMod } } -impl GaussianMixtureModel { +impl GaussianMixtureModel + where T: Initializer, + C: CovOption + Default +{ /// Constructs a new Gaussian Mixture Model /// /// Defaults to 100 maximum iterations and @@ -141,9 +132,9 @@ impl GaussianMixtureModel { /// /// # Examples /// ``` - /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random}; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull}; /// - /// let gmm: GaussianMixtureModel = GaussianMixtureModel::new(3); + /// let gmm: GaussianMixtureModel = GaussianMixtureModel::new(3); /// ``` pub fn new(k: usize) -> Self { Self::with_weights(k, Vector::ones(k) / k as f64).unwrap() @@ -157,12 +148,12 @@ impl GaussianMixtureModel { /// # Examples /// /// ``` - /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random}; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull}; /// use rusty_machine::linalg::Vector; /// /// let mix_weights = Vector::new(vec![0.25, 0.25, 0.5]); /// - /// let gmm: GaussianMixtureModel = GaussianMixtureModel::with_weights(3, mix_weights).unwrap(); + /// let gmm: GaussianMixtureModel = GaussianMixtureModel::with_weights(3, mix_weights).unwrap(); /// ``` /// /// # Failures @@ -185,10 +176,10 @@ impl GaussianMixtureModel { mix_weights: normalized_weights, model_means: None, model_covars: None, - precisions_cholesky: None, + precisions: None, log_lik: 0., max_iters: 100, - cov_option: CovOption::Full, + cov_option: C::default(), phantom: PhantomData, }) } @@ -233,83 +224,15 @@ impl GaussianMixtureModel { /// # Examples /// /// ``` - /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random}; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull}; /// - /// let mut gmm: GaussianMixtureModel = GaussianMixtureModel::new(2); + /// let mut gmm: GaussianMixtureModel = GaussianMixtureModel::new(2); /// gmm.set_max_iters(5); /// ``` pub fn set_max_iters(&mut self, iters: usize) { self.max_iters = iters; } - fn compute_precision_cholesky(&self) -> LearningResult>> { - let &GaussianMixtureModel { - model_covars: ref covariances, - cov_option: ref covariance_type, - comp_count: n_components, - .. - } = self; - - let covariances = covariances.as_ref().unwrap(); - match *covariance_type { - CovOption::Full | CovOption::Regularized(_) => { - let mut precisions_chol = Vec::>::with_capacity(n_components); - for covariance in covariances { - // println!("loop-cov: \n{:.4}", &covariance); - let mut cov_chol: Matrix = try!(covariance.cholesky()); - // cholesky is correct - let half = (cov_chol.rows() as f64 / 2.0).floor() as usize; - let lower_rows = cov_chol.rows() - half - 1; - let n_col = cov_chol.cols() - 1; - // println!("cov_chol: \n{:.4}", &cov_chol); - - // solve_l_triangular doesn't work with a matrix, so we have to do it - // the hard way. - let det: f64 = cov_chol.det(); - cov_chol /= det; - - for idx in 0..half { - // Mirror along the diagonal - let (mut upper, mut lower) = cov_chol.split_at_mut(half, Axes::Row); - { - let swap_row = lower_rows - idx; - let swap_col = n_col - idx; - mem::swap(&mut upper[[idx, idx]], &mut lower[[swap_row, swap_col]]); - } - - // Transpose and invert all other values - for j in (idx+1)..(n_col+1) { - let swap_row = lower_rows - idx; - let swap_col = n_col - j; - mem::swap(&mut upper[[idx, j]], &mut lower[[swap_row, swap_col]]); - upper[[idx, j]] *= -1.0; - } - } - precisions_chol.push(cov_chol); - } - Ok(precisions_chol) - }, - CovOption::Diagonal => { - let n_features = covariances[0].cols(); - for covariance in covariances { - for i in 0..covariance.cols() { - if covariance[[i, i]] < EPSILON { - return Err(Error::new( - ErrorKind::InvalidState, - "Mixture model had zeros along the diagonals.")); - } - } - } - let mut precisions_chol = Vec::>::with_capacity(n_components); - for covariance in covariances { - let v: Vec = covariance.iter().map(|v| 1.0 / v.sqrt()).collect(); - precisions_chol.push(Matrix::::new(n_features, n_features, v)); - } - Ok(precisions_chol) - } - } - } - // == Parameters // inputs : [n_samples, n_features] // @@ -334,15 +257,15 @@ impl GaussianMixtureModel { // // fn estimate_log_prob(&self, inputs: &Matrix) -> Matrix { - let precisions_cholesky: &Vec> = self.precisions_cholesky.as_ref().unwrap(); + let precisions: &Vec> = self.precisions.as_ref().unwrap(); let ref model_means = self.model_means.as_ref().unwrap(); // The log of the determinant for each precision matrix - let log_det = precisions_cholesky.iter().map(|m| m.det().ln()); + let log_det = precisions.iter().map(|m| m.det().ln()); // println!("log_det: {:?}", log_det); let mut log_prob = Matrix::zeros(inputs.rows(), self.comp_count); for k in 0..self.comp_count { - let prec = &precisions_cholesky[k]; + let prec = &precisions[k]; // y is a matrix of shape [n_samples, n_features] let mut y = inputs * prec; // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6])); @@ -401,7 +324,6 @@ impl GaussianMixtureModel { }) + 10.0 * EPSILON; - let mut model_covars = self.model_covars.as_mut().unwrap(); let mut model_means = self.model_means.as_mut().unwrap(); *model_means = resp.transpose() * inputs; @@ -411,7 +333,109 @@ impl GaussianMixtureModel { } } - // Iterate through each component in the model covariances + // Delegate updating the covariance matrices to the cov_option + let mut model_covars = self.model_covars.as_mut().unwrap(); + self.cov_option.update_covars(model_covars, resp, inputs, + model_means, &self.mix_weights); + + self.mix_weights /= inputs.rows() as f64; + } +} + +/// Contains various methods for calculating covariances +pub trait CovOption { + /// Calculate a precision matrix from a covariance matrix + fn compute_precision(&self, covariances: &Vec>) -> + LearningResult>>; + + /// Update covariance matrices + fn update_covars(&self, model_covars: &mut Vec>, resp: Matrix, + inputs: &Matrix, model_means: &Matrix, + mix_weights: &Vector); + + /// Regularization constant added along the diagonal + fn reg_covar(&self) -> f64 { + 0.0 + } +} + +/// Performs calculations on a full covariance matrix using Cholesky factorization, +/// adding a constant regularization factor along the diagonal to ensure stability. +#[derive(Clone, Copy, Debug)] +pub struct CholeskyFull { + /// Regularization constant + pub reg_covar: f64 +} + +/// Only calculates the diagonal (the variance). This assumes that all parameters +/// are uncorrelated. Adds a constant regularization factor (generally unnecessary). +#[derive(Clone, Copy, Debug)] +pub struct Diagonal { + /// Regularization constant + pub reg_covar: f64 +} + +impl CholeskyFull { + /// Initialize with a covariance regularization constant + pub fn new(reg_covar: f64) -> Self { + CholeskyFull { + reg_covar: reg_covar + } + } + + /// Solves a system given the Cholesky decomposition of the lower triangular matrix + pub fn solve_cholesky_decomposition(mut cov_chol: Matrix) -> Matrix { + let half = (cov_chol.rows() as f64 / 2.0).floor() as usize; + let lower_rows = cov_chol.rows() - half - 1; + let n_col = cov_chol.cols() - 1; + + let det: f64 = cov_chol.det(); + cov_chol /= det; + + for idx in 0..half { + // Mirror along the diagonal + let (mut upper, mut lower) = cov_chol.split_at_mut(half, Axes::Row); + { + let swap_row = lower_rows - idx; + let swap_col = n_col - idx; + mem::swap(&mut upper[[idx, idx]], &mut lower[[swap_row, swap_col]]); + } + + // Transpose and invert all other values + for j in (idx+1)..(n_col+1) { + let swap_row = lower_rows - idx; + let swap_col = n_col - j; + mem::swap(&mut upper[[idx, j]], &mut lower[[swap_row, swap_col]]); + upper[[idx, j]] *= -1.0; + } + } + cov_chol + } +} + +impl Default for CholeskyFull { + /// Initialize with a covariance regularization constant of 0.0 + fn default() -> Self { + Self::new(0.0) + } +} + +impl CovOption for CholeskyFull { + fn compute_precision(&self, covariances: &Vec>) -> LearningResult>> { + let n_components = covariances.len(); + let mut precisions_chol = Vec::>::with_capacity(n_components); + + for covariance in covariances { + let cov_chol: Matrix = try!(covariance.cholesky()); + precisions_chol.push(Self::solve_cholesky_decomposition(cov_chol)); + } + + Ok(precisions_chol) + } + + fn update_covars(&self, model_covars: &mut Vec>, resp: Matrix, + inputs: &Matrix, model_means: &Matrix, + mix_weights: &Vector) { for (k, covariance) in model_covars.iter_mut().enumerate() { let mut diff: Matrix = inputs.clone(); for (_, mut row) in diff.iter_rows_mut().enumerate() { @@ -428,16 +452,84 @@ impl GaussianMixtureModel { } } - *covariance = (diff_transpose * diff) / self.mix_weights[k]; + *covariance = (diff_transpose * diff) / mix_weights[k]; // Add the regularization value for idx in 0..covariance.rows() { - covariance[[idx, idx]] += 0.01; + covariance[[idx, idx]] += self.reg_covar; } + } + } + fn reg_covar(&self) -> f64 { + self.reg_covar + } +} + +impl Default for Diagonal { + /// Initialize with a covariance regularization constant of 0.0 + fn default() -> Self { + Diagonal { + reg_covar: 0.0 } + } +} - self.mix_weights /= inputs.rows() as f64; + +impl CovOption for Diagonal { + fn compute_precision(&self, covariances: &Vec>) -> LearningResult>> { + let n_components = covariances.len(); + let n_features = covariances[0].cols(); + for covariance in covariances { + for i in 0..covariance.cols() { + if covariance[[i, i]] < EPSILON { + return Err(Error::new( + ErrorKind::InvalidState, + "Mixture model had zeros along the diagonals.")); + } + + if covariance[[i, i]].is_nan() { + return Err(Error::new( + ErrorKind::InvalidState, + "Mixture model had NaN along the diagonals.")); + } + } + } + + let mut precisions_chol = Vec::>::with_capacity(n_components); + for covariance in covariances { + let v: Vec = covariance.iter().map(|v| { + if *v != 0.0 { + 1.0 / v.sqrt() + } else { + 0.0 + } + }).collect(); + precisions_chol.push(Matrix::::new(n_features, n_features, v)); + } + + Ok(precisions_chol) + } + + fn update_covars(&self, model_covars: &mut Vec>, _resp: Matrix, + inputs: &Matrix, model_means: &Matrix, + mix_weights: &Vector) { + for (k, covariance) in model_covars.iter_mut().enumerate() { + let mut diff: Matrix = inputs.clone(); + for (_, mut row) in diff.iter_rows_mut().enumerate() { + for (i, v) in row.iter_mut().enumerate() { + *v -= model_means[[k, i]]; + } + } + + *covariance = Matrix::from_diag( + &((diff.variance(Axes::Row).unwrap() / mix_weights[k]) + + self.reg_covar).data()[..]); + } + } + + fn reg_covar(&self) -> f64 { + self.reg_covar } } @@ -490,19 +582,19 @@ impl Initializer for KMeans { #[cfg(test)] mod tests { - use super::{GaussianMixtureModel, Random}; + use super::{GaussianMixtureModel, Random, CholeskyFull}; use linalg::Vector; #[test] fn test_means_none() { - let model = GaussianMixtureModel::::new(5); + let model = GaussianMixtureModel::::new(5); assert_eq!(model.means(), None); } #[test] fn test_covars_none() { - let model = GaussianMixtureModel::::new(5); + let model = GaussianMixtureModel::::new(5); assert_eq!(model.covariances(), None); } @@ -510,14 +602,14 @@ mod tests { #[test] fn test_negative_mixtures() { let mix_weights = Vector::new(vec![-0.25, 0.75, 0.5]); - let gmm_res = GaussianMixtureModel::::with_weights(3, mix_weights); + let gmm_res = GaussianMixtureModel::::with_weights(3, mix_weights); assert!(gmm_res.is_err()); } #[test] fn test_wrong_length_mixtures() { let mix_weights = Vector::new(vec![0.1, 0.25, 0.75, 0.5]); - let gmm_res = GaussianMixtureModel::::with_weights(3, mix_weights); + let gmm_res = GaussianMixtureModel::::with_weights(3, mix_weights); assert!(gmm_res.is_err()); } } diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs index c2f8d7eb..8a5eeaba 100644 --- a/tests/learning/gmm.rs +++ b/tests/learning/gmm.rs @@ -1,7 +1,7 @@ extern crate rand; use rm::linalg::{Matrix, BaseMatrix}; -use rm::learning::gmm::{GaussianMixtureModel, KMeans}; +use rm::learning::gmm::{GaussianMixtureModel, KMeans, CholeskyFull}; use rm::learning::UnSupModel; use self::rand::thread_rng; @@ -46,7 +46,8 @@ fn gmm_train() { // Generate some data randomly around the centroids let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.2); - let mut model = GaussianMixtureModel::::new(10); + let mut model = GaussianMixtureModel::::new(100); + model.cov_option.reg_covar = 0.0; model.set_max_iters(100); match model.train(&samples) { Ok(_) => { From fe8fa5b45a9ec90d638ba4aad52653892239fecd Mon Sep 17 00:00:00 2001 From: James Lucas Date: Wed, 19 Oct 2016 08:35:50 +0100 Subject: [PATCH 21/21] Adding basic gmm example for verification --- examples/gmm_simulation.rs | 81 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 examples/gmm_simulation.rs diff --git a/examples/gmm_simulation.rs b/examples/gmm_simulation.rs new file mode 100644 index 00000000..7e686663 --- /dev/null +++ b/examples/gmm_simulation.rs @@ -0,0 +1,81 @@ +extern crate rusty_machine; +extern crate rand; + +use rand::distributions::{Normal, IndependentSample}; +use rand::{Rng, thread_rng}; + +use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull}; +use rusty_machine::learning::UnSupModel; +use rusty_machine::linalg::Matrix; + +// Simulate some data from gaussian clusters +fn simulate_gmm_1d_data(count: usize, + means: Vec, + vars: Vec, + mixture_weights: Vec) + -> Vec { + assert_eq!(means.len(), vars.len()); + assert_eq!(means.len(), mixture_weights.len()); + + let gmm_count = means.len(); + + let mut gaussians = Vec::with_capacity(gmm_count); + + for i in 0..gmm_count { + // Create a Gaussian with given mean and var + gaussians.push(Normal::new(means[i], vars[i].sqrt())); + } + + let mut rng = thread_rng(); + let mut out_samples = Vec::with_capacity(count); + + for _ in 0..count { + // Pick a gaussian from the mixture weights + let chosen_gaussian = gaussians[pick_gaussian(&mixture_weights, &mut rng)]; + + // Draw sample from it + let sample = chosen_gaussian.ind_sample(&mut rng); + + // Add to data + out_samples.push(sample); + } + + out_samples +} + +// A utility function which chooses an index from some mixture weights +fn pick_gaussian(unnorm_dist: &[f64], rng: &mut R) -> usize { + assert!(unnorm_dist.len() > 0); + let sum = unnorm_dist.iter().fold(0f64, |acc, &x| acc + x); + let rand = rng.gen_range(0.0f64, sum); + + let mut tempsum = 0.0; + for (i, p) in unnorm_dist.iter().enumerate() { + tempsum += *p; + if rand < tempsum { + return i; + } + } + + panic!("No random value was sampled!"); +} + + +fn main() { + let gmm_count = 3; + let count = 1000; + + let means = vec![-3f64, 0., 3.]; + let vars = vec![1f64, 0.5, 0.25]; + let weights = vec![0.5, 0.25, 0.25]; + + let data = simulate_gmm_1d_data(count, means, vars, weights); + + let mut gmm: GaussianMixtureModel = GaussianMixtureModel::new(gmm_count); + gmm.set_max_iters(300); + gmm.train(&Matrix::new(count, 1, data)).unwrap(); + + println!("Means = {:?}", gmm.means()); + println!("Covs = {:?}", gmm.covariances()); + println!("Mix Weights = {:?}", gmm.mixture_weights()); +} \ No newline at end of file