diff --git a/pybmc/bmc.py b/pybmc/bmc.py index cd0314e..ff17879 100644 --- a/pybmc/bmc.py +++ b/pybmc/bmc.py @@ -119,6 +119,7 @@ def predict(self, property): - lower_df: DataFrame with columns domain_keys + ['Predicted_Lower'] - median_df: DataFrame with columns domain_keys + ['Predicted_Median'] - upper_df: DataFrame with columns domain_keys + ['Predicted_Upper'] + - weights: numpy.ndarray with posterior model weight samples """ if self.samples is None or self.Vt_hat is None: raise ValueError("Must call `orthogonalize()` and `train()` before predicting.") @@ -142,7 +143,8 @@ def predict(self, property): model_preds = df[available_models].values domain_df = df[domain_keys].reset_index(drop=True) - rndm_m, (lower, median, upper) = rndm_m_random_calculator(model_preds, self.samples, self.Vt_hat) + rndm_m, (lower, median, upper), weights = rndm_m_random_calculator( + model_preds, self.samples, self.Vt_hat, output_weights=True) # Build output DataFrames lower_df = domain_df.copy() @@ -155,7 +157,7 @@ def predict(self, property): upper_df = domain_df.copy() upper_df["Predicted_Upper"] = upper - return rndm_m, lower_df, median_df, upper_df + return rndm_m, lower_df, median_df, upper_df, weights def evaluate(self, domain_filter=None): """ diff --git a/pybmc/sampling_utils.py b/pybmc/sampling_utils.py index bf4955c..1df037a 100644 --- a/pybmc/sampling_utils.py +++ b/pybmc/sampling_utils.py @@ -37,7 +37,7 @@ def coverage(percentiles, rndm_m, models_output, truth_column): return coverage_results -def rndm_m_random_calculator(filtered_model_predictions, samples, Vt_hat): +def rndm_m_random_calculator(filtered_model_predictions, samples, Vt_hat, output_weights=False): """ Generates posterior predictive samples and credible intervals. @@ -50,6 +50,7 @@ def rndm_m_random_calculator(filtered_model_predictions, samples, Vt_hat): tuple[numpy.ndarray, list[numpy.ndarray]]: - `rndm_m` (numpy.ndarray): Posterior predictive samples. - `[lower, median, upper]` (list[numpy.ndarray]): Credible interval arrays. + - `model_weights_random` (numpy.ndarray): Posterior model weight samples """ np.random.seed(142858) rng = np.random.default_rng() @@ -80,5 +81,8 @@ def rndm_m_random_calculator(filtered_model_predictions, samples, Vt_hat): lower_radius = np.percentile(rndm_m, 2.5, axis=0) median_radius = np.percentile(rndm_m, 50, axis=0) upper_radius = np.percentile(rndm_m, 97.5, axis=0) - - return rndm_m, [lower_radius, median_radius, upper_radius] + if output_weights: + return rndm_m, [lower_radius, median_radius, upper_radius], model_weights_random + else: + return rndm_m, [lower_radius, median_radius, upper_radius] + diff --git a/tests/test_bmc.py b/tests/test_bmc.py index 60b1cd6..0839a05 100644 --- a/tests/test_bmc.py +++ b/tests/test_bmc.py @@ -126,7 +126,7 @@ def test_predict(self): self.bmc.train() # Use all rows for prediction input X = self.df[["x", "y", "model1", "model2", "model3"]].copy() - rndm_m, lower_df, median_df, upper_df = self.bmc.predict(self.property) + rndm_m, lower_df, median_df, upper_df, weights = self.bmc.predict(self.property) self.assertEqual(rndm_m.shape[1], len(X)) self.assertIn("Predicted_Lower", lower_df.columns) @@ -160,7 +160,7 @@ def test_bmc_predict(self): bmc.train() # Perform prediction using property name - rndm_m, lower_df, median_df, upper_df = bmc.predict("property") + rndm_m, lower_df, median_df, upper_df, weights = bmc.predict("property") self.assertIsNotNone(rndm_m) self.assertIsInstance(lower_df, pd.DataFrame) @@ -169,6 +169,7 @@ def test_bmc_predict(self): self.assertFalse(lower_df.empty) self.assertFalse(median_df.empty) self.assertFalse(upper_df.empty) + self.assertIsNotNone(weights) # Check domain columns are present for col in ["N", "Z"]: self.assertIn(col, lower_df.columns) diff --git a/tests/test_integration.py b/tests/test_integration.py index fb9f57f..01a2c0b 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -85,7 +85,7 @@ def mock_hdf_reader(file, key): self.assertIsNotNone(bmc.Vt_hat) # Step 5: Predict on all domain points (including those without truth) - rndm_m, lower_df, median_df, upper_df = bmc.predict("BE") + rndm_m, lower_df, median_df, upper_df, weights= bmc.predict("BE") # Verify predictions self.assertEqual(rndm_m.shape[1], 6, "Should have predictions for all 6 domain points") @@ -164,7 +164,7 @@ def test_bmc_workflow_with_smaller_truth_domain_csv(self, mock_read_csv, mock_ex bmc.train() # Predict on all points - rndm_m, lower_df, median_df, upper_df = bmc.predict("BE") + rndm_m, lower_df, median_df, upper_df, weights = bmc.predict("BE") # Verify predictions cover all domain points self.assertEqual(len(lower_df), 6)