PythonPredictions · sandervh14 · Apr 8, 2022 · May 25, 2022 · Jun 1, 2022 · Jun 1, 2022
diff --git a/README.rst b/README.rst
@@ -68,7 +68,17 @@ Documentation and extra material
 Contributing to Cobra
 =====================
 
-We'd love you to contribute to the development of Cobra! There are many ways in which you can contribute, the most common of which is to contribute to the source code or documentation of the project. However, there are many other ways you can contribute (report issues, improve code coverage by adding unit tests, ...).
-We use GitHub issues to track all bugs and feature requests. Feel free to open an issue in case you found a bug or in case you wish to see a new feature added.
+We'd love you to contribute to the development of Cobra!
+
+There are many ways in which you can contribute.
+
+* We much appreciate contributions to the source code or documentation of this
+  project.
+* However, there are many other ways you can contribute (report issues,
+  improve code coverage by adding unit tests,...).
+
+We use GitHub issues to track all bugs and feature requests.
+Feel free to open an issue in case you found a bug or in case you wish to see
+a new feature added.
 
 For more details, check out our `wiki <https://github.com/PythonPredictions/cobra/wiki/Contributing-guidelines-&-workflows>`_.
diff --git a/cobra/evaluation/evaluator.py b/cobra/evaluation/evaluator.py
@@ -154,7 +154,7 @@ def _compute_scalar_metrics(y_true: np.ndarray,
             "matthews_corrcoef": matthews_corrcoef(y_true, y_pred_b),
             "lift at {}".format(lift_at): np.round(ClassificationEvaluator
                                                    ._compute_lift(y_true=y_true,
-                                                                  y_pred=y_pred,
+                                                                  y_score=y_pred,
                                                                   lift_at=lift_at), 2)
         })
 
@@ -493,7 +493,7 @@ def _compute_lift_per_bin(y_true: np.ndarray,
         """
 
         lifts = [ClassificationEvaluator._compute_lift(y_true=y_true,
-                                                       y_pred=y_pred,
+                                                       y_score=y_pred,
                                                        lift_at=perc_lift)
                  for perc_lift in np.linspace(1/n_bins, 1, num=n_bins, endpoint=True)]
 
@@ -502,15 +502,23 @@ def _compute_lift_per_bin(y_true: np.ndarray,
         return x_labels, lifts, y_true.mean()
 
     @staticmethod
-    def _compute_lift(y_true: np.ndarray, y_pred: np.ndarray,
+    def _compute_lift(y_true: np.ndarray,
+                      y_score: np.ndarray,
                       lift_at: float=0.05) -> float:
-        """Calculates lift given two arrays on specified level.
+        """
+        Calculate the lift metric for evaluation of the classifier model,
+        given the ground truth labels and the prediction scores
+        (scores or probabilities indicating the likelihood of the observations
+        being positive).
+        The lift metric is computed at a certain top level percentage - meaning
+        the selection percentage of observations from the evaluation data set,
+        after ordering them on descending prediction score.
 
         Parameters
         ----------
         y_true : np.ndarray
             True binary target data labels.
-        y_pred : np.ndarray
+        y_score : np.ndarray
             Target scores of the model.
         lift_at : float, optional
             At what top level percentage the lift should be computed.
@@ -523,14 +531,14 @@ def _compute_lift(y_true: np.ndarray, y_pred: np.ndarray,
 
         # Make sure it is numpy array
         y_true_ = np.array(y_true)
-        y_pred_ = np.array(y_pred)
+        y_score_ = np.array(y_score)
 
         # Make sure it has correct shape
         y_true_ = y_true_.reshape(len(y_true_), 1)
-        y_pred_ = y_pred_.reshape(len(y_pred_), 1)
+        y_score_ = y_score_.reshape(len(y_score_), 1)
 
         # Merge data together
-        y_data = np.hstack([y_true_, y_pred_])
+        y_data = np.hstack([y_true_, y_score_])
 
         # Calculate necessary variables
         nrows = len(y_data)

diff --git a/cobra/model_building/forward_selection.py b/cobra/model_building/forward_selection.py
@@ -14,10 +14,16 @@ class ForwardFeatureSelection:
     algorithm.
 
     Predictors are sequentially added to the model, starting with the one that
-    has the highest univariate predictive power, and then proceeding with those that
-    jointly lead to the best fit, optimizing for selection AUC or RMSE. Interaction
-    effects are not explicitly modeled, yet they are implicitly present given the
-    feature selection and the underlying feature correlation structure.
+    has the highest univariate predictive power, and then proceeding with those
+    that jointly lead to the best fit, optimizing (tuning) for model
+    performance on the selection set, measured with AUC (default for
+    classification), RMSE (default for regression) or a custom metric (when
+    passing the metric parameter and possibly also metric_args and
+    metric_kwargs.)
+
+    Interaction effects are not explicitly modeled, yet they are implicitly
+    present given the feature selection and the underlying feature
+    correlation structure.
 
     Attributes
     ----------
@@ -33,12 +39,41 @@ class ForwardFeatureSelection:
         Whether or not the model coefficients should all be positive (no sign flips).
     self._fitted_models : list
         List of fitted models.
+    metric : Callable (function), optional
+        Function that evaluates the model's performance, by calculating a
+        certain evaluation metric.
+        For more details about the possibilities here, refer to the
+        documentation of the metric parameter in the evaluate() function of
+        either models.LogisticRegressionModel or models.LinearRegressionModel,
+        depending on which model you are going to use in this forward feature
+        selection.
+    metric_args : dict, optional
+        Arguments (for example: lift_at=0.05) to be passed to the metric
+        function when evaluating the model's performance.
+        Example metric function in which this is required:
+        ClassificationEvaluator._compute_lift(y_true=y_true,
+                                              y_score=y_score,
+                                              lift_at=0.05)
+    metric_kwargs : dict, optional
+        Keyword arguments (for example: normalize=True) to be passed to the
+        metric function when evaluating the model's performance.
+        Example metric function in which this is required (from
+        scikit-learn):
+        def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None)
+    higher_is_better : bool, optional
+        Whether the model is performing better if the chosen evaluation
+        metric results in a higher score (higher_is_better=True),
+        or worse (higher_is_better=False, meaning "lower is better").
     """
 
     def __init__(self,
                  model_type: str="classification",
                  max_predictors: int=50,
-                 pos_only: bool=True):
+                 pos_only: bool=True,
+                 metric: Optional[Callable] = None,
+                 metric_args: Optional[dict] = None,
+                 metric_kwargs: Optional[dict] = None,
+                 higher_is_better: Optional[bool] = None):
 
         self.model_type = model_type
         if model_type == "classification":
@@ -49,6 +84,37 @@ def __init__(self,
         self.max_predictors = max_predictors
         self.pos_only = pos_only
 
+        if higher_is_better is None:
+            if metric is None:
+                if self.MLModel == LogisticRegressionModel:
+                    # If no custom evaluation metric is chosen,
+                    # the LogisticRegressionModel uses AUC as default metric,
+                    # so "higher is better" evaluation logic is applied on the
+                    # evaluation scores.
+                    self.higher_is_better = True
+                elif self.MLModel == LinearRegressionModel:
+                    # If no custom evaluation metric is chosen,
+                    # the LinearRegressionModel uses RMSE as default metric,
+                    # so "lower is better" evaluation logic is applied on the
+                    # evaluation scores.
+                    self.higher_is_better = False
+                else:
+                    raise ValueError("The configured machine learning model is "
+                                     "not the standard logistic regression or "
+                                     "linear regression model. "
+                                     "Therefore, please fill the metric and "
+                                     "higher_is_better arguments.")
+            else:
+                raise ValueError("You chose a custom evaluation metric. "
+                                 "Please fill the higher_is_better argument.")
+        else:
+            self.higher_is_better = higher_is_better
+
+        self.metric = metric
+        self.metric_args = metric_args
+        self.metric_kwargs = metric_kwargs
+
+
         self._fitted_models = []
 
     def get_model_from_step(self, step: int):
@@ -77,8 +143,7 @@ def get_model_from_step(self, step: int):
 
     def compute_model_performances(self, data: pd.DataFrame,
                                    target_column_name: str,
-                                   splits: list=["train", "selection", "validation"],
-                                   metric: Optional[Callable]=None,
+                                   splits: list=["train", "selection", "validation"]
                                    ) -> pd.DataFrame:
         """Compute for each model the performance for different sets (e.g.
         train-selection-validation) and return them along with a list of
@@ -94,13 +159,6 @@ def compute_model_performances(self, data: pd.DataFrame,
             Name of the target column.
         splits : list, optional
             List of splits to compute performance on.
-        metric: Callable (function), optional
-            Function that computes an evaluation metric to evaluate the model's
-            performances, instead of the default metric (AUC for
-            classification, RMSE for regression).
-            The function should require y_true and y_pred arguments.
-            Metric functions from sklearn can be used, for example, see
-            https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics.
 
         Returns
         -------
@@ -126,8 +184,9 @@ def compute_model_performances(self, data: pd.DataFrame,
                     data[data["split"] == split],
                     data[data["split"] == split][target_column_name],
                     split=split,  # parameter used for caching
-                    metric=metric
-                )
+                    metric=self.metric,
+                    metric_args=self.metric_args,
+                    metric_kwargs=self.metric_kwargs)
                 for split in splits
             })
 
@@ -290,14 +349,14 @@ def _find_next_best_model(self,
         """
         # placeholders
         best_model = None
-        if self.MLModel == LogisticRegressionModel:
-            best_performance = -1  # AUC metric is used
-        elif self.MLModel == LinearRegressionModel:
-            best_performance = float("inf")  # RMSE metric is used
+
+        # Set the performance intially with the worst possible value,
+        # depending on whether higher_is_better is true or false for the
+        # chosen evaluation metric.
+        if self.higher_is_better:
+            best_performance = -float("inf")
         else:
-            raise ValueError("No metric comparison method has been configured "
-                             "for the given model_type specified as "
-                             "ForwardFeatureSelection argument.")
+            best_performance = float("inf")
 
         fit_data = train_data[train_data["split"] == "train"]  # data to fit the models with
         sel_data = train_data[train_data["split"] == "selection"]  # data to compare the models with
@@ -311,19 +370,20 @@ def _find_next_best_model(self,
             performance = (model
                            .evaluate(sel_data[current_predictors + [pred]],
                                      sel_data[target_column_name],
-                                     split="selection"))
+                                     split="selection",
+                                     metric=self.metric,
+                                     metric_args=self.metric_args,
+                                     metric_kwargs=self.metric_kwargs))
 
             if self.pos_only and (not (model.get_coef() >= 0).all()):
                 continue
 
             # Check if the model is better than the current best model
             # and if it is, replace the current best.
-            if self.MLModel == LogisticRegressionModel \
-                    and performance > best_performance:  # AUC metric is used
+            if self.higher_is_better and performance > best_performance:
                 best_performance = performance
                 best_model = model
-            elif self.MLModel == LinearRegressionModel \
-                    and performance < best_performance:  # RMSE metric is used
+            elif not self.higher_is_better and performance < best_performance:
                 best_performance = performance
                 best_model = model