Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions cobra/model_building/forward_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,21 @@ class ForwardFeatureSelection:
selection.
pos_only : bool
Whether or not the model coefficients should all be positive (no sign flips).
model_kwargs: dict, optional
An optional dictionary of hyperparameters and their values to
override the default hyperparameters that Cobra uses when
constructing the model during forward selection.
For more info, see the documentation of kwargs in the documentation
of the model that is used (e.g. LinearRegressionModel).
self._fitted_models : list
List of fitted models.
"""

def __init__(self,
model_type: str="classification",
max_predictors: int=50,
pos_only: bool=True):
pos_only: bool=True,
model_kwargs: Optional[dict]=None):

self.model_type = model_type
if model_type == "classification":
Expand All @@ -49,6 +56,8 @@ def __init__(self,
self.max_predictors = max_predictors
self.pos_only = pos_only

self.model_kwargs = model_kwargs

self._fitted_models = []

def get_model_from_step(self, step: int):
Expand Down Expand Up @@ -347,7 +356,10 @@ def _train_model(self, train_data: pd.DataFrame, target_column_name: str,
self.MLModel
Trained model.
"""
model = self.MLModel()
if self.model_kwargs is None:
model = self.MLModel()
else:
model = self.MLModel(**self.model_kwargs)

model.fit(train_data[predictors], train_data[target_column_name])

Expand Down
52 changes: 44 additions & 8 deletions cobra/model_building/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,29 @@ class LogisticRegressionModel:
scikit-learn logistic regression model.
predictors : list
List of predictors used in the model.
kwargs: dict, optional
Pass a dictionary here (optional!), to override Cobra's default
choice of hyperparameter values for the scikit-learn
LogisticRegression model that is used behind the scenes.
Cobra's defaults are: fit_intercept=True, C=1e9, solver='liblinear',
random_state=42.
See scikit-learn's documentation of the possible hyperparameters and
values that can be set:
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
"""

def __init__(self):
self.logit = LogisticRegression(fit_intercept=True, C=1e9,
solver='liblinear', random_state=42)
def __init__(self, **kwargs):
# Initialize a scikit-learn linear regression model,
# with custom arguments passed by the data scientist (if any),
# supplemented with Cobra's default arguments, if a custom value was
# not provided by the data scientist for overriding purposes:
model_kwargs = dict(fit_intercept=True, C=1e9, solver='liblinear',
random_state=42)
model_kwargs.update(kwargs)
self.logit = LogisticRegression(**model_kwargs)

self._is_fitted = False
# placeholder to keep track of a list of predictors
self.predictors = []
self.predictors = [] # placeholder to keep track of a list of predictors
self._eval_metrics_by_split = {}

def serialize(self) -> dict:
Expand Down Expand Up @@ -104,7 +119,12 @@ def get_intercept(self) -> float:
float
Intercept of the model.
"""
return self.logit.intercept_[0]
if self.logit.fit_intercept:
return self.logit.intercept_[0]
else:
raise ValueError("An intercept cannot be returned: this "
"LogisticRegressionModel was created with "
"the hyperparameter fit_intercept set to False.")

def get_coef_by_predictor(self) -> dict:
"""Returns a dictionary mapping predictor (key) to coefficient (value).
Expand Down Expand Up @@ -258,10 +278,26 @@ class LinearRegressionModel:
scikit-learn linear regression model.
predictors : list
List of predictors used in the model.
kwargs: dict, optional
Pass a dictionary here (optional!), to override Cobra's default
choice of hyperparameter values for the scikit-learn
LinearRegression model that is used behind the scenes.
Cobra's only default setting is fit_intercept=True, but there are
other hyperparmeters that can be set too.
See scikit-learn's documentation of the possible hyperparameters and
values that can be set:
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
"""

def __init__(self):
self.linear = LinearRegression(fit_intercept=True)
def __init__(self, **kwargs):
# Initialize a scikit-learn linear regression model,
# with custom arguments passed by the data scientist (if any),
# supplemented with Cobra's default arguments, if a custom value was
# not provided by the data scientist for overriding purposes:
model_kwargs = dict(fit_intercept=True)
model_kwargs.update(kwargs)
self.linear = LinearRegression(**model_kwargs)

self._is_fitted = False
self.predictors = [] # placeholder to keep track of a list of predictors
self._eval_metrics_by_split = {}
Expand Down