revert some changes

ing-bank · Mar 28, 2024 · 22b4bc5 · 22b4bc5
1 parent 2cb8724
commit 22b4bc5
Show file tree

Hide file tree

Showing 7 changed files with 308 additions and 32 deletions.
diff --git a/docs/tutorials/nb_custom_scoring.ipynb b/docs/tutorials/nb_custom_scoring.ipynb
diff --git a/probatus/feature_elimination/feature_elimination.py b/probatus/feature_elimination/feature_elimination.py
@@ -8,15 +8,14 @@
 from sklearn.base import clone, is_classifier, is_regressor
 from sklearn.model_selection import check_cv
 from sklearn.model_selection._search import BaseSearchCV
-from sklearn.metrics import get_scorer
-from lightgbm import early_stopping, log_evaluation
 
 from probatus.utils import (
     BaseFitComputePlotClass,
     assure_pandas_series,
     calculate_shap_importance,
     preprocess_data,
     preprocess_labels,
+    get_single_scorer,
     shap_calc,
 )
 
@@ -145,7 +144,7 @@ def __init__(
                 [cv parameter](https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFECV.html).
                 If None, then cv of 5 is used.
 
-            scoring (string or sklearn Scorer, optional):
+            scoring (string or probatus.utils.Scorer, optional):
                 Metric for which the model performance is calculated. It can be either a metric name aligned with predefined
                 [classification scorers names in sklearn](https://scikit-learn.org/stable/modules/model_evaluation.html).
 
@@ -170,8 +169,7 @@ def __init__(
         self.step = self.validate_step(step)
         self.min_features_to_select = self.validate_min_features(min_features_to_select)
         self.cv = cv
-        self.scoring = scoring  # (str) name of the metric
-        self.scorer = get_scorer(scoring)
+        self.scorer = get_single_scorer(scoring)
         self.n_jobs = n_jobs
         self.verbose = verbose
         self.random_state = random_state
@@ -380,8 +378,8 @@ def _get_feature_shap_values_per_fold(
             model = model.fit(X_train, y_train)
 
         # Score the model
-        score_train = self.scorer(model, X_train, y_train)
-        score_val = self.scorer(model, X_val, y_val)
+        score_train = self.scorer.score(model, X_train, y_train)
+        score_val = self.scorer.score(model, X_val, y_val)
 
         # Compute SHAP values
         shap_values = shap_calc(model, X_val, verbose=self.verbose, random_state=self.random_state, **shap_kwargs)
@@ -937,7 +935,7 @@ def plot(self, show=True, **figure_kwargs):
         )
 
         plt.xlabel("Number of features")
-        plt.ylabel(f"Performance {self.scoring}")
+        plt.ylabel(f"Performance {self.scorer.metric_name}")
         plt.title("Backwards Feature Elimination using SHAP & CV")
         plt.legend(loc="lower left")
         fig.axes[0].invert_xaxis()
@@ -1176,6 +1174,8 @@ def _get_fit_params_lightGBM(
         Returns:
             dict: fit parameters
         """
+        from lightgbm import early_stopping, log_evaluation
+
         fit_params = {
             "X": X_train,
             "y": y_train,
@@ -1476,8 +1476,8 @@ def _get_feature_shap_values_per_fold(
         model = model.fit(**fit_params)
 
         # Score the model
-        score_train = self.scorer(model, X_train, y_train)
-        score_val = self.scorer(model, X_val, y_val)
+        score_train = self.scorer.score(model, X_train, y_train)
+        score_val = self.scorer.score(model, X_val, y_val)
 
         # Compute SHAP values
         shap_values = shap_calc(model, X_val, verbose=self.verbose, random_state=self.random_state, **shap_kwargs)

diff --git a/probatus/interpret/model_interpret.py b/probatus/interpret/model_interpret.py
@@ -1,7 +1,6 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from sklearn.metrics import get_scorer
 from shap import summary_plot
 from shap.plots._waterfall import waterfall_legacy
 
@@ -12,6 +11,7 @@
     calculate_shap_importance,
     preprocess_data,
     preprocess_labels,
+    get_single_scorer,
     shap_calc,
 )
 
@@ -86,8 +86,7 @@ def __init__(self, model, scoring="roc_auc", verbose=0, random_state=None):
                 reproducible results set it to an integer.
         """
         self.model = model
-        self.scoring = scoring  # (str) name of the metric
-        self.scorer = get_scorer(scoring)
+        self.scorer = get_single_scorer(scoring)
         self.verbose = verbose
         self.random_state = random_state
 
@@ -145,12 +144,12 @@ def fit(
             self.class_names = ["Negative Class", "Positive Class"]
 
         # Calculate Metrics
-        self.train_score = self.scorer(self.model, self.X_train, self.y_train)
-        self.test_score = self.scorer(self.model, self.X_test, self.y_test)
+        self.train_score = self.scorer.score(self.model, self.X_train, self.y_train)
+        self.test_score = self.scorer.score(self.model, self.X_test, self.y_test)
 
         self.results_text = (
-            f"Train {self.scoring}: {np.round(self.train_score, 3)},\n"
-            f"Test {self.scoring}: {np.round(self.test_score, 3)}."
+            f"Train {self.scorer.metric_name}: {np.round(self.train_score, 3)},\n"
+            f"Test {self.scorer.metric_name}: {np.round(self.test_score, 3)}."
         )
 
         (

diff --git a/probatus/sample_similarity/resemblance_model.py b/probatus/sample_similarity/resemblance_model.py
@@ -7,9 +7,8 @@
 from shap import summary_plot
 from sklearn.inspection import permutation_importance
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import get_scorer
 
-from probatus.utils import BaseFitComputePlotClass, preprocess_data, preprocess_labels
+from probatus.utils import BaseFitComputePlotClass, preprocess_data, preprocess_labels, get_single_scorer
 from probatus.utils.shap_helpers import calculate_shap_importance, shap_calc
 
 
@@ -71,8 +70,7 @@ class is 'roc_auc'.
         self.n_jobs = n_jobs
         self.random_state = random_state
         self.verbose = verbose
-        self.scoring = scoring  # (str) name of the metric
-        self.scorer = get_scorer(scoring)
+        self.scorer = get_single_scorer(scoring)
 
     def _init_output_variables(self):
         """
@@ -153,20 +151,20 @@ def fit(self, X1, X2, column_names=None, class_names=None):
         )
         self.model.fit(self.X_train, self.y_train)
 
-        self.train_score = np.round(self.scorer(self.model, self.X_train, self.y_train), 3)
-        self.test_score = np.round(self.scorer(self.model, self.X_test, self.y_test), 3)
+        self.train_score = np.round(self.scorer.score(self.model, self.X_train, self.y_train), 3)
+        self.test_score = np.round(self.scorer.score(self.model, self.X_test, self.y_test), 3)
 
         self.results_text = (
-            f"Train {self.scoring}: {np.round(self.train_score, 3)},\n"
-            f"Test {self.scoring}: {np.round(self.test_score, 3)}."
+            f"Train {self.scorer.metric_name}: {np.round(self.train_score, 3)},\n"
+            f"Test {self.scorer.metric_name}: {np.round(self.test_score, 3)}."
         )
         if self.verbose > 1:
             logger.info(f"Finished model training: \n{self.results_text}")
 
         if self.verbose > 0:
             if self.train_score > self.test_score:
                 warnings.warn(
-                    f"Train {self.scoring} > Test {self.scoring}, which might indicate "
+                    f"Train {self.scorer.metric_name} > Test {self.scorer.metric_name}, which might indicate "
                     f"an overfit. \n Strong overfit might lead to misleading conclusions when analysing "
                     f"feature importance. Consider retraining with more regularization applied to the model."
                 )
@@ -386,7 +384,7 @@ def fit(self, X1, X2, column_names=None, class_names=None):
             self.model,
             self.X_test,
             self.y_test,
-            scoring=self.scorer,
+            scoring=self.scorer.scorer,
             n_repeats=self.iterations,
             n_jobs=self.n_jobs,
         )

diff --git a/probatus/utils/__init__.py b/probatus/utils/__init__.py
@@ -5,20 +5,23 @@
     preprocess_data,
     preprocess_labels,
 )
+from .scoring import Scorer, get_single_scorer
 from .shap_helpers import shap_calc, shap_to_df, calculate_shap_importance
 from ._utils import assure_list_of_strings
 from .base_class_interface import BaseFitComputeClass, BaseFitComputePlotClass
 
 __all__ = [
-    "NotFittedError",
-    "assure_pandas_df",
     "assure_list_of_strings",
-    "shap_calc",
-    "shap_to_df",
-    "calculate_shap_importance",
+    "assure_pandas_df",
     "assure_pandas_series",
     "preprocess_data",
     "preprocess_labels",
     "BaseFitComputeClass",
     "BaseFitComputePlotClass",
+    "NotFittedError",
+    "get_single_scorer",
+    "Scorer",
+    "shap_calc",
+    "shap_to_df",
+    "calculate_shap_importance",
 ]
diff --git a/probatus/utils/scoring.py b/probatus/utils/scoring.py
@@ -0,0 +1,103 @@
+from sklearn.metrics import get_scorer
+
+
+def get_single_scorer(scoring):
+    """
+    Returns Scorer, based on provided input in scoring argument.
+
+    Args:
+        scoring (string or probatus.utils.Scorer, optional):
+            Metric for which the model performance is calculated. It can be either a metric name aligned with
+            predefined classification scorers names in sklearn
+            ([link](https://scikit-learn.org/stable/modules/model_evaluation.html)).
+            Another option is using probatus.utils.Scorer to define a custom metric.
+
+    Returns:
+        (probatus.utils.Scorer):
+            Scorer that can be used for scoring models
+    """
+    if isinstance(scoring, str):
+        return Scorer(scoring)
+    elif isinstance(scoring, Scorer):
+        return scoring
+    else:
+        raise (ValueError("The scoring should contain either strings or probatus.utils.Scorer class"))
+
+
+class Scorer:
+    """
+    Scores a given machine learning model based on the provided metric name and optionally a custom scoring function.
+
+    Examples:
+
+    ```python
+    from probatus.utils import Scorer
+    from sklearn.metrics import make_scorer
+    from sklearn.datasets import make_classification
+    from sklearn.model_selection import train_test_split
+    from sklearn.ensemble import RandomForestClassifier
+    import pandas as pd
+
+    # Make ROC AUC scorer
+    scorer1 = Scorer('roc_auc')
+
+    # Make custom scorer with following function:
+    def custom_metric(y_true, y_pred):
+         return (y_true == y_pred).sum()
+    scorer2 = Scorer('custom_metric', custom_scorer=make_scorer(custom_metric))
+
+    # Prepare two samples
+    feature_names = ['f1', 'f2', 'f3', 'f4']
+    X, y = make_classification(n_samples=1000, n_features=4, random_state=0)
+    X = pd.DataFrame(X, columns=feature_names)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    # Prepare and fit model. Remember about class_weight="balanced" or an equivalent.
+    model = RandomForestClassifier(class_weight='balanced', n_estimators = 100, max_depth=2, random_state=0)
+    model = model.fit(X_train, y_train)
+
+    # Score model
+    score_test_scorer1 = scorer1.score(model, X_test, y_test)
+    score_test_scorer2 = scorer2.score(model, X_test, y_test)
+
+    print(f'Test ROC AUC is {score_test_scorer1}, Test {scorer2.metric_name} is {score_test_scorer2}')
+    ```
+    """
+
+    def __init__(self, metric_name, custom_scorer=None):
+        """
+        Initializes the class.
+
+        Args:
+            metric_name (str): Name of the metric used to evaluate the model.
+                If the custom_scorer is not passed, the
+                metric name needs to be aligned with classification scorers names in sklearn
+                ([link](https://scikit-learn.org/stable/modules/model_evaluation.html)).
+            custom_scorer (sklearn.metrics Scorer callable, optional): Callable
+                that can score samples.
+        """
+        self.metric_name = metric_name
+        if custom_scorer is not None:
+            self.scorer = custom_scorer
+        else:
+            self.scorer = get_scorer(self.metric_name)
+
+    def score(self, model, X, y):
+        """
+        Scores the samples model based on the provided metric name.
+
+        Args
+            model (model object):
+                Model to be scored.
+
+            X (array-like of shape (n_samples,n_features)):
+                Samples on which the model is scored.
+
+            y (array-like of shape (n_samples,)):
+                Labels on which the model is scored.
+
+        Returns:
+            (float):
+                Score returned by the model
+        """
+        return self.scorer(model, X, y)
diff --git a/tests/docs/test_docstring.py b/tests/docs/test_docstring.py
@@ -21,6 +21,7 @@
     probatus.interpret.DependencePlotter,
     probatus.sample_similarity.SHAPImportanceResemblance,
     probatus.sample_similarity.PermutationImportanceResemblance,
+    probatus.utils.Scorer,
 ]
 
 CLASSES_TO_TEST_LGBM = [