diff --git a/mne/decoding/base.py b/mne/decoding/base.py index 247c6f89f2d..52c187198ff 100644 --- a/mne/decoding/base.py +++ b/mne/decoding/base.py @@ -10,7 +10,7 @@ import datetime as dt import numbers from ..parallel import parallel_func -from ..fixes import BaseEstimator, is_classifier, _get_check_scoring +from ..fixes import BaseEstimator, _get_check_scoring from ..utils import warn, verbose @@ -51,6 +51,16 @@ class LinearModel(BaseEstimator): .. footbibliography:: """ + _model_attr_wrap = ( + "transform", + "predict", + "predict_proba", + "_estimator_type", + "decision_function", + "score", + "classes_", + ) + def __init__(self, model=None): # noqa: D102 if model is None: from sklearn.linear_model import LogisticRegression @@ -58,7 +68,20 @@ def __init__(self, model=None): # noqa: D102 model = LogisticRegression(solver="liblinear") self.model = model - self._estimator_type = getattr(model, "_estimator_type", None) + + def _more_tags(self): + return {"no_validation": True} + + def __getattr__(self, attr): + """Wrap to model for some attributes.""" + if attr in LinearModel._model_attr_wrap: + return getattr(self.model, attr) + elif attr == "fit_transform" and hasattr(self.model, "fit_transform"): + return super().__getattr__(self, "_fit_transform") + return super().__getattr__(self, attr) + + def _fit_transform(self, X, y): + return self.fit(X, y).transform(X) def fit(self, X, y, **fit_params): """Estimate the coefficients of the linear model. @@ -120,110 +143,12 @@ def filters_(self): filters = filters[0] return filters - def transform(self, X): - """Transform the data using the linear model. - - Parameters - ---------- - X : array, shape (n_samples, n_features) - The data to transform. - - Returns - ------- - y_pred : array, shape (n_samples,) - The predicted targets. - """ - return self.model.transform(X) - - def fit_transform(self, X, y): - """Fit the data and transform it using the linear model. - - Parameters - ---------- - X : array, shape (n_samples, n_features) - The training input samples to estimate the linear coefficients. - y : array, shape (n_samples,) - The target values. - - Returns - ------- - y_pred : array, shape (n_samples,) - The predicted targets. - """ - return self.fit(X, y).transform(X) - - def predict(self, X): - """Compute predictions of y from X. - - Parameters - ---------- - X : array, shape (n_samples, n_features) - The data used to compute the predictions. - - Returns - ------- - y_pred : array, shape (n_samples,) - The predictions. - """ - return self.model.predict(X) - - def predict_proba(self, X): - """Compute probabilistic predictions of y from X. - - Parameters - ---------- - X : array, shape (n_samples, n_features) - The data used to compute the predictions. - - Returns - ------- - y_pred : array, shape (n_samples, n_classes) - The probabilities. - """ - return self.model.predict_proba(X) - - def decision_function(self, X): - """Compute distance from the decision function of y from X. - - Parameters - ---------- - X : array, shape (n_samples, n_features) - The data used to compute the predictions. - - Returns - ------- - y_pred : array, shape (n_samples, n_classes) - The distances. - """ - return self.model.decision_function(X) - - def score(self, X, y): - """Score the linear model computed on the given test data. - - Parameters - ---------- - X : array, shape (n_samples, n_features) - The data to transform. - y : array, shape (n_samples,) - The target values. - - Returns - ------- - score : float - Score of the linear model. - """ - return self.model.score(X, y) - - # Needed for sklearn 1.3+ - @property - def classes_(self): - """The classes (pass-through to model).""" - return self.model.classes_ - def _set_cv(cv, estimator=None, X=None, y=None): """Set the default CV depending on whether clf is classifier/regressor.""" # Detect whether classification or regression + from sklearn.base import is_classifier + if estimator in ["classifier", "regressor"]: est_is_classifier = estimator == "classifier" else: @@ -440,8 +365,7 @@ def cross_val_multiscore( Array of scores of the estimator for each run of the cross validation. """ # This code is copied from sklearn - - from sklearn.base import clone + from sklearn.base import clone, is_classifier from sklearn.utils import indexable from sklearn.model_selection._split import check_cv diff --git a/mne/decoding/receptive_field.py b/mne/decoding/receptive_field.py index 6fa38a4f72f..63cf638a3f3 100644 --- a/mne/decoding/receptive_field.py +++ b/mne/decoding/receptive_field.py @@ -9,7 +9,6 @@ from .base import get_coef, BaseEstimator, _check_estimator from .time_delaying_ridge import TimeDelayingRidge -from ..fixes import is_regressor from ..utils import _validate_type, verbose, fill_doc @@ -128,6 +127,9 @@ def __init__( self.n_jobs = n_jobs self.edge_correction = edge_correction + def _more_tags(self): + return {"no_validation": True} + def __repr__(self): # noqa: D105 s = "tmin, tmax : (%.3f, %.3f), " % (self.tmin, self.tmax) estimator = self.estimator @@ -153,7 +155,11 @@ def _delay_and_reshape(self, X, y=None): if not isinstance(self.estimator_, TimeDelayingRidge): # X is now shape (n_times, n_epochs, n_feats, n_delays) X = _delay_time_series( - X, self.tmin, self.tmax, self.sfreq, fill_mean=self.fit_intercept + X, + self.tmin, + self.tmax, + self.sfreq, + fill_mean=self.fit_intercept_, ) X = _reshape_for_est(X) # Concat times + epochs @@ -183,7 +189,7 @@ def fit(self, X, y): "scoring must be one of %s, got" "%s " % (sorted(_SCORERS.keys()), self.scoring) ) - from sklearn.base import clone + from sklearn.base import clone, is_regressor X, y, _, self._y_dim = self._check_dimensions(X, y) @@ -199,13 +205,15 @@ def fit(self, X, y): if isinstance(self.estimator, numbers.Real): if self.fit_intercept is None: - self.fit_intercept = True + self.fit_intercept_ = True + else: + self.fit_intercept_ = self.fit_intercept estimator = TimeDelayingRidge( self.tmin, self.tmax, self.sfreq, alpha=self.estimator, - fit_intercept=self.fit_intercept, + fit_intercept=self.fit_intercept_, n_jobs=self.n_jobs, edge_correction=self.edge_correction, ) @@ -221,7 +229,7 @@ def fit(self, X, y): "same fit_intercept value or use fit_intercept=None" % (estimator.fit_intercept, self.fit_intercept) ) - self.fit_intercept = estimator.fit_intercept + self.fit_intercept_ = estimator.fit_intercept else: raise ValueError( "`estimator` must be a float or an instance" @@ -354,6 +362,8 @@ def score(self, X, y): return scores def _check_dimensions(self, X, y, predict=False): + _validate_type(X, "array-like", "X") + _validate_type(y, ("array-like", None), "y") X_dim = X.ndim y_dim = y.ndim if y is not None else 0 if X_dim == 2: diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index f2671b7ea11..a9d9af1520b 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -25,6 +25,8 @@ class SlidingEstimator(BaseEstimator, TransformerMixin): %(scoring)s %(n_jobs)s %(position)s + allow_2d : bool + If True, allow 2D data as input (i.e. n_samples, n_features). %(verbose)s Attributes @@ -35,16 +37,30 @@ class SlidingEstimator(BaseEstimator, TransformerMixin): @verbose def __init__( - self, base_estimator, scoring=None, n_jobs=None, *, position=0, verbose=None + self, + base_estimator, + scoring=None, + n_jobs=None, + *, + position=0, + allow_2d=False, + verbose=None, ): # noqa: D102 _check_estimator(base_estimator) - self._estimator_type = getattr(base_estimator, "_estimator_type", None) self.base_estimator = base_estimator self.n_jobs = n_jobs self.scoring = scoring self.position = position + self.allow_2d = allow_2d self.verbose = verbose + def _more_tags(self): + return {"no_validation": True, "requires_fit": False} + + @property + def _estimator_type(self): + return getattr(self.base_estimator, "_estimator_type", None) + def __repr__(self): # noqa: D105 repr_str = "<" + super(SlidingEstimator, self).__repr__() if hasattr(self, "estimators_"): @@ -72,12 +88,12 @@ def fit(self, X, y, **fit_params): self : object Return self. """ - self._check_Xy(X, y) + X = self._check_Xy(X, y) parallel, p_func, n_jobs = parallel_func( _sl_fit, self.n_jobs, max_jobs=X.shape[-1], verbose=False ) self.estimators_ = list() - self.fit_params = fit_params + self.fit_params_ = fit_params # For fitting, the parallelization is across estimators. context = _create_progressbar_context(self, X, "Fitting") @@ -123,7 +139,7 @@ def fit_transform(self, X, y, **fit_params): def _transform(self, X, method): """Aux. function to make parallel predictions/transformation.""" - self._check_Xy(X) + X = self._check_Xy(X) method = _check_method(self.base_estimator, method) if X.shape[-1] != len(self.estimators_): raise ValueError("The number of estimators does not match " "X.shape[-1]") @@ -144,7 +160,7 @@ def _transform(self, X, method): ) y_pred = np.concatenate(y_pred, axis=1) - return y_pred + return y_pred.astype(X.dtype) def transform(self, X): """Transform each data slice/task with a series of independent estimators. @@ -237,11 +253,21 @@ def decision_function(self, X): def _check_Xy(self, X, y=None): """Aux. function to check input data.""" + X = np.asarray(X) if y is not None: + y = np.asarray(y) if len(X) != len(y) or len(y) < 1: raise ValueError("X and y must have the same length.") if X.ndim < 3: - raise ValueError("X must have at least 3 dimensions.") + err = None + if not self.allow_2d: + err = 3 + elif X.ndim < 2: + err = 2 + if err: + raise ValueError(f"X must have at least {err} dimensions.") + X = X[..., np.newaxis] + return X def score(self, X, y): """Score each estimator on each task. @@ -268,7 +294,7 @@ def score(self, X, y): """ # noqa: E501 check_scoring = _get_check_scoring() - self._check_Xy(X) + X = self._check_Xy(X, y) if X.shape[-1] != len(self.estimators_): raise ValueError("The number of estimators does not match " "X.shape[-1]") @@ -446,7 +472,7 @@ def __repr__(self): # noqa: D105 def _transform(self, X, method): """Aux. function to make parallel predictions/transformation.""" - self._check_Xy(X) + X = self._check_Xy(X) method = _check_method(self.base_estimator, method) parallel, p_func, n_jobs = parallel_func( @@ -567,7 +593,7 @@ def score(self, X, y): Score for each estimator / data slice couple. """ # noqa: E501 check_scoring = _get_check_scoring() - self._check_Xy(X) + X = self._check_Xy(X, y) # For predictions/transforms the parallelization is across the data and # not across the estimators to avoid memory load. parallel, p_func, n_jobs = parallel_func( diff --git a/mne/decoding/tests/test_base.py b/mne/decoding/tests/test_base.py index c7773a217d4..dde633feda5 100644 --- a/mne/decoding/tests/test_base.py +++ b/mne/decoding/tests/test_base.py @@ -14,7 +14,6 @@ import pytest from mne import create_info, EpochsArray -from mne.fixes import is_regressor, is_classifier from mne.utils import requires_sklearn from mne.decoding.base import ( _get_inverse_funcs, @@ -69,7 +68,12 @@ def _make_data(n_samples=1000, n_features=5, n_targets=3): @requires_sklearn def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" - from sklearn.base import TransformerMixin, BaseEstimator + from sklearn.base import ( + TransformerMixin, + BaseEstimator, + is_classifier, + is_regressor, + ) from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn import svm @@ -450,3 +454,20 @@ def test_cross_val_multiscore(): manual = cross_val(reg, X, y, cv=KFold(2)) auto = cross_val(reg, X, y, cv=2) assert_array_equal(manual, auto) + + +def test_sklearn_compliance(): + """Test LinearModel compliance with sklearn.""" + from sklearn.utils.estimator_checks import check_estimator + from sklearn.linear_model import LogisticRegression + + lm = LinearModel(LogisticRegression()) + ignores = ( + "check_estimator_sparse_data", # we densify + "check_estimators_overwrite_params", # self.model changes! + "check_parameters_default_constructible", + ) + for est, check in check_estimator(lm, generate_only=True): + if any(ignore in str(check) for ignore in ignores): + continue + check(est) diff --git a/mne/decoding/tests/test_receptive_field.py b/mne/decoding/tests/test_receptive_field.py index 9a993b43669..1014a4aac2b 100644 --- a/mne/decoding/tests/test_receptive_field.py +++ b/mne/decoding/tests/test_receptive_field.py @@ -596,3 +596,42 @@ def test_linalg_warning(): (RuntimeWarning, UserWarning), match="[Singular|scipy.linalg.solve]" ): rf.fit(y, X) + + +@requires_sklearn +def test_tdr_sklearn_compliance(): + """Test sklearn estimator compliance.""" + from sklearn.utils.estimator_checks import check_estimator + + tdr = TimeDelayingRidge(0, 10, 1.0, 0.1, "laplacian", n_jobs=1) + # We don't actually comply with a bunch of the regressor specs :( + ignores = ( + "check_supervised_y_no_nan", + "check_regressor", + "check_parameters_default_constructible", + "check_estimators_unfitted", + "_invariance", + "check_fit2d_1sample", + ) + for est, check in check_estimator(tdr, generate_only=True): + if any(ignore in str(check) for ignore in ignores): + continue + check(est) + + +@requires_sklearn +def test_rf_sklearn_compliance(): + """Test sklearn RF compliance.""" + from sklearn.linear_model import Ridge + from sklearn.utils.estimator_checks import check_estimator + + rf = ReceptiveField(-1, 2, 1.0, estimator=Ridge(), patterns=True) + ignores = ( + "check_parameters_default_constructible", + "_invariance", + "check_fit2d_1sample", + ) + for est, check in check_estimator(rf, generate_only=True): + if any(ignore in str(check) for ignore in ignores): + continue + check(est) diff --git a/mne/decoding/tests/test_search_light.py b/mne/decoding/tests/test_search_light.py index a531d7b668e..68aa2b534d9 100644 --- a/mne/decoding/tests/test_search_light.py +++ b/mne/decoding/tests/test_search_light.py @@ -57,7 +57,7 @@ def test_search_light(): # transforms pytest.raises(ValueError, sl.predict, X[:, :, :2]) y_pred = sl.predict(X) - assert y_pred.dtype == int + assert y_pred.dtype == float assert_array_equal(y_pred.shape, [n_epochs, n_time]) y_proba = sl.predict_proba(X) assert y_proba.dtype == float @@ -314,3 +314,26 @@ def predict_proba(self, X): estimator = SlidingEstimator(LinearDiscriminantAnalysis()) cross_val_predict(estimator, X, y, method="predict_proba", cv=2) + + +@pytest.mark.slowtest +@requires_sklearn +def test_sklearn_compliance(): + """Test LinearModel compliance with sklearn.""" + from sklearn.utils.estimator_checks import check_estimator + from sklearn.linear_model import LogisticRegression + + est = SlidingEstimator(LogisticRegression(), allow_2d=True) + + ignores = ( + "check_estimator_sparse_data", # we densify + "check_classifiers_one_label_sample_weights", # don't handle singleton + "check_classifiers_classes", # dim mismatch + "check_classifiers_train", + "check_decision_proba_consistency", + "check_parameters_default_constructible", + ) + for est, check in check_estimator(est, generate_only=True): + if any(ignore in str(check) for ignore in ignores): + continue + check(est) diff --git a/mne/decoding/time_delaying_ridge.py b/mne/decoding/time_delaying_ridge.py index 2299aa5d861..2853cc98b36 100644 --- a/mne/decoding/time_delaying_ridge.py +++ b/mne/decoding/time_delaying_ridge.py @@ -10,7 +10,7 @@ from ..cuda import _setup_cuda_fft_multiply_repeated from ..filter import next_fast_len from ..fixes import jit -from ..utils import warn, ProgressBar, logger +from ..utils import warn, ProgressBar, logger, _validate_type, _check_option def _compute_corrs( @@ -301,6 +301,9 @@ def __init__( self.edge_correction = edge_correction self.n_jobs = n_jobs + def _more_tags(self): + return {"no_validation": True} + @property def _smin(self): return int(round(self.tmin * self.sfreq)) @@ -324,12 +327,21 @@ def fit(self, X, y): self : instance of TimeDelayingRidge Returns the modified instance. """ + _validate_type(X, "array-like", "X") + _validate_type(y, "array-like", "y") + X = np.asarray(X, dtype=float) + y = np.asarray(y, dtype=float) if X.ndim == 3: assert y.ndim == 3 assert X.shape[:2] == y.shape[:2] else: - assert X.ndim == 2 and y.ndim == 2 - assert X.shape[0] == y.shape[0] + if X.ndim == 1: + X = X[:, np.newaxis] + if y.ndim == 1: + y = y[:, np.newaxis] + assert X.ndim == 2 + assert y.ndim == 2 + _check_option("y.shape[0]", y.shape[0], (X.shape[0],)) # These are split into two functions because it's possible that we # might want to allow people to do them separately (e.g., to test # different regularization parameters). diff --git a/mne/fixes.py b/mne/fixes.py index c05dfaec344..b89e6cd0f84 100644 --- a/mne/fixes.py +++ b/mne/fixes.py @@ -176,38 +176,6 @@ def _read_volume_info(fobj): # adapted from scikit-learn -def is_classifier(estimator): - """Returns True if the given estimator is (probably) a classifier. - - Parameters - ---------- - estimator : object - Estimator object to test. - - Returns - ------- - out : bool - True if estimator is a classifier and False otherwise. - """ - return getattr(estimator, "_estimator_type", None) == "classifier" - - -def is_regressor(estimator): - """Returns True if the given estimator is (probably) a regressor. - - Parameters - ---------- - estimator : object - Estimator object to test. - - Returns - ------- - out : bool - True if estimator is a regressor and False otherwise. - """ - return getattr(estimator, "_estimator_type", None) == "regressor" - - _DEFAULT_TAGS = { "non_deterministic": False, "requires_positive_X": False,