scikit-learn · glemaitre · Feb 22, 2021 · Feb 1, 2021 · Feb 1, 2021 · Feb 3, 2021
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
@@ -16,7 +16,7 @@
 from ..base import ClassifierMixin, RegressorMixin
 from ..metrics import r2_score, accuracy_score
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
-from ..utils import check_random_state, check_array, column_or_1d
+from ..utils import check_random_state, column_or_1d, deprecated
 from ..utils import indices_to_mask
 from ..utils.metaestimators import if_delegate_has_method
 from ..utils.multiclass import check_classification_targets
@@ -287,7 +287,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)

        # Remap output
-        n_samples, self.n_features_ = X.shape
+        n_samples = X.shape[0]
        self._n_samples = n_samples
        y = self._validate_y(y)

@@ -313,11 +313,11 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
        if isinstance(self.max_features, numbers.Integral):
            max_features = self.max_features
        elif isinstance(self.max_features, float):
-            max_features = self.max_features * self.n_features_
+            max_features = self.max_features * self.n_features_in_
        else:
            raise ValueError("max_features must be int or float")

-        if not (0 < max_features <= self.n_features_):
+        if not (0 < max_features <= self.n_features_in_):
            raise ValueError("max_features must be in (0, n_features]")

        max_features = max(1, int(max_features))
@@ -408,7 +408,7 @@ def _get_estimators_indices(self):
            # to those in `_parallel_build_estimators()`
            feature_indices, sample_indices = _generate_bagging_indices(
                seed, self.bootstrap_features, self.bootstrap,
-                self.n_features_, self._n_samples, self._max_features,
+                self.n_features_in_, self._n_samples, self._max_features,
                self._max_samples)

            yield feature_indices, sample_indices
@@ -429,6 +429,16 @@ def estimators_samples_(self):
        return [sample_indices
                for _, sample_indices in self._get_estimators_indices()]

+    # TODO: Remove in 1.2
+    # mypy error: Decorated property not supported
+    @deprecated(  # type: ignore
+        "Attribute n_features_ was deprecated in version 1.0 and will be "
+        "removed in 1.2. Use 'n_features_in_' instead."
+    )
+    @property
+    def n_features_(self):
+        return self.n_features_in_
+

 class BaggingClassifier(ClassifierMixin, BaseBagging):
    """A Bagging classifier.
@@ -523,6 +533,10 @@ class BaggingClassifier(ClassifierMixin, BaseBagging):
    n_features_ : int
        The number of features when :meth:`fit` is performed.

+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
    estimators_ : list of estimators
        The collection of fitted base estimators.

@@ -702,17 +716,11 @@ def predict_proba(self, X):
        """
        check_is_fitted(self)
        # Check data
-        X = check_array(
+        X = self._validate_data(
            X, accept_sparse=['csr', 'csc'], dtype=None,
-            force_all_finite=False
+            force_all_finite=False, reset=False
        )

-        if self.n_features_ != X.shape[1]:
-            raise ValueError("Number of features of the model must "
-                             "match the input. Model n_features is {0} and "
-                             "input n_features is {1}."
-                             "".format(self.n_features_, X.shape[1]))
-
        # Parallel loop
        n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,
                                                             self.n_jobs)
@@ -753,17 +761,11 @@ def predict_log_proba(self, X):
        check_is_fitted(self)
        if hasattr(self.base_estimator_, "predict_log_proba"):
            # Check data
-            X = check_array(
+            X = self._validate_data(
                X, accept_sparse=['csr', 'csc'], dtype=None,
-                force_all_finite=False
+                force_all_finite=False, reset=False
            )

-            if self.n_features_ != X.shape[1]:
-                raise ValueError("Number of features of the model must "
-                                 "match the input. Model n_features is {0} "
-                                 "and input n_features is {1} "
-                                 "".format(self.n_features_, X.shape[1]))
-
            # Parallel loop
            n_jobs, n_estimators, starts = _partition_estimators(
                self.n_estimators, self.n_jobs)
@@ -811,17 +813,11 @@ def decision_function(self, X):
        check_is_fitted(self)

        # Check data
-        X = check_array(
+        X = self._validate_data(
            X, accept_sparse=['csr', 'csc'], dtype=None,
-            force_all_finite=False
+            force_all_finite=False, reset=False
        )

-        if self.n_features_ != X.shape[1]:
-            raise ValueError("Number of features of the model must "
-                             "match the input. Model n_features is {0} and "
-                             "input n_features is {1} "
-                             "".format(self.n_features_, X.shape[1]))
-
        # Parallel loop
        n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,
                                                             self.n_jobs)
@@ -929,6 +925,10 @@ class BaggingRegressor(RegressorMixin, BaseBagging):
    n_features_ : int
        The number of features when :meth:`fit` is performed.

+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
    estimators_ : list of estimators
        The collection of fitted sub-estimators.

@@ -1024,9 +1024,9 @@ def predict(self, X):
        """
        check_is_fitted(self)
        # Check data
-        X = check_array(
+        X = self._validate_data(
            X, accept_sparse=['csr', 'csc'], dtype=None,
-            force_all_finite=False
+            force_all_finite=False, reset=False
        )

        # Parallel loop

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
@@ -57,7 +57,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
 from ..tree import (DecisionTreeClassifier, DecisionTreeRegressor,
                    ExtraTreeClassifier, ExtraTreeRegressor)
 from ..tree._tree import DTYPE, DOUBLE
-from ..utils import check_random_state, check_array, compute_sample_weight
+from ..utils import check_random_state, compute_sample_weight, deprecated
 from ..exceptions import DataConversionWarning
 from ._base import BaseEnsemble, _partition_estimators
 from ..utils.fixes import delayed
@@ -312,9 +312,6 @@ def fit(self, X, y, sample_weight=None):
            # ensemble sorts the indices.
            X.sort_indices()

-        # Remap output
-        self.n_features_ = X.shape[1]
-
        y = np.atleast_1d(y)
        if y.ndim == 2 and y.shape[1] == 1:
            warn("A column-vector y was passed when a 1d array was"
@@ -446,7 +443,8 @@ def _compute_oob_predictions(self, X, y):
                (n_samples, 1, n_outputs)
            The OOB predictions.
      """
-        X = check_array(X, dtype=DTYPE, accept_sparse='csr')
+        X = self._validate_data(X, dtype=DTYPE, accept_sparse='csr',
+                                reset=False)

        n_samples = y.shape[0]
        n_outputs = self.n_outputs_
@@ -530,12 +528,22 @@ def feature_importances_(self):
            for tree in self.estimators_ if tree.tree_.node_count > 1)

        if not all_importances:
-            return np.zeros(self.n_features_, dtype=np.float64)
+            return np.zeros(self.n_features_in_, dtype=np.float64)

        all_importances = np.mean(all_importances,
                                  axis=0, dtype=np.float64)
        return all_importances / np.sum(all_importances)

+    # TODO: Remove in 1.2
+    # mypy error: Decorated property not supported
+    @deprecated(  # type: ignore
+        "Attribute n_features_ was deprecated in version 1.0 and will be "
+        "removed in 1.2. Use 'n_features_in_' instead."
+    )
+    @property
+    def n_features_(self):
+        return self.n_features_in_
+

 def _accumulate_prediction(predict, X, out, lock):
    """
@@ -1163,6 +1171,10 @@ class labels (multi-output problem).
    n_features_ : int
        The number of features when ``fit`` is performed.

+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
    n_outputs_ : int
        The number of outputs when ``fit`` is performed.

@@ -1463,6 +1475,10 @@ class RandomForestRegressor(ForestRegressor):
    n_features_ : int
        The number of features when ``fit`` is performed.

+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
    n_outputs_ : int
        The number of outputs when ``fit`` is performed.

@@ -1783,6 +1799,10 @@ class labels (multi-output problem).
    n_features_ : int
        The number of features when ``fit`` is performed.

+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
    n_outputs_ : int
        The number of outputs when ``fit`` is performed.

@@ -2068,6 +2088,10 @@ class ExtraTreesRegressor(ForestRegressor):
    n_features_ : int
        The number of features.

+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
    n_outputs_ : int
        The number of outputs.

@@ -2292,6 +2316,10 @@ class RandomTreesEmbedding(BaseForest):
    n_features_ : int
        The number of features when ``fit`` is performed.

+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
    n_outputs_ : int
        The number of outputs when ``fit`` is performed.

@@ -2421,7 +2449,7 @@ def fit_transform(self, X, y=None, sample_weight=None):
        X_transformed : sparse matrix of shape (n_samples, n_out)
            Transformed dataset.
        """
-        X = check_array(X, accept_sparse=['csc'])
+        X = self._validate_data(X, accept_sparse=['csc'])
        if issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.