scikit-learn-bot
diff --git a/‎doc/modules/model_evaluation.rst
Copy file name to clipboardExpand all lines: doc/modules/model_evaluation.rst
+3-3Lines changed: 3 additions & 3 deletions b/‎doc/modules/model_evaluation.rst
Copy file name to clipboardExpand all lines: doc/modules/model_evaluation.rst
+3-3Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/whats_new/upcoming_changes/sklearn.linear_model/31241.api.rst
Copy file name to clipboard
+7Lines changed: 7 additions & 0 deletions b/‎doc/whats_new/upcoming_changes/sklearn.linear_model/31241.api.rst
Copy file name to clipboard
+7Lines changed: 7 additions & 0 deletions
diff --git a/‎sklearn/ensemble/tests/test_voting.py
Copy file name to clipboardExpand all lines: sklearn/ensemble/tests/test_voting.py
+5-5Lines changed: 5 additions & 5 deletions b/‎sklearn/ensemble/tests/test_voting.py
Copy file name to clipboardExpand all lines: sklearn/ensemble/tests/test_voting.py
+5-5Lines changed: 5 additions & 5 deletions
diff --git a/‎sklearn/linear_model/_logistic.py
Copy file name to clipboardExpand all lines: sklearn/linear_model/_logistic.py
+22-4Lines changed: 22 additions & 4 deletions b/‎sklearn/linear_model/_logistic.py
Copy file name to clipboardExpand all lines: sklearn/linear_model/_logistic.py
+22-4Lines changed: 22 additions & 4 deletions
diff --git a/‎sklearn/linear_model/tests/test_logistic.py
Copy file name to clipboardExpand all lines: sklearn/linear_model/tests/test_logistic.py
+42-19Lines changed: 42 additions & 19 deletions b/‎sklearn/linear_model/tests/test_logistic.py
Copy file name to clipboardExpand all lines: sklearn/linear_model/tests/test_logistic.py
+42-19Lines changed: 42 additions & 19 deletions
diff --git a/‎sklearn/metrics/_ranking.py
Copy file name to clipboardExpand all lines: sklearn/metrics/_ranking.py
+3-3Lines changed: 3 additions & 3 deletions b/‎sklearn/metrics/_ranking.py
Copy file name to clipboardExpand all lines: sklearn/metrics/_ranking.py
+3-3Lines changed: 3 additions & 3 deletions
@@ -1632,7 +1632,7 @@ Therefore, the `y_score` parameter is of size (n_samples,).
   >>> from sklearn.linear_model import LogisticRegression
   >>> from sklearn.metrics import roc_auc_score
   >>> X, y = load_breast_cancer(return_X_y=True)
-  >>> clf = LogisticRegression(solver="liblinear").fit(X, y)
+  >>> clf = LogisticRegression().fit(X, y)
   >>> clf.classes_
   array([0, 1])
 
@@ -1728,11 +1728,11 @@ class with the greater label for each output.
   >>> from sklearn.datasets import make_multilabel_classification
   >>> from sklearn.multioutput import MultiOutputClassifier
   >>> X, y = make_multilabel_classification(random_state=0)
-  >>> inner_clf = LogisticRegression(solver="liblinear", random_state=0)
+  >>> inner_clf = LogisticRegression(random_state=0)
   >>> clf = MultiOutputClassifier(inner_clf).fit(X, y)
   >>> y_score = np.transpose([y_pred[:, 1] for y_pred in clf.predict_proba(X)])
   >>> roc_auc_score(y, y_score, average=None)
-  array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])
+  array([0.82..., 0.85..., 0.93..., 0.86..., 0.94...])
 
 And the decision values do not require such processing.
 
 
@@ -0,0 +1,7 @@
+- Using the `"liblinear"` solver for multiclass classification with a one-versus-rest
+  scheme in :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.LogisticRegressionCV` is deprecated and will raise an error in
+  version 1.8. Either use a solver which supports the multinomial loss or wrap the
+  estimator in a :class:`sklearn.multiclass.OneVsRestClassifier` to keep applying a
+  one-versus-rest scheme.
+  By :user:`Jérémie du Boisberranger <jeremiedbb>`.
@@ -114,7 +114,7 @@ def test_notfitted():
 
 def test_majority_label_iris(global_random_seed):
     """Check classification by majority label on dataset iris."""
-    clf1 = LogisticRegression(solver="liblinear", random_state=global_random_seed)
+    clf1 = LogisticRegression(random_state=global_random_seed)
     clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
     clf3 = GaussianNB()
     eclf = VotingClassifier(
@@ -127,12 +127,12 @@ def test_majority_label_iris(global_random_seed):
 
 def test_tie_situation():
     """Check voting classifier selects smaller class label in tie situation."""
-    clf1 = LogisticRegression(random_state=123, solver="liblinear")
+    clf1 = LogisticRegression(random_state=123)
     clf2 = RandomForestClassifier(random_state=123)
     eclf = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], voting="hard")
-    assert clf1.fit(X, y).predict(X)[73] == 2
-    assert clf2.fit(X, y).predict(X)[73] == 1
-    assert eclf.fit(X, y).predict(X)[73] == 1
+    assert clf1.fit(X, y).predict(X)[52] == 2
+    assert clf2.fit(X, y).predict(X)[52] == 1
+    assert eclf.fit(X, y).predict(X)[52] == 1
 
 
 def test_weights_iris(global_random_seed):
 
@@ -501,6 +501,15 @@ def _logistic_regression_path(
             w0 = sol.solve(X=X, y=target, sample_weight=sample_weight)
             n_iter_i = sol.iteration
         elif solver == "liblinear":
+            if len(classes) > 2:
+                warnings.warn(
+                    "Using the 'liblinear' solver for multiclass classification is "
+                    "deprecated. An error will be raised in 1.8. Either use another "
+                    "solver which supports the multinomial loss or wrap the estimator "
+                    "in a OneVsRestClassifier to keep applying a one-versus-rest "
+                    "scheme.",
+                    FutureWarning,
+                )
             (
                 coef_,
                 intercept_,
@@ -931,7 +940,7 @@ class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
            'lbfgs'           'l2', None                     yes
            'liblinear'       'l1', 'l2'                     no
            'newton-cg'       'l2', None                     yes
-           'newton-cholesky' 'l2', None                     no
+           'newton-cholesky' 'l2', None                     yes
            'sag'             'l2', None                     yes
            'saga'            'elasticnet', 'l1', 'l2', None yes
            ================= ============================== ======================
@@ -1238,7 +1247,7 @@ def fit(self, X, y, sample_weight=None):
         check_classification_targets(y)
         self.classes_ = np.unique(y)
 
-        # TODO(1.7) remove multi_class
+        # TODO(1.8) remove multi_class
         multi_class = self.multi_class
         if self.multi_class == "multinomial" and len(self.classes_) == 2:
             warnings.warn(
@@ -1274,6 +1283,15 @@ def fit(self, X, y, sample_weight=None):
         multi_class = _check_multi_class(multi_class, solver, len(self.classes_))
 
         if solver == "liblinear":
+            if len(self.classes_) > 2:
+                warnings.warn(
+                    "Using the 'liblinear' solver for multiclass classification is "
+                    "deprecated. An error will be raised in 1.8. Either use another "
+                    "solver which supports the multinomial loss or wrap the estimator "
+                    "in a OneVsRestClassifier to keep applying a one-versus-rest "
+                    "scheme.",
+                    FutureWarning,
+                )
             if effective_n_jobs(self.n_jobs) != 1:
                 warnings.warn(
                     "'n_jobs' > 1 does not have any effect when"
@@ -1568,7 +1586,7 @@ class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstima
            'lbfgs'           'l2'                           yes
            'liblinear'       'l1', 'l2'                     no
            'newton-cg'       'l2'                           yes
-           'newton-cholesky' 'l2',                          no
+           'newton-cholesky' 'l2',                          yes
            'sag'             'l2',                          yes
            'saga'            'elasticnet', 'l1', 'l2'       yes
            ================= ============================== ======================
@@ -1900,7 +1918,7 @@ def fit(self, X, y, sample_weight=None, **params):
         classes = self.classes_ = label_encoder.classes_
         encoded_labels = label_encoder.transform(label_encoder.classes_)
 
-        # TODO(1.7) remove multi_class
+        # TODO(1.8) remove multi_class
         multi_class = self.multi_class
         if self.multi_class == "multinomial" and len(self.classes_) == 2:
             warnings.warn(
 
@@ -129,8 +129,7 @@ def __call__(self, model, X, y, sample_weight=None):
 
 @skip_if_no_parallel
 def test_lr_liblinear_warning():
-    n_samples, n_features = iris.data.shape
-    target = iris.target_names[iris.target]
+    X, y = make_classification(random_state=0)
 
     lr = LogisticRegression(solver="liblinear", n_jobs=2)
     warning_message = (
@@ -139,7 +138,7 @@ def test_lr_liblinear_warning():
         " = 2."
     )
     with pytest.warns(UserWarning, match=warning_message):
-        lr.fit(iris.data, target)
+        lr.fit(X, y)
 
 
 @pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
@@ -148,8 +147,11 @@ def test_predict_3_classes(csr_container):
     check_predictions(LogisticRegression(C=10), csr_container(X), Y2)
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
+@pytest.mark.filterwarnings(
+    "ignore:.*'liblinear' solver for multiclass classification is deprecated.*"
+)
 @pytest.mark.parametrize(
     "clf",
     [
@@ -197,7 +199,7 @@ def test_predict_iris(clf):
     assert np.mean(pred == target) > 0.95
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 @pytest.mark.parametrize("LR", [LogisticRegression, LogisticRegressionCV])
 def test_check_solver_option(LR):
@@ -249,7 +251,7 @@ def test_elasticnet_l1_ratio_err_helpful(LR):
         model.fit(np.array([[1, 2], [3, 4]]), np.array([0, 1]))
 
 
-# TODO(1.7): remove whole test with deprecation of multi_class
+# TODO(1.8): remove whole test with deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 @pytest.mark.parametrize("solver", ["lbfgs", "newton-cg", "sag", "saga"])
 def test_multinomial_binary(solver):
@@ -274,7 +276,7 @@ def test_multinomial_binary(solver):
     assert np.mean(pred == target) > 0.9
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 # Maybe even remove this whole test as correctness of multinomial loss is tested
 # elsewhere.
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@@ -614,7 +616,7 @@ def test_logistic_cv_sparse(csr_container):
     assert clfs.C_ == clf.C_
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 # Best remove this whole test.
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 def test_ovr_multinomial_iris():
@@ -700,7 +702,7 @@ def test_logistic_regression_solvers():
         )
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 @pytest.mark.parametrize("fit_intercept", [False, True])
 def test_logistic_regression_solvers_multiclass(fit_intercept):
@@ -1301,7 +1303,7 @@ def test_logreg_predict_proba_multinomial():
     assert clf_wrong_loss > clf_multi_loss
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 @pytest.mark.parametrize("max_iter", np.arange(1, 5))
 @pytest.mark.parametrize("multi_class", ["ovr", "multinomial"])
@@ -1345,8 +1347,11 @@ def test_max_iter(max_iter, multi_class, solver, message):
     assert lr.n_iter_[0] == max_iter
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
+@pytest.mark.filterwarnings(
+    "ignore:.*'liblinear' solver for multiclass classification is deprecated.*"
+)
 @pytest.mark.parametrize("solver", SOLVERS)
 def test_n_iter(solver):
     # Test that self.n_iter_ has the correct format.
@@ -1478,7 +1483,7 @@ def test_saga_vs_liblinear(csr_container):
                 assert_array_almost_equal(saga.coef_, liblinear.coef_, 3)
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 @pytest.mark.parametrize("multi_class", ["ovr", "multinomial"])
 @pytest.mark.parametrize(
@@ -1738,7 +1743,7 @@ def test_LogisticRegressionCV_GridSearchCV_elastic_net(n_classes):
     assert gs.best_params_["C"] == lrcv.C_[0]
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 # Maybe remove whole test after removal of the deprecated multi_class.
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 def test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr():
@@ -1786,7 +1791,7 @@ def test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr():
     assert (lrcv.predict(X_test) == gs.predict(X_test)).mean() >= 0.8
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 @pytest.mark.parametrize("penalty", ("l2", "elasticnet"))
 @pytest.mark.parametrize("multi_class", ("ovr", "multinomial", "auto"))
@@ -1825,7 +1830,7 @@ def test_LogisticRegressionCV_no_refit(penalty, multi_class):
     assert lrcv.coef_.shape == (n_classes, n_features)
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 # Remove multi_class an change first element of the expected n_iter_.shape from
 # n_classes to 1 (according to the docstring).
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@@ -1955,8 +1960,11 @@ def test_logistic_regression_path_coefs_multinomial():
         assert_array_almost_equal(coefs[1], coefs[2], decimal=1)
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
+@pytest.mark.filterwarnings(
+    "ignore:.*'liblinear' solver for multiclass classification is deprecated.*"
+)
 @pytest.mark.parametrize(
     "est",
     [
@@ -2126,7 +2134,7 @@ def test_scores_attribute_layout_elasticnet():
             assert avg_scores_lrcv[i, j] == pytest.approx(avg_score_lr)
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 @pytest.mark.parametrize("solver", ["lbfgs", "newton-cg", "newton-cholesky"])
 @pytest.mark.parametrize("fit_intercept", [False, True])
@@ -2171,7 +2179,7 @@ def test_multinomial_identifiability_on_iris(solver, fit_intercept):
         assert clf.intercept_.sum(axis=0) == pytest.approx(0, abs=1e-11)
 
 
-# TODO(1.7): remove filterwarnings after the deprecation of multi_class
+# TODO(1.8): remove filterwarnings after the deprecation of multi_class
 @pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
 @pytest.mark.parametrize("multi_class", ["ovr", "multinomial", "auto"])
 @pytest.mark.parametrize("class_weight", [{0: 1.0, 1: 10.0, 2: 1.0}, "balanced"])
@@ -2349,7 +2357,7 @@ def test_passing_params_without_enabling_metadata_routing():
             lr_cv.score(X, y, **params)
 
 
-# TODO(1.7): remove
+# TODO(1.8): remove
 def test_multi_class_deprecated():
     """Check `multi_class` parameter deprecated."""
     X, y = make_classification(n_classes=3, n_samples=50, n_informative=6)
@@ -2414,3 +2422,18 @@ def test_newton_cholesky_fallback_to_lbfgs(global_random_seed):
             n_iter_nc_limited = lr_nc_limited.n_iter_[0]
 
     assert n_iter_nc_limited == lr_nc_limited.max_iter - 1
+
+
+# TODO(1.8): check for an error instead
+@pytest.mark.parametrize("Estimator", [LogisticRegression, LogisticRegressionCV])
+def test_liblinear_multiclass_warning(Estimator):
+    """Check that liblinear warns on multiclass problems."""
+    msg = (
+        "Using the 'liblinear' solver for multiclass classification is "
+        "deprecated. An error will be raised in 1.8. Either use another "
+        "solver which supports the multinomial loss or wrap the estimator "
+        "in a OneVsRestClassifier to keep applying a one-versus-rest "
+        "scheme."
+    )
+    with pytest.warns(FutureWarning, match=msg):
+        Estimator(solver="liblinear").fit(iris.data, iris.target)
@@ -622,7 +622,7 @@ class scores must correspond to the order of ``labels``,
     >>> from sklearn.linear_model import LogisticRegression
     >>> from sklearn.metrics import roc_auc_score
     >>> X, y = load_breast_cancer(return_X_y=True)
-    >>> clf = LogisticRegression(solver="liblinear", random_state=0).fit(X, y)
+    >>> clf = LogisticRegression(solver="newton-cholesky", random_state=0).fit(X, y)
     >>> roc_auc_score(y, clf.predict_proba(X)[:, 1])
     0.99...
     >>> roc_auc_score(y, clf.decision_function(X))
@@ -632,7 +632,7 @@ class scores must correspond to the order of ``labels``,
 
     >>> from sklearn.datasets import load_iris
     >>> X, y = load_iris(return_X_y=True)
-    >>> clf = LogisticRegression(solver="liblinear").fit(X, y)
+    >>> clf = LogisticRegression(solver="newton-cholesky").fit(X, y)
     >>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')
     0.99...
 
@@ -649,7 +649,7 @@ class scores must correspond to the order of ``labels``,
     >>> # extract the positive columns for each output
     >>> y_score = np.transpose([score[:, 1] for score in y_score])
     >>> roc_auc_score(y, y_score, average=None)
-    array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])
+    array([0.82..., 0.85..., 0.93..., 0.86..., 0.94...])
     >>> from sklearn.linear_model import RidgeClassifierCV
     >>> clf = RidgeClassifierCV().fit(X, y)
     >>> roc_auc_score(y, clf.decision_function(X), average=None)