From 8d4b501072a966878f6c144273633b7559d10d42 Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Sun, 3 Apr 2022 22:11:49 -0400 Subject: [PATCH 01/16] Update _alpha_grid to take sample_weight It seems like this single call to _preprocess_data suffices in all cases. --- sklearn/linear_model/_coordinate_descent.py | 64 +++++++++------------ 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index b450790da5a07..bc338ebaa41db 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -92,6 +92,7 @@ def _alpha_grid( n_alphas=100, normalize=False, copy_X=True, + sample_weight=None, ): """Compute the grid of alpha values for elastic net parameter search @@ -138,6 +139,8 @@ def _alpha_grid( copy_X : bool, default=True If ``True``, X will be copied; else, it may be overwritten. + + sample_weight : ndarray of shape (n_samples,) """ if l1_ratio == 0: raise ValueError( @@ -146,47 +149,35 @@ def _alpha_grid( "your estimator with the appropriate `alphas=` " "argument." ) - n_samples = len(y) - - sparse_center = False - if Xy is None: - X_sparse = sparse.isspmatrix(X) - sparse_center = X_sparse and (fit_intercept or normalize) - X = check_array( - X, accept_sparse="csc", copy=(copy_X and fit_intercept and not X_sparse) + if Xy is not None and ((sample_weight is None) or (fit_intercept is False)): + # In this case, the precomputed Xy should be valid. + pass + else: + # Compute Xy. + X, y, X_offset, y_offset, X_scale = _preprocess_data( + X, + y, + fit_intercept, + normalize=normalize, + copy=copy_X, + sample_weight=sample_weight, + check_input=False, ) - if not X_sparse: - # X can be touched inplace thanks to the above line - X, y, _, _, _ = _preprocess_data(X, y, fit_intercept, normalize, copy=False) - Xy = safe_sparse_dot(X.T, y, dense_output=True) - - if sparse_center: - # Workaround to find alpha_max for sparse matrices. - # since we should not destroy the sparsity of such matrices. - _, _, X_offset, _, X_scale = _preprocess_data( - X, y, fit_intercept, normalize - ) - mean_dot = X_offset * np.sum(y) + if sample_weight is not None: + yw = y * sample_weight / sample_weight.mean() + else: + yw = y + if sparse.issparse(X): + Xy = safe_sparse_dot(X.T, yw, dense_output=True) - np.sum(yw) * X_offset + else: + Xy = np.dot(X.T, yw) if Xy.ndim == 1: Xy = Xy[:, np.newaxis] - - if sparse_center: - if fit_intercept: - Xy -= mean_dot[:, np.newaxis] - if normalize: - Xy /= X_scale[:, np.newaxis] - - alpha_max = np.sqrt(np.sum(Xy**2, axis=1)).max() / (n_samples * l1_ratio) - + alpha_max = np.sqrt(np.sum(Xy**2, axis=1)).max() / (l1_ratio * X.shape[0]) if alpha_max <= np.finfo(float).resolution: - alphas = np.empty(n_alphas) - alphas.fill(np.finfo(float).resolution) - return alphas - - return np.logspace(np.log10(alpha_max * eps), np.log10(alpha_max), num=n_alphas)[ - ::-1 - ] + return np.full(n_alphas, np.finfo(float).resolution) + return np.logspace(np.log10(alpha_max), np.log10(alpha_max * eps), num=n_alphas) def lasso_path( @@ -1660,6 +1651,7 @@ def fit(self, X, y, sample_weight=None): n_alphas=self.n_alphas, normalize=_normalize, copy_X=self.copy_X, + sample_weight=sample_weight, ) for l1_ratio in l1_ratios ] From 2f494db40f647ca99f57807a3934943caecc7d58 Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Sun, 3 Apr 2022 22:12:51 -0400 Subject: [PATCH 02/16] Add a simple test for alpha_max with sample_weight This tiny example was given in https://github.com/scikit-learn/scikit-learn/issues/22914. The test merely asserts that alpha_max is large enough to force the coefficient to 0. --- .../linear_model/tests/test_coordinate_descent.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index e5d7ba358c1f5..f38fd3933371b 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1642,6 +1642,18 @@ def test_enet_cv_grid_search(sample_weight): assert reg.alpha_ == pytest.approx(gs.best_params_["alpha"]) +@pytest.mark.parametrize("sparseX", [False, True]) +def test_enet_alpha_max_sample_weight(sparseX): + X = np.array([[3, 1], [2, 5], [5, 3], [1, 4]]) + beta = np.array([1, 1]) + y = X @ beta + sample_weight = np.array([10, 1, 10, 1]) + if sparseX: + X = sparse.csc_matrix(X) + reg = ElasticNetCV(n_alphas=1, cv=2).fit(X, y, sample_weight=sample_weight) + assert_almost_equal(reg.coef_, 0) + + @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("l1_ratio", [0, 0.5, 1]) @pytest.mark.parametrize("precompute", [False, True]) From fa2c8215bf61e1f553dcfed3dae539a3769a3b00 Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Mon, 4 Apr 2022 09:06:16 -0400 Subject: [PATCH 03/16] Update test As per reviewer's suggestions: (1) Clarify eps=1. (2) Parameterize `fit_intercept`. --- sklearn/linear_model/tests/test_coordinate_descent.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index f38fd3933371b..807429985dcac 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1643,14 +1643,16 @@ def test_enet_cv_grid_search(sample_weight): @pytest.mark.parametrize("sparseX", [False, True]) -def test_enet_alpha_max_sample_weight(sparseX): +@pytest.mark.parametrize("fit_intercept", [False, True]) +def test_enet_alpha_max_sample_weight(sparseX, fit_intercept): X = np.array([[3, 1], [2, 5], [5, 3], [1, 4]]) beta = np.array([1, 1]) y = X @ beta sample_weight = np.array([10, 1, 10, 1]) if sparseX: X = sparse.csc_matrix(X) - reg = ElasticNetCV(n_alphas=1, cv=2).fit(X, y, sample_weight=sample_weight) + reg = ElasticNetCV(n_alphas=1, cv=2, eps=1, fit_intercept=fit_intercept) + reg.fit(X, y, sample_weight=sample_weight) assert_almost_equal(reg.coef_, 0) From 75e65842e9312af3296a96ccab7d38dfbb0b59e6 Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Mon, 4 Apr 2022 09:10:45 -0400 Subject: [PATCH 04/16] Clarify _alpha_grid. (1) Give the name `n_samples` to the quantity `X.shape[0]`. (2) Clarify that `y_offset` and `X_scale` are not used, since these are already applied to the data by `_preprocess_data`. --- sklearn/linear_model/_coordinate_descent.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index bc338ebaa41db..6c710af8b5137 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -154,7 +154,7 @@ def _alpha_grid( pass else: # Compute Xy. - X, y, X_offset, y_offset, X_scale = _preprocess_data( + X, y, X_offset, _, _ = _preprocess_data( X, y, fit_intercept, @@ -174,7 +174,9 @@ def _alpha_grid( if Xy.ndim == 1: Xy = Xy[:, np.newaxis] - alpha_max = np.sqrt(np.sum(Xy**2, axis=1)).max() / (l1_ratio * X.shape[0]) + n_samples = X.shape[0] + alpha_max = np.max(np.sqrt(np.sum(Xy**2, axis=1))) / (l1_ratio * n_samples) + if alpha_max <= np.finfo(float).resolution: return np.full(n_alphas, np.finfo(float).resolution) return np.logspace(np.log10(alpha_max), np.log10(alpha_max * eps), num=n_alphas) From 8b6cfc0bfb53faf56cd6e410e35b4b83ebc3f219 Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Tue, 5 Apr 2022 18:16:49 -0400 Subject: [PATCH 05/16] Clarify notation --- sklearn/linear_model/_coordinate_descent.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index 6c710af8b5137..3655bd667c2d0 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -150,10 +150,8 @@ def _alpha_grid( "argument." ) if Xy is not None and ((sample_weight is None) or (fit_intercept is False)): - # In this case, the precomputed Xy should be valid. - pass + Xyw = Xy else: - # Compute Xy. X, y, X_offset, _, _ = _preprocess_data( X, y, @@ -168,14 +166,14 @@ def _alpha_grid( else: yw = y if sparse.issparse(X): - Xy = safe_sparse_dot(X.T, yw, dense_output=True) - np.sum(yw) * X_offset + Xyw = safe_sparse_dot(X.T, yw, dense_output=True) - np.sum(yw) * X_offset else: - Xy = np.dot(X.T, yw) + Xyw = np.dot(X.T, yw) - if Xy.ndim == 1: - Xy = Xy[:, np.newaxis] + if Xyw.ndim == 1: + Xyw = Xyw[:, np.newaxis] n_samples = X.shape[0] - alpha_max = np.max(np.sqrt(np.sum(Xy**2, axis=1))) / (l1_ratio * n_samples) + alpha_max = np.max(np.sqrt(np.sum(Xyw**2, axis=1))) / (l1_ratio * n_samples) if alpha_max <= np.finfo(float).resolution: return np.full(n_alphas, np.finfo(float).resolution) From 2ba4c57acd36b7a6c92b7322ce0cb9fc18cd34ce Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Sat, 2 Jul 2022 09:51:25 -0400 Subject: [PATCH 06/16] Use Xy if it is provided. --- sklearn/linear_model/_coordinate_descent.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index 3655bd667c2d0..b1597bedce814 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -149,7 +149,7 @@ def _alpha_grid( "your estimator with the appropriate `alphas=` " "argument." ) - if Xy is not None and ((sample_weight is None) or (fit_intercept is False)): + if Xy is not None: Xyw = Xy else: X, y, X_offset, _, _ = _preprocess_data( @@ -162,7 +162,7 @@ def _alpha_grid( check_input=False, ) if sample_weight is not None: - yw = y * sample_weight / sample_weight.mean() + yw = y * sample_weight else: yw = y if sparse.issparse(X): @@ -172,7 +172,10 @@ def _alpha_grid( if Xyw.ndim == 1: Xyw = Xyw[:, np.newaxis] - n_samples = X.shape[0] + if sample_weight is not None: + n_samples = sample_weight.sum() + else: + n_samples = X.shape[0] alpha_max = np.max(np.sqrt(np.sum(Xyw**2, axis=1))) / (l1_ratio * n_samples) if alpha_max <= np.finfo(float).resolution: From 5d1f5e7b09aa08cdf5b7d980ef114bcafb9f8c52 Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Sat, 2 Jul 2022 10:08:08 -0400 Subject: [PATCH 07/16] Update test, check alpha_max is not too large --- sklearn/linear_model/tests/test_coordinate_descent.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 807429985dcac..8fdc9dbc43c96 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -26,6 +26,7 @@ from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_less from sklearn.utils._testing import ignore_warnings from sklearn.utils._testing import _convert_container @@ -1651,9 +1652,14 @@ def test_enet_alpha_max_sample_weight(sparseX, fit_intercept): sample_weight = np.array([10, 1, 10, 1]) if sparseX: X = sparse.csc_matrix(X) + # Test alpha_max makes coefs zero. reg = ElasticNetCV(n_alphas=1, cv=2, eps=1, fit_intercept=fit_intercept) reg.fit(X, y, sample_weight=sample_weight) assert_almost_equal(reg.coef_, 0) + # Test smaller alpha makes coefs nonzero. + reg = ElasticNetCV(n_alphas=2, cv=2, eps=0.99, fit_intercept=fit_intercept) + reg.fit(X, y, sample_weight=sample_weight) + assert_array_less(0, np.max(np.abs(reg.coef_))) @pytest.mark.parametrize("fit_intercept", [True, False]) From dce169c41c4613e50fbf61ac18883722edb47ffe Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Sat, 2 Jul 2022 12:43:15 -0400 Subject: [PATCH 08/16] Fix test that alpha_max is not too large. --- sklearn/linear_model/tests/test_coordinate_descent.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 8fdc9dbc43c96..a414a14c71c06 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1656,10 +1656,11 @@ def test_enet_alpha_max_sample_weight(sparseX, fit_intercept): reg = ElasticNetCV(n_alphas=1, cv=2, eps=1, fit_intercept=fit_intercept) reg.fit(X, y, sample_weight=sample_weight) assert_almost_equal(reg.coef_, 0) + alpha_max = reg.alpha_ # Test smaller alpha makes coefs nonzero. - reg = ElasticNetCV(n_alphas=2, cv=2, eps=0.99, fit_intercept=fit_intercept) + reg = ElasticNet(alpha=0.99 * alpha_max, fit_intercept=fit_intercept) reg.fit(X, y, sample_weight=sample_weight) - assert_array_less(0, np.max(np.abs(reg.coef_))) + assert_array_less(1e-3, np.max(np.abs(reg.coef_))) @pytest.mark.parametrize("fit_intercept", [True, False]) From 380c21f6f342b69021b0f5269e919c2cb82980c9 Mon Sep 17 00:00:00 2001 From: "Mr. Snrub" <45150804+s-banach@users.noreply.github.com> Date: Tue, 5 Jul 2022 22:39:17 -0400 Subject: [PATCH 09/16] Test alpha_max without sample_weight. --- sklearn/linear_model/tests/test_coordinate_descent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index a414a14c71c06..c7c4a839b7170 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1645,11 +1645,11 @@ def test_enet_cv_grid_search(sample_weight): @pytest.mark.parametrize("sparseX", [False, True]) @pytest.mark.parametrize("fit_intercept", [False, True]) -def test_enet_alpha_max_sample_weight(sparseX, fit_intercept): - X = np.array([[3, 1], [2, 5], [5, 3], [1, 4]]) +@pytest.mark.parametrize("sample_weight", [np.array([10, 1, 10, 1]), None]) +def test_enet_alpha_max_sample_weight(sparseX, fit_intercept, sample_weight): + X = np.array([[3.0, 1.0], [2.0, 5.0], [5.0, 3.0], [1.0, 4.0]]) beta = np.array([1, 1]) y = X @ beta - sample_weight = np.array([10, 1, 10, 1]) if sparseX: X = sparse.csc_matrix(X) # Test alpha_max makes coefs zero. From 40d8b304ffd615307638959e308a1e8ea53d3d89 Mon Sep 17 00:00:00 2001 From: snath-xoc <51656807+snath-xoc@users.noreply.github.com> Date: Thu, 20 Jun 2024 22:10:54 +0100 Subject: [PATCH 10/16] Update _preprocess_data inputs in _coordinate_descent.py Co-authored-by: Olivier Grisel --- sklearn/linear_model/_coordinate_descent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index b794065d04512..39143002b2638 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -149,7 +149,7 @@ def _alpha_grid( X, y, X_offset, _, _ = _preprocess_data( X, y, - fit_intercept, + fit_intercept=fit_intercept, copy=copy_X, sample_weight=sample_weight, check_input=False, From 85062c0e37d52737f35dbbc090ec76b25cb22012 Mon Sep 17 00:00:00 2001 From: Shruti Nath Date: Fri, 21 Jun 2024 00:34:19 +0100 Subject: [PATCH 11/16] added tests for repeated vs weighted on cyclic ElasticNetCV and modified alpha_grid_ to accommodate for MultitaskCV y shape --- sklearn/linear_model/_coordinate_descent.py | 7 +++++- .../tests/test_coordinate_descent.py | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index 39143002b2638..fd44282f1aea3 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -155,7 +155,12 @@ def _alpha_grid( check_input=False, ) if sample_weight is not None: - yw = y * sample_weight + if y.ndim>1: + + yw = y * np.broadcast_to(sample_weight.reshape(-1,1),y.shape) + + else: + yw = y * sample_weight else: yw = y if sparse.issparse(X): diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index d294acf3141c5..1b41d02fb4e1a 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1305,6 +1305,28 @@ def test_enet_sample_weight_consistency( assert_allclose(reg1.coef_, reg2.coef_, rtol=1e-6) +@pytest.mark.parametrize("fit_intercept", [True, False]) +@pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS) +def test_enet_cv_sample_weight(fit_intercept, sparse_container, global_random_seed): + rng = np.random.RandomState(global_random_seed) + rng = np.random.RandomState(0) + + X, y = make_regression( + n_samples=10000, n_features=5, random_state=10 + ) + + sample_weight = rng.randint(0, 5, size=X.shape[0]) + X_resampled_by_weights = np.repeat(X, sample_weight, axis=0) + y_resampled_by_weights = np.repeat(y, sample_weight, axis=0) + + est_weighted = ElasticNetCV(selection='cyclic').fit(X,y,sample_weight=sample_weight) + est_repeated = ElasticNetCV(selection='cyclic').fit(X_resampled_by_weights,y_resampled_by_weights) + + assert_allclose(est_weighted.alphas_, est_repeated.alphas_) + assert_allclose(est_weighted.coef_, est_repeated.coef_) + + + @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS) def test_enet_cv_sample_weight_correctness(fit_intercept, sparse_container): From 41fcb5f76bcbc3c3f095f65377ed5d6f23a4dbea Mon Sep 17 00:00:00 2001 From: Shruti Nath Date: Thu, 27 Jun 2024 19:38:32 +0200 Subject: [PATCH 12/16] added to changelog and changed seeding in tests --- doc/whats_new/v1.6.rst | 6 ++++++ sklearn/cluster/_birch.py | 8 +++++--- sklearn/discriminant_analysis.py | 8 +++++--- sklearn/linear_model/_coordinate_descent.py | 13 ++++--------- sklearn/linear_model/_theil_sen.py | 8 +++++--- .../linear_model/tests/test_coordinate_descent.py | 14 +++++++------- sklearn/metrics/tests/test_common.py | 1 - 7 files changed, 32 insertions(+), 26 deletions(-) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index e1e7a1f01f2f8..8d0fca344d193 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -99,6 +99,12 @@ Changelog :pr:`123456` by :user:`Joe Bloggs `. where 123455 is the *pull request* number, not the issue number. +:mod:`sklearn.linear_model` +.......................... +- |Fix| :func:`_coordinate_descent._alpha_grid` adapted to account for sample weights. + :pr:`23045`by :user:`John Hopfensperger `and :pr:`29308`by :user:`Shruti Nath `. + + :mod:`sklearn.base` ................... diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 81c26d51bbb71..a69aa4711e221 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -531,9 +531,11 @@ def _fit(self, X, partial): if self.copy != "deprecated" and first_call: warnings.warn( - "`copy` was deprecated in 1.6 and will be removed in 1.8 since it " - "has no effect internally. Simply leave this parameter to its default " - "value to avoid this warning.", + ( + "`copy` was deprecated in 1.6 and will be removed in 1.8 since it" + " has no effect internally. Simply leave this parameter to its" + " default value to avoid this warning." + ), FutureWarning, ) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index caad2de01b135..2b728ea1dd499 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -943,9 +943,11 @@ def fit(self, X, y): rank = np.sum(S2 > self.tol) if rank < n_features: warnings.warn( - f"The covariance matrix of class {ind} is not full rank. " - "Increasing the value of parameter `reg_param` might help" - " reducing the collinearity.", + ( + f"The covariance matrix of class {ind} is not full rank. " + "Increasing the value of parameter `reg_param` might help" + " reducing the collinearity." + ), linalg.LinAlgWarning, ) if self.store_covariance or store_covariance: diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index a896614c7efcb..ca05f0bb8337c 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -155,10 +155,9 @@ def _alpha_grid( check_input=False, ) if sample_weight is not None: - if y.ndim>1: + if y.ndim > 1: + yw = y * np.broadcast_to(sample_weight.reshape(-1, 1), y.shape) - yw = y * np.broadcast_to(sample_weight.reshape(-1,1),y.shape) - else: yw = y * sample_weight else: @@ -176,8 +175,8 @@ def _alpha_grid( n_samples = X.shape[0] alpha_max = np.sqrt(np.sum(Xyw**2, axis=1)).max() / (n_samples * l1_ratio) - if alpha_max <= np.finfo(float).resolution: - return np.full(n_alphas, np.finfo(float).resolution) + if alpha_max <= np.finfo(np.float64).resolution: + return np.full(n_alphas, np.finfo(np.float64).resolution) return np.geomspace(alpha_max, alpha_max * eps, num=n_alphas) @@ -980,7 +979,6 @@ def fit(self, X, y, sample_weight=None, check_input=True): accept_sparse="csc", order="F", dtype=[np.float64, np.float32], - force_writeable=True, accept_large_sparse=False, copy=X_copied, multi_output=True, @@ -1609,7 +1607,6 @@ def fit(self, X, y, sample_weight=None, **params): check_X_params = dict( accept_sparse="csc", dtype=[np.float64, np.float32], - force_writeable=True, copy=False, accept_large_sparse=False, ) @@ -1635,7 +1632,6 @@ def fit(self, X, y, sample_weight=None, **params): accept_sparse="csc", dtype=[np.float64, np.float32], order="F", - force_writeable=True, copy=copy_X, ) X, y = self._validate_data( @@ -2513,7 +2509,6 @@ def fit(self, X, y): check_X_params = dict( dtype=[np.float64, np.float32], order="F", - force_writeable=True, copy=self.copy_X and self.fit_intercept, ) check_y_params = dict(ensure_2d=False, order="F") diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py index 61ae234d1d3fa..c85de38306ed8 100644 --- a/sklearn/linear_model/_theil_sen.py +++ b/sklearn/linear_model/_theil_sen.py @@ -416,9 +416,11 @@ def fit(self, X, y): """ if self.copy_X != "deprecated": warnings.warn( - "`copy_X` was deprecated in 1.6 and will be removed in 1.8 since it " - "has no effect internally. Simply leave this parameter to its default " - "value to avoid this warning.", + ( + "`copy_X` was deprecated in 1.6 and will be removed in 1.8 since it" + " has no effect internally. Simply leave this parameter to its" + " default value to avoid this warning." + ), FutureWarning, ) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 1b41d02fb4e1a..0cd4de25262b9 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1309,24 +1309,24 @@ def test_enet_sample_weight_consistency( @pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS) def test_enet_cv_sample_weight(fit_intercept, sparse_container, global_random_seed): rng = np.random.RandomState(global_random_seed) - rng = np.random.RandomState(0) - X, y = make_regression( - n_samples=10000, n_features=5, random_state=10 - ) + X, y = make_regression(n_samples=10000, n_features=5, random_state=rng) sample_weight = rng.randint(0, 5, size=X.shape[0]) X_resampled_by_weights = np.repeat(X, sample_weight, axis=0) y_resampled_by_weights = np.repeat(y, sample_weight, axis=0) - est_weighted = ElasticNetCV(selection='cyclic').fit(X,y,sample_weight=sample_weight) - est_repeated = ElasticNetCV(selection='cyclic').fit(X_resampled_by_weights,y_resampled_by_weights) + est_weighted = ElasticNetCV(selection="cyclic", random_state=rng).fit( + X, y, sample_weight=sample_weight + ) + est_repeated = ElasticNetCV(selection="cyclic", random_state=rng).fit( + X_resampled_by_weights, y_resampled_by_weights + ) assert_allclose(est_weighted.alphas_, est_repeated.alphas_) assert_allclose(est_weighted.coef_, est_repeated.coef_) - @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS) def test_enet_cv_sample_weight_correctness(fit_intercept, sparse_container): diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index c247e5f87f7d3..a85d9aebde23d 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1950,7 +1950,6 @@ def check_array_api_regression_metric_multioutput( def check_array_api_metric_pairwise(metric, array_namespace, device, dtype_name): - X_np = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=dtype_name) Y_np = np.array([[0.2, 0.3, 0.4], [0.5, 0.6, 0.7]], dtype=dtype_name) From 335137d102c578fbc65e9a8f24a111588b374e35 Mon Sep 17 00:00:00 2001 From: Shruti Nath Date: Thu, 27 Jun 2024 19:54:46 +0200 Subject: [PATCH 13/16] [all random seeds] test_enet_cv_sample_weight From fec4f74034026c772da3715d0cb00039e4b7660b Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 28 Jun 2024 11:32:36 +0200 Subject: [PATCH 14/16] Revert unrelated changes --- sklearn/cluster/_birch.py | 8 +++----- sklearn/discriminant_analysis.py | 8 +++----- sklearn/linear_model/_theil_sen.py | 8 +++----- sklearn/metrics/tests/test_common.py | 1 + 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index a69aa4711e221..81c26d51bbb71 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -531,11 +531,9 @@ def _fit(self, X, partial): if self.copy != "deprecated" and first_call: warnings.warn( - ( - "`copy` was deprecated in 1.6 and will be removed in 1.8 since it" - " has no effect internally. Simply leave this parameter to its" - " default value to avoid this warning." - ), + "`copy` was deprecated in 1.6 and will be removed in 1.8 since it " + "has no effect internally. Simply leave this parameter to its default " + "value to avoid this warning.", FutureWarning, ) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 2b728ea1dd499..caad2de01b135 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -943,11 +943,9 @@ def fit(self, X, y): rank = np.sum(S2 > self.tol) if rank < n_features: warnings.warn( - ( - f"The covariance matrix of class {ind} is not full rank. " - "Increasing the value of parameter `reg_param` might help" - " reducing the collinearity." - ), + f"The covariance matrix of class {ind} is not full rank. " + "Increasing the value of parameter `reg_param` might help" + " reducing the collinearity.", linalg.LinAlgWarning, ) if self.store_covariance or store_covariance: diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py index c85de38306ed8..61ae234d1d3fa 100644 --- a/sklearn/linear_model/_theil_sen.py +++ b/sklearn/linear_model/_theil_sen.py @@ -416,11 +416,9 @@ def fit(self, X, y): """ if self.copy_X != "deprecated": warnings.warn( - ( - "`copy_X` was deprecated in 1.6 and will be removed in 1.8 since it" - " has no effect internally. Simply leave this parameter to its" - " default value to avoid this warning." - ), + "`copy_X` was deprecated in 1.6 and will be removed in 1.8 since it " + "has no effect internally. Simply leave this parameter to its default " + "value to avoid this warning.", FutureWarning, ) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index a85d9aebde23d..c247e5f87f7d3 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1950,6 +1950,7 @@ def check_array_api_regression_metric_multioutput( def check_array_api_metric_pairwise(metric, array_namespace, device, dtype_name): + X_np = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=dtype_name) Y_np = np.array([[0.2, 0.3, 0.4], [0.5, 0.6, 0.7]], dtype=dtype_name) From c41a8eeabdb42ffca30f36236d6d197362d340a7 Mon Sep 17 00:00:00 2001 From: Shruti Nath Date: Fri, 28 Jun 2024 18:10:54 +0200 Subject: [PATCH 15/16] merged test into test_enet_cv_sample_weight_correctness --- .../tests/test_coordinate_descent.py | 51 ++++--------------- 1 file changed, 11 insertions(+), 40 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 0cd4de25262b9..b60efd91e72e9 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1305,28 +1305,6 @@ def test_enet_sample_weight_consistency( assert_allclose(reg1.coef_, reg2.coef_, rtol=1e-6) -@pytest.mark.parametrize("fit_intercept", [True, False]) -@pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS) -def test_enet_cv_sample_weight(fit_intercept, sparse_container, global_random_seed): - rng = np.random.RandomState(global_random_seed) - - X, y = make_regression(n_samples=10000, n_features=5, random_state=rng) - - sample_weight = rng.randint(0, 5, size=X.shape[0]) - X_resampled_by_weights = np.repeat(X, sample_weight, axis=0) - y_resampled_by_weights = np.repeat(y, sample_weight, axis=0) - - est_weighted = ElasticNetCV(selection="cyclic", random_state=rng).fit( - X, y, sample_weight=sample_weight - ) - est_repeated = ElasticNetCV(selection="cyclic", random_state=rng).fit( - X_resampled_by_weights, y_resampled_by_weights - ) - - assert_allclose(est_weighted.alphas_, est_repeated.alphas_) - assert_allclose(est_weighted.coef_, est_repeated.coef_) - - @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS) def test_enet_cv_sample_weight_correctness(fit_intercept, sparse_container): @@ -1342,39 +1320,32 @@ def test_enet_cv_sample_weight_correctness(fit_intercept, sparse_container): X = sparse_container(X) params = dict(tol=1e-6) - # Set alphas, otherwise the two cv models might use different ones. - if fit_intercept: - alphas = np.linspace(0.001, 0.01, num=91) - else: - alphas = np.linspace(0.01, 0.1, num=91) - - # We weight the first fold 2 times more. - sw[:n_samples] = 2 + # We weight the first fold n times more. + sw[:n_samples] = rng.randint(0, 5, size=sw[:n_samples].shape[0]) groups_sw = np.r_[ np.full(n_samples, 0), np.full(n_samples, 1), np.full(n_samples, 2) ] splits_sw = list(LeaveOneGroupOut().split(X, groups=groups_sw)) - reg_sw = ElasticNetCV( - alphas=alphas, cv=splits_sw, fit_intercept=fit_intercept, **params - ) + reg_sw = ElasticNetCV(cv=splits_sw, fit_intercept=fit_intercept, **params) reg_sw.fit(X, y, sample_weight=sw) # We repeat the first fold 2 times and provide splits ourselves if sparse_container is not None: X = X.toarray() - X = np.r_[X[:n_samples], X] + X_rep = np.repeat(X[:n_samples], sw, axis=0) + ##Need to know number of repitions made in total + n_reps = X_rep.shape[0] - X.shape[0] + X = X_rep if sparse_container is not None: X = sparse_container(X) - y = np.r_[y[:n_samples], y] - groups = np.r_[ - np.full(2 * n_samples, 0), np.full(n_samples, 1), np.full(n_samples, 2) - ] + y = np.repeat(y[:n_samples], sw, axis=0) + groups = np.r_[np.full(n_reps, 0), np.full(n_samples, 1), np.full(n_samples, 2)] splits = list(LeaveOneGroupOut().split(X, groups=groups)) - reg = ElasticNetCV(alphas=alphas, cv=splits, fit_intercept=fit_intercept, **params) + reg = ElasticNetCV(cv=splits, fit_intercept=fit_intercept, **params) reg.fit(X, y) # ensure that we chose meaningful alphas, i.e. not boundaries - assert alphas[0] < reg.alpha_ < alphas[-1] + assert_allclose(reg_sw.alphas_, reg.alphas_) assert reg_sw.alpha_ == reg.alpha_ assert_allclose(reg_sw.coef_, reg.coef_) assert reg_sw.intercept_ == pytest.approx(reg.intercept_) From ac9f0908a79de774e75be4219c53c12819e6ef2a Mon Sep 17 00:00:00 2001 From: Shruti Nath Date: Sat, 29 Jun 2024 17:22:24 +0200 Subject: [PATCH 16/16] changed sample weight to be explicitly set as integers in sklearn/linear_model/tests/test_coordinate_descent/test_enet_cv_sample_weight_correctness --- sklearn/linear_model/tests/test_coordinate_descent.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index b60efd91e72e9..4e5a7c634c87a 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -1332,14 +1332,16 @@ def test_enet_cv_sample_weight_correctness(fit_intercept, sparse_container): # We repeat the first fold 2 times and provide splits ourselves if sparse_container is not None: X = X.toarray() - X_rep = np.repeat(X[:n_samples], sw, axis=0) + X_rep = np.repeat(X, sw.astype(int), axis=0) ##Need to know number of repitions made in total n_reps = X_rep.shape[0] - X.shape[0] X = X_rep if sparse_container is not None: X = sparse_container(X) - y = np.repeat(y[:n_samples], sw, axis=0) - groups = np.r_[np.full(n_reps, 0), np.full(n_samples, 1), np.full(n_samples, 2)] + y = np.repeat(y, sw.astype(int), axis=0) + groups = np.r_[ + np.full(n_reps + n_samples, 0), np.full(n_samples, 1), np.full(n_samples, 2) + ] splits = list(LeaveOneGroupOut().split(X, groups=groups)) reg = ElasticNetCV(cv=splits, fit_intercept=fit_intercept, **params) reg.fit(X, y)