scikit-learn · rth · Mar 15, 2021 · Feb 15, 2021 · Feb 15, 2021 · Feb 15, 2021
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
@@ -216,6 +216,15 @@ Changelog
  for non-English characters. :pr:`18959` by :user:`Zero <Zeroto521>`
  and :user:`wstates <wstates>`.

+:mod:`sklearn.utils`
+....................
+
+- |Enhancement| Deprecated the default value of the `random_state=0` in 
+  :func:`~sklearn.utils.extmath.randomized_svd`. Starting in 1.2,
+  the default value of `random_state` will be set to `None`.
+  :pr:`19459` by :user:`Cindy Bezuidenhout <cinbez>` and 
+  :user:`Clifford Akai-Nettey<cliffordEmmanuel>`.
+
 :mod:`sklearn.calibration`
 ............................


diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
@@ -245,7 +245,7 @@ def randomized_range_finder(A, *, size, n_iter,
 @_deprecate_positional_args
 def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto',
                   power_iteration_normalizer='auto', transpose='auto',
-                   flip_sign=True, random_state=0):
+                   flip_sign=True, random_state='warn'):
    """Computes a truncated randomized SVD.

    Parameters
@@ -296,11 +296,17 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto',
        set to `True`, the sign ambiguity is resolved by making the largest
        loadings for each component in the left singular vectors positive.

-    random_state : int, RandomState instance or None, default=0
-        The seed of the pseudo random number generator to use when shuffling
-        the data, i.e. getting the random vectors to initialize the algorithm.
-        Pass an int for reproducible results across multiple function calls.
-        See :term:`Glossary <random_state>`.
+    random_state : int, RandomState instance or None, default='warn'
+        The seed of the pseudo random number generator to use when
+        shuffling the data, i.e. getting the random vectors to initialize
+        the algorithm. Pass an int for reproducible results across multiple
+        function calls. See :term:`Glossary <random_state>`.
+
+        .. versionchanged:: 1.2
+            The previous behavior (`random_state=0`) is deprecated, and
+            from v1.2 the default value will be `random_state=None`. Set
+            the value of `random_state` explicitly to suppress the deprecation
+            warning.

    Notes
    -----
@@ -326,10 +332,22 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto',
    """
    if isinstance(M, (sparse.lil_matrix, sparse.dok_matrix)):
        warnings.warn("Calculating SVD of a {} is expensive. "
-                      "csr_matrix is more efficient.".format(
-                          type(M).__name__),
+                      "csr_matrix is more efficient.".format(type(M).__name__),
                      sparse.SparseEfficiencyWarning)

+    if random_state == 'warn':
+        warnings.warn(
+            "If 'random_state' is not supplied, the current default "
+            "is to use 0 as a fixed seed. This will change to  "
+            "None in version 1.2 leading to non-deterministic results "
+            "that better reflect nature of the randomized_svd solver. "
+            "If you want to silence this warning, set 'random_state' "
+            "to an integer seed or to None explicitly depending "
+            "if you want your code to be deterministic or not.",
+            FutureWarning
+        )
+        random_state = 0
+
    random_state = check_random_state(random_state)
    n_random = n_components + n_oversamples
    n_samples, n_features = M.shape

diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
@@ -249,15 +249,17 @@ def test_randomized_svd_infinite_rank():
        # compute the singular values of X using the fast approximate method
        # without the iterated power method
        _, sa, _ = randomized_svd(X, k, n_iter=0,
-                                  power_iteration_normalizer=normalizer)
+                                  power_iteration_normalizer=normalizer,
+                                  random_state=0)

        # the approximation does not tolerate the noise:
        assert np.abs(s[:k] - sa).max() > 0.1

        # compute the singular values of X using the fast approximate method
        # with iterated power method
        _, sap, _ = randomized_svd(X, k, n_iter=5,
-                                   power_iteration_normalizer=normalizer)
+                                   power_iteration_normalizer=normalizer,
+                                   random_state=0)

        # the iterated power method is still managing to get most of the
        # structure at the requested rank
@@ -307,11 +309,13 @@ def test_randomized_svd_power_iteration_normalizer():

    # Check that it diverges with many (non-normalized) power iterations
    U, s, Vt = randomized_svd(X, n_components, n_iter=2,
-                              power_iteration_normalizer='none')
+                              power_iteration_normalizer='none',
+                              random_state=0)
    A = X - U.dot(np.diag(s).dot(Vt))
    error_2 = linalg.norm(A, ord='fro')
    U, s, Vt = randomized_svd(X, n_components, n_iter=20,
-                              power_iteration_normalizer='none')
+                              power_iteration_normalizer='none',
+                              random_state=0)
    A = X - U.dot(np.diag(s).dot(Vt))
    error_20 = linalg.norm(A, ord='fro')
    assert np.abs(error_2 - error_20) > 100
@@ -401,14 +405,15 @@ def max_loading_is_positive(u, v):
    mat = np.arange(10 * 8).reshape(10, -1)

    # Without transpose
-    u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True)
+    u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True,
+                                             random_state=0)
    u_based, v_based = max_loading_is_positive(u_flipped, v_flipped)
    assert u_based
    assert not v_based

    # With transpose
    u_flipped_with_transpose, _, v_flipped_with_transpose = randomized_svd(
-        mat, 3, flip_sign=True, transpose=True)
+        mat, 3, flip_sign=True, transpose=True, random_state=0)
    u_based, v_based = max_loading_is_positive(
        u_flipped_with_transpose, v_flipped_with_transpose)
    assert u_based