glemaitre
diff --git a/‎doc/modules/random_projection.rst
Copy file name to clipboardExpand all lines: doc/modules/random_projection.rst
+39Lines changed: 39 additions & 0 deletions b/‎doc/modules/random_projection.rst
Copy file name to clipboardExpand all lines: doc/modules/random_projection.rst
+39Lines changed: 39 additions & 0 deletions
diff --git a/‎doc/whats_new/v1.1.rst
Copy file name to clipboardExpand all lines: doc/whats_new/v1.1.rst
+8Lines changed: 8 additions & 0 deletions b/‎doc/whats_new/v1.1.rst
Copy file name to clipboardExpand all lines: doc/whats_new/v1.1.rst
+8Lines changed: 8 additions & 0 deletions
diff --git a/‎sklearn/random_projection.py
Copy file name to clipboardExpand all lines: sklearn/random_projection.py
+86-6Lines changed: 86 additions & 6 deletions b/‎sklearn/random_projection.py
Copy file name to clipboardExpand all lines: sklearn/random_projection.py
+86-6Lines changed: 86 additions & 6 deletions
diff --git a/‎sklearn/tests/test_random_projection.py
Copy file name to clipboardExpand all lines: sklearn/tests/test_random_projection.py
+54-2Lines changed: 54 additions & 2 deletions b/‎sklearn/tests/test_random_projection.py
Copy file name to clipboardExpand all lines: sklearn/tests/test_random_projection.py
+54-2Lines changed: 54 additions & 2 deletions
@@ -160,3 +160,42 @@ projection transformer::
    In Proceedings of the 12th ACM SIGKDD international conference on
    Knowledge discovery and data mining (KDD '06). ACM, New York, NY, USA,
    287-296.
+
+
+.. _random_projection_inverse_transform:
+
+Inverse Transform
+=================
+The random projection transformers have ``compute_inverse_components`` parameter. When
+set to True, after creating the random ``components_`` matrix during fitting,
+the transformer computes the pseudo-inverse of this matrix and stores it as
+``inverse_components_``. The ``inverse_components_`` matrix has shape
+:math:`n_{features} \times n_{components}`, and it is always a dense matrix,
+regardless of whether the components matrix is sparse or dense. So depending on
+the number of features and components, it may use a lot of memory.
+
+When the ``inverse_transform`` method is called, it computes the product of the
+input ``X`` and the transpose of the inverse components. If the inverse components have
+been computed during fit, they are reused at each call to ``inverse_transform``.
+Otherwise they are recomputed each time, which can be costly. The result is always
+dense, even if ``X`` is sparse.
+
+Here a small code example which illustrates how to use the inverse transform
+feature::
+
+  >>> import numpy as np
+  >>> from sklearn.random_projection import SparseRandomProjection
+  >>> X = np.random.rand(100, 10000)
+  >>> transformer = SparseRandomProjection(
+  ...   compute_inverse_components=True
+  ... )
+  ...
+  >>> X_new = transformer.fit_transform(X)
+  >>> X_new.shape
+  (100, 3947)
+  >>> X_new_inversed = transformer.inverse_transform(X_new)
+  >>> X_new_inversed.shape
+  (100, 10000)
+  >>> X_new_again = transformer.transform(X_new_inversed)
+  >>> np.allclose(X_new, X_new_again)
+  True
@@ -800,6 +800,14 @@ Changelog
   :class:`random_projection.GaussianRandomProjection` preserves dtype for
   `numpy.float32`. :pr:`22114` by :user:`Takeshi Oura <takoika>`.
 
+- |Enhancement| Adds an :meth:`inverse_transform` method and a
+  `compute_inverse_transform` parameter to all transformers in the
+  :mod:`~sklearn.random_projection` module:
+  :class:`~sklearn.random_projection.GaussianRandomProjection` and
+  :class:`~sklearn.random_projection.SparseRandomProjection`. When the parameter is set
+  to True, the pseudo-inverse of the components is computed during `fit` and stored as
+  `inverse_components_`. :pr:`21701` by `Aurélien Geron <ageron>`.
+
 - |API| Adds :term:`get_feature_names_out` to all transformers in the
   :mod:`~sklearn.random_projection` module:
   :class:`~sklearn.random_projection.GaussianRandomProjection` and
 
@@ -31,6 +31,7 @@
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
+from scipy import linalg
 import scipy.sparse as sp
 
 from .base import BaseEstimator, TransformerMixin
@@ -39,10 +40,9 @@
 from .utils import check_random_state
 from .utils.extmath import safe_sparse_dot
 from .utils.random import sample_without_replacement
-from .utils.validation import check_is_fitted
+from .utils.validation import check_array, check_is_fitted
 from .exceptions import DataDimensionalityWarning
 
-
 __all__ = [
     "SparseRandomProjection",
     "GaussianRandomProjection",
@@ -302,11 +302,18 @@ class BaseRandomProjection(
 
     @abstractmethod
     def __init__(
-        self, n_components="auto", *, eps=0.1, dense_output=False, random_state=None
+        self,
+        n_components="auto",
+        *,
+        eps=0.1,
+        dense_output=False,
+        compute_inverse_components=False,
+        random_state=None,
     ):
         self.n_components = n_components
         self.eps = eps
         self.dense_output = dense_output
+        self.compute_inverse_components = compute_inverse_components
         self.random_state = random_state
 
     @abstractmethod
@@ -323,12 +330,18 @@ def _make_random_matrix(self, n_components, n_features):
 
         Returns
         -------
-        components : {ndarray, sparse matrix} of shape \
-                (n_components, n_features)
+        components : {ndarray, sparse matrix} of shape (n_components, n_features)
             The generated random matrix. Sparse matrix will be of CSR format.
 
         """
 
+    def _compute_inverse_components(self):
+        """Compute the pseudo-inverse of the (densified) components."""
+        components = self.components_
+        if sp.issparse(components):
+            components = components.toarray()
+        return linalg.pinv(components, check_finite=False)
+
     def fit(self, X, y=None):
         """Generate a sparse random projection matrix.
 
@@ -399,6 +412,9 @@ def fit(self, X, y=None):
             " not the proper shape."
         )
 
+        if self.compute_inverse_components:
+            self.inverse_components_ = self._compute_inverse_components()
+
         return self
 
     def transform(self, X):
@@ -437,6 +453,35 @@ def _n_features_out(self):
         """
         return self.n_components
 
+    def inverse_transform(self, X):
+        """Project data back to its original space.
+
+        Returns an array X_original whose transform would be X. Note that even
+        if X is sparse, X_original is dense: this may use a lot of RAM.
+
+        If `compute_inverse_components` is False, the inverse of the components is
+        computed during each call to `inverse_transform` which can be costly.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_components)
+            Data to be transformed back.
+
+        Returns
+        -------
+        X_original : ndarray of shape (n_samples, n_features)
+            Reconstructed data.
+        """
+        check_is_fitted(self)
+
+        X = check_array(X, dtype=[np.float64, np.float32], accept_sparse=("csr", "csc"))
+
+        if self.compute_inverse_components:
+            return X @ self.inverse_components_.T
+
+        inverse_components = self._compute_inverse_components()
+        return X @ inverse_components.T
+
     def _more_tags(self):
         return {
             "preserves_dtype": [np.float64, np.float32],
@@ -474,6 +519,11 @@ class GaussianRandomProjection(BaseRandomProjection):
         Smaller values lead to better embedding and higher number of
         dimensions (n_components) in the target projection space.
 
+    compute_inverse_components : bool, default=False
+        Learn the inverse transform by computing the pseudo-inverse of the
+        components during fit. Note that computing the pseudo-inverse does not
+        scale well to large matrices.
+
     random_state : int, RandomState instance or None, default=None
         Controls the pseudo random number generator used to generate the
         projection matrix at fit time.
@@ -488,6 +538,12 @@ class GaussianRandomProjection(BaseRandomProjection):
     components_ : ndarray of shape (n_components, n_features)
         Random matrix used for the projection.
 
+    inverse_components_ : ndarray of shape (n_features, n_components)
+        Pseudo-inverse of the components, only computed if
+        `compute_inverse_components` is True.
+
+        .. versionadded:: 1.1
+
     n_features_in_ : int
         Number of features seen during :term:`fit`.
 
@@ -516,11 +572,19 @@ class GaussianRandomProjection(BaseRandomProjection):
     (25, 2759)
     """
 
-    def __init__(self, n_components="auto", *, eps=0.1, random_state=None):
+    def __init__(
+        self,
+        n_components="auto",
+        *,
+        eps=0.1,
+        compute_inverse_components=False,
+        random_state=None,
+    ):
         super().__init__(
             n_components=n_components,
             eps=eps,
             dense_output=True,
+            compute_inverse_components=compute_inverse_components,
             random_state=random_state,
         )
 
@@ -610,6 +674,14 @@ class SparseRandomProjection(BaseRandomProjection):
         If False, the projected data uses a sparse representation if
         the input is sparse.
 
+    compute_inverse_components : bool, default=False
+        Learn the inverse transform by computing the pseudo-inverse of the
+        components during fit. Note that the pseudo-inverse is always a dense
+        array, even if the training data was sparse. This means that it might be
+        necessary to call `inverse_transform` on a small batch of samples at a
+        time to avoid exhausting the available memory on the host. Moreover,
+        computing the pseudo-inverse does not scale well to large matrices.
+
     random_state : int, RandomState instance or None, default=None
         Controls the pseudo random number generator used to generate the
         projection matrix at fit time.
@@ -625,6 +697,12 @@ class SparseRandomProjection(BaseRandomProjection):
         Random matrix used for the projection. Sparse matrix will be of CSR
         format.
 
+    inverse_components_ : ndarray of shape (n_features, n_components)
+        Pseudo-inverse of the components, only computed if
+        `compute_inverse_components` is True.
+
+        .. versionadded:: 1.1
+
     density_ : float in range 0.0 - 1.0
         Concrete density computed from when density = "auto".
 
@@ -676,12 +754,14 @@ def __init__(
         density="auto",
         eps=0.1,
         dense_output=False,
+        compute_inverse_components=False,
         random_state=None,
     ):
         super().__init__(
             n_components=n_components,
             eps=eps,
             dense_output=dense_output,
+            compute_inverse_components=compute_inverse_components,
             random_state=random_state,
         )
 
 
@@ -1,5 +1,6 @@
 import functools
 from typing import List, Any
+import warnings
 
 import numpy as np
 import scipy.sparse as sp
@@ -31,8 +32,8 @@
 
 # Make some random data with uniformly located non zero entries with
 # Gaussian distributed values
-def make_sparse_random_data(n_samples, n_features, n_nonzeros):
-    rng = np.random.RandomState(0)
+def make_sparse_random_data(n_samples, n_features, n_nonzeros, random_state=0):
+    rng = np.random.RandomState(random_state)
     data_coo = sp.coo_matrix(
         (
             rng.randn(n_nonzeros),
@@ -377,6 +378,57 @@ def test_random_projection_feature_names_out(random_projection_cls):
     assert_array_equal(names_out, expected_names_out)
 
 
+@pytest.mark.parametrize("n_samples", (2, 9, 10, 11, 1000))
+@pytest.mark.parametrize("n_features", (2, 9, 10, 11, 1000))
+@pytest.mark.parametrize("random_projection_cls", all_RandomProjection)
+@pytest.mark.parametrize("compute_inverse_components", [True, False])
+def test_inverse_transform(
+    n_samples,
+    n_features,
+    random_projection_cls,
+    compute_inverse_components,
+    global_random_seed,
+):
+    n_components = 10
+
+    random_projection = random_projection_cls(
+        n_components=n_components,
+        compute_inverse_components=compute_inverse_components,
+        random_state=global_random_seed,
+    )
+
+    X_dense, X_csr = make_sparse_random_data(
+        n_samples,
+        n_features,
+        n_samples * n_features // 100 + 1,
+        random_state=global_random_seed,
+    )
+
+    for X in [X_dense, X_csr]:
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                message=(
+                    "The number of components is higher than the number of features"
+                ),
+                category=DataDimensionalityWarning,
+            )
+            projected = random_projection.fit_transform(X)
+
+        if compute_inverse_components:
+            assert hasattr(random_projection, "inverse_components_")
+            inv_components = random_projection.inverse_components_
+            assert inv_components.shape == (n_features, n_components)
+
+        projected_back = random_projection.inverse_transform(projected)
+        assert projected_back.shape == X.shape
+
+        projected_again = random_projection.transform(projected_back)
+        if hasattr(projected, "toarray"):
+            projected = projected.toarray()
+        assert_allclose(projected, projected_again, rtol=1e-7, atol=1e-10)
+
+
 @pytest.mark.parametrize("random_projection_cls", all_RandomProjection)
 @pytest.mark.parametrize(
     "input_dtype, expected_dtype",