31
31
from abc import ABCMeta , abstractmethod
32
32
33
33
import numpy as np
34
+ from scipy import linalg
34
35
import scipy .sparse as sp
35
36
36
37
from .base import BaseEstimator , TransformerMixin
39
40
from .utils import check_random_state
40
41
from .utils .extmath import safe_sparse_dot
41
42
from .utils .random import sample_without_replacement
42
- from .utils .validation import check_is_fitted
43
+ from .utils .validation import check_array , check_is_fitted
43
44
from .exceptions import DataDimensionalityWarning
44
45
45
-
46
46
__all__ = [
47
47
"SparseRandomProjection" ,
48
48
"GaussianRandomProjection" ,
@@ -302,11 +302,18 @@ class BaseRandomProjection(
302
302
303
303
@abstractmethod
304
304
def __init__ (
305
- self , n_components = "auto" , * , eps = 0.1 , dense_output = False , random_state = None
305
+ self ,
306
+ n_components = "auto" ,
307
+ * ,
308
+ eps = 0.1 ,
309
+ dense_output = False ,
310
+ compute_inverse_components = False ,
311
+ random_state = None ,
306
312
):
307
313
self .n_components = n_components
308
314
self .eps = eps
309
315
self .dense_output = dense_output
316
+ self .compute_inverse_components = compute_inverse_components
310
317
self .random_state = random_state
311
318
312
319
@abstractmethod
@@ -323,12 +330,18 @@ def _make_random_matrix(self, n_components, n_features):
323
330
324
331
Returns
325
332
-------
326
- components : {ndarray, sparse matrix} of shape \
327
- (n_components, n_features)
333
+ components : {ndarray, sparse matrix} of shape (n_components, n_features)
328
334
The generated random matrix. Sparse matrix will be of CSR format.
329
335
330
336
"""
331
337
338
+ def _compute_inverse_components (self ):
339
+ """Compute the pseudo-inverse of the (densified) components."""
340
+ components = self .components_
341
+ if sp .issparse (components ):
342
+ components = components .toarray ()
343
+ return linalg .pinv (components , check_finite = False )
344
+
332
345
def fit (self , X , y = None ):
333
346
"""Generate a sparse random projection matrix.
334
347
@@ -399,6 +412,9 @@ def fit(self, X, y=None):
399
412
" not the proper shape."
400
413
)
401
414
415
+ if self .compute_inverse_components :
416
+ self .inverse_components_ = self ._compute_inverse_components ()
417
+
402
418
return self
403
419
404
420
def transform (self , X ):
@@ -437,6 +453,35 @@ def _n_features_out(self):
437
453
"""
438
454
return self .n_components
439
455
456
+ def inverse_transform (self , X ):
457
+ """Project data back to its original space.
458
+
459
+ Returns an array X_original whose transform would be X. Note that even
460
+ if X is sparse, X_original is dense: this may use a lot of RAM.
461
+
462
+ If `compute_inverse_components` is False, the inverse of the components is
463
+ computed during each call to `inverse_transform` which can be costly.
464
+
465
+ Parameters
466
+ ----------
467
+ X : {array-like, sparse matrix} of shape (n_samples, n_components)
468
+ Data to be transformed back.
469
+
470
+ Returns
471
+ -------
472
+ X_original : ndarray of shape (n_samples, n_features)
473
+ Reconstructed data.
474
+ """
475
+ check_is_fitted (self )
476
+
477
+ X = check_array (X , dtype = [np .float64 , np .float32 ], accept_sparse = ("csr" , "csc" ))
478
+
479
+ if self .compute_inverse_components :
480
+ return X @ self .inverse_components_ .T
481
+
482
+ inverse_components = self ._compute_inverse_components ()
483
+ return X @ inverse_components .T
484
+
440
485
def _more_tags (self ):
441
486
return {
442
487
"preserves_dtype" : [np .float64 , np .float32 ],
@@ -474,6 +519,11 @@ class GaussianRandomProjection(BaseRandomProjection):
474
519
Smaller values lead to better embedding and higher number of
475
520
dimensions (n_components) in the target projection space.
476
521
522
+ compute_inverse_components : bool, default=False
523
+ Learn the inverse transform by computing the pseudo-inverse of the
524
+ components during fit. Note that computing the pseudo-inverse does not
525
+ scale well to large matrices.
526
+
477
527
random_state : int, RandomState instance or None, default=None
478
528
Controls the pseudo random number generator used to generate the
479
529
projection matrix at fit time.
@@ -488,6 +538,12 @@ class GaussianRandomProjection(BaseRandomProjection):
488
538
components_ : ndarray of shape (n_components, n_features)
489
539
Random matrix used for the projection.
490
540
541
+ inverse_components_ : ndarray of shape (n_features, n_components)
542
+ Pseudo-inverse of the components, only computed if
543
+ `compute_inverse_components` is True.
544
+
545
+ .. versionadded:: 1.1
546
+
491
547
n_features_in_ : int
492
548
Number of features seen during :term:`fit`.
493
549
@@ -516,11 +572,19 @@ class GaussianRandomProjection(BaseRandomProjection):
516
572
(25, 2759)
517
573
"""
518
574
519
- def __init__ (self , n_components = "auto" , * , eps = 0.1 , random_state = None ):
575
+ def __init__ (
576
+ self ,
577
+ n_components = "auto" ,
578
+ * ,
579
+ eps = 0.1 ,
580
+ compute_inverse_components = False ,
581
+ random_state = None ,
582
+ ):
520
583
super ().__init__ (
521
584
n_components = n_components ,
522
585
eps = eps ,
523
586
dense_output = True ,
587
+ compute_inverse_components = compute_inverse_components ,
524
588
random_state = random_state ,
525
589
)
526
590
@@ -610,6 +674,14 @@ class SparseRandomProjection(BaseRandomProjection):
610
674
If False, the projected data uses a sparse representation if
611
675
the input is sparse.
612
676
677
+ compute_inverse_components : bool, default=False
678
+ Learn the inverse transform by computing the pseudo-inverse of the
679
+ components during fit. Note that the pseudo-inverse is always a dense
680
+ array, even if the training data was sparse. This means that it might be
681
+ necessary to call `inverse_transform` on a small batch of samples at a
682
+ time to avoid exhausting the available memory on the host. Moreover,
683
+ computing the pseudo-inverse does not scale well to large matrices.
684
+
613
685
random_state : int, RandomState instance or None, default=None
614
686
Controls the pseudo random number generator used to generate the
615
687
projection matrix at fit time.
@@ -625,6 +697,12 @@ class SparseRandomProjection(BaseRandomProjection):
625
697
Random matrix used for the projection. Sparse matrix will be of CSR
626
698
format.
627
699
700
+ inverse_components_ : ndarray of shape (n_features, n_components)
701
+ Pseudo-inverse of the components, only computed if
702
+ `compute_inverse_components` is True.
703
+
704
+ .. versionadded:: 1.1
705
+
628
706
density_ : float in range 0.0 - 1.0
629
707
Concrete density computed from when density = "auto".
630
708
@@ -676,12 +754,14 @@ def __init__(
676
754
density = "auto" ,
677
755
eps = 0.1 ,
678
756
dense_output = False ,
757
+ compute_inverse_components = False ,
679
758
random_state = None ,
680
759
):
681
760
super ().__init__ (
682
761
n_components = n_components ,
683
762
eps = eps ,
684
763
dense_output = dense_output ,
764
+ compute_inverse_components = compute_inverse_components ,
685
765
random_state = random_state ,
686
766
)
687
767
0 commit comments