Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit dc13d25

Browse filesBrowse files
committed
added PCA helper class to mlab and deprecated prepca
svn path=/trunk/matplotlib/; revision=7926
1 parent ae377a2 commit dc13d25
Copy full SHA for dc13d25

File tree

Expand file treeCollapse file tree

1 file changed

+81
-0
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+81
-0
lines changed

‎lib/matplotlib/mlab.py

Copy file name to clipboardExpand all lines: lib/matplotlib/mlab.py
+81Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,9 @@ def longest_ones(x):
759759

760760
def prepca(P, frac=0):
761761
"""
762+
763+
WARNING: this function is deprecated -- please see class PCA instead
764+
762765
Compute the principal components of *P*. *P* is a (*numVars*,
763766
*numObs*) array. *frac* is the minimum fraction of variance that a
764767
component must contain to be included.
@@ -778,6 +781,7 @@ def prepca(P, frac=0):
778781
R13 Neural Network Toolbox but is not found in later versions;
779782
its successor seems to be called "processpcs".
780783
"""
784+
warnings.warn('This function is deprecated -- see class PCA instead')
781785
U,s,v = np.linalg.svd(P)
782786
varEach = s**2/P.shape[1]
783787
totVar = varEach.sum()
@@ -789,6 +793,83 @@ def prepca(P, frac=0):
789793
Pcomponents = np.dot(Trans,P)
790794
return Pcomponents, Trans, fracVar[ind]
791795

796+
797+
class PCA:
798+
def __init__(self, a):
799+
"""
800+
compute the SVD of a and store data for PCA. Use project to
801+
project the data onto a reduced set of dimensions
802+
803+
Inputs:
804+
805+
*a*: a numobservations x numdims array
806+
807+
Attrs:
808+
809+
*a* a centered unit sigma version of input a
810+
811+
*numrows*, *numcols*: the dimensions of a
812+
813+
*mu* : a numdims array of means of a
814+
815+
*sigma* : a numdims array of atandard deviation of a
816+
817+
*fracs* : the proportion of variance of each of the principal components
818+
819+
*Wt* : the weight vector for projecting a numdims point or array into PCA space
820+
821+
*Y* : a projected into PCA space
822+
823+
"""
824+
n, m = a.shape
825+
if n<m:
826+
raise RuntimeError('we assume data in a is organized with numrows>numcols')
827+
828+
self.numrows, self.numcols = n, m
829+
self.mu = a.mean(axis=0)
830+
self.sigma = a.std(axis=0)
831+
832+
a = self.center(a)
833+
834+
self.a = a
835+
836+
U, s, Vh = np.linalg.svd(a, full_matrices=False)
837+
838+
839+
Y = np.dot(Vh, a.T).T
840+
841+
vars = s**2/float(len(s))
842+
self.fracs = vars/vars.sum()
843+
844+
845+
self.Wt = Vh
846+
self.Y = Y
847+
848+
849+
def project(self, x, minfrac=0.):
850+
'project x onto the principle axes, dropping any axes where fraction of variance<minfrac'
851+
x = np.asarray(x)
852+
853+
ndims = len(x.shape)
854+
855+
if (x.shape[-1]!=self.numcols):
856+
raise ValueError('Expected an array with dims[-1]==%d'%self.numcols)
857+
858+
859+
Y = np.dot(self.Wt, self.center(x).T).T
860+
mask = self.fracs>=minfrac
861+
if ndims==2:
862+
Yreduced = Y[:,mask]
863+
else:
864+
Yreduced = Y[mask]
865+
return Yreduced
866+
867+
868+
869+
def center(self, x):
870+
'center the data using the mean and sigma from training set a'
871+
return (x - self.mu)/self.sigma
872+
792873
def prctile(x, p = (0.0, 25.0, 50.0, 75.0, 100.0)):
793874
"""
794875
Return the percentiles of *x*. *p* can either be a sequence of

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.