Revision: 7926
http://matplotlib.svn.sourceforge.net/matplotlib/?rev=7926&view=rev
Author: jdh2358
Date: 2009-11-03 20:27:23 +0000 (Tue, 03 Nov 2009)
Log Message:
-----------
added PCA helper class to mlab and deprecated prepca
Modified Paths:
--------------
trunk/matplotlib/lib/matplotlib/mlab.py
Modified: trunk/matplotlib/lib/matplotlib/mlab.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/mlab.py 2009-11-03 17:57:52 UTC (rev
7925)
+++ trunk/matplotlib/lib/matplotlib/mlab.py 2009-11-03 20:27:23 UTC (rev
7926)
@@ -759,6 +759,9 @@
def prepca(P, frac=0):
"""
+
+ WARNING: this function is deprecated -- please see class PCA instead
+
Compute the principal components of *P*. *P* is a (*numVars*,
*numObs*) array. *frac* is the minimum fraction of variance that a
component must contain to be included.
@@ -778,6 +781,7 @@
R13 Neural Network Toolbox but is not found in later versions;
its successor seems to be called "processpcs".
"""
+ warnings.warn('This function is deprecated -- see class PCA instead')
U,s,v = np.linalg.svd(P)
varEach = s**2/P.shape[1]
totVar = varEach.sum()
@@ -789,6 +793,83 @@
Pcomponents = np.dot(Trans,P)
return Pcomponents, Trans, fracVar[ind]
+
+class PCA:
+ def __init__(self, a):
+ """
+ compute the SVD of a and store data for PCA. Use project to
+ project the data onto a reduced set of dimensions
+
+ Inputs:
+
+ *a*: a numobservations x numdims array
+
+ Attrs:
+
+ *a* a centered unit sigma version of input a
+
+ *numrows*, *numcols*: the dimensions of a
+
+ *mu* : a numdims array of means of a
+
+ *sigma* : a numdims array of atandard deviation of a
+
+ *fracs* : the proportion of variance of each of the principal
components
+
+ *Wt* : the weight vector for projecting a numdims point or array
into PCA space
+
+ *Y* : a projected into PCA space
+
+ """
+ n, m = a.shape
+ if n<m:
+ raise RuntimeError('we assume data in a is organized with
numrows>numcols')
+
+ self.numrows, self.numcols = n, m
+ self.mu = a.mean(axis=0)
+ self.sigma = a.std(axis=0)
+
+ a = self.center(a)
+
+ self.a = a
+
+ U, s, Vh = np.linalg.svd(a, full_matrices=False)
+
+
+ Y = np.dot(Vh, a.T).T
+
+ vars = s**2/float(len(s))
+ self.fracs = vars/vars.sum()
+
+
+ self.Wt = Vh
+ self.Y = Y
+
+
+ def project(self, x, minfrac=0.):
+ 'project x onto the principle axes, dropping any axes where fraction
of variance<minfrac'
+ x = np.asarray(x)
+
+ ndims = len(x.shape)
+
+ if (x.shape[-1]!=self.numcols):
+ raise ValueError('Expected an array with
dims[-1]==%d'%self.numcols)
+
+
+ Y = np.dot(self.Wt, self.center(x).T).T
+ mask = self.fracs>=minfrac
+ if ndims==2:
+ Yreduced = Y[:,mask]
+ else:
+ Yreduced = Y[mask]
+ return Yreduced
+
+
+
+ def center(self, x):
+ 'center the data using the mean and sigma from training set a'
+ return (x - self.mu)/self.sigma
+
def prctile(x, p = (0.0, 25.0, 50.0, 75.0, 100.0)):
"""
Return the percentiles of *x*. *p* can either be a sequence of
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Come build with us! The BlackBerry(R) Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay
ahead of the curve. Join us from November 9 - 12, 2009. Register now!
http://p.sf.net/sfu/devconference
_______________________________________________
Matplotlib-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins