Revision: 7926
          http://matplotlib.svn.sourceforge.net/matplotlib/?rev=7926&view=rev
Author:   jdh2358
Date:     2009-11-03 20:27:23 +0000 (Tue, 03 Nov 2009)

Log Message:
-----------
added PCA helper class to mlab and deprecated prepca

Modified Paths:
--------------
    trunk/matplotlib/lib/matplotlib/mlab.py

Modified: trunk/matplotlib/lib/matplotlib/mlab.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/mlab.py     2009-11-03 17:57:52 UTC (rev 
7925)
+++ trunk/matplotlib/lib/matplotlib/mlab.py     2009-11-03 20:27:23 UTC (rev 
7926)
@@ -759,6 +759,9 @@
 
 def prepca(P, frac=0):
     """
+
+    WARNING: this function is deprecated -- please see class PCA instead
+    
     Compute the principal components of *P*.  *P* is a (*numVars*,
     *numObs*) array.  *frac* is the minimum fraction of variance that a
     component must contain to be included.
@@ -778,6 +781,7 @@
     R13 Neural Network Toolbox but is not found in later versions;
     its successor seems to be called "processpcs".
     """
+    warnings.warn('This function is deprecated -- see class PCA instead')
     U,s,v = np.linalg.svd(P)
     varEach = s**2/P.shape[1]
     totVar = varEach.sum()
@@ -789,6 +793,83 @@
     Pcomponents = np.dot(Trans,P)
     return Pcomponents, Trans, fracVar[ind]
 
+
+class PCA:
+    def __init__(self, a):
+        """
+        compute the SVD of a and store data for PCA.  Use project to
+        project the data onto a reduced set of dimensions
+
+        Inputs:
+
+          *a*: a numobservations x numdims array
+
+        Attrs:
+
+          *a* a centered unit sigma version of input a
+
+          *numrows*, *numcols*: the dimensions of a
+
+          *mu* : a numdims array of means of a
+
+          *sigma* : a numdims array of atandard deviation of a
+
+          *fracs* : the proportion of variance of each of the principal 
components
+
+          *Wt* : the weight vector for projecting a numdims point or array 
into PCA space
+
+          *Y* : a projected into PCA space
+
+        """
+        n, m = a.shape
+        if n<m:
+            raise RuntimeError('we assume data in a is organized with 
numrows>numcols')
+
+        self.numrows, self.numcols = n, m
+        self.mu = a.mean(axis=0)
+        self.sigma = a.std(axis=0)
+
+        a = self.center(a)
+
+        self.a = a
+
+        U, s, Vh = np.linalg.svd(a, full_matrices=False)
+
+
+        Y = np.dot(Vh, a.T).T
+
+        vars = s**2/float(len(s))
+        self.fracs = vars/vars.sum()
+
+
+        self.Wt = Vh
+        self.Y = Y
+
+
+    def project(self, x, minfrac=0.):
+        'project x onto the principle axes, dropping any axes where fraction 
of variance<minfrac'
+        x = np.asarray(x)
+
+        ndims = len(x.shape)
+
+        if (x.shape[-1]!=self.numcols):
+            raise ValueError('Expected an array with 
dims[-1]==%d'%self.numcols)
+
+
+        Y = np.dot(self.Wt, self.center(x).T).T
+        mask = self.fracs>=minfrac
+        if ndims==2:
+            Yreduced = Y[:,mask]
+        else:
+            Yreduced = Y[mask]
+        return Yreduced
+
+
+
+    def center(self, x):
+        'center the data using the mean and sigma from training set a'
+        return (x - self.mu)/self.sigma
+
 def prctile(x, p = (0.0, 25.0, 50.0, 75.0, 100.0)):
     """
     Return the percentiles of *x*.  *p* can either be a sequence of


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

------------------------------------------------------------------------------
Come build with us! The BlackBerry(R) Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay 
ahead of the curve. Join us from November 9 - 12, 2009. Register now!
http://p.sf.net/sfu/devconference
_______________________________________________
Matplotlib-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins

Reply via email to