nnbf

jdh2358 Wed, 07 Jan 2009 19:23:49 -0800

Revision: 6764
          http://matplotlib.svn.sourceforge.net/matplotlib/?rev=6764&view=rev
Author:   jdh2358
Date:     2009-01-08 03:23:42 +0000 (Thu, 08 Jan 2009)


Log Message:
-----------
added erics simple loop optimiztions

Modified Paths:
--------------
    trunk/py4science/examples/pyrex/nnbf/nnbf.pyx

Added Paths:
-----------
    trunk/py4science/examples/pyrex/nnbf/nnbf_v2.pyx

Modified: trunk/py4science/examples/pyrex/nnbf/nnbf.pyx
===================================================================
--- trunk/py4science/examples/pyrex/nnbf/nnbf.pyx       2009-01-08 02:50:18 UTC 
(rev 6763)
+++ trunk/py4science/examples/pyrex/nnbf/nnbf.pyx       2009-01-08 03:23:42 UTC 
(rev 6764)
@@ -36,7 +36,7 @@
         """
         #cdef np.ndarray[double, ndim=2] inner_data
 
-        
+
         self.n = n
         self.numrows = 100
         #  XXX how to create mepty as contiguous w/o copy?
@@ -78,30 +78,36 @@
         return a list of indices into data which are within radius
         from point
         """
-        cdef int i, neighbor
+        cdef int i, neighbor, n
         cdef double d2max
         cdef np.ndarray[double, ndim=1] pp
-        cdef np.ndarray[double, ndim=1] row        
 
+        # avoid python array indexing in the inner loop
+        #cdef np.ndarray[double, ndim=1] row
+        cdef double * dataptr
+        dataptr = <double*> self.data.data
         if len(point)!=self.n:
             raise ValueError('Expected a length %d vector'%self.n)
-        
+
         pp = np.asarray(point).astype(np.float)
 
         d2max = radius*radius
         neighbors = []
 
+        # don't do a python lookup inside the loop
+        n = self.n
 
         for i in range(self.numpoints):
             # XXX : is there a more efficient way to access the row
             # data?  Can/should we be using raw_data here?
-            row = self.data[i]
+            #row = self.data[i]
             neighbor = is_neighbor(
-                self.n,
-                <double*>row.data,
+                n,
+                #<double*>row.data,
+                dataptr + i,
                 <double*>pp.data,
                 d2max)
-            
+
             # if the number of points in the cluster is small, the
             # python list performance should not kill us
             if neighbor:

Added: trunk/py4science/examples/pyrex/nnbf/nnbf_v2.pyx
===================================================================
--- trunk/py4science/examples/pyrex/nnbf/nnbf_v2.pyx                            
(rev 0)
+++ trunk/py4science/examples/pyrex/nnbf/nnbf_v2.pyx    2009-01-08 03:23:42 UTC 
(rev 6764)
@@ -0,0 +1,118 @@
+"""
+A brute force nearest neighbor routine with incremental add.  The
+internal array data structure grows as you add points
+"""
+
+import numpy as np
+cimport numpy as np
+
+cdef extern from "math.h":
+     float sqrt(float)
+
+cdef inline int is_neighbor(int n, double*row, double*pp, double d2max):
+    """
+    return 1 if the sum-of-squares of n length array row[j]-pp[j] <= d2max
+    """
+    cdef int j
+    cdef double d, d2
+
+    d2 = 0.
+
+    for j in range(n):
+        d = row[j] - pp[j]
+        d2 += d*d
+        if d2>d2max:
+            return 0
+    return 1
+
+cdef class NNBF:
+    cdef readonly object data
+    #cdef double* raw_data
+    cdef readonly int n, numrows, numpoints
+
+    def __init__(self, n):
+        """
+        create a buffer to hold n dimensional points
+        """
+        #cdef np.ndarray[double, ndim=2] inner_data
+
+
+        self.n = n
+        self.numrows = 100
+        #  XXX how to create mepty as contiguous w/o copy?
+        self.data = np.empty((self.numrows, self.n), dtype=np.float)
+        #inner_data = self.data
+        #self.raw_data = <double*>inner_data.data
+        self.numpoints = 0
+
+
+    def add(NNBF self, object point):
+        """
+        add a point to the buffer, grow if necessary
+        """
+        #cdef np.ndarray[double, ndim=2] inner_data
+        cdef np.ndarray[double, ndim=1] pp
+        pp = np.asarray(point).astype(np.float)
+
+
+        self.data[self.numpoints] = pp
+        self.numpoints += 1
+        if self.numpoints==self.numrows:
+            ## XXX do I need to do memory management here, eg free
+            ## raw_data if I were using it?
+            self.numrows *= 2
+            newdata = np.empty((self.numrows, self.n), np.float)
+            newdata[:self.numpoints] = self.data
+            self.data = newdata
+            #self.raw_data = <double*>inner_data.data
+
+    def get_data(NNBF self):
+        """
+        return a copy of data added so far as a numpoints x n array
+        """
+        return self.data[:self.numpoints]
+
+
+    def find_neighbors(NNBF self, object point, double radius):
+        """
+        return a list of indices into data which are within radius
+        from point
+        """
+        cdef int i, neighbor, n
+        cdef double d2max
+        cdef np.ndarray[double, ndim=1] pp
+
+        # avoid python array indexing in the inner loop
+        #cdef np.ndarray[double, ndim=1] row
+        cdef double * dataptr
+        dataptr = <double*> self.data.data
+        if len(point)!=self.n:
+            raise ValueError('Expected a length %d vector'%self.n)
+
+        pp = np.asarray(point).astype(np.float)
+
+        d2max = radius*radius
+        neighbors = []
+
+        # don't do a python lookup inside the loop
+        n = self.n
+
+        for i in range(self.numpoints):
+            # XXX : is there a more efficient way to access the row
+            # data?  Can/should we be using raw_data here?
+            #row = self.data[i]
+            neighbor = is_neighbor(
+                n,
+                #<double*>row.data,
+                dataptr + i,
+                <double*>pp.data,
+                d2max)
+
+            # if the number of points in the cluster is small, the
+            # python list performance should not kill us
+            if neighbor:
+                neighbors.append(i)
+
+        return neighbors
+
+


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

------------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It is the best place to buy or sell services for
just about anything Open Source.
http://p.sf.net/sfu/Xq1LFB
_______________________________________________
Matplotlib-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins

SF.net SVN: matplotlib:[6764] trunk/py4science/examples/pyrex/nnbf

Reply via email to