Chris Colbert wrote:
> Normally, when I'm hitting a road block with numpy performance, I port
> that section to cython and typically see an order of magnitude
> increase in speed. (this is without disabling bounds checking) In my
> current case, I'm seeing a slowdown of 2x (with boundschecking
> disabled) and I'm pretty sure everything in my code is kosher. Would
> anyone here mind looking at it and see if there is any glaring
> ommision?
>   
Strange. I take it "cython -a" doesn't reveal anything? Could you just 
attach the HTML report so that I can have a (quicker) look?

Dag Sverre
> Here are the benchmarks:
>
> For the cython version:
>
> In [6]: superquadricfit._err_func_with_grad(guess, pts[:,0], pts[:,1],
> pts[:,2], 0.5, False)
> Out[6]: 200.0698654738344
>
> In [7]: %timeit superquadricfit._err_func_with_grad(guess, pts[:,0],
> pts[:,1], pts[:,2], 0.5, False)
> 1000 loops, best of 3: 882 µs per loop
>
>
> and the numpy version:
>
> In [11]: quad._err_func_with_grad(guess, pts[:,0], pts[:,1], pts[:,2],
> 0.5, False)
> Out[11]: 200.0698654738344
>
> In [12]: %timeit quad._err_func_with_grad(guess, pts[:,0], pts[:,1],
> pts[:,2], 0.5, False)
> 1000 loops, best of 3: 438 µs per loop
>
>
> I appreciate any help anyone can give
>
> Cheers!
>
> Chris
>
> Here is the code:
>
> #####   Cython Code #######
>
> from __future__ import division
> import numpy as np
> cimport numpy as np
>
> cdef extern from "math.h":
>     double cos(double)
>     double sin(double)
>     double sqrt(double)
>     double pow(double, double)
>
> DTYPE = np.float64
> ctypedef np.float64_t DTYPE_f64_t
>
> cimport cython
> @cython.boundscheck(False)
> def _err_func_with_grad(params, np.ndarray[DTYPE_f64_t, ndim=1] xw,
>                                 np.ndarray[DTYPE_f64_t, ndim=1] yw,
>                                 np.ndarray[DTYPE_f64_t, ndim=1] zw,
>                                 double weight,
>                                 bool calc_grad):
>
>     cdef double a1, a2, a3, e1, e2, phi, theta, psi, px, py, pz
>     a1, a2, a3, e1, e2, phi, theta, psi, px, py, pz = params
>
>     cdef double w1, w2, cphi, ctheta, cpsi, sphi, stheta, spsi, nx,
> ny, nz, ox, oy, oz, ax, ay, az
>     cdef double F, f1, f2, f3, err, insiders_err, temperr, sumerr
>
>     cdef int N
>     cdef int i
>
>     cdef double C1, C2, C3
>
>     # world point coordinates
>     if len(xw) != len(yw) or len(xw) != len(zw):
>         raise ValueError('Point vectors are not of equal length')
>
>     N = len(xw)
>
>     # the weights to place on the errors. w1 for all points
>     # w2 for points inside the superquadric
>     w2 = weight
>     w1 = 1 - w2
>
>     cphi = cos(phi)
>     ctheta = cos(theta)
>     cpsi = cos(psi)
>     sphi = sin(phi)
>     stheta = sin(theta)
>     spsi = sin(psi)
>
>     nx = cphi * ctheta * cpsi - sphi * spsi
>     ny = sphi * ctheta * cpsi + cphi * spsi
>     nz = -stheta * cpsi
>     ox = -cphi * ctheta * spsi - sphi * cpsi
>     oy = -sphi * ctheta * spsi + cphi * cpsi
>     oz = stheta * spsi
>     ax = cphi * stheta
>     ay = sphi * stheta
>     az = ctheta
>
>     temperr = 0.0
>     err = 0.0
>     insiders_err = 0.0
>     sumerr = 0.0
>
>     C1 = px * nx - py * ny - pz * nz
>     C2 = px * ox - py * oy - pz * oz
>     C3 = px * ax - py * ay - pz * az
>
>     for i in range(N):
>         f1 = ((nx * xw[i] + ny * yw[i] + nz * zw[i] - C1) / a1)
>         f2 = ((ox * xw[i] + oy * yw[i] + oz * zw[i] - C2) / a2)
>         f3 = ((ax * xw[i] + ay * yw[i] + az * zw[i] - C3) / a3)
>
>
>         F = ((f1**2)**(1/e2) + (f2**2)**(1/e2))**(e2/e1) + (f3**2)**(1/e1)
>
>         temperr = (sqrt(a1 * a2 * a3) * (F**(e1) - 1))**2
>
>         if F < 1.0:
>             insiders_err += temperr
>
>         err += temperr
>
>     sumerr += w1 * err + w2 * insiders_err
>
>     return sumerr
>
>
>
> ############ equivalent python code###############
>
> def _err_func_with_grad(self, params, xw, yw, zw, weight, calc_grad):
>
>
>     params = [float(arg) for arg in params]
>     a1, a2, a3, e1, e2, phi, theta, psi, px, py, pz = params
>
>     # world point coordinates
>     if len(xw) != len(yw) or len(xw) != len(zw):
>         raise ValueError('Point vectors are not of equal length')
>
>     # the weights to place on the errors. w1 for all points
>     # w2 for points inside the superquadric
>     w2 = weight
>     w1 = 1 - w2
>
>     cphi = math.cos(phi)
>     ctheta = math.cos(theta)
>     cpsi = math.cos(psi)
>     sphi = math.sin(phi)
>     stheta = math.sin(theta)
>     spsi = math.sin(psi)
>
>     nx = cphi * ctheta * cpsi - sphi * spsi
>     ny = sphi * ctheta * cpsi + cphi * spsi
>     nz = -stheta * cpsi
>     ox = -cphi * ctheta * spsi - sphi * cpsi
>     oy = -sphi * ctheta * spsi + cphi * cpsi
>     oz = stheta * spsi
>     ax = cphi * stheta
>     ay = sphi * stheta
>     az = ctheta
>
>     f1 = ((nx * xw + ny * yw + nz * zw - px * nx - py * ny - pz * nz) / a1)
>     f2 = ((ox * xw + oy * yw + oz * zw - px * ox - py * oy - pz * oz) / a2)
>     f3 = ((ax * xw + ay * yw + az * zw - px * ax - py * ay - pz * az) / a3)
>
>     F = ((f1**2)**(1/e2) + (f2**2)**(1/e2))**(e2/e1) + (f3**2)**(1/e1)
>
>     # get a mask for the points that lie inside the superquadric
>     insiders_mask = F < 1
>
>     # get the error associated only from the insiders
>     insiders = F[insiders_mask]
>     insiders_err = (math.sqrt(a1 * a2 * a3) * (insiders**(e1) - 1))**2
>
>     # the square error of every point
>     err = (math.sqrt(a1 * a2 * a3) * (F**(e1) - 1))**2
>
>     # this is the final quantity to be minimized
>     esum = err.sum()
>     isum = insiders_err.sum()
>
>     sumerr = w1*esum + w2*isum
>
>     return sumerr
> _______________________________________________
> Cython-dev mailing list
> [email protected]
> http://codespeak.net/mailman/listinfo/cython-dev
>   

_______________________________________________
Cython-dev mailing list
[email protected]
http://codespeak.net/mailman/listinfo/cython-dev

Reply via email to