Received from Evgeny Lazutkin on Tue, Feb 25, 2014 at 03:18:18AM EST: > Dear Lev, dear all, > > the problem with DataTypeError I have solved. I did not mention that > I pass float64 instead of float32. > > In attach you will find the code, it works......but....GPU brings > wrong solution. I print the results in program. They dont match. > Could you explain why?
This is because arrays in numpy are row-major by default while CULA assumes the data is column-major [1]. If you transpose the two input matrices, you should obtain the correct result (transposed). Corrected code attached. [1] http://www.culatools.com/cula_dense_programmers_guide/#column-major-ordering -- Lev Givon Bionet Group http://www.columbia.edu/~lev/ http://lebedov.github.io/
import pycuda.driver as drv import pycuda.tools import pycuda.autoinit import numpy import pycuda.gpuarray as gpuarray from scikits.cuda.cula import * from scikits import * from scipy import * A = array([[ 5.18649167 , 0. , 1.03279556 , 0. , -0.14549722, 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [-5.0819889 , 0., 2.52459667, 0., 0.64549722 , 0. , 0. , 0., 0. , 0. , 0. , 0. ], [ 9.01848057, 0. , -8.13118224, 0. , 5.18649167, 0. , 0. , 0., 0. , 0. , 0. , 0. ], [-0.5 , 4.43649167, 0. , 1.03279556, 0. , -0.14549722, 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , -5.0819889 ,-0.5, 1.77459667 , 0. , 0.64549722, 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 9.01848057 ,0., -8.13118224, -0.5 , 4.43649167, 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. ,0., 0. , 0. , 0. , 5.18649167 , 0. , 1.03279556 , 0. , -0.14549722 , 0. ], [ 0. , 0. ,0., 0. , 0. , 0. , -5.0819889 , 0. , 2.52459667 , 0. , 0.64549722 , 0. ], [ 0. , 0. ,0., 0. , 0., 0. , 9.01848057 , 0. , -8.13118224 , 0. , 5.18649167 , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , -0.5 , 4.43649167, 0. , 1.03279556 , 0. , -0.14549722] , [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , -5.0819889 , -0.5 , 1.77459667 , 0. , 0.64549722] , [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 9.01848057 , 0. , -8.13118224, -0.5 , 4.43649167]], dtype = numpy.float32) B = array([[-5.32379001 , 0. , 0., 0. ], [ 2.661895 , 0. , 0., 0. ], [-5.32379001, 0. , 0. , 0. ], [ 0. , -5.32379001, 0. , 0. ], [ 0. , 2.661895 , 0. , 0. ], [ 0. , -5.32379001, 0. , 0. ], [ 0. , 0. , -5.32379001, 0. ], [ 0. , 0. , 2.661895 , 0. ], [ 0. , 0., -5.32379001, 0. ], [ 0. , 0., 0. , -5.32379001], [ 0. , 0., 0. , 2.661895 ], [ 0. , 0., 0. , -5.32379001]], dtype = numpy.float32) def cpu_solve(k_,y_): start=drv.Event() end=drv.Event() start.record() result = np.linalg.solve(k_, y_) end.record() end.synchronize() print "numpy array time: %fs" %(start.time_till(end)*1e-3) return result def gpu_solve(k_,y_): k_gpu=gpuarray.to_gpu(k_.T.copy()) y_gpu=gpuarray.to_gpu(y_.T.copy()) (m,n)=k_.shape lda=m nrhs=shape(y_)[1] ipiv=np.empty(n,dtype=np.int32) ipiv_gpu=gpuarray.to_gpu(ipiv) ldb=y_.shape[0] start=drv.Event() end=drv.Event() start.record() culaDeviceSgesv(n, nrhs, k_gpu.ptr, lda, ipiv_gpu.ptr, y_gpu.ptr, ldb) end.record() end.synchronize() print "GPU array time: %fs" %(start.time_till(end)*1e-3) return y_gpu.get().T.copy() culaInitialize() x_c = cpu_solve(A,B) # check: correct? ---> YES. x_g = gpu_solve(A,B) # check: corretc? ---> NO! print x_c print x_g culaShutdown()
_______________________________________________ PyCUDA mailing list [email protected] http://lists.tiker.net/listinfo/pycuda
