> Odd, I can't reproduce these issues on the Fermis I have access to. (a
> 480 and a couple C2050s) Something is odd, too--these
> NotImplementedErrors should be gone--did you get a partial git update
> somehow? (Ian implemented these a while ago IIRC)
Woops, indeed, my bad: I last ran make install on 0.94rc during my
tests, not on the latest git. Corrected !
So now indeed test_gpuarray.py only fails 4 tests (29 success):
minmax, subset_minmax, sum, and dot. Attached is the output. Looks
liike numerical asserts failing...
> Cool--if you end up with a nice application of PyCUDA, please consider
> adding it to the PyCUDA showcase:
Sure will ! I may have to use Matlab (sigh) and MEX-files for this
very project, though, that my collaborators use more easily -- but I'm
not against pushing it to python later on ! pyCUDA seems actually
much, much more comfortable than using MEX files (not even getting
into python vs Matlab, nuff said).
Julien
cuInit
cuDeviceGetCount
cuDeviceGetCount
cuDeviceGet
cuCtxCreate
cuCtxGetDevice
==================================================== test session starts
====================================================
platform linux2 -- Python 2.6.5 -- pytest-1.3.2
test path 1: test_gpuarray.py
test_gpuarray.py ...F.....F.....F................F
========================================================= FAILURES
==========================================================
_________________________________________________ TestGPUArray.test_minmax
__________________________________________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
/usr/local/lib/python2.6/dist-packages/pycuda-0.94rc-py2.6-linux-i686.egg/pycuda/tools.py:503:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <test_gpuarray.TestGPUArray instance at 0x935fb0c>
@mark_cuda_test
def test_minmax(self):
from pycuda.curandom import rand as curand
if has_double_support():
dtypes = [numpy.float64, numpy.float32, numpy.int32]
else:
dtypes = [numpy.float32, numpy.int32]
for what in ["min", "max"]:
for dtype in dtypes:
a_gpu = curand((200000,), dtype)
a = a_gpu.get()
op_a = getattr(numpy, what)(a)
op_a_gpu = getattr(gpuarray, what)(a_gpu).get()
> assert op_a_gpu == op_a, (op_a_gpu, op_a, dtype, what)
E AssertionError: (array(0.00056963879615068436, dtype=float32),
6.0442835e-06, <type 'numpy.float32'>, 'min')
test_gpuarray.py:424: AssertionError
------------------------------------------------------ Captured stderr
------------------------------------------------------
cuInit
cuDeviceGetCount
cuDeviceGetCount
cuDeviceGet
cuCtxPopCurrent
cuCtxCreate
cuCtxGetDevice
cuDeviceGetName
cuCtxGetDevice
cuDeviceComputeCapability
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuCtxGetDevice
cuMemAlloc
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceComputeCapability
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (md5_rng_float)
cuParamSetSize (md5_rng_float)
cuFuncSetBlockShape (md5_rng_float)
cuParamSetv (md5_rng_float)
cuLaunchGrid (md5_rng_float)
cuModuleUnload
cuMemcpyDtoH
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (reduce_kernel_stage1)
cuParamSetSize (reduce_kernel_stage1)
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (reduce_kernel_stage2)
cuParamSetSize (reduce_kernel_stage2)
cuMemAlloc
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceComputeCapability
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuParamSetv (reduce_kernel_stage1)
cuLaunchGrid (reduce_kernel_stage1)
cuMemAlloc
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceComputeCapability
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuParamSetv (reduce_kernel_stage2)
cuLaunchGrid (reduce_kernel_stage2)
cuMemFree
cuMemcpyDtoH
cuMemFree
cuCtxPopCurrent
cuCtxPushCurrent
cuCtxPushCurrent
cuCtxDetach
______________________________________________ TestGPUArray.test_subset_minmax
______________________________________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
/usr/local/lib/python2.6/dist-packages/pycuda-0.94rc-py2.6-linux-i686.egg/pycuda/tools.py:503:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <test_gpuarray.TestGPUArray instance at 0x937ad4c>
@mark_cuda_test
def test_subset_minmax(self):
from pycuda.curandom import rand as curand
l_a = 200000
gran = 5
l_m = l_a - l_a // gran + 1
if has_double_support():
dtypes = [numpy.float64, numpy.float32, numpy.int32]
else:
dtypes = [numpy.float32, numpy.int32]
for dtype in dtypes:
a_gpu = curand((l_a,), dtype)
a = a_gpu.get()
meaningful_indices_gpu = gpuarray.zeros(l_m, dtype=numpy.int32)
meaningful_indices = meaningful_indices_gpu.get()
j = 0
for i in range(len(meaningful_indices)):
meaningful_indices[i] = j
j = j + 1
if j % gran == 0:
j = j + 1
meaningful_indices_gpu = gpuarray.to_gpu(meaningful_indices)
b = a[meaningful_indices]
min_a = numpy.min(b)
min_a_gpu = gpuarray.subset_min(meaningful_indices_gpu, a_gpu).get()
> assert min_a_gpu == min_a
E assert array(4.2184256017208099e-05, dtype=float32) == 7.576542e-06
test_gpuarray.py:458: AssertionError
------------------------------------------------------ Captured stderr
------------------------------------------------------
cuInit
cuDeviceGetCount
cuDeviceGetCount
cuDeviceGet
cuCtxPopCurrent
cuCtxCreate
cuCtxGetDevice
cuDeviceGetName
cuCtxGetDevice
cuDeviceComputeCapability
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuCtxGetDevice
cuMemAlloc
cuCtxGetDevice
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (md5_rng_float)
cuParamSetSize (md5_rng_float)
cuFuncSetBlockShape (md5_rng_float)
cuParamSetv (md5_rng_float)
cuLaunchGrid (md5_rng_float)
cuModuleUnload
cuMemcpyDtoH
cuMemAlloc
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceComputeCapability
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (fill)
cuParamSetSize (fill)
cuFuncSetBlockShape (fill)
cuParamSetv (fill)
cuLaunchGrid (fill)
cuMemcpyDtoH
cuMemAlloc
cuCtxGetDevice
cuMemcpyHtoD
cuMemFree
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (reduce_kernel_stage1)
cuParamSetSize (reduce_kernel_stage1)
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (reduce_kernel_stage2)
cuParamSetSize (reduce_kernel_stage2)
cuMemAlloc
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceComputeCapability
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuParamSetv (reduce_kernel_stage1)
cuLaunchGrid (reduce_kernel_stage1)
cuMemAlloc
cuCtxGetDevice
cuParamSetv (reduce_kernel_stage2)
cuLaunchGrid (reduce_kernel_stage2)
cuMemFree
cuMemcpyDtoH
cuMemFree
cuCtxPopCurrent
cuCtxPushCurrent
cuCtxPushCurrent
cuCtxDetach
___________________________________________________ TestGPUArray.test_sum
___________________________________________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
/usr/local/lib/python2.6/dist-packages/pycuda-0.94rc-py2.6-linux-i686.egg/pycuda/tools.py:503:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <test_gpuarray.TestGPUArray instance at 0x937a96c>
@mark_cuda_test
def test_sum(self):
from pycuda.curandom import rand as curand
a_gpu = curand((200000,))
a = a_gpu.get()
sum_a = numpy.sum(a)
from pycuda.reduction import get_sum_kernel
sum_a_gpu = gpuarray.sum(a_gpu).get()
> assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4
E assert (abs((array(1568.4486083984375, dtype=float32) - 100057.79)) /
abs(100057.79)) < 0.0001
test_gpuarray.py:405: AssertionError
------------------------------------------------------ Captured stderr
------------------------------------------------------
cuInit
cuDeviceGetCount
cuDeviceGetCount
cuDeviceGet
cuCtxPopCurrent
cuCtxCreate
cuCtxGetDevice
cuDeviceGetName
cuCtxGetDevice
cuDeviceComputeCapability
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuMemAlloc
cuCtxGetDevice
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (md5_rng_float)
cuParamSetSize (md5_rng_float)
cuFuncSetBlockShape (md5_rng_float)
cuParamSetv (md5_rng_float)
cuLaunchGrid (md5_rng_float)
cuModuleUnload
cuMemcpyDtoH
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (reduce_kernel_stage1)
cuParamSetSize (reduce_kernel_stage1)
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (reduce_kernel_stage2)
cuParamSetSize (reduce_kernel_stage2)
cuMemAlloc
cuCtxGetDevice
cuParamSetv (reduce_kernel_stage1)
cuLaunchGrid (reduce_kernel_stage1)
cuMemAlloc
cuCtxGetDevice
cuParamSetv (reduce_kernel_stage2)
cuLaunchGrid (reduce_kernel_stage2)
cuMemFree
cuMemcpyDtoH
cuMemFree
cuCtxPopCurrent
cuCtxPushCurrent
cuCtxPushCurrent
cuCtxDetach
___________________________________________________ TestGPUArray.test_dot
___________________________________________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
/usr/local/lib/python2.6/dist-packages/pycuda-0.94rc-py2.6-linux-i686.egg/pycuda/tools.py:503:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <test_gpuarray.TestGPUArray instance at 0x942cc2c>
@mark_cuda_test
def test_dot(self):
from pycuda.curandom import rand as curand
a_gpu = curand((200000,))
a = a_gpu.get()
b_gpu = curand((200000,))
b = b_gpu.get()
dot_ab = numpy.dot(a, b)
dot_ab_gpu = gpuarray.dot(a_gpu, b_gpu).get()
> assert abs(dot_ab_gpu-dot_ab)/abs(dot_ab) < 1e-4
E assert (abs((array(793.16839599609375, dtype=float32) - 49974.109)) /
abs(49974.109)) < 0.0001
test_gpuarray.py:472: AssertionError
------------------------------------------------------ Captured stderr
------------------------------------------------------
cuInit
cuDeviceGetCount
cuDeviceGetCount
cuDeviceGet
cuCtxPopCurrent
cuCtxCreate
cuCtxGetDevice
cuDeviceGetName
cuCtxGetDevice
cuDeviceComputeCapability
cuCtxGetDevice
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuDeviceGetAttribute
cuMemAlloc
cuCtxGetDevice
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (md5_rng_float)
cuParamSetSize (md5_rng_float)
cuFuncSetBlockShape (md5_rng_float)
cuParamSetv (md5_rng_float)
cuLaunchGrid (md5_rng_float)
cuModuleUnload
cuMemcpyDtoH
cuMemAlloc
cuCtxGetDevice
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (md5_rng_float)
cuParamSetSize (md5_rng_float)
cuFuncSetBlockShape (md5_rng_float)
cuParamSetv (md5_rng_float)
cuLaunchGrid (md5_rng_float)
cuModuleUnload
cuMemcpyDtoH
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (reduce_kernel_stage1)
cuParamSetSize (reduce_kernel_stage1)
cuCtxGetDevice
cuDeviceComputeCapability
cuModuleLoadDataEx
cuModuleGetFunction
cuFuncSetBlockShape (reduce_kernel_stage2)
cuParamSetSize (reduce_kernel_stage2)
cuMemAlloc
cuCtxGetDevice
cuParamSetv (reduce_kernel_stage1)
cuLaunchGrid (reduce_kernel_stage1)
cuMemAlloc
cuCtxGetDevice
cuParamSetv (reduce_kernel_stage2)
cuLaunchGrid (reduce_kernel_stage2)
cuMemFree
cuMemcpyDtoH
cuMemFree
cuCtxPopCurrent
cuCtxPushCurrent
cuCtxPushCurrent
cuCtxDetach
=========================================== 4 failed, 29 passed in 12.82
seconds ============================================
cuCtxPopCurrent
cuCtxPushCurrent
cuCtxDetach
_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda