Hi,
I'm trying to implement streams in pycuda. I created this simple program
to test, but I'm having trouble. When I try to copy data asynchronously I
get this error.
Traceback (most recent call last):
File "py_stream_test.py", line 40, in <module>
cuda.memcpy_htod(dev1,host1,stream1)
pycuda._driver.LogicError: cuMemcpyHtoDAsync failed: invalid value
code:
import pycuda.driver as cuda
import pycuda.autoinit
import numpy
size = 2000
#random numpy
arrays
rand1 = numpy.random.rand(size).astype(numpy.float32)
rand2 = numpy.random.rand(size).astype(numpy.float32)
#init page locked
memory
host1 = cuda.pagelocked_empty_like(rand1)
host2 = cuda.pagelocked_empty_like(rand2)
#copy data to page locked
mem
host1 = rand1
host2 = rand2
print host1
print host2
#allocate space on
device
dev1 = cuda.mem_alloc(rand1.size*rand1.dtype.itemsize)
dev2 = cuda.mem_alloc(rand2.size*rand2.dtype.itemsize)
mod =
cuda.SourceModule("""
__global__ void cuda_double(float
*A){
int idx = blockIdx.x*blockDim.x +
threadIdx.x;
A[idx] =
A[idx]*2;
}
"""
)
cuda_double = mod.get_function("cuda_double")
#create two
streams
stream1=cuda.Stream()
stream2=cuda.Stream()
#copy the
data
cuda.memcpy_htod(dev1,host1,stream1)
cuda.memcpy_htod(dev2,host2,stream2)
#run
kernel
num_blocks = (size/512)+1
cuda_double(dev1,block=(512,1,1),grid=(num_blocks,1),stream=stream1)
cuda_double(dev2,block=(512,1,1),grid=(num_blocks,1),stream=stream2)
#copy
back
cuda.memcpy_dtoh(host1,dev1,stream1)
cuda.memcpy_dtoh(host2,dev2,stream2)
print host1
print host2
If I remove the stream argument from the memcpy it runs fine.
Thanks,
James
_______________________________________________
PyCuda mailing list
[email protected]
http://tiker.net/mailman/listinfo/pycuda_tiker.net