Hi,
  I'm trying to implement streams in pycuda. I created this simple program
to test, but I'm having trouble. When I try to copy data asynchronously I
get this error.

Traceback (most recent call last):
  File "py_stream_test.py", line 40, in <module>
    cuda.memcpy_htod(dev1,host1,stream1)
pycuda._driver.LogicError: cuMemcpyHtoDAsync failed: invalid value

code:


import pycuda.driver as cuda
import pycuda.autoinit
import numpy

size = 2000

#random numpy
arrays

rand1 = numpy.random.rand(size).astype(numpy.float32)
rand2 = numpy.random.rand(size).astype(numpy.float32)

#init page locked
memory

host1 = cuda.pagelocked_empty_like(rand1)
host2 = cuda.pagelocked_empty_like(rand2)

#copy data to page locked
mem

host1 = rand1
host2 = rand2
print host1
print host2

#allocate space on
device

dev1 = cuda.mem_alloc(rand1.size*rand1.dtype.itemsize)
dev2 = cuda.mem_alloc(rand2.size*rand2.dtype.itemsize)


mod =
cuda.SourceModule("""

  __global__ void cuda_double(float
*A){
    int idx = blockIdx.x*blockDim.x +
threadIdx.x;
    A[idx] =
A[idx]*2;


}

 """
)

cuda_double = mod.get_function("cuda_double")

#create two
streams

stream1=cuda.Stream()
stream2=cuda.Stream()

#copy the
data

cuda.memcpy_htod(dev1,host1,stream1)
cuda.memcpy_htod(dev2,host2,stream2)

#run
kernel

num_blocks = (size/512)+1
cuda_double(dev1,block=(512,1,1),grid=(num_blocks,1),stream=stream1)
cuda_double(dev2,block=(512,1,1),grid=(num_blocks,1),stream=stream2)

#copy
back

cuda.memcpy_dtoh(host1,dev1,stream1)
cuda.memcpy_dtoh(host2,dev2,stream2)

print host1
print host2


If I remove the stream argument from the memcpy it runs fine.
Thanks,
James
_______________________________________________
PyCuda mailing list
[email protected]
http://tiker.net/mailman/listinfo/pycuda_tiker.net

Reply via email to