Hi

I'm having an issue with PyCUDA that at first glance seem like they might be similar to those of Thomas Unterthiner (messages from Jun 20 2014, "Weird bug when slicing arrays on Kepler cards"). I'm also using a Kepler card (GTX 670) and getting the same clean-up/dead context errors. However, unlike Thomas, I'm not using cublas. The simplest example I can show is below, which is a cuda kernel taken directly from here:
http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/

----------- code --------------
from numpy import random
from numpy import float32, float64, int32
import time
# CUDA
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule

def main(n=2**12):
    # CUDA grid
    block_size=(256,1,1)
    grid = (n/block_size[0],1)

    # CUDA source
    cusrc = SourceModule("""
    __global__ void saxpy(int n, double a, double *x, double *y)
    {
    for (int i = blockIdx.x * blockDim.x + threadIdx.x;
        i < n;
        i += blockDim.x * gridDim.x)
        {
            y[i] = a * x[i] + y[i];
        }
    }
    """)
    SAXPY = cusrc.get_function('saxpy')

    # data arrays
    w = 500 #arbitrary
x = random.uniform(0,w,n) #.astype(float32) << same error with either float or double
    y = random.uniform(0,w,n) #.astype(float32)
    y_o = y

    #init gpu (input) arrays
    a = float64(24.5)
    n = int32(n)
    a_gpu = cuda.mem_alloc(a.nbytes)
    cuda.memcpy_htod(a_gpu, a)
    n_gpu = cuda.mem_alloc(n.nbytes)
    cuda.memcpy_htod(n_gpu, n)
    X_gpu = cuda.mem_alloc(x.nbytes)
    cuda.memcpy_htod(X_gpu, x)
    Y_gpu = cuda.mem_alloc(y.nbytes)
    cuda.memcpy_htod(Y_gpu, y)

    SAXPY(n_gpu, a_gpu, X_gpu, Y_gpu, grid=grid, block=block_size)

    #retrieve outputs
    cuda.memcpy_dtoh(y,Y_gpu)

    #free gpu memory
    X_gpu.free(); Y_gpu.free()
    a_gpu.free(); n_gpu.free()

    print Yout
    print a*pos_x + pos_y_o
    print Yout - (a*pos_x + pos_y_o) ## compare the two

if __name__ == '__main__':
    main()

------- output from command line -----------
Traceback (most recent call last):
  File "as_cuda_loop.py", line 60, in <module>
    main()
  File "as_cuda_loop.py", line 49, in main
    cuda.memcpy_dtoh(y,Y_gpu)
pycuda._driver.LogicError: cuMemcpyDtoH failed: invalid/unknown error code
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuModuleUnload failed: invalid/unknown error code
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: invalid/unknown error code
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: invalid/unknown error code
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: invalid/unknown error code
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: invalid/unknown error code

-------------------------------

I'm running windows 8.1, PyCUDA 2013.1.1, CUDA 6.0. I have absolutely no idea what's going wrong here - can anyone help?

Thanks

James

_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda

Reply via email to