Hello,

Yet another stupid question. Most probably, I missed something
obvious, but anyway - can someone explain why I get some NaN's in
output for the program (listed below)? Surprisingly, bug disappears if
I send '1' instead of '-1' as a third parameter to function (or remove
'int' parameters completely and leave only two pointers). Same kernel
in pure Cuda works fine. Looks like memory corruption, but I can't
figure out where it happens...

Output:
$ python test.py
6.65991e-37
nan

Code:

import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray
import numpy

src = """
__global__ void test(float *in, float *out, int dir, int S)
{
        __shared__ float sMem[2048];

        size_t lMemLoad;
        float a[16] = {0,0,0,0,0,0,0,0,
                        0,0,0,0,0,0,0,0};
        int lId = threadIdx.x;

        # (0..15) * 128 + (0..7) + (0..15)*8 < 2048 always,
        # so I think I do not breach shared memory borders...
        lMemLoad = (lId % 16) * 128 + (lId / 16);

        for(int k = 0; k < 16; k++)
                sMem[lMemLoad + k * 8] = a[k];
        __syncthreads();

        for(int k = 0; k < 16; k++)
                a[k] = sMem[lMemLoad + k * 8];
        __syncthreads();

        for(int k = 0; k < 16; k++)
                out[lMemLoad + k * 8] = a[k];
}

"""

mod = SourceModule(src)
func = mod.get_function('test')

data = numpy.ones(2048).astype(numpy.float32) # unused
a_gpu = gpuarray.to_gpu(data)
b_gpu = gpuarray.GPUArray(data.shape, dtype=data.dtype)

func.prepare("PPii", block=(128,1,1))

# expected results (all zeros)
func.prepared_call((1,1), a_gpu.gpudata, b_gpu.gpudata, 1, 1)
res = b_gpu.get()
print numpy.sum(res)

# unexpected result (NaN)
func.prepared_call((1,1), a_gpu.gpudata, b_gpu.gpudata, -1, 1)
res = b_gpu.get()
print numpy.sum(res)

_______________________________________________
PyCUDA mailing list
pyc...@host304.hostmonster.com
http://host304.hostmonster.com/mailman/listinfo/pycuda_tiker.net

Reply via email to