Hi,
I have a small example that when run, crash with this error:
[...]
fct(*args, **d)
File
"/u/bastienf/repos/pycuda.git/build.fc9/lib.linux-x86_64-2.5/pycuda/driver.py",
line 187, in function_call
func.launch_grid(*grid)
LaunchError: cuLaunchGrid failed: launch out of resources
----------------------------------------------------------------------
The problem is that I use only 1 thread per block and only 1 block.
The code in the gpu function is very simple: "Z[0] = 0;". I think I
pass correctly the parameters when calling the gpu function. Do
someone have any idea what could be wrong? I join the example of the
crash.
thanks
Frédéric Bastien
import numpy
import pycuda.autoinit
import pycuda.gpuarray
from pycuda.compiler import SourceModule
def test_crash():
code = """
__global__ void kernel_reduce_sum_11_nodename(
const int d0,
const int d1,
const float *A,
const int sA0,
const int sA1,
float * Z
)
{
Z[0]=0;
}
"""
shape = (50, 5)
dtype = "float32"
dtype = getattr(numpy, dtype)()
cpu_val = (numpy.random.rand(*shape) * 10).astype(dtype)
gpu_val = pycuda.gpuarray.to_gpu(cpu_val)
fct_name = "kernel_reduce_sum_11_nodename"
mod = SourceModule(code)
fct = mod.get_function(fct_name)
shared_ = dtype.itemsize * 1
out = pycuda.gpuarray.empty((), dtype)
args = [numpy.intc(i) for i in shape]
args.append(gpu_val)
args += [numpy.intc(i) / dtype.itemsize for i in gpu_val.strides]
args.append(out)
pycuda._driver.Context.synchronize()
d = {"block": (1, 1, 1),
"shared": shared_,
"grid": (1, 1)}
print args
print d
fct(*args, **d)
_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda