Hello,
I am currently trying to implement Ranlux in one of my programs. My kernel
will be re-run several times with the same seeds, so I don't want to
include the Ranlux initialization in it as I only want to do this once
(right?). I also want to make sure to use the same memory between the runs.
So I figure that I solve this by having two kernels: one kernel that
initializes Ranlux (run this once at the beginning), as well as my "main"
kernel. They will both be written in the same c-file.
Here is some of the code. At first I had some strange errors getting it to
work. Now I can get it to run, but it feels like it runs out of memory
quicker than it should. Am I approaching this the wrong way?
Host code:
ctx = cl.create_some_context()
queueProperties = cl.command_queue_properties.PROFILING_ENABLE
queue = cl.CommandQueue(ctx, properties=queueProperties)
mf = cl.mem_flags
dummyBuffer = np.zeros(nbrOfThreads * 28, dtype=np.uint32)
ins = cl.array.to_device(queue, (np.random.randint(0, high = 2 ** 31 - 1,
size = (nbrOfThreads))).astype(np.uint32))
ranluxcltab = cl.Buffer(ctx, mf.READ_WRITE, size=0, hostbuf=dummyBuffer)
kernelCode_r = open(os.path.dirname(__file__) + 'ranlux_test_kernel.c',
'r').read()
kernelCode = kernelCode_r % replacements
prg = (cl.Program(ctx, kernelCode).build(options=programBuildOptions))
kernel_init = prg.ranlux_init_kernel
kernelObj_init = kernel_init(queue, globalSize, localSize, ins.data,
ranluxcltab)
kernelObj_init.wait()
kernel = prg.ranlux_test_kernel
kernelObj = kernel(queue, globalSize, localSize, ins.data, ranluxcltab)
kernelObj.wait()
Kernel Code:
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define RANLUXCL_SUPPORT_DOUBLE
#include "pyopencl-ranluxcl.cl" // Ranlux source-code
#define RANLUXCL_LUX 4
__kernel void ranlux_init_kernel(__global uint *ins, __global
ranluxcl_state_t *ranluxcltab)
{
//ranluxclstate stores the state of the generator.
ranluxcl_state_t ranluxclstate;
ranluxcl_initialization(ins, ranluxcltab);
}
__kernel void ranlux_test_kernel(__global uint *ins, __global
ranluxcl_state_t *ranluxcltab)
{
uint threadId = get_global_id(0) + get_global_id(1) *
get_global_size(0);
//ranluxclstate stores the state of the generator.
ranluxcl_state_t ranluxclstate;
//Download state into ranluxclstate struct.
ranluxcl_download_seed(&ranluxclstate, ranluxcltab);
double randomnr;
randomnr = ranluxcl64(&ranluxclstate);
/* DO STUFF */
//Upload state again so that we don't get the same
//numbers over again the next time we use ranluxcl.
ranluxcl_upload_seed(&ranluxclstate, ranluxcltab);
}
Cheers,
Calle
_______________________________________________
PyOpenCL mailing list
[email protected]
http://lists.tiker.net/listinfo/pyopencl