Hi all,
I modified the example
http://documen.tician.de/pycuda/tutorial.html#advanced-topics by removing
the '__padding' from the structure definition and got incorrect result.
The kernel is launched with 2 blocks and one thread in each block.
Each thread prints the 'len' field in structure, which should be 3 for
block 0 and 2 for block 1. However, the result I got is:
block 1: 2097664
block 0: 3
No such problem if I write the following program using C. Any help is
appreciated.
Yifei
#include <stdio.h>
struct Vec {
* int len;*
float* data;
};
__global__ void test(Vec *a) {
Vec v = a[blockIdx.x];
printf("block %d: %d\n", blockIdx.x, v.len);
}
-------------------------------------------------- end of kernel
---------------------------------------------------------------
import numpy
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
class DoubleOpStruct:
# mem_size = 8 + numpy.intp(0).nbytes
*mem_size = 4 + numpy.intp(0).nbytes*
def __init__(self, array, struct_arr_ptr):
data = cuda.to_device(array)
cuda.memcpy_htod(int(struct_arr_ptr), numpy.int32(array.size))
#cuda.memcpy_htod(int(struct_arr_ptr) + 8, numpy.intp(int(data)))
*cuda.memcpy_htod(int(struct_arr_ptr) + 4, numpy.intp(int(data)))*
struct_arr = cuda.mem_alloc(2 * DoubleOpStruct.mem_size)
do2_ptr = int(struct_arr) + DoubleOpStruct.mem_size
array1 = DoubleOpStruct(numpy.array([1, 2, 3], dtype=numpy.float32),
struct_arr)
array2 = DoubleOpStruct(numpy.array([0, 4], dtype=numpy.float32),
do2_ptr)
with open('test.cu', 'r') as f:
src = f.read()
mod = SourceModule(src)
func = mod.get_function("test")
func(struct_arr, block = (1, 1, 1), grid=(2, 1))
_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda