Dear Sir:
        I meet some problem that need you help.

block dim not work,just like below code


import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np

# set the init array
b_img = np.empty([2, 2, 2]).astype(np.int32)
label_img = np.random.randint(0, 2, [2,2,2]).astype(np.int32)

# kernel function
mod = SourceModule("""
__global__ void localUF(int *b_img, int *label_img)
   int global_block_idx = gridDim.x*gridDim.y*blockIdx.z + gridDim.x*blockIdx.y 
+ blockIdx.x;
   int global_thread_idx = global_block_idx * (blockDim.x * blockDim.y * 
blockDim.z)  + (threadIdx.z * (blockDim.x * blockDim.y))  + (threadIdx.y * 
blockDim.x) + threadIdx.x; 
    b_img[global_thread_idx] = global_block_idx;
func = mod.get_function("localUF")
func(cuda.InOut(b_img), cuda.In(label_img), 
    block=(8,1,1), grid=(2,2,2))

Set block=(8,1,1) and the output is

array([[[0, 0, 0, 0],
       [0, 0, 0, 0],
       [1, 1, 1, 1],
       [1, 1, 1, 1]],

      [[2, 2, 2, 2],
       [2, 2, 2, 2],
       [3, 3, 3, 3],
       [3, 3, 3, 3]],

      [[4, 4, 4, 4],
       [4, 4, 4, 4],
       [5, 5, 5, 5],
       [5, 5, 5, 5]],

      [[6, 6, 6, 6],
       [6, 6, 6, 6],
       [7, 7, 7, 7],
       [7, 7, 7, 7]]], dtype=int32)

if I set block=(2,2,2) and the output is same

array([[[0, 0, 0, 0],
       [0, 0, 0, 0],
       [1, 1, 1, 1],
       [1, 1, 1, 1]],

      [[2, 2, 2, 2],
       [2, 2, 2, 2],
       [3, 3, 3, 3],
       [3, 3, 3, 3]],

      [[4, 4, 4, 4],
       [4, 4, 4, 4],
       [5, 5, 5, 5],
       [5, 5, 5, 5]],

      [[6, 6, 6, 6],
       [6, 6, 6, 6],
       [7, 7, 7, 7],
       [7, 7, 7, 7]]], dtype=int32)
I hope the output is 

array([[[0, 0, 1, 1],
       [0, 0, 1, 1],
       [2, 2, 3, 3],
       [2, 2, 3, 3]],

      [[0, 0, 1, 1],
       [0, 0, 1, 1],
       [2, 2, 3, 3],
       [2, 2, 3, 3]],

      [[4, 4, 5, 5],
       [4, 4, 5, 5],
       [6, 6, 7, 7],
       [6, 6, 7, 7]],

      [[4, 4, 5, 5],
       [4, 4, 5, 5],
       [6, 6, 7, 7],
       [6, 6, 7, 7]]], dtype=int32)

Blcok 3d unuseful? it means block(2,2,2) = bolck(2x2x2,1,1)???

PyCUDA mailing list

Reply via email to