Hello members,

I adapted the SDK diagonal transpose example and I want to transpose a
2776x2080 matrix. However I do not get the right transpose matrix. The
output is:

A
shape:  (2776, 2080)
[[   0    0    0 ...,    0    0    0]
 [   1    1    1 ...,    1    1    1]
 [   2    2    2 ...,    2    2    2]
 ...,
 [2773 2773 2773 ..., 2773 2773 2773]
 [2774 2774 2774 ..., 2774 2774 2774]
 [2775 2775 2775 ..., 2775 2775 2775]]
--------------------------------------------------------------------------------
A.T
shape:  (2080, 2776)
[[   0    1    2 ...,  924  926  927]
 [ 928  930  931 ..., 1853 1855 1856]
 [1857 1859 1860 ...,    6    8    9]
 ...,
 [2766 2767 2769 ...,  915  916  918]
 [ 919  920  922 ..., 1844 1845 1847]
 [1848 1849 1851 ..., 2773 2774 2775]]
--------------------------------------------------------------------------------

Did I miss something? What is wrong in my code?
If I transpose 2776x2776 or 2080x2080 matrices everything is fine.

Best regards,
Giuseppe



------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
Forschungszentrum Juelich GmbH
52425 Juelich
Sitz der Gesellschaft: Juelich
Eingetragen im Handelsregister des Amtsgerichts Dueren Nr. HR B 3498
Vorsitzender des Aufsichtsrats: MinDir Dr. Karl Eugen Huthmacher
Geschaeftsfuehrung: Prof. Dr. Achim Bachem (Vorsitzender),
Karsten Beneke (stellv. Vorsitzender), Prof. Dr.-Ing. Harald Bolt,
Prof. Dr. Sebastian M. Schmidt
------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
from __future__ import division
from pycuda.compiler import SourceModule
import numpy
import pdb
import pycuda.autoinit
import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray



def _get_transpose_kernel():
   mod = SourceModule("""
    #define TILE_DIM 8
    #define BLOCK_ROWS 1
    
   __global__ void transpose(unsigned short *A_t, unsigned short *A, int a_width, int a_height)
   {
    __shared__ unsigned short block[TILE_DIM][TILE_DIM+1];
    
    int blockIdx_x, blockIdx_y;
    
    if(a_width == a_height) {
        blockIdx_y = blockIdx.x;
        blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x;
    } else {
    int bid = blockIdx.x + gridDim.x*blockIdx.y;
    blockIdx_y = bid % gridDim.y;
    blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x;
    }
    
    int xIndex = blockIdx_x * TILE_DIM + threadIdx.x;
    int yIndex = blockIdx_y * TILE_DIM + threadIdx.y;
    int index_in = xIndex + a_width * yIndex;
    
    xIndex = blockIdx_y * TILE_DIM + threadIdx.x;
    yIndex = blockIdx_x * TILE_DIM + threadIdx.y;
    int index_out = xIndex + a_height * yIndex;
    
    for (int i=0; i < TILE_DIM; i += BLOCK_ROWS)
    {
        block[threadIdx.y + i][threadIdx.x] =  A[index_in + i * a_width];
    }
    
    __syncthreads();
    
    for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS)
    {
        A_t[index_out + i * a_height] = block[threadIdx.x][threadIdx.y + i];
    }
    }
   """)
   
   func = mod.get_function("transpose")
   return func

def transpose(src):
    w, h = src.shape
    result = gpuarray.empty((h, w), dtype=src.dtype, order='C')
   
    blocks = (8, 1, 1)
   
    gridx = int(w / 8)
    gridy = int(h / 8)
    
    transpose_matrix = _get_transpose_kernel()
    transpose_matrix(result.gpudata, src.gpudata, numpy.int32(w), numpy.int32(h), block=blocks, grid=(gridx, gridy))
    return result

if __name__ == '__main__':
    
    i_data = numpy.repeat(numpy.arange(0,2776), 2080).reshape(2776,-1)
    i_data = numpy.array(i_data, dtype= numpy.uint16, order='C')
    input = gpuarray.to_gpu(i_data)
    result = transpose(input)
    
    print "A"
    print "shape: ", i_data.shape
    print i_data
    print "-"*80
    print "A.T"
    print "shape: ", result.shape
    print result
    print "-"*80

_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda

Reply via email to