Hello,

I have built a benchmark for my custom dot kernel, pasted below. It only 
outperforms cpu dot for big sizes,  expectable in my educated guess.  When 
ITERS goes up to 10000, there is a drastic overhead. Is this function call 
overhead? Moreover, for big sizes some of the outputs don't match.

Thanks in advance for some insights,
RL


#### the benchmark ####
from time import clock
import pycuda.autoinit
import pycuda.driver as drv
from pycuda import gpuarray
import numpy as np
import numpy
import random

from pycuda.compiler import SourceModule

SIZE = 512
ITERS = 1000
a = numpy.array(range(SIZE),dtype=numpy.float32)
b = numpy.array([range(SIZE+1) for i in range(SIZE)],
                dtype=numpy.float32, order='F')

BLOCK_SIZE = (SIZE,1,1)
GRID_SIZE = (SIZE+1,1)

mod = SourceModule("""
                   #define BLOCK_SIZE %i
                   __global__ void vecmatdot(float *dest, float *a, float *b)
                   {
  __shared__ float partials[BLOCK_SIZE];
                   int bx = blockIdx.x;
                   int linear_thr_idx = bx * blockDim.x + threadIdx.x;
                   partials[threadIdx.x] = a[threadIdx.x] * b[linear_thr_idx];
                   __syncthreads();

  if( threadIdx.x == 0){
                   float sum = 0.0f;
                   for(int i = 0 ; i < BLOCK_SIZE; i++)
                   sum += partials[i];
                   dest[bx] = sum;
               }
}
                   """%(SIZE,))

vecmatdot = mod.get_function("vecmatdot")

sum_= .0
for i in range(ITERS):
    s = clock()
    bl = np.dot(a,b)
    sum_ += (clock()-s)
print "%f seconds" % (sum_/ITERS,)

gpu_a = gpuarray.to_gpu(a)
gpu_b = gpuarray.to_gpu(b)
gpudest = gpuarray.to_gpu(numpy.zeros_like(b[0]))
sum_ = .0
for i in range(ITERS):
    s = clock()
    vecmatdot(
        gpudest, gpu_a, gpu_b,
        block=BLOCK_SIZE, grid=GRID_SIZE)
    sum_ += (clock()-s)
print "%f seconds" % (sum_/ITERS,)

print (gpudest.get() - bl)[::11]

#### the end ####


_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda

Reply via email to