masahi commented on pull request #8571:
URL: https://github.com/apache/tvm/pull/8571#issuecomment-888736472


   For the dyn shmem matmul test, the generated kernel looks like:
   
   ```
   extern "C" __global__ void default_function_kernel0(half* __restrict__ A, 
half* __restrict__ B, float* __restrict__ reduce) {
     extern __shared__ uchar buf_dyn_shmem[];
     ((float*)buf_dyn_shmem)[((((((int)threadIdx.y) * 16) + ((int)threadIdx.x)) 
+ 512))] = 0.000000e+00f;
     for (int i = 0; i < 64; ++i) {
       ((half*)buf_dyn_shmem)[((((((int)threadIdx.y) * 16) + 
((int)threadIdx.x)) + 512))] = A[(((((((int)blockIdx.y) * 16384) + 
(((int)threadIdx.y) * 1024)) + (i * 16)) + ((int)threadIdx.x)))];
       ((half*)buf_dyn_shmem)[(((((int)threadIdx.y) * 16) + 
((int)threadIdx.x)))] = B[(((((i * 16384) + (((int)threadIdx.y) * 1024)) + 
(((int)blockIdx.x) * 16)) + ((int)threadIdx.x)))];
       __syncthreads();
       for (int k = 0; k < 16; ++k) {
         ((float*)buf_dyn_shmem)[((((((int)threadIdx.y) * 16) + 
((int)threadIdx.x)) + 512))] = (((float*)buf_dyn_shmem)[((((((int)threadIdx.y) 
* 16) + ((int)threadIdx.x)) + 512))] + 
((float)(((half*)buf_dyn_shmem)[((((((int)threadIdx.y) * 16) + k) + 512))] * 
((half*)buf_dyn_shmem)[(((k * 16) + ((int)threadIdx.x)))])));
       }
       __syncthreads();
     }
     reduce[(((((((int)blockIdx.y) * 16384) + (((int)threadIdx.y) * 1024)) + 
(((int)blockIdx.x) * 16)) + ((int)threadIdx.x)))] = 
((float*)buf_dyn_shmem)[((((((int)threadIdx.y) * 16) + ((int)threadIdx.x)) + 
512))];
   }
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to