This is an automated email from the ASF dual-hosted git repository.
wuwei pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 08965f08ff [CUBLAS] Set fp32 compute and scale dtypes in fp16 matmul
(#16892)
08965f08ff is described below
commit 08965f08ff0f6a8d34d45f8275c4aa78b04c90ee
Author: Ivan Sidorenko <[email protected]>
AuthorDate: Tue Apr 16 23:11:01 2024 +0300
[CUBLAS] Set fp32 compute and scale dtypes in fp16 matmul (#16892)
This commit replaces fp16 compute dtype and scale dtype by fp32 in
cublas matmul.
---
src/runtime/contrib/cublas/cublas.cc | 6 ------
1 file changed, 6 deletions(-)
diff --git a/src/runtime/contrib/cublas/cublas.cc
b/src/runtime/contrib/cublas/cublas.cc
index 49aa35a7e0..553d4014c0 100644
--- a/src/runtime/contrib/cublas/cublas.cc
+++ b/src/runtime/contrib/cublas/cublas.cc
@@ -150,8 +150,6 @@ void CallCublasLt(cublasLtHandle_t hdl, cudaStream_t stream,
cudaDataType_t c_type = CUDA_R_32F;
float one_fp32 = 1.0;
float zero_fp32 = 0.0;
- auto one_fp16 = __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t,
10>(1.0);
- auto zero_fp16 = __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t,
10>(0.0);
int32_t one_i32 = 1;
int32_t zero_i32 = 0;
void* alpha = &one_fp32;
@@ -168,10 +166,6 @@ void CallCublasLt(cublasLtHandle_t hdl, cudaStream_t
stream,
if (TypeMatch(C->dtype, kDLFloat, 16)) {
c_type = CUDA_R_16F;
- compute_type = CUBLAS_COMPUTE_16F;
- scale_type = CUDA_R_16F;
- alpha = &one_fp16;
- beta = &zero_fp16;
} else if (TypeMatch(C->dtype, kDLInt, 32)) {
c_type = CUDA_R_32I;
compute_type = CUBLAS_COMPUTE_32I;