llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangir Author: Ayokunle Amodu (ayokunle321) <details> <summary>Changes</summary> Adds codegen for the following AMDGPU tanh builtins: - __builtin_amdgcn_tanhf (float) - __builtin_amdgcn_tanhh (half) - __builtin_amdgcn_tanh_bf16 (bfloat16) These are lowered to the corresponding `llvm.amdgcn.tanh` intrinsic. --- Full diff: https://github.com/llvm/llvm-project/pull/197852.diff 4 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp (+4-4) - (added) clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip (+26) - (added) clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip (+65) - (modified) clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip (+11-3) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index 04ab1c29b0d63..8d17fcabcea6a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -350,10 +350,10 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, case AMDGPU::BI__builtin_amdgcn_tanhf: case AMDGPU::BI__builtin_amdgcn_tanhh: case AMDGPU::BI__builtin_amdgcn_tanh_bf16: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + mlir::Value src = emitScalarExpr(expr->getArg(0)); + return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), + "amdgcn.tanh", src.getType(), + mlir::ValueRange{src}); } case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip new file mode 100644 index 0000000000000..aca9cd45ee234 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip @@ -0,0 +1,26 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -target-feature +tanh-insts -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1250 -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z14test_tanh_bf16PDF16bDF16b +// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.bf16) -> !cir.bf16 +// LLVM: define{{.*}} void @_Z14test_tanh_bf16PDF16bDF16b +// LLVM: call{{.*}} bfloat @llvm.amdgcn.tanh.bf16(bfloat %{{.*}}) +__device__ void test_tanh_bf16(__bf16* out, __bf16 a) { + *out = __builtin_amdgcn_tanh_bf16(a); +} diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip new file mode 100644 index 0000000000000..8cbd3c48a58d5 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip @@ -0,0 +1,65 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -target-feature +tanh-insts -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -target-feature +tanh-insts -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -target-feature +tanh-insts -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -target-feature +tanh-insts -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu tonga -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx900 -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1010 -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1012 -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z13test_tanh_f16PDF16_DF16_ +// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.f16) -> !cir.f16 +// LLVM: define{{.*}} void @_Z13test_tanh_f16PDF16_DF16_ +// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}tanh.f16(half %{{.*}}) +__device__ void test_tanh_f16(_Float16* out, _Float16 a) { + *out = __builtin_amdgcn_tanhh(a); +} diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip index 4a61fde7aa90c..c64f32be2444a 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip @@ -2,15 +2,15 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ -// RUN: -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: -target-cpu tahiti -target-feature +tanh-insts -fcuda-is-device -emit-cir %s -o %t.cir // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ -// RUN: -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t-cir.ll +// RUN: -target-cpu tahiti --target-feature +tanh-insts fcuda-is-device -emit-llvm %s -o %t-cir.ll // RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ -// RUN: -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: -target-cpu tahiti -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s //===----------------------------------------------------------------------===// @@ -71,3 +71,11 @@ __device__ void test_div_fmas_f64(double* out, double a, double b, double c, int __device__ void test_ds_swizzle(int* out, int a) { *out = __builtin_amdgcn_ds_swizzle(a, 32); } + +// CIR-LABEL: @_Z14test_tanhf_f32Pff +// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.float) -> !cir.float +// LLVM: define{{.*}} void @_Z14test_tanhf_f32Pff +// LLVM: call{{.*}} float @llvm.amdgcn.tanh.f32(float %{{.*}}) +__device__ void test_tanhf_f32(float* out, float a) { + *out = __builtin_amdgcn_tanhf(a); +} `````````` </details> https://github.com/llvm/llvm-project/pull/197852 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
