https://github.com/ayokunle321 created https://github.com/llvm/llvm-project/pull/197447
Adds codegen support for the following AMDGPU reciprocal builtins: - __builtin_amdgcn_rcp (double) - __builtin_amdgcn_rcpf (float) - __builtin_amdgcn_rcph (half) - __builtin_amdgcn_rcp_bf16 (bfloat16) These are lowered to the corresponding `llvm.amdgcn.rcp` intrinsic. >From 41452bf3cfc4c5e9308119211c1a165707d8fe6e Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Wed, 13 May 2026 09:35:15 -0400 Subject: [PATCH] add amdgcn rcp builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 7 +++-- .../CodeGenHIP/builtins-amdgcn-gfx1250.hip | 26 +++++++++++++++++++ clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 20 ++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index 04ab1c29b0d63..9837140b22491 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -211,10 +211,9 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, case AMDGPU::BI__builtin_amdgcn_rcpf: case AMDGPU::BI__builtin_amdgcn_rcph: case AMDGPU::BI__builtin_amdgcn_rcp_bf16: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + mlir::Value src = emitScalarExpr(expr->getArg(0)); + return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), "amdgcn.rcp", + src.getType(), mlir::ValueRange{src}); } case AMDGPU::BI__builtin_amdgcn_sqrt: case AMDGPU::BI__builtin_amdgcn_sqrtf: diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip new file mode 100644 index 0000000000000..3cb81826a9b01 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip @@ -0,0 +1,26 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z13test_rcp_bf16PDF16bDF16b +// CIR: cir.call_llvm_intrinsic "amdgcn.rcp" {{.*}} : (!cir.bf16) -> !cir.bf16 +// LLVM: define{{.*}} void @_Z13test_rcp_bf16PDF16bDF16b +// LLVM: call{{.*}} bfloat @llvm.amdgcn.rcp.bf16(bfloat %{{.*}}) +__device__ void test_rcp_bf16(__bf16* out, __bf16 a) { + *out = __builtin_amdgcn_rcp_bf16(a); +} diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip index 4a61fde7aa90c..16155d4374c81 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip @@ -71,3 +71,23 @@ __device__ void test_div_fmas_f64(double* out, double a, double b, double c, int __device__ void test_ds_swizzle(int* out, int a) { *out = __builtin_amdgcn_ds_swizzle(a, 32); } + +// CIR-LABEL: @_Z12test_rcp_f32Pff +// CIR: cir.call_llvm_intrinsic "amdgcn.rcp" {{.*}} : (!cir.float) -> !cir.float +// LLVM: define{{.*}} void @_Z12test_rcp_f32Pff +// LLVM: call{{.*}} float @llvm.amdgcn.rcp.f32(float %{{.*}}) +// OGCG: define{{.*}} void @_Z12test_rcp_f32Pff +// OGCG: call{{.*}} float @llvm.amdgcn.rcp.f32(float %{{.*}}) +__device__ void test_rcp_f32(float* out, float a) { + *out = __builtin_amdgcn_rcpf(a); +} + +// CIR-LABEL: @_Z12test_rcp_f64Pdd +// CIR: cir.call_llvm_intrinsic "amdgcn.rcp" {{.*}} : (!cir.double) -> !cir.double +// LLVM: define{{.*}} void @_Z12test_rcp_f64Pdd +// LLVM: call{{.*}} double @llvm.amdgcn.rcp.f64(double %{{.*}}) +// OGCG: define{{.*}} void @_Z12test_rcp_f64Pdd +// OGCG: call{{.*}} double @llvm.amdgcn.rcp.f64(double %{{.*}}) +__device__ void test_rcp_f64(double* out, double a) { + *out = __builtin_amdgcn_rcp(a); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
