https://github.com/ayokunle321 updated https://github.com/llvm/llvm-project/pull/197806
>From bf3e710faec048108c7de6ce0e9b74192010fe91 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Thu, 14 May 2026 17:07:17 -0400 Subject: [PATCH 1/3] add amdgcn cos builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 7 +- .../CodeGenHIP/builtins-amdgcn-gfx1250.hip | 26 ++++++++ .../CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 65 +++++++++++++++++++ clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 8 +++ 4 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index 04ab1c29b0d63..9cf119b2f0414 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -252,10 +252,9 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, case AMDGPU::BI__builtin_amdgcn_cosf: case AMDGPU::BI__builtin_amdgcn_cosh: case AMDGPU::BI__builtin_amdgcn_cos_bf16: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + mlir::Value src = emitScalarExpr(expr->getArg(0)); + return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), "amdgcn.cos", + src.getType(), mlir::ValueRange{src}); } case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: { cgm.errorNYI(expr->getSourceRange(), diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip new file mode 100644 index 0000000000000..6e7a48d32f203 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip @@ -0,0 +1,26 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z13test_cos_bf16PDF16bDF16b +// CIR: cir.call_llvm_intrinsic "amdgcn.cos" {{.*}} : (!cir.bf16) -> !cir.bf16 +// LLVM: define{{.*}} void @_Z13test_cos_bf16PDF16bDF16b +// LLVM: call{{.*}} bfloat @llvm.amdgcn.cos.bf16(bfloat %{{.*}}) +__device__ void test_cos_bf16(__bf16* out, __bf16 a) { + *out = __builtin_amdgcn_cos_bf16(a); +} diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip new file mode 100644 index 0000000000000..112667bbdd38e --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip @@ -0,0 +1,65 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z12test_cos_f16PDF16_DF16_ +// CIR: cir.call_llvm_intrinsic "amdgcn.cos" {{.*}} : (!cir.f16) -> !cir.f16 +// LLVM: define{{.*}} void @_Z12test_cos_f16PDF16_DF16_ +// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}cos.f16(half %{{.*}}) +__device__ void test_cos_f16(_Float16* out, _Float16 a) { + *out = __builtin_amdgcn_cosh(a); +} diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip index 4a61fde7aa90c..970d02b9a8edd 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip @@ -71,3 +71,11 @@ __device__ void test_div_fmas_f64(double* out, double a, double b, double c, int __device__ void test_ds_swizzle(int* out, int a) { *out = __builtin_amdgcn_ds_swizzle(a, 32); } + +// CIR-LABEL: @_Z13test_cosf_f32Pff +// CIR: cir.call_llvm_intrinsic "amdgcn.cos" {{.*}} : (!cir.float) -> !cir.float +// LLVM: define{{.*}} void @_Z13test_cosf_f32Pff +// LLVM: call{{.*}} float @llvm.amdgcn.cos.f32(float %{{.*}}) +__device__ void test_cosf_f32(float* out, float a) { + *out = __builtin_amdgcn_cosf(a); +} >From ff6010bd9ed72f7ce29a6af47876cbe5eb6354c9 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Sat, 16 May 2026 17:33:28 -0400 Subject: [PATCH 2/3] remove regex matching in CHECK line --- clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip index 112667bbdd38e..5dc405fbc65bf 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip @@ -59,7 +59,7 @@ // CIR-LABEL: @_Z12test_cos_f16PDF16_DF16_ // CIR: cir.call_llvm_intrinsic "amdgcn.cos" {{.*}} : (!cir.f16) -> !cir.f16 // LLVM: define{{.*}} void @_Z12test_cos_f16PDF16_DF16_ -// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}cos.f16(half %{{.*}}) +// LLVM: call{{.*}} half @llvm.amdgcn.cos.f16(half %{{.*}}) __device__ void test_cos_f16(_Float16* out, _Float16 a) { *out = __builtin_amdgcn_cosh(a); } >From 167c31eeab6e4fc0b4582b53d5f8220f6e76c15c Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Thu, 4 Jun 2026 12:25:56 -0400 Subject: [PATCH 3/3] switch header out for macro --- .../{builtins-amdgcn-vi.hip => builtins-amdgcn-vi-f16.hip} | 5 +++-- clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) rename clang/test/CIR/CodeGenHIP/{builtins-amdgcn-vi.hip => builtins-amdgcn-vi-f16.hip} (98%) diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip similarity index 98% rename from clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip rename to clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip index 5dc405fbc65bf..4f559a6eb0baa 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip @@ -1,6 +1,5 @@ -#include "../CodeGenCUDA/Inputs/cuda.h" - // REQUIRES: amdgpu-registered-target + // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ // RUN: -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s @@ -52,6 +51,8 @@ // RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +#define __device__ __attribute__((device)) + //===----------------------------------------------------------------------===// // Test AMDGPU builtins //===----------------------------------------------------------------------===// diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip index 970d02b9a8edd..41374b6f7619a 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip @@ -1,5 +1,3 @@ -#include "../CodeGenCUDA/Inputs/cuda.h" - // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ // RUN: -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir @@ -13,6 +11,8 @@ // RUN: -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +#define __device__ __attribute__((device)) + //===----------------------------------------------------------------------===// // Test AMDGPU builtins //===----------------------------------------------------------------------===// _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
