https://github.com/ayokunle321 updated https://github.com/llvm/llvm-project/pull/197342
>From 46542d4ef68643bb2e488060d62615a7f8e9c161 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Tue, 12 May 2026 20:59:51 -0400 Subject: [PATCH 1/8] add amdgcn sqrt builtin --- clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 8 ++++---- clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index 04ab1c29b0d63..52b8c478fc0f4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -220,10 +220,10 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, case AMDGPU::BI__builtin_amdgcn_sqrtf: case AMDGPU::BI__builtin_amdgcn_sqrth: case AMDGPU::BI__builtin_amdgcn_sqrt_bf16: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + mlir::Value src = emitScalarExpr(expr->getArg(0)); + return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), + "amdgcn.sqrt", src.getType(), + mlir::ValueRange{src}); } case AMDGPU::BI__builtin_amdgcn_rsq: case AMDGPU::BI__builtin_amdgcn_rsqf: diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip index 4a61fde7aa90c..77bcb72d8f702 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip @@ -71,3 +71,19 @@ __device__ void test_div_fmas_f64(double* out, double a, double b, double c, int __device__ void test_ds_swizzle(int* out, int a) { *out = __builtin_amdgcn_ds_swizzle(a, 32); } + +// CIR-LABEL: @_Z13test_sqrt_f32Pff +// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float +// LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff +// LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}}) +__device__ void test_sqrt_f32(float* out, float a) { + *out = __builtin_amdgcn_sqrtf(a); +} + +// CIR-LABEL: @_Z13test_sqrt_f64Pdd +// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double +// LLVM: define{{.*}} void @_Z13test_sqrt_f64Pdd +// LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}}) +__device__ void test_sqrt_f64(double* out, double a) { + *out = __builtin_amdgcn_sqrt(a); +} >From 0f21ec967d95faaabd516702bab7ca699f19d80b Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Tue, 12 May 2026 22:10:55 -0400 Subject: [PATCH 2/8] fix intinsic call instruction --- clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip index 77bcb72d8f702..decc6fafcac07 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip @@ -73,7 +73,7 @@ __device__ void test_ds_swizzle(int* out, int a) { } // CIR-LABEL: @_Z13test_sqrt_f32Pff -// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float +// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float // LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff // LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}}) __device__ void test_sqrt_f32(float* out, float a) { @@ -81,7 +81,7 @@ __device__ void test_sqrt_f32(float* out, float a) { } // CIR-LABEL: @_Z13test_sqrt_f64Pdd -// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double +// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double // LLVM: define{{.*}} void @_Z13test_sqrt_f64Pdd // LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}}) __device__ void test_sqrt_f64(double* out, double a) { >From d51932381e6a3ee705ddb941a44cd553da9eda25 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Wed, 13 May 2026 10:01:08 -0400 Subject: [PATCH 3/8] add test for bf16 type --- .../CodeGenHIP/builtins-amdgcn-gfx1250.hip | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip new file mode 100644 index 0000000000000..44bdb2c666204 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip @@ -0,0 +1,26 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z14test_sqrt_bf16PDF16bDF16b +// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.bf16) -> !cir.bf16 +// LLVM: define{{.*}} void @_Z14test_sqrt_bf16PDF16bDF16b +// LLVM: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}}) +__device__ void test_sqrt_bf16(__bf16* out, __bf16 a) { + *out = __builtin_amdgcn_sqrt_bf16(a); +} >From a859751fba9515afac8a8ecf0d5834d6118b3a38 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Wed, 13 May 2026 10:11:43 -0400 Subject: [PATCH 4/8] delete bf16 test --- .../CodeGenHIP/builtins-amdgcn-gfx1250.hip | 26 ------------------- 1 file changed, 26 deletions(-) delete mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip deleted file mode 100644 index 44bdb2c666204..0000000000000 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip +++ /dev/null @@ -1,26 +0,0 @@ -#include "../CodeGenCUDA/Inputs/cuda.h" - -// REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ -// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir -// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s - -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ -// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll -// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s - -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ -// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll -// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s - -//===----------------------------------------------------------------------===// -// Test AMDGPU builtins -//===----------------------------------------------------------------------===// - -// CIR-LABEL: @_Z14test_sqrt_bf16PDF16bDF16b -// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.bf16) -> !cir.bf16 -// LLVM: define{{.*}} void @_Z14test_sqrt_bf16PDF16bDF16b -// LLVM: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}}) -__device__ void test_sqrt_bf16(__bf16* out, __bf16 a) { - *out = __builtin_amdgcn_sqrt_bf16(a); -} >From 1857a15c81d4a4a21f319b0519d349609e0c9936 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Wed, 13 May 2026 17:30:44 -0400 Subject: [PATCH 5/8] add tests for f16 and bf16 types --- .../CodeGenHIP/builtins-amdgcn-gfx1250.hip | 26 ++++++++ .../CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 65 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip new file mode 100644 index 0000000000000..2e132ce291d32 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip @@ -0,0 +1,26 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z14test_sqrt_bf16PDF16bDF16b +// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.bf16) -> !cir.bf16 +// LLVM: define{{.*}} void @_Z14test_sqrt_bf16PDF16bDF16b +// LLVM: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}}) +__device__ void test_sqrt_bf16(__bf16* out, __bf16 a) { + *out = __builtin_amdgcn_sqrt_bf16(a); +} diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip new file mode 100644 index 0000000000000..325ae2dd97237 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip @@ -0,0 +1,65 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix= --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z13test_sqrt_f16PDF16_DF16_ +// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.f16) -> !cir.f16 +// LLVM: define{{.*}} void @_Z13test_sqrt_f16PDF16_DF16_ +// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16(half %{{.*}}) +__device__ void test_sqrt_f16(_Float16* out, _Float16 a) { + *out = __builtin_amdgcn_sqrth(a); +} >From 7f8db68fa10bd4ca6464015502d55a8c7d5b3c96 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Sat, 16 May 2026 15:27:06 -0400 Subject: [PATCH 6/8] add missing test prefix --- clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip index 325ae2dd97237..ac4da55c4105d 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip @@ -38,7 +38,7 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ // RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll -// RUN: FileCheck --check-prefix= --input-file=%t.ll %s +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ // RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll >From b8718fc54d0ec9eacc15cb56b1baf8109cc760c4 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Sat, 16 May 2026 17:50:05 -0400 Subject: [PATCH 7/8] remove regex matching in CHECK line --- clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip index ac4da55c4105d..3c966d16272eb 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip @@ -59,7 +59,7 @@ // CIR-LABEL: @_Z13test_sqrt_f16PDF16_DF16_ // CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.f16) -> !cir.f16 // LLVM: define{{.*}} void @_Z13test_sqrt_f16PDF16_DF16_ -// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16(half %{{.*}}) +// LLVM: call{{.*}} half @llvm.amdgcn.sqrt.f16(half %{{.*}}) __device__ void test_sqrt_f16(_Float16* out, _Float16 a) { *out = __builtin_amdgcn_sqrth(a); } >From b8ed0ce23860d5e2fa037040b1f3b850552a6fe8 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Thu, 11 Jun 2026 17:54:30 -0400 Subject: [PATCH 8/8] switch out header for macro (__device specifier__) --- .../CodeGenHIP/builtins-amdgcn-gfx1250.hip | 4 +- ...dgcn-vi.hip => builtins-amdgcn-vi-f16.hip} | 12 ++++- clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 44 ++++++++++++++++++- 3 files changed, 54 insertions(+), 6 deletions(-) rename clang/test/CIR/CodeGenHIP/{builtins-amdgcn-vi.hip => builtins-amdgcn-vi-f16.hip} (86%) diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip index 2e132ce291d32..59b86fbf05f77 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip @@ -1,5 +1,3 @@ -#include "../CodeGenCUDA/Inputs/cuda.h" - // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ // RUN: -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir @@ -13,6 +11,8 @@ // RUN: -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +#define __device__ __attribute__((device)) + //===----------------------------------------------------------------------===// // Test AMDGPU builtins //===----------------------------------------------------------------------===// diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip similarity index 86% rename from clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip rename to clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip index 3c966d16272eb..96b8d26e6cf80 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip @@ -1,5 +1,3 @@ -#include "../CodeGenCUDA/Inputs/cuda.h" - // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ // RUN: -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir @@ -52,10 +50,20 @@ // RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +#define __device__ __attribute__((device)) + //===----------------------------------------------------------------------===// // Test AMDGPU builtins //===----------------------------------------------------------------------===// +// CIR-LABEL: @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_ +// CIR: ir.call_llvm_intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16 +// LLVM: define{{.*}} void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_ +// LLVM: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}}) +__device__ void test_div_fixup_f16(_Float16* out, _Float16 a, _Float16 b, _Float16 c) { + *out = __builtin_amdgcn_div_fixuph(a, b, c); +} + // CIR-LABEL: @_Z13test_sqrt_f16PDF16_DF16_ // CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.f16) -> !cir.f16 // LLVM: define{{.*}} void @_Z13test_sqrt_f16PDF16_DF16_ diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip index decc6fafcac07..ff024cf5eead1 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip @@ -1,5 +1,3 @@ -#include "../CodeGenCUDA/Inputs/cuda.h" - // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ // RUN: -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir @@ -13,6 +11,8 @@ // RUN: -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +#define __device__ __attribute__((device)) + //===----------------------------------------------------------------------===// // Test AMDGPU builtins //===----------------------------------------------------------------------===// @@ -72,6 +72,46 @@ __device__ void test_ds_swizzle(int* out, int a) { *out = __builtin_amdgcn_ds_swizzle(a, 32); } +// CIR-LABEL: @_Z18test_div_fixup_f32Pffff +// CIR: cir.call_llvm_intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.float, !cir.float, !cir.float) -> !cir.float +// LLVM: define{{.*}} void @_Z18test_div_fixup_f32Pffff +// LLVM: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}) +__device__ void test_div_fixup_f32(float* out, float a, float b, float c) { + *out = __builtin_amdgcn_div_fixupf(a, b, c); +} + +// CIR-LABEL: @_Z18test_div_fixup_f64Pdddd +// CIR: cir.call_llvm_intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.double, !cir.double, !cir.double) -> !cir.double +// LLVM: define{{.*}} void @_Z18test_div_fixup_f64Pdddd +// LLVM: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}) +__device__ void test_div_fixup_f64(double* out, double a, double b, double c) { + *out = __builtin_amdgcn_div_fixup(a, b, c); +} + +// CIR-LABEL: @_Z13test_readlanePiii +// CIR: cir.call_llvm_intrinsic "amdgcn.readlane" {{.*}} : (!s32i, !s32i) -> !s32i +// LLVM: define{{.*}} void @_Z13test_readlanePiii +// LLVM: call{{.*}} i32 @llvm.amdgcn.readlane.i32(i32 %{{.*}}, i32 %{{.*}}) +__device__ void test_readlane(int* out, int a, int b) { + *out = __builtin_amdgcn_readlane(a, b); +} + +// CIR-LABEL: @_Z18test_readfirstlanePii +// CIR: cir.call_llvm_intrinsic "amdgcn.readfirstlane" {{.*}} : (!s32i) -> !s32i +// LLVM: define{{.*}} void @_Z18test_readfirstlanePii +// LLVM: call{{.*}} i32 @llvm.amdgcn.readfirstlane.i32(i32 %{{.*}}) +__device__ void test_readfirstlane(int* out, int a) { + *out = __builtin_amdgcn_readfirstlane(a); +} + +// CIR-LABEL: @_Z17test_dispatch_ptr +// CIR: %{{.*}} = cir.call_llvm_intrinsic "amdgcn.dispatch.ptr" : () -> !cir.ptr<!void, target_address_space(4)> +// LLVM-LABEL: @_Z17test_dispatch_ptr +// LLVM: call{{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +__device__ void test_dispatch_ptr(__attribute__((address_space(4))) void ** out) { + *out = (__attribute__((address_space(4))) void *)__builtin_amdgcn_dispatch_ptr(); +} + // CIR-LABEL: @_Z13test_sqrt_f32Pff // CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float // LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
