[clang] [CIR][AMDGPU] Add support for AMDGCN sqrt builtins (PR #197342)

Ayokunle Amodu via cfe-commits Thu, 11 Jun 2026 14:54:59 -0700

https://github.com/ayokunle321 updated 
https://github.com/llvm/llvm-project/pull/197342


>From 46542d4ef68643bb2e488060d62615a7f8e9c161 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Tue, 12 May 2026 20:59:51 -0400
Subject: [PATCH 1/8] add amdgcn sqrt builtin

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp |  8 ++++----
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index 04ab1c29b0d63..52b8c478fc0f4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -220,10 +220,10 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   case AMDGPU::BI__builtin_amdgcn_sqrtf:
   case AMDGPU::BI__builtin_amdgcn_sqrth:
   case AMDGPU::BI__builtin_amdgcn_sqrt_bf16: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    mlir::Value src = emitScalarExpr(expr->getArg(0));
+    return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
+                                       "amdgcn.sqrt", src.getType(),
+                                       mlir::ValueRange{src});
   }
   case AMDGPU::BI__builtin_amdgcn_rsq:
   case AMDGPU::BI__builtin_amdgcn_rsqf:
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index 4a61fde7aa90c..77bcb72d8f702 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -71,3 +71,19 @@ __device__ void test_div_fmas_f64(double* out, double a, 
double b, double c, int
 __device__ void test_ds_swizzle(int* out, int a) {
   *out = __builtin_amdgcn_ds_swizzle(a, 32);
 }
+
+// CIR-LABEL: @_Z13test_sqrt_f32Pff
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+__device__ void test_sqrt_f32(float* out, float a) {
+  *out = __builtin_amdgcn_sqrtf(a);
+}
+
+// CIR-LABEL: @_Z13test_sqrt_f64Pdd
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z13test_sqrt_f64Pdd
+// LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+__device__ void test_sqrt_f64(double* out, double a) {
+  *out = __builtin_amdgcn_sqrt(a);
+}

>From 0f21ec967d95faaabd516702bab7ca699f19d80b Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Tue, 12 May 2026 22:10:55 -0400
Subject: [PATCH 2/8] fix intinsic call instruction

---
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index 77bcb72d8f702..decc6fafcac07 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -73,7 +73,7 @@ __device__ void test_ds_swizzle(int* out, int a) {
 }
 
 // CIR-LABEL: @_Z13test_sqrt_f32Pff
-// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float
+// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> 
!cir.float
 // LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff
 // LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
 __device__ void test_sqrt_f32(float* out, float a) {
@@ -81,7 +81,7 @@ __device__ void test_sqrt_f32(float* out, float a) {
 }
 
 // CIR-LABEL: @_Z13test_sqrt_f64Pdd
-// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double
+// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> 
!cir.double
 // LLVM: define{{.*}} void @_Z13test_sqrt_f64Pdd
 // LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
 __device__ void test_sqrt_f64(double* out, double a) {

>From d51932381e6a3ee705ddb941a44cd553da9eda25 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Wed, 13 May 2026 10:01:08 -0400
Subject: [PATCH 3/8] add test for bf16 type

---
 .../CodeGenHIP/builtins-amdgcn-gfx1250.hip    | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
new file mode 100644
index 0000000000000..44bdb2c666204
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
@@ -0,0 +1,26 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z14test_sqrt_bf16PDF16bDF16b
+// CIR:  cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.bf16) -> 
!cir.bf16
+// LLVM: define{{.*}} void @_Z14test_sqrt_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}})
+__device__ void test_sqrt_bf16(__bf16* out, __bf16 a) {
+  *out = __builtin_amdgcn_sqrt_bf16(a);
+}

>From a859751fba9515afac8a8ecf0d5834d6118b3a38 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Wed, 13 May 2026 10:11:43 -0400
Subject: [PATCH 4/8] delete bf16 test

---
 .../CodeGenHIP/builtins-amdgcn-gfx1250.hip    | 26 -------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
deleted file mode 100644
index 44bdb2c666204..0000000000000
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
-// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-//===----------------------------------------------------------------------===//
-// Test AMDGPU builtins
-//===----------------------------------------------------------------------===//
-
-// CIR-LABEL: @_Z14test_sqrt_bf16PDF16bDF16b
-// CIR:  cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.bf16) -> 
!cir.bf16
-// LLVM: define{{.*}} void @_Z14test_sqrt_bf16PDF16bDF16b
-// LLVM: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}})
-__device__ void test_sqrt_bf16(__bf16* out, __bf16 a) {
-  *out = __builtin_amdgcn_sqrt_bf16(a);
-}

>From 1857a15c81d4a4a21f319b0519d349609e0c9936 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Wed, 13 May 2026 17:30:44 -0400
Subject: [PATCH 5/8] add tests for f16 and bf16 types

---
 .../CodeGenHIP/builtins-amdgcn-gfx1250.hip    | 26 ++++++++
 .../CIR/CodeGenHIP/builtins-amdgcn-vi.hip     | 65 +++++++++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
new file mode 100644
index 0000000000000..2e132ce291d32
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
@@ -0,0 +1,26 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z14test_sqrt_bf16PDF16bDF16b
+// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @_Z14test_sqrt_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}})
+__device__ void test_sqrt_bf16(__bf16* out, __bf16 a) {
+  *out = __builtin_amdgcn_sqrt_bf16(a);
+}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
new file mode 100644
index 0000000000000..325ae2dd97237
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
@@ -0,0 +1,65 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix= --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z13test_sqrt_f16PDF16_DF16_
+// CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z13test_sqrt_f16PDF16_DF16_
+// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16(half %{{.*}})
+__device__ void test_sqrt_f16(_Float16* out, _Float16 a) {
+  *out = __builtin_amdgcn_sqrth(a);
+}

>From 7f8db68fa10bd4ca6464015502d55a8c7d5b3c96 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Sat, 16 May 2026 15:27:06 -0400
Subject: [PATCH 6/8] add missing test prefix

---
 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
index 325ae2dd97237..ac4da55c4105d 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
@@ -38,7 +38,7 @@
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
 // RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix= --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
 // RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll

>From b8718fc54d0ec9eacc15cb56b1baf8109cc760c4 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Sat, 16 May 2026 17:50:05 -0400
Subject: [PATCH 7/8] remove regex matching in CHECK line

---
 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
index ac4da55c4105d..3c966d16272eb 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
@@ -59,7 +59,7 @@
 // CIR-LABEL: @_Z13test_sqrt_f16PDF16_DF16_
 // CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.f16) -> !cir.f16
 // LLVM: define{{.*}} void @_Z13test_sqrt_f16PDF16_DF16_
-// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16(half %{{.*}})
+// LLVM: call{{.*}} half @llvm.amdgcn.sqrt.f16(half %{{.*}})
 __device__ void test_sqrt_f16(_Float16* out, _Float16 a) {
   *out = __builtin_amdgcn_sqrth(a);
 }

>From b8ed0ce23860d5e2fa037040b1f3b850552a6fe8 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Thu, 11 Jun 2026 17:54:30 -0400
Subject: [PATCH 8/8] switch out header for macro (__device specifier__)

---
 .../CodeGenHIP/builtins-amdgcn-gfx1250.hip    |  4 +-
 ...dgcn-vi.hip => builtins-amdgcn-vi-f16.hip} | 12 ++++-
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 44 ++++++++++++++++++-
 3 files changed, 54 insertions(+), 6 deletions(-)
 rename clang/test/CIR/CodeGenHIP/{builtins-amdgcn-vi.hip => 
builtins-amdgcn-vi-f16.hip} (86%)

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
index 2e132ce291d32..59b86fbf05f77 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
@@ -1,5 +1,3 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
 // RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir
@@ -13,6 +11,8 @@
 // RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
+#define __device__ __attribute__((device))
+
 
//===----------------------------------------------------------------------===//
 // Test AMDGPU builtins
 
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip
similarity index 86%
rename from clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
rename to clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip
index 3c966d16272eb..96b8d26e6cf80 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip
@@ -1,5 +1,3 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
 // RUN:            -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir
@@ -52,10 +50,20 @@
 // RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
+#define __device__ __attribute__((device))
+
 
//===----------------------------------------------------------------------===//
 // Test AMDGPU builtins
 
//===----------------------------------------------------------------------===//
 
+// CIR-LABEL: @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_
+// CIR: ir.call_llvm_intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.f16, 
!cir.f16, !cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_
+// LLVM: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half 
%{{.+}}, half %{{.+}})
+__device__ void test_div_fixup_f16(_Float16* out, _Float16 a, _Float16 b, 
_Float16 c) {
+  *out = __builtin_amdgcn_div_fixuph(a, b, c);
+}
+
 // CIR-LABEL: @_Z13test_sqrt_f16PDF16_DF16_
 // CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.f16) -> !cir.f16
 // LLVM: define{{.*}} void @_Z13test_sqrt_f16PDF16_DF16_
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index decc6fafcac07..ff024cf5eead1 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -1,5 +1,3 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
 // RUN:            -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir
@@ -13,6 +11,8 @@
 // RUN:            -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
+#define __device__ __attribute__((device))
+
 
//===----------------------------------------------------------------------===//
 // Test AMDGPU builtins
 
//===----------------------------------------------------------------------===//
@@ -72,6 +72,46 @@ __device__ void test_ds_swizzle(int* out, int a) {
   *out = __builtin_amdgcn_ds_swizzle(a, 32);
 }
 
+// CIR-LABEL: @_Z18test_div_fixup_f32Pffff
+// CIR: cir.call_llvm_intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.float, 
!cir.float, !cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f32Pffff
+// LLVM: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float 
%{{.+}}, float %{{.+}})
+__device__ void test_div_fixup_f32(float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_div_fixupf(a, b, c);
+}
+
+// CIR-LABEL: @_Z18test_div_fixup_f64Pdddd
+// CIR: cir.call_llvm_intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.double, 
!cir.double, !cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f64Pdddd
+// LLVM: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double 
%{{.+}}, double %{{.+}})
+__device__ void test_div_fixup_f64(double* out, double a, double b, double c) {
+  *out = __builtin_amdgcn_div_fixup(a, b, c);
+}
+
+// CIR-LABEL: @_Z13test_readlanePiii
+// CIR: cir.call_llvm_intrinsic "amdgcn.readlane" {{.*}} : (!s32i, !s32i) -> 
!s32i
+// LLVM: define{{.*}} void @_Z13test_readlanePiii
+// LLVM: call{{.*}} i32 @llvm.amdgcn.readlane.i32(i32 %{{.*}}, i32 %{{.*}})
+__device__ void test_readlane(int* out, int a, int b) {
+  *out = __builtin_amdgcn_readlane(a, b);
+}
+
+// CIR-LABEL: @_Z18test_readfirstlanePii
+// CIR: cir.call_llvm_intrinsic "amdgcn.readfirstlane" {{.*}} : (!s32i) -> 
!s32i
+// LLVM: define{{.*}} void @_Z18test_readfirstlanePii
+// LLVM: call{{.*}} i32 @llvm.amdgcn.readfirstlane.i32(i32 %{{.*}})
+__device__ void test_readfirstlane(int* out, int a) {
+  *out = __builtin_amdgcn_readfirstlane(a);
+}
+
+// CIR-LABEL: @_Z17test_dispatch_ptr
+// CIR: %{{.*}} = cir.call_llvm_intrinsic "amdgcn.dispatch.ptr" : () -> 
!cir.ptr<!void, target_address_space(4)>
+// LLVM-LABEL: @_Z17test_dispatch_ptr
+// LLVM: call{{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+__device__ void test_dispatch_ptr(__attribute__((address_space(4))) void ** 
out) {
+  *out = (__attribute__((address_space(4))) void 
*)__builtin_amdgcn_dispatch_ptr();
+}
+
 // CIR-LABEL: @_Z13test_sqrt_f32Pff
 // CIR: cir.call_llvm_intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> 
!cir.float
 // LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][AMDGPU] Add support for AMDGCN sqrt builtins (PR #197342)

Reply via email to