https://github.com/ayokunle321 updated 
https://github.com/llvm/llvm-project/pull/197806

>From bf3e710faec048108c7de6ce0e9b74192010fe91 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Thu, 14 May 2026 17:07:17 -0400
Subject: [PATCH 1/4] add amdgcn cos builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp |  7 +-
 .../CodeGenHIP/builtins-amdgcn-gfx1250.hip    | 26 ++++++++
 .../CIR/CodeGenHIP/builtins-amdgcn-vi.hip     | 65 +++++++++++++++++++
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip |  8 +++
 4 files changed, 102 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index 04ab1c29b0d63..9cf119b2f0414 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -252,10 +252,9 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   case AMDGPU::BI__builtin_amdgcn_cosf:
   case AMDGPU::BI__builtin_amdgcn_cosh:
   case AMDGPU::BI__builtin_amdgcn_cos_bf16: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    mlir::Value src = emitScalarExpr(expr->getArg(0));
+    return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), 
"amdgcn.cos",
+                                       src.getType(), mlir::ValueRange{src});
   }
   case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: {
     cgm.errorNYI(expr->getSourceRange(),
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
new file mode 100644
index 0000000000000..6e7a48d32f203
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
@@ -0,0 +1,26 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z13test_cos_bf16PDF16bDF16b
+// CIR: cir.call_llvm_intrinsic "amdgcn.cos" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @_Z13test_cos_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.cos.bf16(bfloat %{{.*}})
+__device__ void test_cos_bf16(__bf16* out, __bf16 a) {
+  *out = __builtin_amdgcn_cos_bf16(a);
+}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
new file mode 100644
index 0000000000000..112667bbdd38e
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
@@ -0,0 +1,65 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z12test_cos_f16PDF16_DF16_
+// CIR: cir.call_llvm_intrinsic "amdgcn.cos" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z12test_cos_f16PDF16_DF16_
+// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}cos.f16(half %{{.*}})
+__device__ void test_cos_f16(_Float16* out, _Float16 a) {
+  *out = __builtin_amdgcn_cosh(a);
+}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index 4a61fde7aa90c..970d02b9a8edd 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -71,3 +71,11 @@ __device__ void test_div_fmas_f64(double* out, double a, 
double b, double c, int
 __device__ void test_ds_swizzle(int* out, int a) {
   *out = __builtin_amdgcn_ds_swizzle(a, 32);
 }
+
+// CIR-LABEL: @_Z13test_cosf_f32Pff
+// CIR: cir.call_llvm_intrinsic "amdgcn.cos" {{.*}} : (!cir.float) -> 
!cir.float
+// LLVM: define{{.*}} void @_Z13test_cosf_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.cos.f32(float %{{.*}})
+__device__ void test_cosf_f32(float* out, float a) {
+  *out = __builtin_amdgcn_cosf(a);
+}

>From ff6010bd9ed72f7ce29a6af47876cbe5eb6354c9 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Sat, 16 May 2026 17:33:28 -0400
Subject: [PATCH 2/4] remove regex matching in CHECK line

---
 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
index 112667bbdd38e..5dc405fbc65bf 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
@@ -59,7 +59,7 @@
 // CIR-LABEL: @_Z12test_cos_f16PDF16_DF16_
 // CIR: cir.call_llvm_intrinsic "amdgcn.cos" {{.*}} : (!cir.f16) -> !cir.f16
 // LLVM: define{{.*}} void @_Z12test_cos_f16PDF16_DF16_
-// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}cos.f16(half %{{.*}})
+// LLVM: call{{.*}} half @llvm.amdgcn.cos.f16(half %{{.*}})
 __device__ void test_cos_f16(_Float16* out, _Float16 a) {
   *out = __builtin_amdgcn_cosh(a);
 }

>From 167c31eeab6e4fc0b4582b53d5f8220f6e76c15c Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Thu, 4 Jun 2026 12:25:56 -0400
Subject: [PATCH 3/4] switch header out for macro

---
 .../{builtins-amdgcn-vi.hip => builtins-amdgcn-vi-f16.hip}   | 5 +++--
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip                | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)
 rename clang/test/CIR/CodeGenHIP/{builtins-amdgcn-vi.hip => 
builtins-amdgcn-vi-f16.hip} (98%)

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip
similarity index 98%
rename from clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
rename to clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip
index 5dc405fbc65bf..4f559a6eb0baa 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi-f16.hip
@@ -1,6 +1,5 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
 // REQUIRES: amdgpu-registered-target
+
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
 // RUN:            -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
@@ -52,6 +51,8 @@
 // RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
+#define __device__ __attribute__((device))
+
 
//===----------------------------------------------------------------------===//
 // Test AMDGPU builtins
 
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index 970d02b9a8edd..41374b6f7619a 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -1,5 +1,3 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
 // RUN:            -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir
@@ -13,6 +11,8 @@
 // RUN:            -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
+#define __device__ __attribute__((device))
+
 
//===----------------------------------------------------------------------===//
 // Test AMDGPU builtins
 
//===----------------------------------------------------------------------===//

>From a05319470723dc515472bd148fa44390ec493c46 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Thu, 4 Jun 2026 12:28:31 -0400
Subject: [PATCH 4/4] switch header out for macro

---
 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
index 6e7a48d32f203..55bbb500d4f0a 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
@@ -1,5 +1,3 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
 // RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir
@@ -13,6 +11,8 @@
 // RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
+#define __device__ __attribute__((device))
+
 
//===----------------------------------------------------------------------===//
 // Test AMDGPU builtins
 
//===----------------------------------------------------------------------===//

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to