https://github.com/jmmartinez created https://github.com/llvm/llvm-project/pull/173480
Allows for type checking depending on the built-in signature. From 892659b38b9ed790e5baa89848c738319ea32804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Wed, 24 Dec 2025 11:43:15 +0100 Subject: [PATCH 1/2] Pre-commit test: [Clang] Remove 't' from __builtin_amdgcn_global_atomic_fadd_f32/f64 --- .../CodeGenHIP/amdgpu-global-atomic-fadd.hip | 65 +++++++++++++++++++ .../SemaHIP/amdgpu-global-atomic-fadd-err.hip | 39 +++++++++++ 2 files changed, 104 insertions(+) create mode 100644 clang/test/CodeGenHIP/amdgpu-global-atomic-fadd.hip create mode 100644 clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip diff --git a/clang/test/CodeGenHIP/amdgpu-global-atomic-fadd.hip b/clang/test/CodeGenHIP/amdgpu-global-atomic-fadd.hip new file mode 100644 index 0000000000000..1fa3a20442064 --- /dev/null +++ b/clang/test/CodeGenHIP/amdgpu-global-atomic-fadd.hip @@ -0,0 +1,65 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx942 -emit-llvm -fcuda-is-device %s -o - | FileCheck %s + +#define __device__ __attribute__((device)) + +__device__ float global_float; +__device__ double global_double; + +// CHECK-LABEL: define dso_local void @_Z33test_global_atomic_fadd_f32_validPff( +// CHECK-SAME: ptr noundef [[PTR:%.*]], float noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RESULT:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr +// CHECK-NEXT: [[RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT]] to ptr +// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store float [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(1) +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[VAL_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP1]], float [[TMP2]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]], !amdgpu.ignore.denormal.mode [[META4]] +// CHECK-NEXT: store float [[TMP3]], ptr [[RESULT_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[VAL_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) @global_float, float [[TMP4]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]] +// CHECK-NEXT: store float [[TMP5]], ptr [[RESULT_ASCAST]], align 4 +// CHECK-NEXT: ret void +// +__device__ void test_global_atomic_fadd_f32_valid(float *ptr, float val) { + float result; + result = __builtin_amdgcn_global_atomic_fadd_f32(ptr, val); + result = __builtin_amdgcn_global_atomic_fadd_f32(&global_float, val); +} + +// CHECK-LABEL: define dso_local void @_Z33test_global_atomic_fadd_f64_validPdd( +// CHECK-SAME: ptr noundef [[PTR:%.*]], double noundef [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[RESULT:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr +// CHECK-NEXT: [[RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT]] to ptr +// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store double [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(1) +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[VAL_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP1]], double [[TMP2]] syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// CHECK-NEXT: store double [[TMP3]], ptr [[RESULT_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[VAL_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) @global_double, double [[TMP4]] syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// CHECK-NEXT: store double [[TMP5]], ptr [[RESULT_ASCAST]], align 8 +// CHECK-NEXT: ret void +// +__device__ void test_global_atomic_fadd_f64_valid(double *ptr, double val) { + double result; + result = __builtin_amdgcn_global_atomic_fadd_f64(ptr, val); + result = __builtin_amdgcn_global_atomic_fadd_f64(&global_double, val); +} +//. +// CHECK: [[META4]] = !{} +//. diff --git a/clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip b/clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip new file mode 100644 index 0000000000000..9bf4a841e8dfd --- /dev/null +++ b/clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx942 -verify %s -fcuda-is-device +// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s +// expected-no-diagnostics + +#define __device__ __attribute__((device)) +#define __shared__ __attribute__((shared)) + +__device__ float global_float; +__device__ float global_double; + +__device__ void test_global_atomic_fadd_f32_valid(float *ptr, float val) { + float result; + result = __builtin_amdgcn_global_atomic_fadd_f32(ptr, val); + result = __builtin_amdgcn_global_atomic_fadd_f32(&global_float, val); +} + +__device__ void test_global_atomic_fadd_f32_errors(float *ptr, float val, + __shared__ float *lds_ptr, + double *ptr_d) { + float result; + result = __builtin_amdgcn_global_atomic_fadd_f32(ptr, val, 0); + result = __builtin_amdgcn_global_atomic_fadd_f32(lds_ptr, val); + result = __builtin_amdgcn_global_atomic_fadd_f32(ptr_d, val); +} + +__device__ void test_global_atomic_fadd_f64_valid(double *ptr, double val) { + double result; + result = __builtin_amdgcn_global_atomic_fadd_f64(ptr, val); + result = __builtin_amdgcn_global_atomic_fadd_f32(&global_double, val); +} + +__device__ void test_global_atomic_fadd_f64_errors(double *ptr, double val, + __shared__ double *lds_ptr, + float *ptr_f) { + double result; + result = __builtin_amdgcn_global_atomic_fadd_f64(ptr, val, 0); + result = __builtin_amdgcn_global_atomic_fadd_f64(lds_ptr, val); + result = __builtin_amdgcn_global_atomic_fadd_f64(ptr_f, val); +} From 619a5e0741ae15c1bc34ad2b85a6e9e87efa4c1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Wed, 24 Dec 2025 11:43:57 +0100 Subject: [PATCH 2/2] [Clang] Remove 't' from __builtin_amdgcn_global_atomic_fadd_f32/f64 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 4 ++-- clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 2623bd476f08f..224aa2ea30bad 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -266,8 +266,8 @@ TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts") TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "", "atomic-fadd-rtn-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts") diff --git a/clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip b/clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip index 9bf4a841e8dfd..dc8697debc9bc 100644 --- a/clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip +++ b/clang/test/SemaHIP/amdgpu-global-atomic-fadd-err.hip @@ -1,6 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx942 -verify %s -fcuda-is-device // RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s -// expected-no-diagnostics #define __device__ __attribute__((device)) #define __shared__ __attribute__((shared)) @@ -18,9 +17,9 @@ __device__ void test_global_atomic_fadd_f32_errors(float *ptr, float val, __shared__ float *lds_ptr, double *ptr_d) { float result; - result = __builtin_amdgcn_global_atomic_fadd_f32(ptr, val, 0); + result = __builtin_amdgcn_global_atomic_fadd_f32(ptr, val, 0); // expected-error{{too many arguments to function call, expected 2, have 3}} result = __builtin_amdgcn_global_atomic_fadd_f32(lds_ptr, val); - result = __builtin_amdgcn_global_atomic_fadd_f32(ptr_d, val); + result = __builtin_amdgcn_global_atomic_fadd_f32(ptr_d, val); // expected-error{{cannot initialize a parameter of type}} } __device__ void test_global_atomic_fadd_f64_valid(double *ptr, double val) { @@ -33,7 +32,7 @@ __device__ void test_global_atomic_fadd_f64_errors(double *ptr, double val, __shared__ double *lds_ptr, float *ptr_f) { double result; - result = __builtin_amdgcn_global_atomic_fadd_f64(ptr, val, 0); + result = __builtin_amdgcn_global_atomic_fadd_f64(ptr, val, 0); // expected-error{{too many arguments to function call, expected 2, have 3}} result = __builtin_amdgcn_global_atomic_fadd_f64(lds_ptr, val); - result = __builtin_amdgcn_global_atomic_fadd_f64(ptr_f, val); + result = __builtin_amdgcn_global_atomic_fadd_f64(ptr_f, val); // expected-error{{cannot initialize a parameter of type}} } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
