llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Justin Lebar (jlebar)
<details>
<summary>Changes</summary>
Previously clang was calling the same LLVM intrinsic for signed and
unsigned atomic min! (Also the same intrinsic for signed and unsigned
atomic max.)
Define new intrinsics and call them from clang.
CUDA reproducer:
__global__ void k(unsigned *p, unsigned v) { atomicMax_block(p, v); }
This generated `atom.global.cta.max.s32` before this patch, now
generates max.u32.
---
Full diff: https://github.com/llvm/llvm-project/pull/200735.diff
5 Files Affected:
- (modified) clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp (+16-12)
- (modified) clang/test/CodeGen/builtins-nvptx.c (+16-16)
- (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+2)
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+8-6)
- (modified) llvm/test/CodeGen/NVPTX/atomics-with-scope.ll (+34)
``````````diff
diff --git a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
index e2d494103a5c9..faf682b59d44b 100644
--- a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
@@ -614,33 +614,37 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned
BuiltinID,
case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_max_gen_i:
- case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
case NVPTX::BI__nvvm_atom_cta_max_gen_l:
- case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
- case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
+ case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
+ case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
+ case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_umax_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_max_gen_i:
- case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
case NVPTX::BI__nvvm_atom_sys_max_gen_l:
- case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
- case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
+ case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
+ case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
+ case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_umax_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_min_gen_i:
- case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
case NVPTX::BI__nvvm_atom_cta_min_gen_l:
- case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
- case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
+ case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
+ case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
+ case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_umin_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_min_gen_i:
- case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
case NVPTX::BI__nvvm_atom_sys_min_gen_l:
- case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
- case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
+ case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
+ case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
+ case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_umin_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
diff --git a/clang/test/CodeGen/builtins-nvptx.c
b/clang/test/CodeGen/builtins-nvptx.c
index 86907f8452fa1..714cc3b61bc3a 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -461,84 +461,84 @@ __device__ void nvvm_atom(float *fp, float f, double
*dfp, double df,
// CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0
// expected-error@+1 {{'__nvvm_atom_cta_max_gen_i' needs target feature
sm_60}}
__nvvm_atom_cta_max_gen_i(ip, i);
- // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0
+ // CHECK: call i32 @llvm.nvvm.atomic.umax.gen.i.cta.i32.p0
// expected-error@+1 {{'__nvvm_atom_cta_max_gen_ui' needs target feature
sm_60}}
__nvvm_atom_cta_max_gen_ui((unsigned int *)ip, i);
// LP32: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0
// LP64: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0
// expected-error@+1 {{'__nvvm_atom_cta_max_gen_l' needs target feature
sm_60}}
__nvvm_atom_cta_max_gen_l(&dl, l);
- // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0
- // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0
+ // LP32: call i32 @llvm.nvvm.atomic.umax.gen.i.cta.i32.p0
+ // LP64: call i64 @llvm.nvvm.atomic.umax.gen.i.cta.i64.p0
// expected-error@+1 {{'__nvvm_atom_cta_max_gen_ul' needs target feature
sm_60}}
__nvvm_atom_cta_max_gen_ul((unsigned long *)lp, l);
// CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0
// expected-error@+1 {{'__nvvm_atom_cta_max_gen_ll' needs target feature
sm_60}}
__nvvm_atom_cta_max_gen_ll(&sll, ll);
- // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0
+ // CHECK: call i64 @llvm.nvvm.atomic.umax.gen.i.cta.i64.p0
// expected-error@+1 {{'__nvvm_atom_cta_max_gen_ull' needs target feature
sm_60}}
__nvvm_atom_cta_max_gen_ull((unsigned long long *)llp, ll);
// CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0
// expected-error@+1 {{'__nvvm_atom_sys_max_gen_i' needs target feature
sm_60}}
__nvvm_atom_sys_max_gen_i(ip, i);
- // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0
+ // CHECK: call i32 @llvm.nvvm.atomic.umax.gen.i.sys.i32.p0
// expected-error@+1 {{'__nvvm_atom_sys_max_gen_ui' needs target feature
sm_60}}
__nvvm_atom_sys_max_gen_ui((unsigned int *)ip, i);
// LP32: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0
// LP64: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0
// expected-error@+1 {{'__nvvm_atom_sys_max_gen_l' needs target feature
sm_60}}
__nvvm_atom_sys_max_gen_l(&dl, l);
- // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0
- // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0
+ // LP32: call i32 @llvm.nvvm.atomic.umax.gen.i.sys.i32.p0
+ // LP64: call i64 @llvm.nvvm.atomic.umax.gen.i.sys.i64.p0
// expected-error@+1 {{'__nvvm_atom_sys_max_gen_ul' needs target feature
sm_60}}
__nvvm_atom_sys_max_gen_ul((unsigned long *)lp, l);
// CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0
// expected-error@+1 {{'__nvvm_atom_sys_max_gen_ll' needs target feature
sm_60}}
__nvvm_atom_sys_max_gen_ll(&sll, ll);
- // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0
+ // CHECK: call i64 @llvm.nvvm.atomic.umax.gen.i.sys.i64.p0
// expected-error@+1 {{'__nvvm_atom_sys_max_gen_ull' needs target feature
sm_60}}
__nvvm_atom_sys_max_gen_ull((unsigned long long *)llp, ll);
// CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0
// expected-error@+1 {{'__nvvm_atom_cta_min_gen_i' needs target feature
sm_60}}
__nvvm_atom_cta_min_gen_i(ip, i);
- // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0
+ // CHECK: call i32 @llvm.nvvm.atomic.umin.gen.i.cta.i32.p0
// expected-error@+1 {{'__nvvm_atom_cta_min_gen_ui' needs target feature
sm_60}}
__nvvm_atom_cta_min_gen_ui((unsigned int *)ip, i);
// LP32: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0
// LP64: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0
// expected-error@+1 {{'__nvvm_atom_cta_min_gen_l' needs target feature
sm_60}}
__nvvm_atom_cta_min_gen_l(&dl, l);
- // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0
- // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0
+ // LP32: call i32 @llvm.nvvm.atomic.umin.gen.i.cta.i32.p0
+ // LP64: call i64 @llvm.nvvm.atomic.umin.gen.i.cta.i64.p0
// expected-error@+1 {{'__nvvm_atom_cta_min_gen_ul' needs target feature
sm_60}}
__nvvm_atom_cta_min_gen_ul((unsigned long *)lp, l);
// CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0
// expected-error@+1 {{'__nvvm_atom_cta_min_gen_ll' needs target feature
sm_60}}
__nvvm_atom_cta_min_gen_ll(&sll, ll);
- // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0
+ // CHECK: call i64 @llvm.nvvm.atomic.umin.gen.i.cta.i64.p0
// expected-error@+1 {{'__nvvm_atom_cta_min_gen_ull' needs target feature
sm_60}}
__nvvm_atom_cta_min_gen_ull((unsigned long long *)llp, ll);
// CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0
// expected-error@+1 {{'__nvvm_atom_sys_min_gen_i' needs target feature
sm_60}}
__nvvm_atom_sys_min_gen_i(ip, i);
- // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0
+ // CHECK: call i32 @llvm.nvvm.atomic.umin.gen.i.sys.i32.p0
// expected-error@+1 {{'__nvvm_atom_sys_min_gen_ui' needs target feature
sm_60}}
__nvvm_atom_sys_min_gen_ui((unsigned int *)ip, i);
// LP32: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0
// LP64: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0
// expected-error@+1 {{'__nvvm_atom_sys_min_gen_l' needs target feature
sm_60}}
__nvvm_atom_sys_min_gen_l(&dl, l);
- // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0
- // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0
+ // LP32: call i32 @llvm.nvvm.atomic.umin.gen.i.sys.i32.p0
+ // LP64: call i64 @llvm.nvvm.atomic.umin.gen.i.sys.i64.p0
// expected-error@+1 {{'__nvvm_atom_sys_min_gen_ul' needs target feature
sm_60}}
__nvvm_atom_sys_min_gen_ul((unsigned long *)lp, l);
// CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0
// expected-error@+1 {{'__nvvm_atom_sys_min_gen_ll' needs target feature
sm_60}}
__nvvm_atom_sys_min_gen_ll(&sll, ll);
- // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0
+ // CHECK: call i64 @llvm.nvvm.atomic.umin.gen.i.sys.i64.p0
// expected-error@+1 {{'__nvvm_atom_sys_min_gen_ull' needs target feature
sm_60}}
__nvvm_atom_sys_min_gen_ull((unsigned long long *)llp, ll);
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td
b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 647b65cf7714a..e11a6197488ab 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1917,6 +1917,8 @@ let TargetPrefix = "nvvm" in {
defm int_nvvm_atomic_xor_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
defm int_nvvm_atomic_max_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
defm int_nvvm_atomic_min_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
+ defm int_nvvm_atomic_umax_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
+ defm int_nvvm_atomic_umin_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
defm int_nvvm_atomic_or_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3<llvm_anyint_ty>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 1a3420ac6a7c7..fa9c225b90c94 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2650,13 +2650,14 @@ foreach t = [I16RT, I32RT, I64RT] in {
// Constructs intrinsic name and instruction asm strings.
multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
string ScopeStr, string SpaceStr,
- RegTyInfo t, list<Predicate> Preds> {
+ RegTyInfo t, list<Predicate> Preds,
+ string IntOpStr = OpStr> {
defm "" : F_ATOMIC_2_INTRINSIC<t,
as_str = !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr),
sem_str = !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr),
op_str = OpStr # "." # TypeStr,
op = !cast<Intrinsic>(
- "int_nvvm_atomic_" # OpStr
+ "int_nvvm_atomic_" # IntOpStr
# "_" # SpaceStr # "_" # IntTypeStr
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
preds = Preds>;
@@ -2664,14 +2665,15 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr,
string TypeStr,
// Constructs variants for different scopes of atomic op.
multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
- RegTyInfo t, list<Predicate> Preds> {
+ RegTyInfo t, list<Predicate> Preds,
+ string IntOpStr = OpStr> {
// .gpu scope is default and is currently covered by existing
// atomics w/o explicitly specified scope.
foreach scope = ["cta", "sys"] in {
// For now we only need variants for generic space pointers.
foreach space = ["gen"] in {
defm _#scope#space : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, scope,
space,
- t, !listconcat(Preds, [hasAtomScope])>;
+ t, !listconcat(Preds, [hasAtomScope]), IntOpStr>;
}
}
}
@@ -2721,9 +2723,9 @@ multiclass ATOM2_exch_impl<string OpStr> {
// atom.{min,max}
multiclass ATOM2_minmax_impl<string OpStr> {
defm _s32 : ATOM2S_impl<OpStr, "i", "s32", I32RT, []>;
- defm _u32 : ATOM2S_impl<OpStr, "i", "u32", I32RT, []>;
defm _s64 : ATOM2S_impl<OpStr, "i", "s64", I64RT, [hasAtomMinMax64]>;
- defm _u64 : ATOM2S_impl<OpStr, "i", "u64", I64RT, [hasAtomMinMax64]>;
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", I32RT, [], "u"#OpStr>;
+ defm _u64 : ATOM2S_impl<OpStr, "i", "u64", I64RT, [hasAtomMinMax64],
"u"#OpStr>;
}
// atom.{inc,dec}
diff --git a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
index d406f9c1e33f8..eb86e5132ef71 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
@@ -187,3 +187,37 @@ declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr
nocapture, i32, i32) #1
declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr nocapture, i64, i64) #1
attributes #1 = { argmemonly nounwind }
+
+declare i32 @llvm.nvvm.atomic.umax.gen.i.cta.i32.p0(ptr, i32)
+declare i64 @llvm.nvvm.atomic.umax.gen.i.cta.i64.p0(ptr, i64)
+declare i32 @llvm.nvvm.atomic.umax.gen.i.sys.i32.p0(ptr, i32)
+declare i64 @llvm.nvvm.atomic.umax.gen.i.sys.i64.p0(ptr, i64)
+declare i32 @llvm.nvvm.atomic.umin.gen.i.cta.i32.p0(ptr, i32)
+declare i64 @llvm.nvvm.atomic.umin.gen.i.cta.i64.p0(ptr, i64)
+declare i32 @llvm.nvvm.atomic.umin.gen.i.sys.i32.p0(ptr, i32)
+declare i64 @llvm.nvvm.atomic.umin.gen.i.sys.i64.p0(ptr, i64)
+
+; CHECK-LABEL: test_atomics_scope_unsigned_minmax
+; CHECK: atom.cta.max.s32
+; CHECK: atom.cta.min.s32
+; CHECK: atom.cta.max.u32
+; CHECK: atom.cta.max.u64
+; CHECK: atom.sys.max.u32
+; CHECK: atom.sys.max.u64
+; CHECK: atom.cta.min.u32
+; CHECK: atom.cta.min.u64
+; CHECK: atom.sys.min.u32
+; CHECK: atom.sys.min.u64
+define void @test_atomics_scope_unsigned_minmax(ptr %ip, i32 %i, ptr %llp, i64
%ll) {
+ %s = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr %ip, i32 %i)
+ %t = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr %ip, i32 %i)
+ %a = tail call i32 @llvm.nvvm.atomic.umax.gen.i.cta.i32.p0(ptr %ip, i32 %i)
+ %b = tail call i64 @llvm.nvvm.atomic.umax.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
+ %c = tail call i32 @llvm.nvvm.atomic.umax.gen.i.sys.i32.p0(ptr %ip, i32 %i)
+ %d = tail call i64 @llvm.nvvm.atomic.umax.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
+ %e = tail call i32 @llvm.nvvm.atomic.umin.gen.i.cta.i32.p0(ptr %ip, i32 %i)
+ %f = tail call i64 @llvm.nvvm.atomic.umin.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
+ %g = tail call i32 @llvm.nvvm.atomic.umin.gen.i.sys.i32.p0(ptr %ip, i32 %i)
+ %h = tail call i64 @llvm.nvvm.atomic.umin.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/200735
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits