[PATCH] D46386: Adding __atomic_fetch_min/max intrinsics to clang

Elena Demikhovsky via Phabricator via cfe-commits Thu, 10 May 2018 00:34:16 -0700

delena updated this revision to Diff 146080.
delena added a comment.

Given more clarification about memory model of atomic operations.



Repository:
  rC Clang

https://reviews.llvm.org/D46386

Files:
  docs/LanguageExtensions.rst
  include/clang/Basic/Builtins.def
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/AST/Expr.cpp
  lib/CodeGen/CGAtomic.cpp
  lib/Sema/SemaChecking.cpp
  test/CodeGen/Atomics.c
  test/Sema/atomic-ops.c

Index: test/Sema/atomic-ops.c
===================================================================
--- test/Sema/atomic-ops.c
+++ test/Sema/atomic-ops.c
@@ -173,6 +173,9 @@
   __atomic_fetch_sub(P, 3, memory_order_seq_cst);
   __atomic_fetch_sub(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer or pointer}}
   __atomic_fetch_sub(s1, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer or pointer}}
+  __atomic_fetch_min(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to signed or unsigned 32-bit integer}}
+  __atomic_fetch_max(P, 3, memory_order_seq_cst); // expected-error {{must be a pointer to signed or unsigned 32-bit integer}}
+  __atomic_fetch_max(p, 3);                       // expected-error {{too few arguments to function call, expected 3, have 2}}
 
   __c11_atomic_fetch_and(i, 1, memory_order_seq_cst);
   __c11_atomic_fetch_and(p, 1, memory_order_seq_cst); // expected-error {{must be a pointer to atomic integer}}
@@ -456,6 +459,20 @@
   (void)__atomic_fetch_nand(p, val, memory_order_acq_rel);
   (void)__atomic_fetch_nand(p, val, memory_order_seq_cst);
 
+  (void)__atomic_fetch_min(p, val, memory_order_relaxed);
+  (void)__atomic_fetch_min(p, val, memory_order_acquire);
+  (void)__atomic_fetch_min(p, val, memory_order_consume);
+  (void)__atomic_fetch_min(p, val, memory_order_release);
+  (void)__atomic_fetch_min(p, val, memory_order_acq_rel);
+  (void)__atomic_fetch_min(p, val, memory_order_seq_cst);
+
+  (void)__atomic_fetch_max(p, val, memory_order_relaxed);
+  (void)__atomic_fetch_max(p, val, memory_order_acquire);
+  (void)__atomic_fetch_max(p, val, memory_order_consume);
+  (void)__atomic_fetch_max(p, val, memory_order_release);
+  (void)__atomic_fetch_max(p, val, memory_order_acq_rel);
+  (void)__atomic_fetch_max(p, val, memory_order_seq_cst);
+
   (void)__atomic_and_fetch(p, val, memory_order_relaxed);
   (void)__atomic_and_fetch(p, val, memory_order_acquire);
   (void)__atomic_and_fetch(p, val, memory_order_consume);
Index: test/CodeGen/Atomics.c
===================================================================
--- test/CodeGen/Atomics.c
+++ test/CodeGen/Atomics.c
@@ -291,3 +291,10 @@
   __sync_lock_release (&sll); // CHECK: store atomic {{.*}} release, align 8
   __sync_lock_release (&ull); // CHECK: store atomic {{.*}} release, align 8
 }
+
+void test_atomic(void) {
+  ui = __atomic_fetch_min(&ui, 5, __ATOMIC_RELAXED); // CHECK: atomicrmw umin {{.*}} monotonic
+  si = __atomic_fetch_min(&si, 5, __ATOMIC_SEQ_CST); // CHECK: atomicrmw min {{.*}} seq_cst
+  ui = __atomic_fetch_max(&ui, 5, __ATOMIC_ACQUIRE); // CHECK: atomicrmw umax {{.*}} acquire
+  si = __atomic_fetch_max(&si, 5, __ATOMIC_RELEASE); // CHECK: atomicrmw max {{.*}} release
+}
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3037,6 +3037,7 @@
              Op == AtomicExpr::AO__atomic_exchange_n ||
              Op == AtomicExpr::AO__atomic_compare_exchange_n;
   bool IsAddSub = false;
+  bool IsMinMax = false;
 
   switch (Op) {
   case AtomicExpr::AO__c11_atomic_init:
@@ -3090,6 +3091,12 @@
     Form = Arithmetic;
     break;
 
+  case AtomicExpr::AO__atomic_fetch_min:
+  case AtomicExpr::AO__atomic_fetch_max:
+    IsMinMax = true;
+    Form = Arithmetic;
+    break;
+
   case AtomicExpr::AO__c11_atomic_exchange:
   case AtomicExpr::AO__opencl_atomic_exchange:
   case AtomicExpr::AO__atomic_exchange_n:
@@ -3172,12 +3179,21 @@
   // For an arithmetic operation, the implied arithmetic must be well-formed.
   if (Form == Arithmetic) {
     // gcc does not enforce these rules for GNU atomics, but we do so for sanity.
-    if (IsAddSub && !ValType->isIntegerType() && !ValType->isPointerType()) {
+    if (IsAddSub && !ValType->isIntegerType()
+        && !ValType->isPointerType()) {
       Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic_int_or_ptr)
         << IsC11 << Ptr->getType() << Ptr->getSourceRange();
       return ExprError();
     }
-    if (!IsAddSub && !ValType->isIntegerType()) {
+    if (IsMinMax) {
+      const BuiltinType *BT = ValType->getAs<BuiltinType>();
+      if (!BT || (BT->getKind() != BuiltinType::Int &&
+                  BT->getKind() != BuiltinType::UInt)) {
+        Diag(DRE->getLocStart(), diag::err_atomic_op_needs_int32_or_ptr);
+        return ExprError();
+      }
+    }
+    if (!IsAddSub && !IsMinMax && !ValType->isIntegerType()) {
       Diag(DRE->getLocStart(), diag::err_atomic_op_bitwise_needs_atomic_int)
         << IsC11 << Ptr->getType() << Ptr->getSourceRange();
       return ExprError();
Index: lib/CodeGen/CGAtomic.cpp
===================================================================
--- lib/CodeGen/CGAtomic.cpp
+++ lib/CodeGen/CGAtomic.cpp
@@ -591,11 +591,13 @@
     break;
 
   case AtomicExpr::AO__opencl_atomic_fetch_min:
+  case AtomicExpr::AO__atomic_fetch_min:
     Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
                                                   : llvm::AtomicRMWInst::UMin;
     break;
 
   case AtomicExpr::AO__opencl_atomic_fetch_max:
+  case AtomicExpr::AO__atomic_fetch_max:
     Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
                                                   : llvm::AtomicRMWInst::UMax;
     break;
@@ -859,6 +861,8 @@
   case AtomicExpr::AO__atomic_or_fetch:
   case AtomicExpr::AO__atomic_xor_fetch:
   case AtomicExpr::AO__atomic_nand_fetch:
+  case AtomicExpr::AO__atomic_fetch_min:
+  case AtomicExpr::AO__atomic_fetch_max:
     Val1 = EmitValToTemp(*this, E->getVal1());
     break;
   }
@@ -913,6 +917,8 @@
     case AtomicExpr::AO__atomic_or_fetch:
     case AtomicExpr::AO__atomic_sub_fetch:
     case AtomicExpr::AO__atomic_xor_fetch:
+    case AtomicExpr::AO__atomic_fetch_min:
+    case AtomicExpr::AO__atomic_fetch_max:
       // For these, only library calls for certain sizes exist.
       UseOptimizedLibcall = true;
       break;
@@ -1095,13 +1101,15 @@
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                         MemTy, E->getExprLoc(), sizeChars);
       break;
+    case AtomicExpr::AO__atomic_fetch_min:
     case AtomicExpr::AO__opencl_atomic_fetch_min:
       LibCallName = E->getValueType()->isSignedIntegerType()
                         ? "__atomic_fetch_min"
                         : "__atomic_fetch_umin";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                         LoweredMemTy, E->getExprLoc(), sizeChars);
       break;
+    case AtomicExpr::AO__atomic_fetch_max:
     case AtomicExpr::AO__opencl_atomic_fetch_max:
       LibCallName = E->getValueType()->isSignedIntegerType()
                         ? "__atomic_fetch_max"
Index: lib/AST/Expr.cpp
===================================================================
--- lib/AST/Expr.cpp
+++ lib/AST/Expr.cpp
@@ -4051,6 +4051,8 @@
   case AO__atomic_or_fetch:
   case AO__atomic_xor_fetch:
   case AO__atomic_nand_fetch:
+  case AO__atomic_fetch_min:
+  case AO__atomic_fetch_max:
     return 3;
 
   case AO__opencl_atomic_store:
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -7114,6 +7114,8 @@
 def err_atomic_op_needs_atomic_int_or_ptr : Error<
   "address argument to atomic operation must be a pointer to %select{|atomic }0"
   "integer or pointer (%1 invalid)">;
+def err_atomic_op_needs_int32_or_ptr : Error<
+  "address argument to atomic operation must be a pointer to signed or unsigned 32-bit integer">;
 def err_atomic_op_bitwise_needs_atomic_int : Error<
   "address argument to bitwise atomic operation must be a pointer to "
   "%select{|atomic }0integer (%1 invalid)">;
Index: include/clang/Basic/Builtins.def
===================================================================
--- include/clang/Basic/Builtins.def
+++ include/clang/Basic/Builtins.def
@@ -721,6 +721,10 @@
 ATOMIC_BUILTIN(__opencl_atomic_fetch_min, "v.", "t")
 ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t")
 
+// GCC does not support these, they are a Clang extension.
+ATOMIC_BUILTIN(__atomic_fetch_min, "iiD*i.", "t")
+ATOMIC_BUILTIN(__atomic_fetch_max, "v.", "t")
+
 #undef ATOMIC_BUILTIN
 
 // Non-overloaded atomic builtins.
Index: docs/LanguageExtensions.rst
===================================================================
--- docs/LanguageExtensions.rst
+++ docs/LanguageExtensions.rst
@@ -1975,6 +1975,63 @@
 Support for constant expression evaluation for the above builtins be detected
 with ``__has_feature(cxx_constexpr_string_builtins)``.
 
+Atomic Min/Max builtins with memory ordering
+--------------------------------------------
+
+There are two atomic builtins with min/max in-memory comparison and swap.
+The syntax and semantics are similar to GCC-compatible __atomic_* builtins.
+
+* ``__atomic_fetch_min`` 
+* ``__atomic_fetch_max`` 
+
+The builtins work with signed and unsigned integers and require to specify memory ordering.
+The return value is the original value that was stored in memory before comparison.
+
+Example:
+
+.. code-block:: c
+
+  unsigned int val = __atomic_fetch_min(unsigned int *pi, unsigned int ui, __ATOMIC_RELAXED);
+
+The third argument defines the memory model. There are six memory models that can
+be specified. It allows to achieve a balance between performance and
+necessary guarantees.
+
+* ``__ATOMIC_RELAXED``
+
+  The weakest level of atomicity. It does not provide any general synchronization.
+  It guarantees that if you take all the operations affecting a specific address,
+  a consistent ordering exists. An optimizer is allowed to reorder non-atomic 
+  and Unordered loads around atomic-relaxed operations. But it can't split an
+  atomic operation into multiple sub-operations.
+
+* ``__ATOMIC_CONSUME``
+
+  Data dependency only for both barrier and synchronization with another thread.
+  The non-dependent shared variables may be moved around the atomic-consume
+  operations. When two threads synchronize in sequentially consistent mode,
+  all the visible variables must be flushed through the system so that all threads
+  see the same state.
+
+* ``__ATOMIC_ACQUIRE``
+
+  Provides a barrier for hoisting of code and synchronizes with release (or stronger)
+  semantic stores from another thread. 
+* ``__ATOMIC_RELEASE``
+
+  Provides a barrier for sinking of code and synchronizes with acquire (or stronger)
+  semantic loads from another thread. 
+* ``__ATOMIC_ACQ_REL``
+
+  Provides a full barrier in both directions and synchronizes with acquire loads
+  and release stores in another thread. 
+
+* ``__ATOMIC_SEQ_CST``
+
+  Provides a a full barrier in both directions and synchronizes with acquire loads
+  and release stores in all threads. It guarantees that a total ordering exists
+  between all sequentially-consistent operations.
+
 .. _langext-__c11_atomic:
 
 __c11_atomic builtins
@@ -2734,4 +2791,3 @@
 The ``#pragma comment(lib, ...)`` directive is supported on all ELF targets.
 The second parameter is the library name (without the traditional Unix prefix of
 ``lib``).  This allows you to provide an implicit link of dependent libraries.
-

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D46386: Adding __atomic_fetch_min/max intrinsics to clang

Reply via email to