yaxunl retitled this revision from "Support synchronisation scope in atomic 
builtin functions" to "Support synchronisation scope in Clang atomic builtin 
functions".
yaxunl updated the summary for this revision.
yaxunl updated this revision to Diff 84373.
yaxunl added a comment.

Fix typos and formatting issues.


https://reviews.llvm.org/D28691

Files:
  include/clang/AST/Expr.h
  lib/AST/Expr.cpp
  lib/Basic/Targets.cpp
  lib/CodeGen/CGAtomic.cpp
  lib/Sema/SemaChecking.cpp
  test/CodeGenOpenCL/atomic-ops.cl
  test/Sema/atomic-ops.c

Index: test/Sema/atomic-ops.c
===================================================================
--- test/Sema/atomic-ops.c
+++ test/Sema/atomic-ops.c
@@ -92,7 +92,7 @@
   __c11_atomic_init(ci, 5); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
 
   __c11_atomic_load(0); // expected-error {{too few arguments to function}}
-  __c11_atomic_load(0,0,0); // expected-error {{too many arguments to function}}
+  __c11_atomic_load(0,0,0,0); // expected-error {{too many arguments to function}}
   __c11_atomic_store(0,0,0); // expected-error {{address argument to atomic builtin must be a pointer}}
   __c11_atomic_store((int*)0,0,0); // expected-error {{address argument to atomic operation must be a pointer to _Atomic}}
   __c11_atomic_store(i, 0, memory_order_relaxed);
@@ -114,7 +114,7 @@
 
   __atomic_load(I, i, memory_order_relaxed); // expected-warning {{passing '_Atomic(int) *' to parameter of type 'int *'}}
   __atomic_load(I, *P, memory_order_relaxed);
-  __atomic_load(I, *P, memory_order_relaxed, 42); // expected-error {{too many arguments}}
+  __atomic_load(I, *P, memory_order_relaxed, 42, 0); // expected-error {{too many arguments}}
   (int)__atomic_load(I, I, memory_order_seq_cst); // expected-error {{operand of type 'void'}}
   __atomic_load(s1, s2, memory_order_acquire);
   __atomic_load(CI, I, memory_order_relaxed);
Index: test/CodeGenOpenCL/atomic-ops.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/atomic-ops.cl
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | FileCheck %s
+
+// Also test serialization of atomic operations here, to avoid duplicating the test.
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck %s
+
+#ifndef ALREADY_INCLUDED
+#define ALREADY_INCLUDED
+
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_consume = __ATOMIC_CONSUME,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+// ToDo: Currently LLVM only supports synchronization scope singlethread
+// and crossthread (default). Add tests for OpenCL synchronization scopes
+// after they are supported by LLVM.
+typedef enum synch_scope
+{
+  synch_scope_single_thread,
+  synch_scope_cross_thread,
+  synch_scope_ocl_work_item,
+  synch_scope_ocl_work_group,
+  synch_scope_ocl_device,
+  synch_scope_ocl_all_svm_devices,
+  synch_scope_ocl_sub_group
+} synch_scope;
+
+void fi1(atomic_int *i, int *j, int cmp) {
+  int x;
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst
+  x = __c11_atomic_load(i, memory_order_seq_cst);
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} singlethread seq_cst
+  x = __c11_atomic_load(i, memory_order_seq_cst, synch_scope_single_thread);
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst
+  x = __c11_atomic_load(i, memory_order_seq_cst, synch_scope_cross_thread);
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} singlethread seq_cst
+  __c11_atomic_store(i, 1, memory_order_seq_cst, synch_scope_single_thread);
+  // CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} singlethread seq_cst
+  x = __c11_atomic_fetch_and(i, 1, memory_order_seq_cst, synch_scope_single_thread);
+  // CHECK: cmpxchg i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} singlethread acquire acquire
+  x = __c11_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, synch_scope_single_thread);
+}
+
+#endif
\ No newline at end of file
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -2630,27 +2630,30 @@
   DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
 
   // All these operations take one of the following forms:
+  // The last optional argument is for synchronization scope. The default value
+  // 1 is for crossthread.
   enum {
     // C    __c11_atomic_init(A *, C)
     Init,
-    // C    __c11_atomic_load(A *, int)
+    // C    __c11_atomic_load(A *, int, int = 1)
     Load,
-    // void __atomic_load(A *, CP, int)
+    // void __atomic_load(A *, CP, int, int = 1)
     LoadCopy,
-    // void __atomic_store(A *, CP, int)
+    // void __atomic_store(A *, CP, int, int = 1)
     Copy,
-    // C    __c11_atomic_add(A *, M, int)
+    // C    __c11_atomic_add(A *, M, int, int = 1)
     Arithmetic,
-    // C    __atomic_exchange_n(A *, CP, int)
+    // C    __atomic_exchange_n(A *, CP, int, int = 1)
     Xchg,
-    // void __atomic_exchange(A *, C *, CP, int)
+    // void __atomic_exchange(A *, C *, CP, int, int = 1)
     GNUXchg,
-    // bool __c11_atomic_compare_exchange_strong(A *, C *, CP, int, int)
+    // bool __c11_atomic_compare_exchange_strong(A *, C *, CP, int, int, int = 1)
     C11CmpXchg,
-    // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int)
+    // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int, int = 1)
     GNUCmpXchg
   } Form = Init;
-  const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 };
+  const unsigned NumArgsMin[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 };
+  const unsigned NumArgsMax[] = { 2, 3, 4, 4, 4, 4, 5, 6, 7 };
   const unsigned NumVals[] = { 1, 0, 1, 1, 1, 1, 2, 2, 3 };
   // where:
   //   C is an appropriate type,
@@ -2734,15 +2737,18 @@
   }
 
   // Check we have the right number of arguments.
-  if (TheCall->getNumArgs() < NumArgs[Form]) {
+  assert((NumArgsMin[Form] == NumArgsMax[Form] ||
+         NumArgsMin[Form] + 1 == NumArgsMax[Form]) &&
+         "Only one optional argument");
+  if (TheCall->getNumArgs() < NumArgsMin[Form]) {
     Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
-      << 0 << NumArgs[Form] << TheCall->getNumArgs()
+      << 0 << NumArgsMin[Form] << TheCall->getNumArgs()
       << TheCall->getCallee()->getSourceRange();
     return ExprError();
-  } else if (TheCall->getNumArgs() > NumArgs[Form]) {
-    Diag(TheCall->getArg(NumArgs[Form])->getLocStart(),
+  } else if (TheCall->getNumArgs() > NumArgsMax[Form]) {
+    Diag(TheCall->getArg(NumArgsMax[Form])->getLocStart(),
          diag::err_typecheck_call_too_many_args)
-      << 0 << NumArgs[Form] << TheCall->getNumArgs()
+      << 0 << NumArgsMax[Form] << TheCall->getNumArgs()
       << TheCall->getCallee()->getSourceRange();
     return ExprError();
   }
@@ -2855,7 +2861,7 @@
   // The first argument --- the pointer --- has a fixed type; we
   // deduce the types of the rest of the arguments accordingly.  Walk
   // the remaining arguments, converting them to the deduced value type.
-  for (unsigned i = 1; i != NumArgs[Form]; ++i) {
+  for (unsigned i = 1; i != TheCall->getNumArgs(); ++i) {
     QualType Ty;
     if (i < NumVals[Form] + 1) {
       switch (i) {
@@ -2910,6 +2916,15 @@
     TheCall->setArg(i, Arg.get());
   }
 
+  Expr *Scope;
+  if (TheCall->getNumArgs() < NumArgsMax[Form]) {
+    Scope = IntegerLiteral::Create(Context,
+      llvm::APInt(Context.getTypeSize(Context.IntTy), (uint64_t) 1),
+      Context.IntTy, SourceLocation());
+  } else {
+    Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
+  }
+
   // Permute the arguments into a 'consistent' order.
   SmallVector<Expr*, 5> SubExprs;
   SubExprs.push_back(Ptr);
@@ -2920,28 +2935,33 @@
     break;
   case Load:
     SubExprs.push_back(TheCall->getArg(1)); // Order
+    SubExprs.push_back(Scope);              // Scope
     break;
   case LoadCopy:
   case Copy:
   case Arithmetic:
   case Xchg:
     SubExprs.push_back(TheCall->getArg(2)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     break;
   case GNUXchg:
     // Note, AtomicExpr::getVal2() has a special case for this atomic.
     SubExprs.push_back(TheCall->getArg(3)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(2)); // Val2
     break;
   case C11CmpXchg:
     SubExprs.push_back(TheCall->getArg(3)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(4)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
     break;
   case GNUCmpXchg:
     SubExprs.push_back(TheCall->getArg(4)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(5)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
Index: lib/CodeGen/CGAtomic.cpp
===================================================================
--- lib/CodeGen/CGAtomic.cpp
+++ lib/CodeGen/CGAtomic.cpp
@@ -359,13 +359,15 @@
                               Address Val1, Address Val2,
                               uint64_t Size,
                               llvm::AtomicOrdering SuccessOrder,
-                              llvm::AtomicOrdering FailureOrder) {
+                              llvm::AtomicOrdering FailureOrder,
+                              llvm::SynchronizationScope Scope) {
   // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
   llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
   llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
 
   llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
-      Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder);
+      Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder,
+      Scope);
   Pair->setVolatile(E->isVolatile());
   Pair->setWeak(IsWeak);
 
@@ -407,7 +409,8 @@
                                         Address Val1, Address Val2,
                                         llvm::Value *FailureOrderVal,
                                         uint64_t Size,
-                                        llvm::AtomicOrdering SuccessOrder) {
+                                        llvm::AtomicOrdering SuccessOrder,
+                                        llvm::SynchronizationScope Scope) {
   llvm::AtomicOrdering FailureOrder;
   if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
     auto FOS = FO->getSExtValue();
@@ -435,7 +438,7 @@
           llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
     }
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
-                      FailureOrder);
+                      FailureOrder, Scope);
     return;
   }
 
@@ -460,13 +463,13 @@
   // doesn't fold to a constant for the ordering.
   CGF.Builder.SetInsertPoint(MonotonicBB);
   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                    Size, SuccessOrder, llvm::AtomicOrdering::Monotonic);
+                    Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
   CGF.Builder.CreateBr(ContBB);
 
   if (AcquireBB) {
     CGF.Builder.SetInsertPoint(AcquireBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, SuccessOrder, llvm::AtomicOrdering::Acquire);
+                      Size, SuccessOrder, llvm::AtomicOrdering::Acquire, Scope);
     CGF.Builder.CreateBr(ContBB);
     SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
@@ -476,7 +479,7 @@
   if (SeqCstBB) {
     CGF.Builder.SetInsertPoint(SeqCstBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
-                      llvm::AtomicOrdering::SequentiallyConsistent);
+                      llvm::AtomicOrdering::SequentiallyConsistent, Scope);
     CGF.Builder.CreateBr(ContBB);
     SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
                 SeqCstBB);
@@ -488,7 +491,8 @@
 static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
                          Address Ptr, Address Val1, Address Val2,
                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
-                         uint64_t Size, llvm::AtomicOrdering Order) {
+                         uint64_t Size, llvm::AtomicOrdering Order,
+                         llvm::SynchronizationScope Scope) {
   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
   llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
 
@@ -498,17 +502,17 @@
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
     emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
-                                FailureOrder, Size, Order);
+                                FailureOrder, Size, Order, Scope);
     return;
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
     emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
-                                FailureOrder, Size, Order);
+                                FailureOrder, Size, Order, Scope);
     return;
   case AtomicExpr::AO__atomic_compare_exchange:
   case AtomicExpr::AO__atomic_compare_exchange_n: {
     if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
       emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
-                                  Val1, Val2, FailureOrder, Size, Order);
+                                  Val1, Val2, FailureOrder, Size, Order, Scope);
     } else {
       // Create all the relevant BB's
       llvm::BasicBlock *StrongBB =
@@ -522,12 +526,12 @@
 
       CGF.Builder.SetInsertPoint(StrongBB);
       emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
-                                  FailureOrder, Size, Order);
+                                  FailureOrder, Size, Order, Scope);
       CGF.Builder.CreateBr(ContBB);
 
       CGF.Builder.SetInsertPoint(WeakBB);
       emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
-                                  FailureOrder, Size, Order);
+                                  FailureOrder, Size, Order, Scope);
       CGF.Builder.CreateBr(ContBB);
 
       CGF.Builder.SetInsertPoint(ContBB);
@@ -538,7 +542,7 @@
   case AtomicExpr::AO__atomic_load_n:
   case AtomicExpr::AO__atomic_load: {
     llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
-    Load->setAtomic(Order);
+    Load->setAtomic(Order, Scope);
     Load->setVolatile(E->isVolatile());
     CGF.Builder.CreateStore(Load, Dest);
     return;
@@ -549,7 +553,7 @@
   case AtomicExpr::AO__atomic_store_n: {
     llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
     llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
-    Store->setAtomic(Order);
+    Store->setAtomic(Order, Scope);
     Store->setVolatile(E->isVolatile());
     return;
   }
@@ -610,7 +614,7 @@
 
   llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
   llvm::AtomicRMWInst *RMWI =
-      CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order);
+      CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope);
   RMWI->setVolatile(E->isVolatile());
 
   // For __atomic_*_fetch operations, perform the operation again to
@@ -684,6 +688,7 @@
   }
 
   llvm::Value *Order = EmitScalarExpr(E->getOrder());
+  llvm::Value *Scope = EmitScalarExpr(E->getScope());
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
@@ -716,7 +721,7 @@
     else
       Val2 = EmitValToTemp(*this, E->getVal2());
     OrderFail = EmitScalarExpr(E->getOrderFail());
-    if (E->getNumSubExprs() == 6)
+    if (E->getNumSubExprs() == 7)
       IsWeak = EmitScalarExpr(E->getWeak());
     break;
 
@@ -1024,38 +1029,43 @@
                 E->getOp() == AtomicExpr::AO__atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
+  assert(isa<llvm::ConstantInt>(Scope) &&
+      "Non-constant synchronization scope not supported");
+  auto sco = (llvm::SynchronizationScope)(
+      cast<llvm::ConstantInt>(Scope)->getZExtValue());
+
   if (isa<llvm::ConstantInt>(Order)) {
     auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
     // We should not ever get to a case where the ordering isn't a valid C ABI
     // value, but it's hard to enforce that in general.
     if (llvm::isValidAtomicOrderingCABI(ord))
       switch ((llvm::AtomicOrderingCABI)ord) {
       case llvm::AtomicOrderingCABI::relaxed:
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Monotonic);
+                     llvm::AtomicOrdering::Monotonic, sco);
         break;
       case llvm::AtomicOrderingCABI::consume:
       case llvm::AtomicOrderingCABI::acquire:
         if (IsStore)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Acquire);
+                     llvm::AtomicOrdering::Acquire, sco);
         break;
       case llvm::AtomicOrderingCABI::release:
         if (IsLoad)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Release);
+                     llvm::AtomicOrdering::Release, sco);
         break;
       case llvm::AtomicOrderingCABI::acq_rel:
         if (IsLoad || IsStore)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::AcquireRelease);
+                     llvm::AtomicOrdering::AcquireRelease, sco);
         break;
       case llvm::AtomicOrderingCABI::seq_cst:
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::SequentiallyConsistent);
+                     llvm::AtomicOrdering::SequentiallyConsistent, sco);
         break;
       }
     if (RValTy->isVoidType())
@@ -1092,12 +1102,12 @@
   // Emit all the different atomics
   Builder.SetInsertPoint(MonotonicBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::AtomicOrdering::Monotonic);
+               Size, llvm::AtomicOrdering::Monotonic, sco);
   Builder.CreateBr(ContBB);
   if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::Acquire);
+                 Size, llvm::AtomicOrdering::Acquire, sco);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
@@ -1107,22 +1117,22 @@
   if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::Release);
+                 Size, llvm::AtomicOrdering::Release, sco);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
                 ReleaseBB);
   }
   if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::AcquireRelease);
+                 Size, llvm::AtomicOrdering::AcquireRelease, sco);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
                 AcqRelBB);
   }
   Builder.SetInsertPoint(SeqCstBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::AtomicOrdering::SequentiallyConsistent);
+               Size, llvm::AtomicOrdering::SequentiallyConsistent, sco);
   Builder.CreateBr(ContBB);
   SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
               SeqCstBB);
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2060,6 +2060,7 @@
 
     AddrSpaceMap = &AMDGPUAddrSpaceMap;
     UseAddrSpaceMapMangling = true;
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
   }
 
   uint64_t getPointerWidthV(unsigned AddrSpace) const override {
Index: lib/AST/Expr.cpp
===================================================================
--- lib/AST/Expr.cpp
+++ lib/AST/Expr.cpp
@@ -3914,9 +3914,10 @@
 unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   switch (Op) {
   case AO__c11_atomic_init:
+    return 2;
   case AO__c11_atomic_load:
   case AO__atomic_load_n:
-    return 2;
+    return 3;
 
   case AO__c11_atomic_store:
   case AO__c11_atomic_exchange:
@@ -3941,18 +3942,18 @@
   case AO__atomic_or_fetch:
   case AO__atomic_xor_fetch:
   case AO__atomic_nand_fetch:
-    return 3;
+    return 4;
 
   case AO__atomic_exchange:
-    return 4;
+    return 5;
 
   case AO__c11_atomic_compare_exchange_strong:
   case AO__c11_atomic_compare_exchange_weak:
-    return 5;
+    return 6;
 
   case AO__atomic_compare_exchange:
   case AO__atomic_compare_exchange_n:
-    return 6;
+    return 7;
   }
   llvm_unreachable("unknown atomic op");
 }
Index: include/clang/AST/Expr.h
===================================================================
--- include/clang/AST/Expr.h
+++ include/clang/AST/Expr.h
@@ -4929,8 +4929,8 @@
 /// AtomicExpr - Variadic atomic builtins: __atomic_exchange, __atomic_fetch_*,
 /// __atomic_load, __atomic_store, and __atomic_compare_exchange_*, for the
 /// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>.
-/// All of these instructions take one primary pointer and at least one memory
-/// order.
+/// All of these instructions take one primary pointer, at least one memory
+/// order, and one synchronization scope.
 class AtomicExpr : public Expr {
 public:
   enum AtomicOp {
@@ -4942,7 +4942,7 @@
   };
 
 private:
-  enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
+  enum { PTR, ORDER, SCOPE, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
   Stmt* SubExprs[END_EXPR];
   unsigned NumSubExprs;
   SourceLocation BuiltinLoc, RParenLoc;
@@ -4967,6 +4967,9 @@
   Expr *getOrder() const {
     return cast<Expr>(SubExprs[ORDER]);
   }
+  Expr *getScope() const {
+    return cast<Expr>(SubExprs[SCOPE]);
+  }
   Expr *getVal1() const {
     if (Op == AO__c11_atomic_init)
       return cast<Expr>(SubExprs[ORDER]);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to