On Sun, Mar 3, 2013 at 8:02 AM, David Chisnall <[email protected]> wrote: > Author: theraven > Date: Sun Mar 3 10:02:42 2013 > New Revision: 176420 > > URL: http://llvm.org/viewvc/llvm-project?rev=176420&view=rev > Log: > Improve C11 atomics support: > > - Generate atomicrmw operations in most of the cases when it's sensible to do > so.
It would be nice if the LLVM optimizers could get this instead of needing the frontend to do it. 'course, compile time might argue for having the frontend do it _also_. > - Don't crash in several common cases (and hopefully don't crash in more of > them). > - Add some better tests. > > We now generate significantly better code for things like: > _Atomic(int) x; > ... > x++; > > On MIPS, this now generates a 4-instruction ll/sc loop, where previously it > generated about 30 instructions in two nested loops. On x86-64, we generate a > single lock incl, instead of a lock cmpxchgl loop (one instruction instead of > ten). > > > Added: > cfe/trunk/test/CodeGen/c11atomics.c > Modified: > cfe/trunk/ (props changed) > cfe/trunk/lib/CodeGen/CGExprScalar.cpp > cfe/trunk/test/CodeGen/atomic_ops.c > > Propchange: cfe/trunk/ > ------------------------------------------------------------------------------ > --- svn:mergeinfo (original) > +++ svn:mergeinfo Sun Mar 3 10:02:42 2013 > @@ -1,2 +1,3 @@ > /cfe/branches/type-system-rewrite:134693-134817 > +/cfe/trunk/test:170344 > /cfe/trunk/test/SemaTemplate:126920 > > Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=176420&r1=176419&r2=176420&view=diff > ============================================================================== > --- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original) > +++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Sun Mar 3 10:02:42 2013 > @@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncD > bool isInc, bool isPre) { > > QualType type = E->getSubExpr()->getType(); > - llvm::Value *value = EmitLoadOfLValue(LV); > - llvm::Value *input = value; > llvm::PHINode *atomicPHI = 0; > + llvm::Value *value; > + llvm::Value *input; > > int amount = (isInc ? 1 : -1); > > if (const AtomicType *atomicTy = type->getAs<AtomicType>()) { > + type = atomicTy->getValueType(); > + if (isInc && type->isBooleanType()) { > + llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type); > + if (isPre) { > + Builder.Insert(new llvm::StoreInst(True, > + LV.getAddress(), LV.isVolatileQualified(), > + LV.getAlignment().getQuantity(), > + llvm::SequentiallyConsistent)); > + return Builder.getTrue(); > + } > + // For atomic bool increment, we just store true and return it for > + // preincrement, do an atomic swap with true for postincrement > + return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, > + LV.getAddress(), True, llvm::SequentiallyConsistent); > + } > + // Special case for atomic increment / decrement on integers, emit > + // atomicrmw instructions. We skip this if we want to be doing overflow > + // checking, and fall into the slow path with the atomic cmpxchg loop. > + if (!type->isBooleanType() && type->isIntegerType() && > + !(type->isUnsignedIntegerType() && > + CGF.SanOpts->UnsignedIntegerOverflow) && > + CGF.getLangOpts().getSignedOverflowBehavior() != > + LangOptions::SOB_Trapping) { I'm not sure this is wrong, but note that "For signed integer types, arithmetic is defined to use two’s complement representation with silent wrap-around on overflow; there are no undefined results." in C11 7.17.7.5. Likely you want to use SanOpts->UnsignedIntegerOverflow for both signed and unsigned atomics. I'm not terribly familiar with the code below here, but it looks like the cmpxchg loop is wrong for this too. > + llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add : > + llvm::AtomicRMWInst::Sub; > + llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add : > + llvm::Instruction::Sub; > + llvm::Value *amt = CGF.EmitToMemory( > + llvm::ConstantInt::get(ConvertType(type), 1, true), type); > + llvm::Value *old = Builder.CreateAtomicRMW(aop, > + LV.getAddress(), amt, llvm::SequentiallyConsistent); > + return isPre ? Builder.CreateBinOp(op, old, amt) : old; > + } > + value = EmitLoadOfLValue(LV); > + input = value; > + // For every other atomic operation, we need to emit a load-op-cmpxchg > loop > llvm::BasicBlock *startBB = Builder.GetInsertBlock(); > llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn); > + value = CGF.EmitToMemory(value, type); > Builder.CreateBr(opBB); > Builder.SetInsertPoint(opBB); > atomicPHI = Builder.CreatePHI(value->getType(), 2); > atomicPHI->addIncoming(value, startBB); > - type = atomicTy->getValueType(); > value = atomicPHI; > + } else { > + value = EmitLoadOfLValue(LV); > + input = value; > } > > // Special case of integer increment that we have to check first: bool++. > @@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncD > llvm::BasicBlock *opBB = Builder.GetInsertBlock(); > llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", > CGF.CurFn); > llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), > atomicPHI, > - value, llvm::SequentiallyConsistent); > + CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent); > atomicPHI->addIncoming(old, opBB); > llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI); > Builder.CreateCondBr(success, contBB, opBB); > @@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAs > OpInfo.E = E; > // Load/convert the LHS. > LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); > - OpInfo.LHS = EmitLoadOfLValue(LHSLV); > > llvm::PHINode *atomicPHI = 0; > - if (LHSTy->isAtomicType()) { > + if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) { > + QualType type = atomicTy->getValueType(); > + if (!type->isBooleanType() && type->isIntegerType() && > + !(type->isUnsignedIntegerType() && > + CGF.SanOpts->UnsignedIntegerOverflow) && > + CGF.getLangOpts().getSignedOverflowBehavior() != > + LangOptions::SOB_Trapping) { > + llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP; > + switch (OpInfo.Opcode) { > + // We don't have atomicrmw operands for *, %, /, <<, >> > + case BO_MulAssign: case BO_DivAssign: > + case BO_RemAssign: > + case BO_ShlAssign: > + case BO_ShrAssign: > + break; > + case BO_AddAssign: > + aop = llvm::AtomicRMWInst::Add; > + break; > + case BO_SubAssign: > + aop = llvm::AtomicRMWInst::Sub; > + break; > + case BO_AndAssign: > + aop = llvm::AtomicRMWInst::And; > + break; > + case BO_XorAssign: > + aop = llvm::AtomicRMWInst::Xor; > + break; > + case BO_OrAssign: > + aop = llvm::AtomicRMWInst::Or; > + break; > + default: > + llvm_unreachable("Invalid compound assignment type"); > + } > + if (aop != llvm::AtomicRMWInst::BAD_BINOP) { > + llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS, > + E->getRHS()->getType(), LHSTy), LHSTy); > + Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt, > + llvm::SequentiallyConsistent); > + return LHSLV; > + } > + } > // FIXME: For floating point types, we should be saving and restoring the > // floating point environment in the loop. > llvm::BasicBlock *startBB = Builder.GetInsertBlock(); > llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn); > + OpInfo.LHS = EmitLoadOfLValue(LHSLV); > + OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type); > Builder.CreateBr(opBB); > Builder.SetInsertPoint(opBB); > atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2); > atomicPHI->addIncoming(OpInfo.LHS, startBB); > OpInfo.LHS = atomicPHI; > } > + else > + OpInfo.LHS = EmitLoadOfLValue(LHSLV); > > OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, > E->getComputationLHSType()); > @@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAs > llvm::BasicBlock *opBB = Builder.GetInsertBlock(); > llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", > CGF.CurFn); > llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), > atomicPHI, > - Result, llvm::SequentiallyConsistent); > + CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent); > atomicPHI->addIncoming(old, opBB); > llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI); > Builder.CreateCondBr(success, contBB, opBB); > > Modified: cfe/trunk/test/CodeGen/atomic_ops.c > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/atomic_ops.c?rev=176420&r1=176419&r2=176420&view=diff > ============================================================================== > --- cfe/trunk/test/CodeGen/atomic_ops.c (original) > +++ cfe/trunk/test/CodeGen/atomic_ops.c Sun Mar 3 10:02:42 2013 > @@ -15,9 +15,4 @@ void foo(int x) > // CHECK: sdiv i32 > // CHECK: cmpxchg i16* > > - // These should be emitting atomicrmw instructions, but they aren't yet > - i += 2; // CHECK: cmpxchg > - i -= 2; // CHECK: cmpxchg > - i++; // CHECK: cmpxchg > - i--; // CHECK: cmpxchg > } > > Added: cfe/trunk/test/CodeGen/c11atomics.c > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/c11atomics.c?rev=176420&view=auto > ============================================================================== > --- cfe/trunk/test/CodeGen/c11atomics.c (added) > +++ cfe/trunk/test/CodeGen/c11atomics.c Sun Mar 3 10:02:42 2013 > @@ -0,0 +1,139 @@ > +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 > | FileCheck %s > + > +// Test that we are generating atomicrmw instructions, rather than > +// compare-exchange loops for common atomic ops. This makes a big difference > +// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair > for > +// the load and then another ll/sc in the loop, expanding to about 30 > +// instructions when it should be only 4. It has a smaller, but still > +// noticeable, impact on platforms like x86 and RISC-V, where there are > atomic > +// RMW instructions. > +// > +// We currently emit cmpxchg loops for most operations on _Bools, because > +// they're sufficiently rare that it's not worth making sure that the > semantics > +// are correct. > + > +typedef int __attribute__((vector_size(16))) vector; > + > +_Atomic(_Bool) b; > +_Atomic(int) i; > +_Atomic(long long) l; > +_Atomic(short) s; > +_Atomic(char*) p; > +_Atomic(float) f; > +_Atomic(vector) v; > + > +// CHECK-NOT: cmpxchg > + > +// CHECK: testinc > +void testinc(void) > +{ > + // Special case for suffix bool++, sets to true and returns the old value. > + // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst > + b++; > + // CHECK: atomicrmw add i32* @i, i32 1 seq_cst > + i++; > + // CHECK: atomicrmw add i64* @l, i64 1 seq_cst > + l++; > + // CHECK: atomicrmw add i16* @s, i16 1 seq_cst > + s++; > + // Prefix increment > + // Special case for bool: set to true and return true > + // CHECK: store atomic i8 1, i8* @b seq_cst, align 1 > + ++b; > + // Currently, we have no variant of atomicrmw that returns the new value, > so > + // we have to generate an atomic add, which returns the old value, and > then a > + // non-atomic add. > + // CHECK: atomicrmw add i32* @i, i32 1 seq_cst > + // CHECK: add i32 > + ++i; > + // CHECK: atomicrmw add i64* @l, i64 1 seq_cst > + // CHECK: add i64 > + ++l; > + // CHECK: atomicrmw add i16* @s, i16 1 seq_cst > + // CHECK: add i16 > + ++s; > +} > +// CHECK: testdec > +void testdec(void) > +{ > + // CHECK: cmpxchg i8* @b > + b--; > + // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst > + i--; > + // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst > + l--; > + // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst > + s--; > + // CHECK: cmpxchg i8* @b > + --b; > + // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst > + // CHECK: sub i32 > + --i; > + // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst > + // CHECK: sub i64 > + --l; > + // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst > + // CHECK: sub i16 > + --s; > +} > +// CHECK: testaddeq > +void testaddeq(void) > +{ > + // CHECK: cmpxchg i8* @b > + // CHECK: atomicrmw add i32* @i, i32 42 seq_cst > + // CHECK: atomicrmw add i64* @l, i64 42 seq_cst > + // CHECK: atomicrmw add i16* @s, i16 42 seq_cst > + b += 42; > + i += 42; > + l += 42; > + s += 42; > +} > +// CHECK: testsubeq > +void testsubeq(void) > +{ > + // CHECK: cmpxchg i8* @b > + // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst > + // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst > + // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst > + b -= 42; > + i -= 42; > + l -= 42; > + s -= 42; > +} > +// CHECK: testxoreq > +void testxoreq(void) > +{ > + // CHECK: cmpxchg i8* @b > + // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst > + // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst > + // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst > + b ^= 42; > + i ^= 42; > + l ^= 42; > + s ^= 42; > +} > +// CHECK: testoreq > +void testoreq(void) > +{ > + // CHECK: cmpxchg i8* @b > + // CHECK: atomicrmw or i32* @i, i32 42 seq_cst > + // CHECK: atomicrmw or i64* @l, i64 42 seq_cst > + // CHECK: atomicrmw or i16* @s, i16 42 seq_cst > + b |= 42; > + i |= 42; > + l |= 42; > + s |= 42; > +} > +// CHECK: testandeq > +void testandeq(void) > +{ > + // CHECK: cmpxchg i8* @b > + // CHECK: atomicrmw and i32* @i, i32 42 seq_cst > + // CHECK: atomicrmw and i64* @l, i64 42 seq_cst > + // CHECK: atomicrmw and i16* @s, i16 42 seq_cst > + b &= 42; > + i &= 42; > + l &= 42; > + s &= 42; > +} > + > > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
