commit 03773037368c3a3b69a0af452cf90a52cd87a3e9
Author: Tim Northover <tnorthover@apple.com>
Date:   Thu Jul 11 17:43:19 2013 +0100

    ARM: implement ldrex, strex and clrex intrinsics
    
    Intrinsics already existed for the 64-bit variants, so these support operations
    of size at most 32-bits.

diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index 93b1ae1..6cbd1c4 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -34,12 +34,15 @@ def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
     Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 
 //===----------------------------------------------------------------------===//
-// Load and Store exclusive doubleword
+// Load, Store and Clear exclusive
+
+def int_arm_ldrex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
+def int_arm_strex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
+def int_arm_clrex : Intrinsic<[]>;
 
 def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
-    llvm_ptr_ty], [IntrReadWriteArgMem]>;
-def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty],
-    [IntrReadArgMem]>;
+    llvm_ptr_ty]>;
+def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
 
 //===----------------------------------------------------------------------===//
 // VFP
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 4eda5dc..31ce38e 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -175,6 +175,7 @@ public:
                                  SDValue &OffImm);
   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
                              SDValue &OffReg, SDValue &ShImm);
+  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 
   inline bool is_so_imm(unsigned Imm) const {
     return ARM_AM::getSOImmVal(Imm) != -1;
@@ -1417,6 +1418,34 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
+                                                SDValue &OffImm) {
+  // This *must* succeed since it's used for the irreplacable ldrex and strex
+  // instructions.
+  Base = N;
+  OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+
+  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
+    return true;
+
+  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  if (!RHS)
+    return true;
+
+  uint32_t RHSC = (int)RHS->getZExtValue();
+  if (RHSC > 1020 || RHSC % 4 != 0)
+    return true;
+
+  Base = N.getOperand(0);
+  if (Base.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy());
+  }
+
+  OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32);
+  return true;
+}
+
 //===--------------------------------------------------------------------===//
 
 /// getAL - Returns a ARMCC::AL immediate node.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8c4a3f1..8c8c667 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -8306,6 +8306,21 @@ static SDValue PerformANDCombine(SDNode *N,
       return Result;
   }
 
+  // fold (and (arm_ldrex_8 addr), 0xff) -> (arm_ldrex_8 addr)
+  // fold (and (arm_ldrex_16 addr), 0xffff) -> (arm_ldrex_16 addr)
+  SDValue LHS = N->getOperand(0);
+  ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && RHSC) {
+    int IntNo = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
+    if (IntNo != Intrinsic::arm_ldrex)
+      return SDValue();
+
+    EVT MemVT = cast<MemIntrinsicSDNode>(LHS)->getMemoryVT();
+    if ((MemVT == MVT::i8 && RHSC->getZExtValue() == 0xff) ||
+        (MemVT == MVT::i16 && RHSC->getZExtValue() == 0xffff))
+      return LHS;
+  }
+
   return SDValue();
 }
 
@@ -9782,6 +9797,31 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
   return Res;
 }
 
+SDValue
+ARMTargetLowering::PerformSTREXCombine(SDNode *N, SelectionDAG &DAG) const {
+  SDValue StoreVal = N->getOperand(2);
+  if (StoreVal.getOpcode() != ISD::AND)
+    return SDValue();
+
+  ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(StoreVal.getOperand(1));
+  if (!RHSC)
+    return SDValue();
+
+  MemIntrinsicSDNode *MN = cast<MemIntrinsicSDNode>(N);
+  EVT MemVT = MN->getMemoryVT();
+  if (!(MemVT == MVT::i8 && RHSC->getZExtValue() == 0xff) &&
+      !(MemVT == MVT::i16 && RHSC->getZExtValue() == 0xffff))
+    return SDValue();
+
+  SDValue Ops[] = { N->getOperand(0), // Chain
+                    N->getOperand(1), // arm_strex
+                    StoreVal.getOperand(0),
+                    N->getOperand(3) };
+  return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, SDLoc(N),
+                                 N->getVTList(), Ops, array_lengthof(Ops),
+                                 MemVT, MN->getMemOperand());
+}
+
 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
@@ -9837,6 +9877,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
     case Intrinsic::arm_neon_vst3lane:
     case Intrinsic::arm_neon_vst4lane:
       return CombineBaseUpdate(N, DCI);
+    case Intrinsic::arm_strex:
+      return PerformSTREXCombine(N, DCI.DAG);
     default: break;
     }
     break;
@@ -10762,6 +10804,30 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.writeMem = true;
     return true;
   }
+  case Intrinsic::arm_ldrex: {
+    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+    Info.vol = true;
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  case Intrinsic::arm_strex: {
+    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.ptrVal = I.getArgOperand(1);
+    Info.offset = 0;
+    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+    Info.vol = true;
+    Info.readMem = false;
+    Info.writeMem = true;
+    return true;
+  }
   case Intrinsic::arm_strexd: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::i64;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 6593777..a0222f8 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -280,6 +280,7 @@ namespace llvm {
     AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const;
 
     SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+    SDValue PerformSTREXCombine(SDNode *N, SelectionDAG &DAG) const;
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 9eba553..ff44e953 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -4383,14 +4383,29 @@ let usesCustomInserter = 1 in {
       [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
 }
 
+def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
 let mayLoad = 1 in {
 def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
                      NoItinerary,
-                    "ldrexb", "\t$Rt, $addr", []>;
+                     "ldrexb", "\t$Rt, $addr",
+                     [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
 def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
-                     NoItinerary, "ldrexh", "\t$Rt, $addr", []>;
+                     NoItinerary, "ldrexh", "\t$Rt, $addr",
+                     [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
 def LDREX  : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
-                     NoItinerary, "ldrex", "\t$Rt, $addr", []>;
+                     NoItinerary, "ldrex", "\t$Rt, $addr",
+                     [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>;
 let hasExtraDefRegAllocReq = 1 in
 def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
                       NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
@@ -4398,13 +4413,31 @@ def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
 }
 }
 
+def strex_1 : PatFrag<(ops node:$val, node:$ptr),
+                      (int_arm_strex node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def strex_2 : PatFrag<(ops node:$val, node:$ptr),
+                      (int_arm_strex node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def strex_4 : PatFrag<(ops node:$val, node:$ptr),
+                      (int_arm_strex node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
 let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
 def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
-                    NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
+                    NoItinerary, "strexb", "\t$Rd, $Rt, $addr",
+                    [(set GPR:$Rd, (strex_1 GPR:$Rt, addr_offset_none:$addr))]>;
 def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
-                    NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
+                    NoItinerary, "strexh", "\t$Rd, $Rt, $addr",
+                    [(set GPR:$Rd, (strex_2 GPR:$Rt, addr_offset_none:$addr))]>;
 def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
-                    NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
+                    NoItinerary, "strex", "\t$Rd, $Rt, $addr",
+                    [(set GPR:$Rd, (strex_4 GPR:$Rt, addr_offset_none:$addr))]>;
 let hasExtraSrcRegAllocReq = 1 in
 def STREXD : AIstrex<0b01, (outs GPR:$Rd),
                     (ins GPRPairOp:$Rt, addr_offset_none:$addr),
@@ -4414,7 +4447,8 @@ def STREXD : AIstrex<0b01, (outs GPR:$Rd),
 }
 
 
-def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+                [(int_arm_clrex)]>,
             Requires<[IsARM, HasV7]>  {
   let Inst{31-0} = 0b11110101011111111111000000011111;
 }
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index ee9eaaa..4f26bac 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -251,7 +251,8 @@ def t2am_imm8s4_offset : Operand<i32> {
 def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
   let Name = "MemImm0_1020s4Offset";
 }
-def t2addrmode_imm0_1020s4 : Operand<i32> {
+def t2addrmode_imm0_1020s4 : Operand<i32>,
+                         ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> {
   let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
   let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm0_1020s4";
@@ -3201,13 +3202,16 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
 let mayLoad = 1 in {
 def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
-                         "ldrexb", "\t$Rt, $addr", "", []>;
+                         "ldrexb", "\t$Rt, $addr", "",
+                         [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
 def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
-                         "ldrexh", "\t$Rt, $addr", "", []>;
+                         "ldrexh", "\t$Rt, $addr", "",
+                         [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
 def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
                        AddrModeNone, 4, NoItinerary,
-                       "ldrex", "\t$Rt, $addr", "", []> {
+                       "ldrex", "\t$Rt, $addr", "",
+                     [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]> {
   bits<4> Rt;
   bits<12> addr;
   let Inst{31-27} = 0b11101;
@@ -3232,16 +3236,22 @@ let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
 def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
                          (ins rGPR:$Rt, addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
-                         "strexb", "\t$Rd, $Rt, $addr", "", []>;
+                         "strexb", "\t$Rd, $Rt, $addr", "",
+                         [(set rGPR:$Rd, (strex_1 rGPR:$Rt,
+                                                  addr_offset_none:$addr))]>;
 def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
                          (ins rGPR:$Rt, addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
-                         "strexh", "\t$Rd, $Rt, $addr", "", []>;
+                         "strexh", "\t$Rd, $Rt, $addr", "",
+                         [(set rGPR:$Rd, (strex_2 rGPR:$Rt,
+                                                  addr_offset_none:$addr))]>;
+
 def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
                              t2addrmode_imm0_1020s4:$addr),
                   AddrModeNone, 4, NoItinerary,
                   "strex", "\t$Rd, $Rt, $addr", "",
-                  []> {
+                  [(set rGPR:$Rd, (strex_4 rGPR:$Rt,
+                                           t2addrmode_imm0_1020s4:$addr))]> {
   bits<4> Rd;
   bits<4> Rt;
   bits<12> addr;
@@ -3263,7 +3273,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
 }
 }
 
-def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>,
             Requires<[IsThumb2, HasV7]>  {
   let Inst{31-16} = 0xf3bf;
   let Inst{15-14} = 0b10;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 0f792ec..1f37696 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -37,7 +37,7 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
 }
 
 char NVPTXAllocaHoisting::ID = 1;
-RegisterPass<NVPTXAllocaHoisting>
+static RegisterPass<NVPTXAllocaHoisting>
 X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
                      "blocks to the entry block");
 
diff --git a/test/CodeGen/ARM/ldstrex.ll b/test/CodeGen/ARM/ldstrex.ll
new file mode 100644
index 0000000..deed80f
--- /dev/null
+++ b/test/CodeGen/ARM/ldstrex.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin > %t
+; RUN: FileCheck %s < %t
+; RUN: FileCheck %s < %t --check-prefix=CHECK-T2ADDRMODE
+
+%0 = type { i32, i32 }
+
+; CHECK: f0:
+; CHECK: ldrexd
+define i64 @f0(i8* %p) nounwind readonly {
+entry:
+  %ldrexd = tail call %0 @llvm.arm.ldrexd(i8* %p)
+  %0 = extractvalue %0 %ldrexd, 1
+  %1 = extractvalue %0 %ldrexd, 0
+  %2 = zext i32 %0 to i64
+  %3 = zext i32 %1 to i64
+  %shl = shl nuw i64 %2, 32
+  %4 = or i64 %shl, %3
+  ret i64 %4
+}
+
+; CHECK: f1:
+; CHECK: strexd
+define i32 @f1(i8* %ptr, i64 %val) nounwind {
+entry:
+  %tmp4 = trunc i64 %val to i32
+  %tmp6 = lshr i64 %val, 32
+  %tmp7 = trunc i64 %tmp6 to i32
+  %strexd = tail call i32 @llvm.arm.strexd(i32 %tmp4, i32 %tmp7, i8* %ptr)
+  ret i32 %strexd
+}
+
+declare %0 @llvm.arm.ldrexd(i8*) nounwind readonly
+declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
+
+; CHECK: test_load_i8:
+; CHECK: ldrexb r0, [r0]
+; CHECK-NOT: uxtb
+define i32 @test_load_i8(i8* %addr) {
+  %val = call i32 @llvm.arm.ldrex.p0i8(i8* %addr)
+  ret i32 %val
+}
+
+; CHECK: test_load_i16:
+; CHECK: ldrexh r0, [r0]
+; CHECK-NOT: uxth
+define i32 @test_load_i16(i16* %addr) {
+  %val = call i32 @llvm.arm.ldrex.p0i16(i16* %addr)
+  ret i32 %val
+}
+
+; CHECK: test_load_i32:
+; CHECK: ldrex r0, [r0]
+define i32 @test_load_i32(i32* %addr) {
+  %val = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+  ret i32 %val
+}
+
+declare i32 @llvm.arm.ldrex.p0i8(i8*) nounwind readonly
+declare i32 @llvm.arm.ldrex.p0i16(i16*) nounwind readonly
+declare i32 @llvm.arm.ldrex.p0i32(i32*) nounwind readonly
+
+; CHECK: test_store_i8:
+; CHECK-NOT: uxtb
+; CHECK: strexb r0, r1, [r2]
+define i32 @test_store_i8(i32, i8 %val, i8* %addr) {
+  %extval = zext i8 %val to i32
+  %res = call i32 @llvm.arm.strex.p0i8(i32 %extval, i8* %addr)
+  ret i32 %res
+}
+
+; CHECK: test_store_i16:
+; CHECK-NOT: uxth
+; CHECK: strexh r0, r1, [r2]
+define i32 @test_store_i16(i32, i16 %val, i16* %addr) {
+  %extval = zext i16 %val to i32
+  %res = call i32 @llvm.arm.strex.p0i16(i32 %extval, i16* %addr)
+  ret i32 %res
+}
+
+; CHECK: test_store_i32:
+; CHECK: strex r0, r1, [r2]
+define i32 @test_store_i32(i32, i32 %val, i32* %addr) {
+  %res = call i32 @llvm.arm.strex.p0i32(i32 %val, i32* %addr)
+  ret i32 %res
+}
+
+declare i32 @llvm.arm.strex.p0i8(i32, i8*) nounwind
+declare i32 @llvm.arm.strex.p0i16(i32, i16*) nounwind
+declare i32 @llvm.arm.strex.p0i32(i32, i32*) nounwind
+
+; CHECK: test_clear:
+; CHECK: clrex
+define void @test_clear() {
+  call void @llvm.arm.clrex()
+  ret void
+}
+
+declare void @llvm.arm.clrex() nounwind
+
+@base = global i32* null
+
+define void @excl_addrmode() {
+; CHECK-T2ADDRMODE: excl_addrmode:
+  %base1020 = load i32** @base
+  %offset1020 = getelementptr i32* %base1020, i32 255
+  call i32 @llvm.arm.ldrex.p0i32(i32* %offset1020)
+  call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1020)
+; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
+; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
+
+  %base1024 = load i32** @base
+  %offset1024 = getelementptr i32* %base1024, i32 256
+  call i32 @llvm.arm.ldrex.p0i32(i32* %offset1024)
+  call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1024)
+; CHECK-T2ADDRMODE: add.w r[[ADDR:[0-9]+]], {{r[0-9]+}}, #1024
+; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
+; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
+
+  %base1 = load i32** @base
+  %addr8 = bitcast i32* %base1 to i8*
+  %offset1_8 = getelementptr i8* %addr8, i32 1
+  %offset1 = bitcast i8* %offset1_8 to i32*
+  call i32 @llvm.arm.ldrex.p0i32(i32* %offset1)
+  call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1)
+; CHECK-T2ADDRMODE: adds r[[ADDR:[0-9]+]], #1
+; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
+; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
+
+  %local = alloca i8, i32 1024
+  %local32 = bitcast i8* %local to i32*
+  call i32 @llvm.arm.ldrex.p0i32(i32* %local32)
+  call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32)
+; CHECK-T2ADDRMODE: mov r[[ADDR:[0-9]+]], sp
+; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
+; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
+
+  ret void
+}
diff --git a/test/CodeGen/ARM/ldstrexd.ll b/test/CodeGen/ARM/ldstrexd.ll
deleted file mode 100644
index 0c0911a..0000000
--- a/test/CodeGen/ARM/ldstrexd.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
-
-%0 = type { i32, i32 }
-
-; CHECK: f0:
-; CHECK: ldrexd
-define i64 @f0(i8* %p) nounwind readonly {
-entry:
-  %ldrexd = tail call %0 @llvm.arm.ldrexd(i8* %p)
-  %0 = extractvalue %0 %ldrexd, 1
-  %1 = extractvalue %0 %ldrexd, 0
-  %2 = zext i32 %0 to i64
-  %3 = zext i32 %1 to i64
-  %shl = shl nuw i64 %2, 32
-  %4 = or i64 %shl, %3
-  ret i64 %4
-}
-
-; CHECK: f1:
-; CHECK: strexd
-define i32 @f1(i8* %ptr, i64 %val) nounwind {
-entry:
-  %tmp4 = trunc i64 %val to i32
-  %tmp6 = lshr i64 %val, 32
-  %tmp7 = trunc i64 %tmp6 to i32
-  %strexd = tail call i32 @llvm.arm.strexd(i32 %tmp4, i32 %tmp7, i8* %ptr)
-  ret i32 %strexd
-}
-
-declare %0 @llvm.arm.ldrexd(i8*) nounwind readonly
-declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
-
