Re: [Mesa-dev] [PATCH] R600/SI: Custom select 64-bit ADD

2014-02-14 Thread Tom Stellard
On Thu, Feb 13, 2014 at 07:56:26AM -0800, Matt Arsenault wrote:
 
 On Feb 7, 2014, at 7:46 AM, Tom Stellard t...@stellard.net wrote:
 
  From: Tom Stellard thomas.stell...@amd.com
  
  ---
  lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 48 
  ++
  lib/Target/R600/SIISelLowering.cpp | 29 
  lib/Target/R600/SIISelLowering.h   |  1 -
  test/CodeGen/R600/add.ll   | 10 +++
  test/CodeGen/R600/add_i64.ll   | 23 +++-
  5 files changed, 75 insertions(+), 36 deletions(-)
  
  diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp 
  b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
  index a989135..fea875c 100644
  --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
  +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
  @@ -200,6 +200,54 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
switch (Opc) {
default: break;
  +  // We are selecting i64 ADD here instead of custom lower it during
  +  // DAG legalization, so we can fold some i64 ADDs used for address
  +  // calculation into the LOAD and STORE instructions.
  +  case ISD::ADD: {
  +const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
  +if (N-getValueType(0) != MVT::i64 ||
  +ST.getGeneration()  AMDGPUSubtarget::SOUTHERN_ISLANDS)
  +  break;
  +
  +SDLoc DL(N);
  +SDValue LHS = N-getOperand(0);
  +SDValue RHS = N-getOperand(1);
  +
  +SDValue Sub0 = CurDAG-getTargetConstant(AMDGPU::sub0, MVT::i32);
  +SDValue Sub1 = CurDAG-getTargetConstant(AMDGPU::sub1, MVT::i32);
  +
  +SDNode *Lo0 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  + DL, MVT::i32, LHS, Sub0);
  +SDNode *Hi0 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  + DL, MVT::i32, LHS, Sub1);
  +
  +SDNode *Lo1 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  + DL, MVT::i32, RHS, Sub0);
  +SDNode *Hi1 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  + DL, MVT::i32, RHS, Sub1);
  +
  +SDVTList VTList = CurDAG-getVTList(MVT::i32, MVT::Glue);
  +
  +SmallVectorSDValue, 8 AddLoArgs;
  +AddLoArgs.push_back(SDValue(Lo0, 0));
  +AddLoArgs.push_back(SDValue(Lo1, 0));
  +
  +SDNode *AddLo = CurDAG-getMachineNode(AMDGPU::S_ADD_I32, DL,
  +   VTList, AddLoArgs);
  +SDValue Carry = SDValue(AddLo, 1);
  +SDNode *AddHi = CurDAG-getMachineNode(AMDGPU::S_ADDC_U32, DL,
  +   MVT::i32, SDValue(Hi0, 0),
  +   SDValue(Hi1, 0), Carry);
  +
  +SDValue Args[5] = {
  +  CurDAG-getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
  +  SDValue(AddLo,0),
  +  Sub0,
  +  SDValue(AddHi,0),
  +  Sub1,
  +};
  +return CurDAG-SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 
  5);
  +  }
case ISD::BUILD_VECTOR: {
  unsigned RegClassID;
  const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
  diff --git a/lib/Target/R600/SIISelLowering.cpp 
  b/lib/Target/R600/SIISelLowering.cpp
  index 0a22d16..4d2f370 100644
  --- a/lib/Target/R600/SIISelLowering.cpp
  +++ b/lib/Target/R600/SIISelLowering.cpp
  @@ -76,7 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine TM) :
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
  
  -  setOperationAction(ISD::ADD, MVT::i64, Legal);
 
 Would it be better to mark this as custom lowered, and then just return 
 SDValue() for it? That way it won’t be incorrectly reported as legal for 
 anything that might be checking.


That's an interesting idea, but if we do that then the legalizer
will try to expand the nodes, which isn't much of a problem now since
there is no ExpandNode implementation for ISD:ADD, but someone may
want to add one in the future.

I think most places use isLegalOrCustom(), so it may not make
much of a difference anyway.

-Tom
 
setOperationAction(ISD::ADD, MVT::i32, Legal);
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
  @@ -475,7 +474,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
  SelectionDAG DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfoSIMachineFunctionInfo();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
  -  case ISD::ADD: return LowerADD(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
  LoadSDNode *Load = dyn_castLoadSDNode(Op);
  @@ -613,33 +611,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
  SelectionDAG DAG) const {
return SDValue();
  }
  
  -SDValue SITargetLowering::LowerADD(SDValue Op,
  -   SelectionDAG DAG) const {
  -  if 

Re: [Mesa-dev] [PATCH] R600/SI: Custom select 64-bit ADD

2014-02-13 Thread Matt Arsenault

On Feb 7, 2014, at 7:46 AM, Tom Stellard t...@stellard.net wrote:

 From: Tom Stellard thomas.stell...@amd.com
 
 ---
 lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 48 ++
 lib/Target/R600/SIISelLowering.cpp | 29 
 lib/Target/R600/SIISelLowering.h   |  1 -
 test/CodeGen/R600/add.ll   | 10 +++
 test/CodeGen/R600/add_i64.ll   | 23 +++-
 5 files changed, 75 insertions(+), 36 deletions(-)
 
 diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp 
 b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
 index a989135..fea875c 100644
 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
 +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
 @@ -200,6 +200,54 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   }
   switch (Opc) {
   default: break;
 +  // We are selecting i64 ADD here instead of custom lower it during
 +  // DAG legalization, so we can fold some i64 ADDs used for address
 +  // calculation into the LOAD and STORE instructions.
 +  case ISD::ADD: {
 +const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
 +if (N-getValueType(0) != MVT::i64 ||
 +ST.getGeneration()  AMDGPUSubtarget::SOUTHERN_ISLANDS)
 +  break;
 +
 +SDLoc DL(N);
 +SDValue LHS = N-getOperand(0);
 +SDValue RHS = N-getOperand(1);
 +
 +SDValue Sub0 = CurDAG-getTargetConstant(AMDGPU::sub0, MVT::i32);
 +SDValue Sub1 = CurDAG-getTargetConstant(AMDGPU::sub1, MVT::i32);
 +
 +SDNode *Lo0 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 + DL, MVT::i32, LHS, Sub0);
 +SDNode *Hi0 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 + DL, MVT::i32, LHS, Sub1);
 +
 +SDNode *Lo1 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 + DL, MVT::i32, RHS, Sub0);
 +SDNode *Hi1 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 + DL, MVT::i32, RHS, Sub1);
 +
 +SDVTList VTList = CurDAG-getVTList(MVT::i32, MVT::Glue);
 +
 +SmallVectorSDValue, 8 AddLoArgs;
 +AddLoArgs.push_back(SDValue(Lo0, 0));
 +AddLoArgs.push_back(SDValue(Lo1, 0));
 +
 +SDNode *AddLo = CurDAG-getMachineNode(AMDGPU::S_ADD_I32, DL,
 +   VTList, AddLoArgs);
 +SDValue Carry = SDValue(AddLo, 1);
 +SDNode *AddHi = CurDAG-getMachineNode(AMDGPU::S_ADDC_U32, DL,
 +   MVT::i32, SDValue(Hi0, 0),
 +   SDValue(Hi1, 0), Carry);
 +
 +SDValue Args[5] = {
 +  CurDAG-getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
 +  SDValue(AddLo,0),
 +  Sub0,
 +  SDValue(AddHi,0),
 +  Sub1,
 +};
 +return CurDAG-SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5);
 +  }
   case ISD::BUILD_VECTOR: {
 unsigned RegClassID;
 const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
 diff --git a/lib/Target/R600/SIISelLowering.cpp 
 b/lib/Target/R600/SIISelLowering.cpp
 index 0a22d16..4d2f370 100644
 --- a/lib/Target/R600/SIISelLowering.cpp
 +++ b/lib/Target/R600/SIISelLowering.cpp
 @@ -76,7 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine TM) :
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
 
 -  setOperationAction(ISD::ADD, MVT::i64, Legal);

Would it be better to mark this as custom lowered, and then just return 
SDValue() for it? That way it won’t be incorrectly reported as legal for 
anything that might be checking.

   setOperationAction(ISD::ADD, MVT::i32, Legal);
   setOperationAction(ISD::ADDC, MVT::i32, Legal);
   setOperationAction(ISD::ADDE, MVT::i32, Legal);
 @@ -475,7 +474,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
 SelectionDAG DAG) const {
   SIMachineFunctionInfo *MFI = MF.getInfoSIMachineFunctionInfo();
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 -  case ISD::ADD: return LowerADD(Op, DAG);
   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
   case ISD::LOAD: {
 LoadSDNode *Load = dyn_castLoadSDNode(Op);
 @@ -613,33 +611,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
 SelectionDAG DAG) const {
   return SDValue();
 }
 
 -SDValue SITargetLowering::LowerADD(SDValue Op,
 -   SelectionDAG DAG) const {
 -  if (Op.getValueType() != MVT::i64)
 -return SDValue();
 -
 -  SDLoc DL(Op);
 -  SDValue LHS = Op.getOperand(0);
 -  SDValue RHS = Op.getOperand(1);
 -
 -  SDValue Zero = DAG.getConstant(0, MVT::i32);
 -  SDValue One = DAG.getConstant(1, MVT::i32);
 -
 -  SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero);
 -  SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One);
 -
 -  SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero);
 -  SDValue Hi1 = 

Re: [Mesa-dev] [PATCH] R600/SI: Custom select 64-bit ADD

2014-02-08 Thread Matt Arsenault

I didn't think to try this. Where is the address folding happening?

On 02/07/2014 07:46 AM, Tom Stellard wrote:

From: Tom Stellard thomas.stell...@amd.com

---
  lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 48 ++
  lib/Target/R600/SIISelLowering.cpp | 29 
  lib/Target/R600/SIISelLowering.h   |  1 -
  test/CodeGen/R600/add.ll   | 10 +++
  test/CodeGen/R600/add_i64.ll   | 23 +++-
  5 files changed, 75 insertions(+), 36 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp 
b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index a989135..fea875c 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -200,6 +200,54 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
switch (Opc) {
default: break;
+  // We are selecting i64 ADD here instead of custom lower it during
+  // DAG legalization, so we can fold some i64 ADDs used for address
+  // calculation into the LOAD and STORE instructions.
+  case ISD::ADD: {
+const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
+if (N-getValueType(0) != MVT::i64 ||
+ST.getGeneration()  AMDGPUSubtarget::SOUTHERN_ISLANDS)
+  break;
+
+SDLoc DL(N);
+SDValue LHS = N-getOperand(0);
+SDValue RHS = N-getOperand(1);
+
+SDValue Sub0 = CurDAG-getTargetConstant(AMDGPU::sub0, MVT::i32);
+SDValue Sub1 = CurDAG-getTargetConstant(AMDGPU::sub1, MVT::i32);
+
+SDNode *Lo0 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, LHS, Sub0);
+SDNode *Hi0 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, LHS, Sub1);
+
+SDNode *Lo1 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, RHS, Sub0);
+SDNode *Hi1 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, RHS, Sub1);
+
+SDVTList VTList = CurDAG-getVTList(MVT::i32, MVT::Glue);
+
+SmallVectorSDValue, 8 AddLoArgs;
+AddLoArgs.push_back(SDValue(Lo0, 0));
+AddLoArgs.push_back(SDValue(Lo1, 0));
+
+SDNode *AddLo = CurDAG-getMachineNode(AMDGPU::S_ADD_I32, DL,
+   VTList, AddLoArgs);
+SDValue Carry = SDValue(AddLo, 1);
+SDNode *AddHi = CurDAG-getMachineNode(AMDGPU::S_ADDC_U32, DL,
+   MVT::i32, SDValue(Hi0, 0),
+   SDValue(Hi1, 0), Carry);
+
+SDValue Args[5] = {
+  CurDAG-getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+  SDValue(AddLo,0),
+  Sub0,
+  SDValue(AddHi,0),
+  Sub1,
+};
+return CurDAG-SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5);
+  }
case ISD::BUILD_VECTOR: {
  unsigned RegClassID;
  const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
diff --git a/lib/Target/R600/SIISelLowering.cpp 
b/lib/Target/R600/SIISelLowering.cpp
index 0a22d16..4d2f370 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -76,7 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine TM) :
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
  
-  setOperationAction(ISD::ADD, MVT::i64, Legal);

setOperationAction(ISD::ADD, MVT::i32, Legal);
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
@@ -475,7 +474,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfoSIMachineFunctionInfo();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-  case ISD::ADD: return LowerADD(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
  LoadSDNode *Load = dyn_castLoadSDNode(Op);
@@ -613,33 +611,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const {
return SDValue();
  }
  
-SDValue SITargetLowering::LowerADD(SDValue Op,

-   SelectionDAG DAG) const {
-  if (Op.getValueType() != MVT::i64)
-return SDValue();
-
-  SDLoc DL(Op);
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-
-  SDValue Zero = DAG.getConstant(0, MVT::i32);
-  SDValue One = DAG.getConstant(1, MVT::i32);
-
-  SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero);
-  SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One);
-
-  SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero);
-  SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One);
-
-  SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue);
-
-  SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1);
-  SDValue Carry = 

Re: [Mesa-dev] [PATCH] R600/SI: Custom select 64-bit ADD

2014-02-07 Thread Tom Stellard
On Fri, Feb 07, 2014 at 10:31:20AM -0800, Matt Arsenault wrote:
 I didn't think to try this. Where is the address folding happening?


There are TableGen patterns that do the folding.  I recently added
several new ones: r200932-r200935.

-Tom 
 On 02/07/2014 07:46 AM, Tom Stellard wrote:
 From: Tom Stellard thomas.stell...@amd.com
 
 ---
   lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 48 
  ++
   lib/Target/R600/SIISelLowering.cpp | 29 
   lib/Target/R600/SIISelLowering.h   |  1 -
   test/CodeGen/R600/add.ll   | 10 +++
   test/CodeGen/R600/add_i64.ll   | 23 +++-
   5 files changed, 75 insertions(+), 36 deletions(-)
 
 diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp 
 b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
 index a989135..fea875c 100644
 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
 +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
 @@ -200,6 +200,54 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 }
 switch (Opc) {
 default: break;
 +  // We are selecting i64 ADD here instead of custom lower it during
 +  // DAG legalization, so we can fold some i64 ADDs used for address
 +  // calculation into the LOAD and STORE instructions.
 +  case ISD::ADD: {
 +const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
 +if (N-getValueType(0) != MVT::i64 ||
 +ST.getGeneration()  AMDGPUSubtarget::SOUTHERN_ISLANDS)
 +  break;
 +
 +SDLoc DL(N);
 +SDValue LHS = N-getOperand(0);
 +SDValue RHS = N-getOperand(1);
 +
 +SDValue Sub0 = CurDAG-getTargetConstant(AMDGPU::sub0, MVT::i32);
 +SDValue Sub1 = CurDAG-getTargetConstant(AMDGPU::sub1, MVT::i32);
 +
 +SDNode *Lo0 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 + DL, MVT::i32, LHS, Sub0);
 +SDNode *Hi0 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 + DL, MVT::i32, LHS, Sub1);
 +
 +SDNode *Lo1 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 + DL, MVT::i32, RHS, Sub0);
 +SDNode *Hi1 = CurDAG-getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 + DL, MVT::i32, RHS, Sub1);
 +
 +SDVTList VTList = CurDAG-getVTList(MVT::i32, MVT::Glue);
 +
 +SmallVectorSDValue, 8 AddLoArgs;
 +AddLoArgs.push_back(SDValue(Lo0, 0));
 +AddLoArgs.push_back(SDValue(Lo1, 0));
 +
 +SDNode *AddLo = CurDAG-getMachineNode(AMDGPU::S_ADD_I32, DL,
 +   VTList, AddLoArgs);
 +SDValue Carry = SDValue(AddLo, 1);
 +SDNode *AddHi = CurDAG-getMachineNode(AMDGPU::S_ADDC_U32, DL,
 +   MVT::i32, SDValue(Hi0, 0),
 +   SDValue(Hi1, 0), Carry);
 +
 +SDValue Args[5] = {
 +  CurDAG-getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
 +  SDValue(AddLo,0),
 +  Sub0,
 +  SDValue(AddHi,0),
 +  Sub1,
 +};
 +return CurDAG-SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5);
 +  }
 case ISD::BUILD_VECTOR: {
   unsigned RegClassID;
   const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
 diff --git a/lib/Target/R600/SIISelLowering.cpp 
 b/lib/Target/R600/SIISelLowering.cpp
 index 0a22d16..4d2f370 100644
 --- a/lib/Target/R600/SIISelLowering.cpp
 +++ b/lib/Target/R600/SIISelLowering.cpp
 @@ -76,7 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine TM) :
 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
 -  setOperationAction(ISD::ADD, MVT::i64, Legal);
 setOperationAction(ISD::ADD, MVT::i32, Legal);
 setOperationAction(ISD::ADDC, MVT::i32, Legal);
 setOperationAction(ISD::ADDE, MVT::i32, Legal);
 @@ -475,7 +474,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
 SelectionDAG DAG) const {
 SIMachineFunctionInfo *MFI = MF.getInfoSIMachineFunctionInfo();
 switch (Op.getOpcode()) {
 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 -  case ISD::ADD: return LowerADD(Op, DAG);
 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
 case ISD::LOAD: {
   LoadSDNode *Load = dyn_castLoadSDNode(Op);
 @@ -613,33 +611,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
 SelectionDAG DAG) const {
 return SDValue();
   }
 -SDValue SITargetLowering::LowerADD(SDValue Op,
 -   SelectionDAG DAG) const {
 -  if (Op.getValueType() != MVT::i64)
 -return SDValue();
 -
 -  SDLoc DL(Op);
 -  SDValue LHS = Op.getOperand(0);
 -  SDValue RHS = Op.getOperand(1);
 -
 -  SDValue Zero = DAG.getConstant(0, MVT::i32);
 -  SDValue One = DAG.getConstant(1, MVT::i32);
 -
 -  SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero);
 -  SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One);
 -
 -  SDValue Lo1 =