On Feb 10, 2008, at 8:19 PM, Nate Begeman wrote:
+
+ if (Subtarget-hasSSE41()) {
...
...
+if (Subtarget-is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+}
+ }
I don't see the corresponding patterns? This breaks X86/illegal-
insert.ll on SSE4 capable machines. I am going to change them from
'legal' to 'custom' for now. Please fix if that's not right.
Thanks,
Evan
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -3655,10 +3682,34 @@
}
SDOperand
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
+SelectionDAG DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ if (MVT::getSizeInBits(VT) == 8) {
+SDOperand Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32,
+Op.getOperand(0),
Op.getOperand(1));
+SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32,
Extract,
+DAG.getValueType(VT));
+return DAG.getNode(ISD::TRUNCATE, VT, Assert);
+ } else if (MVT::getSizeInBits(VT) == 16) {
+SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
+Op.getOperand(0),
Op.getOperand(1));
+SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32,
Extract,
+DAG.getValueType(VT));
+return DAG.getNode(ISD::TRUNCATE, VT, Assert);
+ }
+ return SDOperand();
+}
+
+
+SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op,
SelectionDAG DAG) {
if (!isaConstantSDNode(Op.getOperand(1)))
return SDOperand();
+ if (Subtarget-hasSSE41())
+return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
@@ -3699,6 +3750,9 @@
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
} else if (MVT::getSizeInBits(VT) == 64) {
+// FIXME: .td only matches this for 2 x f64, not 2 x i64 on
32b
+// FIXME: seems like this should be unnecessary if mov{h,l}pd
were taught
+//to match extract_elt for f64.
unsigned Idx = castConstantSDNode(Op.getOperand(1))-getValue();
if (Idx == 0)
return Op;
@@ -3724,9 +3778,47 @@
}
SDOperand
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op,
SelectionDAG DAG){
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+
+ SDOperand N0 = Op.getOperand(0);
+ SDOperand N1 = Op.getOperand(1);
+ SDOperand N2 = Op.getOperand(2);
+
+ if ((MVT::getSizeInBits(EVT) == 8) || (MVT::getSizeInBits(EVT) ==
16)) {
+unsigned Opc = (MVT::getSizeInBits(EVT) == 8) ? X86ISD::PINSRB
+ : X86ISD::PINSRW;
+// Transform it so it match pinsr{b,w} which expects a GR32 as
its second
+// argument.
+if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
+if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(castConstantSDNode(N2)-
getValue());
+return DAG.getNode(Opc, VT, N0, N1, N2);
+ } else if (EVT == MVT::f32) {
+// Bits [7:6] of the constant are the source select. This will
always be
+// zero here. The DAG Combiner may combine an extract_elt
index into these
+// bits. For example (insert (extract, 3), 2) could be
matched by putting
+// the '3' into bits [7:6] of X86ISD::INSERTPS.
+// Bits [5:4] of the constant are the destination select. This
is the
+// value of the incoming immediate.
+// Bits [3:0] of the constant are the zero mask. The DAG
Combiner may
+// combine either bitwise AND or insert of float 0.0 to set
these bits.
+N2 = DAG.getIntPtrConstant(castConstantSDNode(N2)-getValue()
4);
+return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2);
+ }
+ return SDOperand();
+}
+
+SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG
DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
+
+ if (Subtarget-hasSSE41())
+return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
if (EVT == MVT::i8)
return SDOperand();
@@ -5273,7 +5365,10 @@
case X86ISD::GlobalBaseReg: return X86ISD::GlobalBaseReg;
case X86ISD::Wrapper:return X86ISD::Wrapper;
case X86ISD::S2VEC: return X86ISD::S2VEC;
+ case X86ISD::PEXTRB: return X86ISD::PEXTRB;
case X86ISD::PEXTRW: return X86ISD::PEXTRW;