Author: Eli Friedman Date: 2020-07-31T17:27:45+02:00 New Revision: 4fd4ec63813fd5b22d81adb6e201cb16ccf72b69
URL: https://github.com/llvm/llvm-project/commit/4fd4ec63813fd5b22d81adb6e201cb16ccf72b69 DIFF: https://github.com/llvm/llvm-project/commit/4fd4ec63813fd5b22d81adb6e201cb16ccf72b69.diff LOG: [AArch64][SVE] Add support for trunc to <vscale x N x i1>. This isn't a natively supported operation, so convert it to a mask+compare. In addition to the operation itself, fix up some surrounding stuff to make the testcase work: we need concat_vectors on i1 vectors, we need legalization of i1 vector truncates, and we need to fix up all the relevant uses of getVectorNumElements(). Differential Revision: https://reviews.llvm.org/D83811 (cherry picked from commit b8f765a1e17f8d212ab1cd8f630d35adc7495556) Added: Modified: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/test/CodeGen/AArch64/sve-trunc.ll Removed: ################################################################################ diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f14b3dba4f31..a026d3960026 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11372,9 +11372,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Stop if more than one members are non-undef. if (NumDefs > 1) break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - X.getValueType().getVectorNumElements())); + X.getValueType().getVectorElementCount())); } if (NumDefs == 0) @@ -18795,6 +18796,11 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); EVT OpVT = N->getOperand(0).getValueType(); + + // We currently can't generate an appropriate shuffle for a scalable vector. + if (VT.isScalableVector()) + return SDValue(); + int NumElts = VT.getVectorNumElements(); int NumOpElts = OpVT.getVectorNumElements(); @@ -19055,11 +19061,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return V; // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR - // nodes often generate nop CONCAT_VECTOR nodes. - // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that - // place the incoming vectors at the exact same location. + // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR + // operands and look for a CONCAT operations that place the incoming vectors + // at the exact same location. + // + // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled. SDValue SingleSource = SDValue(); - unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + unsigned PartNumElem = + N->getOperand(0).getValueType().getVectorMinNumElements(); for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index b2299931021c..1394f084c6dc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2151,7 +2151,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, @@ -2559,13 +2559,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { SDValue InVec = N->getOperand(OpNo); EVT InVT = InVec->getValueType(0); EVT OutVT = N->getValueType(0); - unsigned NumElements = OutVT.getVectorNumElements(); + ElementCount NumElements = OutVT.getVectorElementCount(); bool IsFloat = OutVT.isFloatingPoint(); - // Widening should have already made sure this is a power-two vector - // if we're trying to split it at all. assert() that's true, just in case. - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - unsigned InElementSize = InVT.getScalarSizeInBits(); unsigned OutElementSize = OutVT.getScalarSizeInBits(); @@ -2595,6 +2591,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { GetSplitVector(InVec, InLoVec, InHiVec); // Truncate them to 1/2 the element size. + // + // This assumes the number of elements is a power of two; any vector that + // isn't should be widened, not split. EVT HalfElementVT = IsFloat ? EVT::getFloatingPointVT(InElementSize/2) : EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 85db14ab66fe..d9951b7b8c5b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -932,8 +932,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); - if (VT.getScalarType() == MVT::i1) + if (VT.getScalarType() == MVT::i1) { setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + } } } @@ -8858,6 +8861,16 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); + if (VT.getScalarType() == MVT::i1) { + // Lower i1 truncate to `(x & 1) != 0`. + SDLoc dl(Op); + EVT OpVT = Op.getOperand(0).getValueType(); + SDValue Zero = DAG.getConstant(0, dl, OpVT); + SDValue One = DAG.getConstant(1, dl, OpVT); + SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One); + return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE); + } + if (!VT.isVector() || VT.isScalableVector()) return Op; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 28a54e6f7d79..3449a8bd16d2 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1109,6 +1109,28 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>; defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>; + // Extract lo/hi halves of legal predicate types. + def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_S PPR:$Ps, (PFALSE))>; + def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))), + (ZIP2_PPP_S PPR:$Ps, (PFALSE))>; + def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_H PPR:$Ps, (PFALSE))>; + def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), + (ZIP2_PPP_H PPR:$Ps, (PFALSE))>; + def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_B PPR:$Ps, (PFALSE))>; + def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + (ZIP2_PPP_B PPR:$Ps, (PFALSE))>; + + // Concatenate two predicates. + def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), + (UZP1_PPP_S $p1, $p2)>; + def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)), + (UZP1_PPP_H $p1, $p2)>; + def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)), + (UZP1_PPP_B $p1, $p2)>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll index 876003a3962c..3743301cfa9b 100644 --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -59,3 +59,123 @@ entry: %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i32> ret <vscale x 2 x i32> %out } + +; Truncating to i1 requires convert it to a cmp + +define <vscale x 2 x i1> @trunc_i64toi1(<vscale x 2 x i64> %in) { +; CHECK-LABEL: trunc_i64toi1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret +entry: + %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i1> + ret <vscale x 2 x i1> %out +} + +define <vscale x 4 x i1> @trunc_i64toi1_split(<vscale x 4 x i64> %in) { +; CHECK-LABEL: trunc_i64toi1_split: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s +; CHECK-NEXT: ret +entry: + %out = trunc <vscale x 4 x i64> %in to <vscale x 4 x i1> + ret <vscale x 4 x i1> %out +} + +define <vscale x 8 x i1> @trunc_i64toi1_split2(<vscale x 8 x i64> %in) { +; CHECK-LABEL: trunc_i64toi1_split2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z3.d, z3.d, #0x1 +; CHECK-NEXT: and z2.d, z2.d, #0x1 +; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0 +; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0 +; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s +; CHECK-NEXT: cmpne p2.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p0.s, p0.s, p2.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret +entry: + %out = trunc <vscale x 8 x i64> %in to <vscale x 8 x i1> + ret <vscale x 8 x i1> %out +} + +define <vscale x 16 x i1> @trunc_i64toi1_split3(<vscale x 16 x i64> %in) { +; CHECK-LABEL: trunc_i64toi1_split3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z7.d, z7.d, #0x1 +; CHECK-NEXT: and z6.d, z6.d, #0x1 +; CHECK-NEXT: and z5.d, z5.d, #0x1 +; CHECK-NEXT: and z4.d, z4.d, #0x1 +; CHECK-NEXT: and z3.d, z3.d, #0x1 +; CHECK-NEXT: and z2.d, z2.d, #0x1 +; CHECK-NEXT: cmpne p1.d, p0/z, z7.d, #0 +; CHECK-NEXT: cmpne p2.d, p0/z, z6.d, #0 +; CHECK-NEXT: cmpne p3.d, p0/z, z5.d, #0 +; CHECK-NEXT: cmpne p4.d, p0/z, z4.d, #0 +; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s +; CHECK-NEXT: cmpne p2.d, p0/z, z3.d, #0 +; CHECK-NEXT: uzp1 p3.s, p4.s, p3.s +; CHECK-NEXT: cmpne p4.d, p0/z, z2.d, #0 +; CHECK-NEXT: uzp1 p2.s, p4.s, p2.s +; CHECK-NEXT: cmpne p4.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p0.s, p0.s, p4.s +; CHECK-NEXT: uzp1 p1.h, p3.h, p1.h +; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h +; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b +; CHECK-NEXT: ret +entry: + %out = trunc <vscale x 16 x i64> %in to <vscale x 16 x i1> + ret <vscale x 16 x i1> %out +} + + +define <vscale x 4 x i1> @trunc_i32toi1(<vscale x 4 x i32> %in) { +; CHECK-LABEL: trunc_i32toi1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z0.s, z0.s, #0x1 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ret +entry: + %out = trunc <vscale x 4 x i32> %in to <vscale x 4 x i1> + ret <vscale x 4 x i1> %out +} + +define <vscale x 8 x i1> @trunc_i16toi1(<vscale x 8 x i16> %in) { +; CHECK-LABEL: trunc_i16toi1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and z0.h, z0.h, #0x1 +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret +entry: + %out = trunc <vscale x 8 x i16> %in to <vscale x 8 x i1> + ret <vscale x 8 x i1> %out +} + +define <vscale x 16 x i1> @trunc_i8toi1(<vscale x 16 x i8> %in) { +; CHECK-LABEL: trunc_i8toi1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: and z0.b, z0.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ret +entry: + %out = trunc <vscale x 16 x i8> %in to <vscale x 16 x i1> + ret <vscale x 16 x i1> %out +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
