================ @@ -489,6 +489,90 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); } +void AMDGPUDAGToDAGISel::SelectVectorShuffle(SDNode *N) { + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + + // TODO: Handle 16-bit element vectors with even aligned masks. + if (!Subtarget->hasPkMovB32() || !EltVT.bitsEq(MVT::i32) || + VT.getVectorNumElements() != 2) { + SelectCode(N); + return; + } + + auto *SVN = cast<ShuffleVectorSDNode>(N); + + SDValue Src0 = SVN->getOperand(0); + SDValue Src1 = SVN->getOperand(1); + ArrayRef<int> Mask = SVN->getMask(); + SDLoc DL(N); + + assert(Src0.getValueType().getVectorNumElements() == 2 && Mask.size() == 2 && + Mask[0] < 4 && Mask[1] < 4); + + SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1; + SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1; + unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0; + unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0; + + if (Mask[0] < 0) { + Src0SubReg = Src1SubReg; + MachineSDNode *ImpDef = + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + VSrc0 = SDValue(ImpDef, 0); + } + + if (Mask[1] < 0) { + Src1SubReg = Src0SubReg; + MachineSDNode *ImpDef = + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + VSrc1 = SDValue(ImpDef, 0); + } + + // SGPR case needs to lower to copies. + // + // Also use subregister extract when we can directly blend the registers with + // a simple subregister copy. + // + // TODO: Maybe we should fold this out earlier + if (N->isDivergent() && Src0SubReg == AMDGPU::sub1 && + Src1SubReg == AMDGPU::sub0) { + // The low element of the result always comes from src0. + // The high element of the result always comes from src1. + // op_sel selects the high half of src0. + // op_sel_hi selects the high half of src1. + + unsigned Src0OpSel = + Src0SubReg == AMDGPU::sub1 ? SISrcMods::OP_SEL_0 : SISrcMods::NONE; + unsigned Src1OpSel = + Src1SubReg == AMDGPU::sub1 ? SISrcMods::OP_SEL_0 : SISrcMods::NONE; ---------------- arsenm wrote:
I'm not sure this is correctly encoded. I'm confused by how op_sel and op_sel_hi are supposed to be represented. We set fields in the source modifiers. I guess this should probably be OP_SEL_1? https://github.com/llvm/llvm-project/pull/123684 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits