================ @@ -25,52 +25,151 @@ using namespace llvm; namespace { -struct FoldCandidate { - MachineInstr *UseMI; +/// Track a value we may want to fold into downstream users, applying +/// subregister extracts along the way. +struct FoldableDef { union { - MachineOperand *OpToFold; + MachineOperand *OpToFold = nullptr; uint64_t ImmToFold; int FrameIndexToFold; }; - int ShrinkOpcode; - unsigned UseOpNo; + + /// Register class of the originally defined value. + const TargetRegisterClass *DefRC = nullptr; + + /// Track the original defining instruction for the value. + const MachineInstr *DefMI = nullptr; + + /// Subregister to apply to the value at the use point. + unsigned DefSubReg = AMDGPU::NoSubRegister; + + /// Kind of value stored in the union. MachineOperand::MachineOperandType Kind; - bool Commuted; - FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, - bool Commuted_ = false, - int ShrinkOp = -1) : - UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), - Kind(FoldOp->getType()), - Commuted(Commuted_) { - if (FoldOp->isImm()) { - ImmToFold = FoldOp->getImm(); - } else if (FoldOp->isFI()) { - FrameIndexToFold = FoldOp->getIndex(); + FoldableDef() = delete; + FoldableDef(MachineOperand &FoldOp, const TargetRegisterClass *DefRC, + unsigned DefSubReg = AMDGPU::NoSubRegister) + : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.getType()) { + + if (FoldOp.isImm()) { + ImmToFold = FoldOp.getImm(); + } else if (FoldOp.isFI()) { + FrameIndexToFold = FoldOp.getIndex(); } else { - assert(FoldOp->isReg() || FoldOp->isGlobal()); - OpToFold = FoldOp; + assert(FoldOp.isReg() || FoldOp.isGlobal()); + OpToFold = &FoldOp; } + + DefMI = FoldOp.getParent(); } - FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm, - bool Commuted_ = false, int ShrinkOp = -1) - : UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), - Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {} + FoldableDef(int64_t FoldImm, const TargetRegisterClass *DefRC, + unsigned DefSubReg = AMDGPU::NoSubRegister) + : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg), + Kind(MachineOperand::MO_Immediate) {} + + /// Copy the current def and apply \p SubReg to the value. + FoldableDef getWithSubReg(const SIRegisterInfo &TRI, unsigned SubReg) const { + FoldableDef Copy(*this); + Copy.DefSubReg = TRI.composeSubRegIndices(DefSubReg, SubReg); + return Copy; + } + + bool isReg() const { return Kind == MachineOperand::MO_Register; } + + Register getReg() const { + assert(isReg()); + return OpToFold->getReg(); + } + + unsigned getSubReg() const { + assert(isReg()); + return OpToFold->getSubReg(); + } + + bool isImm() const { return Kind == MachineOperand::MO_Immediate; } bool isFI() const { return Kind == MachineOperand::MO_FrameIndex; } - bool isImm() const { - return Kind == MachineOperand::MO_Immediate; + int getFI() const { + assert(isFI()); + return FrameIndexToFold; } - bool isReg() const { - return Kind == MachineOperand::MO_Register; + bool isGlobal() const { return OpToFold->isGlobal(); } + + /// Return the effective immediate value defined by this instruction, after + /// application of any subregister extracts which may exist between the use + /// and def instruction. + std::optional<int64_t> getEffectiveImmVal() const { + assert(isImm()); + return SIInstrInfo::extractSubregFromImm(ImmToFold, DefSubReg); } - bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; } + /// Check if it is legal to fold this effective value into \p MI's \p OpNo + /// operand. + bool isOperandLegal(const SIInstrInfo &TII, const MachineInstr &MI, + unsigned OpIdx) const { + switch (Kind) { + case MachineOperand::MO_Immediate: { + std::optional<int64_t> ImmToFold = getEffectiveImmVal(); + if (!ImmToFold) + return false; + + // TODO: Should verify the subregister index is supported by the class + // TODO: Avoid the temporary MachineOperand + MachineOperand TmpOp = MachineOperand::CreateImm(*ImmToFold); + return TII.isOperandLegal(MI, OpIdx, &TmpOp); + } + case MachineOperand::MO_FrameIndex: { + if (DefSubReg != AMDGPU::NoSubRegister) + return false; + MachineOperand TmpOp = MachineOperand::CreateFI(FrameIndexToFold); + return TII.isOperandLegal(MI, OpIdx, &TmpOp); + } + default: + // TODO: Try to apply DefSubReg, for global address we can extract + // low/high. + if (DefSubReg != AMDGPU::NoSubRegister) + return false; + return TII.isOperandLegal(MI, OpIdx, OpToFold); + } + + llvm_unreachable("covered MachineOperand kind switch"); + } +}; + +struct FoldCandidate { + MachineInstr *UseMI; + FoldableDef Def; + int ShrinkOpcode; + unsigned UseOpNo; + bool Commuted; + + FoldCandidate(MachineInstr *MI, unsigned OpNo, FoldableDef Def, + bool Commuted_ = false, int ShrinkOp = -1) + : UseMI(MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), + Commuted(Commuted_) {} + + bool isFI() const { return Def.Kind == MachineOperand::MO_FrameIndex; } + + int getFI() const { + assert(isFI()); + return Def.FrameIndexToFold; + } + + bool isImm() const { return Def.isImm(); } + + bool isReg() const { return Def.isReg(); } + + Register getReg() const { + assert(isReg()); + return Def.OpToFold->getReg(); + } + + bool isGlobal() const { return Def.Kind == MachineOperand::MO_GlobalAddress; } ---------------- jayfoad wrote:
Why not defer to `Def.isGlobal`? https://github.com/llvm/llvm-project/pull/140608 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits