I fixed the coding style issue. The <iostream> include was a debug leftover line, it shouldn't be there.
----- Mail original ----- > De : Tom Stellard <[email protected]> > À : Vincent Lejeune <[email protected]> > Cc : [email protected]; [email protected] > Envoyé le : Mercredi 13 mars 2013 21h49 > Objet : Re: [PATCH] R600: Factorize code handling Const Read Port limitation > > On Wed, Mar 13, 2013 at 09:12:41PM +0100, Vincent Lejeune wrote: >> --- >> lib/Target/R600/AMDILISelDAGToDAG.cpp | 34 ++++++++++---- >> lib/Target/R600/R600InstrInfo.cpp | 54 ++++++++++++++++++++++ >> lib/Target/R600/R600InstrInfo.h | 3 ++ >> lib/Target/R600/R600MachineScheduler.cpp | 77 > ++++---------------------------- >> lib/Target/R600/R600MachineScheduler.h | 3 +- >> test/CodeGen/R600/kcache-fold-2.ll | 52 +++++++++++++++++++++ >> 6 files changed, 144 insertions(+), 79 deletions(-) >> create mode 100644 test/CodeGen/R600/kcache-fold-2.ll >> >> diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp > b/lib/Target/R600/AMDILISelDAGToDAG.cpp >> index 0c7880d..05a1ea7 100644 >> --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp >> +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp >> @@ -336,6 +336,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { >> return Result; >> } >> >> + > > Whitespace >> bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, >> const R600InstrInfo *TII, std::vector<SDValue> &Ops) { >> int OperandIdx[] = { >> @@ -365,17 +366,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned > Opcode, >> SDValue Operand = Ops[OperandIdx[i] - 1]; >> switch (Operand.getOpcode()) { >> case AMDGPUISD::CONST_ADDRESS: { >> - if (i == 2) >> - break; >> SDValue CstOffset; >> - if (!Operand.getValueType().isVector() && >> - SelectGlobalValueConstantOffset(Operand.getOperand(0), > CstOffset)) { >> - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, > MVT::f32); >> - Ops[SelIdx[i] - 1] = CstOffset; >> - return true; >> + if (Operand.getValueType().isVector() || >> + !SelectGlobalValueConstantOffset(Operand.getOperand(0), > CstOffset)) >> + break; >> + >> + // Gather others constants values >> + std::vector<unsigned> Consts; >> + for (unsigned j = 0; j < 3; j++) { >> + int SrcIdx = OperandIdx[j]; >> + if (SrcIdx < 0) >> + break; >> + if (RegisterSDNode *Reg = > dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { >> + if (Reg->getReg() == AMDGPU::ALU_CONST) { >> + ConstantSDNode *Cst = > dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); >> + Consts.push_back(Cst->getZExtValue()); >> + } >> + } >> } >> + >> + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); >> + Consts.push_back(Cst->getZExtValue()); >> + if (!TII->fitsConstReadLimitations(Consts)) >> + break; >> + >> + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, > MVT::f32); >> + Ops[SelIdx[i] - 1] = CstOffset; >> + return true; >> } >> - break; >> case ISD::FNEG: >> if (NegIdx[i] < 0) >> break; >> diff --git a/lib/Target/R600/R600InstrInfo.cpp > b/lib/Target/R600/R600InstrInfo.cpp >> index be3318a..0865098 100644 >> --- a/lib/Target/R600/R600InstrInfo.cpp >> +++ b/lib/Target/R600/R600InstrInfo.cpp >> @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const > { >> (TargetFlags & R600_InstFlag::OP3)); >> } >> >> +bool >> +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> > &Consts) >> + const { >> + assert (Consts.size() <= 12 && "Too many operands in > instructions group"); >> + unsigned Pair1 = 0, Pair2 = 0; >> + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { >> + unsigned ReadConstHalf = Consts[i] & 2; >> + unsigned ReadConstIndex = Consts[i] & (~3); >> + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; >> + if (!Pair1) { >> + Pair1 = ReadHalfConst; >> + continue; >> + } >> + if (Pair1 == ReadHalfConst) >> + continue; >> + if (!Pair2) { >> + Pair2 = ReadHalfConst; >> + continue; >> + } >> + if (Pair2 != ReadHalfConst) >> + return false; >> + } >> + return true; >> +} >> + >> +bool >> +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) > const { >> + std::vector<unsigned> Consts; >> + for (unsigned i = 0, n = MIs.size(); i < n; i++) { >> + const MachineInstr *MI = MIs[i]; >> + >> + const R600Operands::Ops OpTable[3][2] = { >> + {R600Operands::SRC0, R600Operands::SRC0_SEL}, >> + {R600Operands::SRC1, R600Operands::SRC1_SEL}, >> + {R600Operands::SRC2, R600Operands::SRC2_SEL}, >> + }; >> + >> + if (!isALUInstr(MI->getOpcode())) >> + continue; >> + >> + for (unsigned j = 0; j < 3; j++) { >> + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); >> + if (SrcIdx < 0) >> + break; >> + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { >> + unsigned Const = MI->getOperand( >> + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); >> + Consts.push_back(Const); >> + } >> + } >> + } >> + return fitsConstReadLimitations(Consts); >> +} >> + >> DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const > TargetMachine *TM, >> const ScheduleDAG *DAG) const { >> const InstrItineraryData *II = TM->getInstrItineraryData(); >> diff --git a/lib/Target/R600/R600InstrInfo.h > b/lib/Target/R600/R600InstrInfo.h >> index efe721c..bf9569e 100644 >> --- a/lib/Target/R600/R600InstrInfo.h >> +++ b/lib/Target/R600/R600InstrInfo.h >> @@ -53,6 +53,9 @@ namespace llvm { >> /// \returns true if this \p Opcode represents an ALU > instruction. >> bool isALUInstr(unsigned Opcode) const; >> >> + bool fitsConstReadLimitations(const std::vector<unsigned>&) > const; >> + bool canBundle(const std::vector<MachineInstr *> &) const; >> + >> /// \breif Vector instructions are instructions that must fill all >> /// instruction slots within an instruction group. >> bool isVector(const MachineInstr &MI) const; >> diff --git a/lib/Target/R600/R600MachineScheduler.cpp > b/lib/Target/R600/R600MachineScheduler.cpp >> index ee8d672..b4d9779 100644 >> --- a/lib/Target/R600/R600MachineScheduler.cpp >> +++ b/lib/Target/R600/R600MachineScheduler.cpp >> @@ -24,7 +24,7 @@ >> #include "llvm/PassManager.h" >> #include "llvm/Support/raw_ostream.h" >> #include <set> >> - >> +#include <iostream> > > You should double check the LLVM coding style guide, but I think system > headers need to be sorted. > >> using namespace llvm; >> >> static >> @@ -222,7 +222,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) > { >> CurInstKind = IDOther; >> CurEmitted = 0; >> OccupedSlotsMask = 15; >> - memset(InstructionsGroupCandidate, 0, > sizeof(InstructionsGroupCandidate)); >> InstKindLimit[IDAlu] = 120; // 120 minus 8 for security >> InstKindLimit[IDOther] = 32; >> >> @@ -503,79 +502,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { >> } >> } >> >> -class ConstPairs { >> -private: >> - unsigned XYPair; >> - unsigned ZWPair; >> -public: >> - ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { >> - for (unsigned i = 0; i < 3; i++) { >> - unsigned ReadConstChan = ReadConst[i] & 3; >> - unsigned ReadConstIndex = ReadConst[i] & (~3); >> - if (ReadConstChan < 2) { >> - if (!XYPair) { >> - XYPair = ReadConstIndex; >> - } >> - } else { >> - if (!ZWPair) { >> - ZWPair = ReadConstIndex; >> - } >> - } >> - } >> - } >> - >> - bool isCompatibleWith(const ConstPairs& CP) const { >> - return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && >> - (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); >> - } >> -}; >> - >> -static >> -const ConstPairs getPairs(const R600InstrInfo *TII, const > MachineInstr& MI) { >> - unsigned ReadConsts[3] = {0, 0, 0}; >> - R600Operands::Ops OpTable[3][2] = { >> - {R600Operands::SRC0, R600Operands::SRC0_SEL}, >> - {R600Operands::SRC1, R600Operands::SRC1_SEL}, >> - {R600Operands::SRC2, R600Operands::SRC2_SEL}, >> - }; >> - >> - if (!TII->isALUInstr(MI.getOpcode())) >> - return ConstPairs(ReadConsts); >> - >> - for (unsigned i = 0; i < 3; i++) { >> - int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); >> - if (SrcIdx < 0) >> - break; >> - if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) >> - ReadConsts[i] =MI.getOperand( >> - TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); >> - } >> - return ConstPairs(ReadConsts); >> -} >> - >> -bool >> -R600SchedStrategy::isBundleable(const MachineInstr& MI) { >> - const ConstPairs &MIPair = getPairs(TII, MI); >> - for (unsigned i = 0; i < 4; i++) { >> - if (!InstructionsGroupCandidate[i]) >> - continue; >> - const ConstPairs &IGPair = getPairs(TII, >> - *InstructionsGroupCandidate[i]->getInstr()); >> - if (!IGPair.isCompatibleWith(MIPair)) >> - return false; >> - } >> - return true; >> -} >> - >> SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) { >> if (Q.empty()) >> return NULL; >> for (std::vector<SUnit *>::iterator It = Q.begin(), E = Q.end(); >> It != E; ++It) { >> SUnit *SU = *It; >> - if (isBundleable(*SU->getInstr())) { >> + InstructionsGroupCandidate.push_back(SU->getInstr()); >> + if (TII->canBundle(InstructionsGroupCandidate)) { >> + InstructionsGroupCandidate.pop_back(); >> Q.erase(It); >> return SU; >> + } else { >> + InstructionsGroupCandidate.pop_back(); >> } >> } >> return NULL; >> @@ -596,7 +535,7 @@ void R600SchedStrategy::PrepareNextSlot() { >> DEBUG(dbgs() << "New Slot\n"); >> assert (OccupedSlotsMask && "Slot wasn't filled"); >> OccupedSlotsMask = 0; >> - memset(InstructionsGroupCandidate, 0, > sizeof(InstructionsGroupCandidate)); >> + InstructionsGroupCandidate.clear(); >> LoadAlu(); >> } >> >> @@ -666,7 +605,7 @@ SUnit* R600SchedStrategy::pickAlu() { >> SUnit *SU = AttemptFillSlot(Chan); >> if (SU) { >> OccupedSlotsMask |= (1 << Chan); >> - InstructionsGroupCandidate[Chan] = SU; >> + InstructionsGroupCandidate.push_back(SU->getInstr()); >> return SU; >> } >> } >> diff --git a/lib/Target/R600/R600MachineScheduler.h > b/lib/Target/R600/R600MachineScheduler.h >> index 4b1f6c3..47b1d56 100644 >> --- a/lib/Target/R600/R600MachineScheduler.h >> +++ b/lib/Target/R600/R600MachineScheduler.h >> @@ -100,7 +100,7 @@ public: >> virtual void releaseBottomNode(SUnit *SU); >> >> private: >> - SUnit *InstructionsGroupCandidate[4]; >> + std::vector<MachineInstr *> InstructionsGroupCandidate; >> >> int getInstKind(SUnit *SU); >> bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) > const; >> @@ -114,7 +114,6 @@ private: >> void AssignSlot(MachineInstr *MI, unsigned Slot); >> SUnit* pickAlu(); >> SUnit* pickOther(int QID); >> - bool isBundleable(const MachineInstr& MI); >> void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); >> }; >> >> diff --git a/test/CodeGen/R600/kcache-fold-2.ll > b/test/CodeGen/R600/kcache-fold-2.ll >> new file mode 100644 >> index 0000000..77d0052 >> --- /dev/null >> +++ b/test/CodeGen/R600/kcache-fold-2.ll > > For performance reasons (I think), it's prefered to stick similar tests > into the same file, so this test should be moved to kcache-fold.ll. > You will also need to add a CHECK: line for each function name in the > new file. Take a look at some of the other tests to see how this is done. > > -Tom > >> @@ -0,0 +1,52 @@ >> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s >> + >> +; CHECK-NOT: MOV >> + >> +define void @main() { >> +main_body: >> + %0 = load <4 x float> addrspace(8)* null >> + %1 = extractelement <4 x float> %0, i32 0 >> + %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x > float>] addrspace(8)* null, i64 0, i32 1) >> + %3 = extractelement <4 x float> %2, i32 0 >> + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x > float>] addrspace(8)* null, i64 0, i32 1) >> + %5 = extractelement <4 x float> %4, i32 1 >> + %6 = fcmp ult float %1, 0.000000e+00 >> + %7 = select i1 %6, float %3, float %5 >> + %8 = load <4 x float> addrspace(8)* null >> + %9 = extractelement <4 x float> %8, i32 1 >> + %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > x float>] addrspace(8)* null, i64 0, i32 2) >> + %11 = extractelement <4 x float> %10, i32 0 >> + %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > x float>] addrspace(8)* null, i64 0, i32 2) >> + %13 = extractelement <4 x float> %12, i32 1 >> + %14 = fcmp ult float %9, 0.000000e+00 >> + %15 = select i1 %14, float %11, float %13 >> + %16 = load <4 x float> addrspace(8)* null >> + %17 = extractelement <4 x float> %16, i32 2 >> + %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > x float>] addrspace(8)* null, i64 0, i32 1) >> + %19 = extractelement <4 x float> %18, i32 3 >> + %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > x float>] addrspace(8)* null, i64 0, i32 1) >> + %21 = extractelement <4 x float> %20, i32 2 >> + %22 = fcmp ult float %17, 0.000000e+00 >> + %23 = select i1 %22, float %19, float %21 >> + %24 = load <4 x float> addrspace(8)* null >> + %25 = extractelement <4 x float> %24, i32 3 >> + %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > x float>] addrspace(8)* null, i64 0, i32 2) >> + %27 = extractelement <4 x float> %26, i32 3 >> + %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > x float>] addrspace(8)* null, i64 0, i32 2) >> + %29 = extractelement <4 x float> %28, i32 2 >> + %30 = fcmp ult float %25, 0.000000e+00 >> + %31 = select i1 %30, float %27, float %29 >> + %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float > 1.000000e+00) >> + %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float > 1.000000e+00) >> + %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float > 1.000000e+00) >> + %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float > 1.000000e+00) >> + %36 = insertelement <4 x float> undef, float %32, i32 0 >> + %37 = insertelement <4 x float> %36, float %33, i32 1 >> + %38 = insertelement <4 x float> %37, float %34, i32 2 >> + %39 = insertelement <4 x float> %38, float %35, i32 3 >> + call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0) >> + ret void >> +} >> + >> +declare float @llvm.AMDIL.clamp.(float, float, float) readnone >> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) >> -- >> 1.8.1.4 >> >> _______________________________________________ >> llvm-commits mailing list >> [email protected] >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >
0001-R600-Factorize-code-handling-Const-Read-Port-limitat.patch
Description: Binary data
_______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
