On Wed, Mar 13, 2013 at 02:59:07PM -0700, Vincent Lejeune wrote: > I fixed the coding style issue. > The <iostream> include was a debug leftover line, it shouldn't be there. >
Reviewed-by: Tom Stellard <[email protected]> > > ----- Mail original ----- > > De : Tom Stellard <[email protected]> > > À : Vincent Lejeune <[email protected]> > > Cc : [email protected]; [email protected] > > Envoyé le : Mercredi 13 mars 2013 21h49 > > Objet : Re: [PATCH] R600: Factorize code handling Const Read Port limitation > > > > On Wed, Mar 13, 2013 at 09:12:41PM +0100, Vincent Lejeune wrote: > >> --- > >> lib/Target/R600/AMDILISelDAGToDAG.cpp | 34 ++++++++++---- > >> lib/Target/R600/R600InstrInfo.cpp | 54 ++++++++++++++++++++++ > >> lib/Target/R600/R600InstrInfo.h | 3 ++ > >> lib/Target/R600/R600MachineScheduler.cpp | 77 > > ++++---------------------------- > >> lib/Target/R600/R600MachineScheduler.h | 3 +- > >> test/CodeGen/R600/kcache-fold-2.ll | 52 +++++++++++++++++++++ > >> 6 files changed, 144 insertions(+), 79 deletions(-) > >> create mode 100644 test/CodeGen/R600/kcache-fold-2.ll > >> > >> diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp > > b/lib/Target/R600/AMDILISelDAGToDAG.cpp > >> index 0c7880d..05a1ea7 100644 > >> --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp > >> +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp > >> @@ -336,6 +336,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { > >> return Result; > >> } > >> > >> + > > > > Whitespace > >> bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, > >> const R600InstrInfo *TII, std::vector<SDValue> &Ops) { > >> int OperandIdx[] = { > >> @@ -365,17 +366,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned > > Opcode, > >> SDValue Operand = Ops[OperandIdx[i] - 1]; > >> switch (Operand.getOpcode()) { > >> case AMDGPUISD::CONST_ADDRESS: { > >> - if (i == 2) > >> - break; > >> SDValue CstOffset; > >> - if (!Operand.getValueType().isVector() && > >> - SelectGlobalValueConstantOffset(Operand.getOperand(0), > > CstOffset)) { > >> - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, > > MVT::f32); > >> - Ops[SelIdx[i] - 1] = CstOffset; > >> - return true; > >> + if (Operand.getValueType().isVector() || > >> + !SelectGlobalValueConstantOffset(Operand.getOperand(0), > > CstOffset)) > >> + break; > >> + > >> + // Gather others constants values > >> + std::vector<unsigned> Consts; > >> + for (unsigned j = 0; j < 3; j++) { > >> + int SrcIdx = OperandIdx[j]; > >> + if (SrcIdx < 0) > >> + break; > >> + if (RegisterSDNode *Reg = > > dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { > >> + if (Reg->getReg() == AMDGPU::ALU_CONST) { > >> + ConstantSDNode *Cst = > > dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); > >> + Consts.push_back(Cst->getZExtValue()); > >> + } > >> + } > >> } > >> + > >> + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); > >> + Consts.push_back(Cst->getZExtValue()); > >> + if (!TII->fitsConstReadLimitations(Consts)) > >> + break; > >> + > >> + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, > > MVT::f32); > >> + Ops[SelIdx[i] - 1] = CstOffset; > >> + return true; > >> } > >> - break; > >> case ISD::FNEG: > >> if (NegIdx[i] < 0) > >> break; > >> diff --git a/lib/Target/R600/R600InstrInfo.cpp > > b/lib/Target/R600/R600InstrInfo.cpp > >> index be3318a..0865098 100644 > >> --- a/lib/Target/R600/R600InstrInfo.cpp > >> +++ b/lib/Target/R600/R600InstrInfo.cpp > >> @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) > >> const > > { > >> (TargetFlags & R600_InstFlag::OP3)); > >> } > >> > >> +bool > >> +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> > > &Consts) > >> + const { > >> + assert (Consts.size() <= 12 && "Too many operands in > > instructions group"); > >> + unsigned Pair1 = 0, Pair2 = 0; > >> + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { > >> + unsigned ReadConstHalf = Consts[i] & 2; > >> + unsigned ReadConstIndex = Consts[i] & (~3); > >> + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; > >> + if (!Pair1) { > >> + Pair1 = ReadHalfConst; > >> + continue; > >> + } > >> + if (Pair1 == ReadHalfConst) > >> + continue; > >> + if (!Pair2) { > >> + Pair2 = ReadHalfConst; > >> + continue; > >> + } > >> + if (Pair2 != ReadHalfConst) > >> + return false; > >> + } > >> + return true; > >> +} > >> + > >> +bool > >> +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) > > const { > >> + std::vector<unsigned> Consts; > >> + for (unsigned i = 0, n = MIs.size(); i < n; i++) { > >> + const MachineInstr *MI = MIs[i]; > >> + > >> + const R600Operands::Ops OpTable[3][2] = { > >> + {R600Operands::SRC0, R600Operands::SRC0_SEL}, > >> + {R600Operands::SRC1, R600Operands::SRC1_SEL}, > >> + {R600Operands::SRC2, R600Operands::SRC2_SEL}, > >> + }; > >> + > >> + if (!isALUInstr(MI->getOpcode())) > >> + continue; > >> + > >> + for (unsigned j = 0; j < 3; j++) { > >> + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); > >> + if (SrcIdx < 0) > >> + break; > >> + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { > >> + unsigned Const = MI->getOperand( > >> + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); > >> + Consts.push_back(Const); > >> + } > >> + } > >> + } > >> + return fitsConstReadLimitations(Consts); > >> +} > >> + > >> DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const > > TargetMachine *TM, > >> const ScheduleDAG *DAG) const { > >> const InstrItineraryData *II = TM->getInstrItineraryData(); > >> diff --git a/lib/Target/R600/R600InstrInfo.h > > b/lib/Target/R600/R600InstrInfo.h > >> index efe721c..bf9569e 100644 > >> --- a/lib/Target/R600/R600InstrInfo.h > >> +++ b/lib/Target/R600/R600InstrInfo.h > >> @@ -53,6 +53,9 @@ namespace llvm { > >> /// \returns true if this \p Opcode represents an ALU > > instruction. > >> bool isALUInstr(unsigned Opcode) const; > >> > >> + bool fitsConstReadLimitations(const std::vector<unsigned>&) > > const; > >> + bool canBundle(const std::vector<MachineInstr *> &) const; > >> + > >> /// \breif Vector instructions are instructions that must fill all > >> /// instruction slots within an instruction group. > >> bool isVector(const MachineInstr &MI) const; > >> diff --git a/lib/Target/R600/R600MachineScheduler.cpp > > b/lib/Target/R600/R600MachineScheduler.cpp > >> index ee8d672..b4d9779 100644 > >> --- a/lib/Target/R600/R600MachineScheduler.cpp > >> +++ b/lib/Target/R600/R600MachineScheduler.cpp > >> @@ -24,7 +24,7 @@ > >> #include "llvm/PassManager.h" > >> #include "llvm/Support/raw_ostream.h" > >> #include <set> > >> - > >> +#include <iostream> > > > > You should double check the LLVM coding style guide, but I think system > > headers need to be sorted. > > > >> using namespace llvm; > >> > >> static > >> @@ -222,7 +222,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI > >> *dag) > > { > >> CurInstKind = IDOther; > >> CurEmitted = 0; > >> OccupedSlotsMask = 15; > >> - memset(InstructionsGroupCandidate, 0, > > sizeof(InstructionsGroupCandidate)); > >> InstKindLimit[IDAlu] = 120; // 120 minus 8 for security > >> InstKindLimit[IDOther] = 32; > >> > >> @@ -503,79 +502,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { > >> } > >> } > >> > >> -class ConstPairs { > >> -private: > >> - unsigned XYPair; > >> - unsigned ZWPair; > >> -public: > >> - ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { > >> - for (unsigned i = 0; i < 3; i++) { > >> - unsigned ReadConstChan = ReadConst[i] & 3; > >> - unsigned ReadConstIndex = ReadConst[i] & (~3); > >> - if (ReadConstChan < 2) { > >> - if (!XYPair) { > >> - XYPair = ReadConstIndex; > >> - } > >> - } else { > >> - if (!ZWPair) { > >> - ZWPair = ReadConstIndex; > >> - } > >> - } > >> - } > >> - } > >> - > >> - bool isCompatibleWith(const ConstPairs& CP) const { > >> - return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && > >> - (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); > >> - } > >> -}; > >> - > >> -static > >> -const ConstPairs getPairs(const R600InstrInfo *TII, const > > MachineInstr& MI) { > >> - unsigned ReadConsts[3] = {0, 0, 0}; > >> - R600Operands::Ops OpTable[3][2] = { > >> - {R600Operands::SRC0, R600Operands::SRC0_SEL}, > >> - {R600Operands::SRC1, R600Operands::SRC1_SEL}, > >> - {R600Operands::SRC2, R600Operands::SRC2_SEL}, > >> - }; > >> - > >> - if (!TII->isALUInstr(MI.getOpcode())) > >> - return ConstPairs(ReadConsts); > >> - > >> - for (unsigned i = 0; i < 3; i++) { > >> - int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); > >> - if (SrcIdx < 0) > >> - break; > >> - if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) > >> - ReadConsts[i] =MI.getOperand( > >> - TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); > >> - } > >> - return ConstPairs(ReadConsts); > >> -} > >> - > >> -bool > >> -R600SchedStrategy::isBundleable(const MachineInstr& MI) { > >> - const ConstPairs &MIPair = getPairs(TII, MI); > >> - for (unsigned i = 0; i < 4; i++) { > >> - if (!InstructionsGroupCandidate[i]) > >> - continue; > >> - const ConstPairs &IGPair = getPairs(TII, > >> - *InstructionsGroupCandidate[i]->getInstr()); > >> - if (!IGPair.isCompatibleWith(MIPair)) > >> - return false; > >> - } > >> - return true; > >> -} > >> - > >> SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) { > >> if (Q.empty()) > >> return NULL; > >> for (std::vector<SUnit *>::iterator It = Q.begin(), E = Q.end(); > >> It != E; ++It) { > >> SUnit *SU = *It; > >> - if (isBundleable(*SU->getInstr())) { > >> + InstructionsGroupCandidate.push_back(SU->getInstr()); > >> + if (TII->canBundle(InstructionsGroupCandidate)) { > >> + InstructionsGroupCandidate.pop_back(); > >> Q.erase(It); > >> return SU; > >> + } else { > >> + InstructionsGroupCandidate.pop_back(); > >> } > >> } > >> return NULL; > >> @@ -596,7 +535,7 @@ void R600SchedStrategy::PrepareNextSlot() { > >> DEBUG(dbgs() << "New Slot\n"); > >> assert (OccupedSlotsMask && "Slot wasn't filled"); > >> OccupedSlotsMask = 0; > >> - memset(InstructionsGroupCandidate, 0, > > sizeof(InstructionsGroupCandidate)); > >> + InstructionsGroupCandidate.clear(); > >> LoadAlu(); > >> } > >> > >> @@ -666,7 +605,7 @@ SUnit* R600SchedStrategy::pickAlu() { > >> SUnit *SU = AttemptFillSlot(Chan); > >> if (SU) { > >> OccupedSlotsMask |= (1 << Chan); > >> - InstructionsGroupCandidate[Chan] = SU; > >> + InstructionsGroupCandidate.push_back(SU->getInstr()); > >> return SU; > >> } > >> } > >> diff --git a/lib/Target/R600/R600MachineScheduler.h > > b/lib/Target/R600/R600MachineScheduler.h > >> index 4b1f6c3..47b1d56 100644 > >> --- a/lib/Target/R600/R600MachineScheduler.h > >> +++ b/lib/Target/R600/R600MachineScheduler.h > >> @@ -100,7 +100,7 @@ public: > >> virtual void releaseBottomNode(SUnit *SU); > >> > >> private: > >> - SUnit *InstructionsGroupCandidate[4]; > >> + std::vector<MachineInstr *> InstructionsGroupCandidate; > >> > >> int getInstKind(SUnit *SU); > >> bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) > > const; > >> @@ -114,7 +114,6 @@ private: > >> void AssignSlot(MachineInstr *MI, unsigned Slot); > >> SUnit* pickAlu(); > >> SUnit* pickOther(int QID); > >> - bool isBundleable(const MachineInstr& MI); > >> void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); > >> }; > >> > >> diff --git a/test/CodeGen/R600/kcache-fold-2.ll > > b/test/CodeGen/R600/kcache-fold-2.ll > >> new file mode 100644 > >> index 0000000..77d0052 > >> --- /dev/null > >> +++ b/test/CodeGen/R600/kcache-fold-2.ll > > > > For performance reasons (I think), it's prefered to stick similar tests > > into the same file, so this test should be moved to kcache-fold.ll. > > You will also need to add a CHECK: line for each function name in the > > new file. Take a look at some of the other tests to see how this is done. > > > > -Tom > > > >> @@ -0,0 +1,52 @@ > >> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s > >> + > >> +; CHECK-NOT: MOV > >> + > >> +define void @main() { > >> +main_body: > >> + %0 = load <4 x float> addrspace(8)* null > >> + %1 = extractelement <4 x float> %0, i32 0 > >> + %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x > > float>] addrspace(8)* null, i64 0, i32 1) > >> + %3 = extractelement <4 x float> %2, i32 0 > >> + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x > > float>] addrspace(8)* null, i64 0, i32 1) > >> + %5 = extractelement <4 x float> %4, i32 1 > >> + %6 = fcmp ult float %1, 0.000000e+00 > >> + %7 = select i1 %6, float %3, float %5 > >> + %8 = load <4 x float> addrspace(8)* null > >> + %9 = extractelement <4 x float> %8, i32 1 > >> + %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > > x float>] addrspace(8)* null, i64 0, i32 2) > >> + %11 = extractelement <4 x float> %10, i32 0 > >> + %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > > x float>] addrspace(8)* null, i64 0, i32 2) > >> + %13 = extractelement <4 x float> %12, i32 1 > >> + %14 = fcmp ult float %9, 0.000000e+00 > >> + %15 = select i1 %14, float %11, float %13 > >> + %16 = load <4 x float> addrspace(8)* null > >> + %17 = extractelement <4 x float> %16, i32 2 > >> + %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > > x float>] addrspace(8)* null, i64 0, i32 1) > >> + %19 = extractelement <4 x float> %18, i32 3 > >> + %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > > x float>] addrspace(8)* null, i64 0, i32 1) > >> + %21 = extractelement <4 x float> %20, i32 2 > >> + %22 = fcmp ult float %17, 0.000000e+00 > >> + %23 = select i1 %22, float %19, float %21 > >> + %24 = load <4 x float> addrspace(8)* null > >> + %25 = extractelement <4 x float> %24, i32 3 > >> + %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > > x float>] addrspace(8)* null, i64 0, i32 2) > >> + %27 = extractelement <4 x float> %26, i32 3 > >> + %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 > > x float>] addrspace(8)* null, i64 0, i32 2) > >> + %29 = extractelement <4 x float> %28, i32 2 > >> + %30 = fcmp ult float %25, 0.000000e+00 > >> + %31 = select i1 %30, float %27, float %29 > >> + %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, > >> float > > 1.000000e+00) > >> + %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, > >> float > > 1.000000e+00) > >> + %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, > >> float > > 1.000000e+00) > >> + %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, > >> float > > 1.000000e+00) > >> + %36 = insertelement <4 x float> undef, float %32, i32 0 > >> + %37 = insertelement <4 x float> %36, float %33, i32 1 > >> + %38 = insertelement <4 x float> %37, float %34, i32 2 > >> + %39 = insertelement <4 x float> %38, float %35, i32 3 > >> + call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0) > >> + ret void > >> +} > >> + > >> +declare float @llvm.AMDIL.clamp.(float, float, float) readnone > >> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) > >> -- > >> 1.8.1.4 > >> > >> _______________________________________________ > >> llvm-commits mailing list > >> [email protected] > >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
