From: Vadim Girlin
This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently
it only tries to expose more parallelism for ALU instructions (this also
makes the distribution of GPR channels more uniform and increases the
chances of ALU instructions to be packed together in a single VLIW group).
Also it tries to reduce clause switching by grouping instruction of the
same kind (ALU/FETCH/CF) together.
Vincent Lejeune:
- Support for VLIW4 Slot assignement
- Recomputation of ScheduleDAG to get more parallelism opportunities
---
lib/Target/R600/AMDGPUTargetMachine.cpp | 17 +-
lib/Target/R600/R600MachineScheduler.cpp | 452 +++
lib/Target/R600/R600MachineScheduler.h | 119
3 files changed, 587 insertions(+), 1 deletion(-)
create mode 100644 lib/Target/R600/R600MachineScheduler.cpp
create mode 100644 lib/Target/R600/R600MachineScheduler.h
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 821e864..e6070cd 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "AMDGPU.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
@@ -39,6 +40,14 @@ extern "C" void LLVMInitializeR600Target() {
RegisterTargetMachine X(TheAMDGPUTarget);
}
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+createR600MachineScheduler);
+
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
@@ -70,7 +79,13 @@ namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
-: TargetPassConfig(TM, PM) {}
+: TargetPassConfig(TM, PM) {
+const AMDGPUSubtarget &ST = TM->getSubtarget();
+if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ enablePass(&MachineSchedulerID);
+ MachineSchedRegistry::setDefault(createR600MachineScheduler);
+}
+ }
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM();
diff --git a/lib/Target/R600/R600MachineScheduler.cpp
b/lib/Target/R600/R600MachineScheduler.cpp
new file mode 100644
index 000..229374c
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -0,0 +1,452 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++
-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS
slot
+//
+//===--===//
+
+#define DEBUG_TYPE "misched"
+
+#include "R600MachineScheduler.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include
+#include
+using namespace llvm;
+
+/// \brief Recompute Output and Anti dependencies of incoming dag
+/// ScheduleDAGInstrs has a conservative policy about subregisters
dependencies.
+/// All subreg write of a same superreg will be chained by Output/Anti deps.
+/// These artificial deps delay releases of MI and thus reduce parallelism
+/// oportunities. This function recompute the ScheduleDag to produce proper
+/// subreg aware dependencies.
+// Todo : It should also recompute Data dependencies
+static
+void RecomputeScheduleDAGMI(ScheduleDAGMI *dag) {
+
+ // Remove all Output/Anti deps
+ for (unsigned i = 0; i < dag->SUnits.size(); ++i) {
+SUnit &SU = dag->SUnits[i];
+for (SUnit::pred_iterator SUIt = SU.Preds.begin(), SUE = SU.Preds.end();
+SUIt != SUE; ++SUIt) {
+ SDep &SD = *SUIt;
+ SUnit *SUPred = SD.getSUnit();
+ if (SD.getKind() == SDep::Output) {
+SUPred->removePred(SD);
+ }
+}
+ }
+
+// Now recompute output/anti dependencies
+ for (unsigned i = 0; i < dag->SUnits.size(); ++i) {
+SUnit &SU = dag->SUnits[i];
+MachineOperand &DestMO = SU.getInstr()->getOperand(0);
+unsigned DestReg = SU.getInstr()->getOperand(0).getReg();
+DEBUG(dbgs() << "Recomputing deps for "; SU.dump(dag); dbgs() << "\n";);
+// Using LiveInterval should make things a lot more efficient, but we
+// can't access them inside a MachineSchedStrategy.
+// Scheduling occurs on a per MBB basis, so it is sufficient to get deps
+// inside a MBB.
+Machi