================
@@ -890,6 +890,81 @@ bool MFMASmallGemmOpt::applyIGLPStrategy(
return true;
}
+/// Whether \p MI matches \c SchedGroupMask::VALU classification (e.g. barrier
+/// mask \c 0x2)
+static bool matchesSchedGroupValu(const MachineInstr &MI,
+ const SIInstrInfo *TII) {
+ if (MI.isMetaInstruction())
+ return false;
+ // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
+ // For our purposes, these shall not be classified as VALU as this results
+ // in unexpected behavior.
+ return TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI) &&
+ !MI.mayLoadOrStore();
+}
+
+/// Interleave MFMA/WMMA with VALU slots: each repeating stage is one MFMA (or
+/// WMMA), then up to N VALU ops per gap where N = floor(#VALU / #MFMA) in this
+/// schedule region (same predicate as \c matchesSchedGroupValu), at least 1.
+/// Template length uses MFMACount * 3 for slack, like MFMASmallGemmOpt.
+/// \p IsBottomUp is false so SchedGroup pipeline order matches forward program
+/// order (MFMA before its VALU gap).
+class MFMAValuSpacingOpt final : public IGLPStrategy {
+public:
+ bool applyIGLPStrategy(
+ DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ AMDGPU::SchedulingPhase Phase) override;
+
+ bool shouldApplyStrategy(ScheduleDAGInstrs *DAG,
+ AMDGPU::SchedulingPhase Phase) override {
+ for (const MachineInstr &I : *DAG)
+ if (TII->isMFMAorWMMA(I))
+ return true;
+ return false;
+ }
+
+ MFMAValuSpacingOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
+ : IGLPStrategy(DAG, TII) {
+ IsBottomUp = false;
+ }
+};
+
+bool MFMAValuSpacingOpt::applyIGLPStrategy(
+ DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ AMDGPU::SchedulingPhase Phase) {
+ unsigned MFMACount = 0;
+ unsigned ValuCount = 0;
+ for (const MachineInstr &I : *DAG) {
+ if (TII->isMFMAorWMMA(I))
+ ++MFMACount;
+ else if (matchesSchedGroupValu(I, TII))
+ ++ValuCount;
+ }
+
+ unsigned ValuGap = 1;
+ if (MFMACount > 0) {
+ ValuGap = ValuCount / MFMACount;
----------------
arsenm wrote:
Should this round up?
https://github.com/llvm/llvm-project/pull/190916
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits