================
@@ -890,6 +890,81 @@ bool MFMASmallGemmOpt::applyIGLPStrategy(
   return true;
 }
 
+/// Whether \p MI matches \c SchedGroupMask::VALU classification (e.g. barrier
+/// mask \c 0x2)
+static bool matchesSchedGroupValu(const MachineInstr &MI,
+                                  const SIInstrInfo *TII) {
+  if (MI.isMetaInstruction())
+    return false;
+  // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
+  // For our purposes, these shall not be classified as VALU as this results
+  // in unexpected behavior.
+  return TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI) &&
+         !MI.mayLoadOrStore();
+}
+
+/// Interleave MFMA/WMMA with VALU slots: each repeating stage is one MFMA (or
+/// WMMA), then up to N VALU ops per gap where N = floor(#VALU / #MFMA) in this
+/// schedule region (same predicate as \c matchesSchedGroupValu), at least 1.
+/// Template length uses MFMACount * 3 for slack, like MFMASmallGemmOpt.
+/// \p IsBottomUp is false so SchedGroup pipeline order matches forward program
+/// order (MFMA before its VALU gap).
+class MFMAValuSpacingOpt final : public IGLPStrategy {
+public:
+  bool applyIGLPStrategy(
+      DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
+      DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+      AMDGPU::SchedulingPhase Phase) override;
+
+  bool shouldApplyStrategy(ScheduleDAGInstrs *DAG,
+                           AMDGPU::SchedulingPhase Phase) override {
+    for (const MachineInstr &I : *DAG)
+      if (TII->isMFMAorWMMA(I))
+        return true;
+    return false;
+  }
+
+  MFMAValuSpacingOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
+      : IGLPStrategy(DAG, TII) {
+    IsBottomUp = false;
+  }
+};
+
+bool MFMAValuSpacingOpt::applyIGLPStrategy(
+    DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
+    DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+    AMDGPU::SchedulingPhase Phase) {
+  unsigned MFMACount = 0;
+  unsigned ValuCount = 0;
+  for (const MachineInstr &I : *DAG) {
+    if (TII->isMFMAorWMMA(I))
+      ++MFMACount;
+    else if (matchesSchedGroupValu(I, TII))
+      ++ValuCount;
+  }
+
+  unsigned ValuGap = 1;
+  if (MFMACount > 0) {
+    ValuGap = ValuCount / MFMACount;
----------------
arsenm wrote:

Should this round up?

https://github.com/llvm/llvm-project/pull/190916
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to