llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-hexagon Author: Kai Yan (kaiyan96) <details> <summary>Changes</summary> We have following bugfixes for window scheduler, do we need submit them by ourselves? * [Added a new restriction for II by pragma in window scheduler](https://github.com/llvm/llvm-project/pull/99448) * [Fixed a bug in stall cycle calculation for window scheduler](https://github.com/llvm/llvm-project/pull/99451) * [Added missing initialization failure information for window scheduler](https://github.com/llvm/llvm-project/pull/99449) * [Fixed max cycle calculation with zero-cost instructions for window scheduler ](https://github.com/llvm/llvm-project/pull/99454) * [Address the issue of multiple resource reservations In window scheduling](https://github.com/llvm/llvm-project/pull/101665) --- Full diff: https://github.com/llvm/llvm-project/pull/102881.diff 7 Files Affected: - (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+10-2) - (modified) llvm/lib/CodeGen/WindowScheduler.cpp (+18-11) - (modified) llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir (+1) - (added) llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir (+83) - (added) llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir (+100) - (added) llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir (+59) - (added) llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir (+45) ``````````diff diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 497e282bb97682..5c68711ff61938 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -528,8 +528,16 @@ bool MachinePipeliner::useSwingModuloScheduler() { } bool MachinePipeliner::useWindowScheduler(bool Changed) { - // WindowScheduler does not work when it is off or when SwingModuloScheduler - // is successfully scheduled. + // WindowScheduler does not work for following cases: + // 1. when it is off. + // 2. when SwingModuloScheduler is successfully scheduled. + // 3. when pragma II is enabled. + if (II_setByPragma) { + LLVM_DEBUG(dbgs() << "Window scheduling is disabled when " + "llvm.loop.pipeline.initiationinterval is set.\n"); + return false; + } + return WindowSchedulingOption == WindowSchedulingFlag::WS_Force || (WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed); } diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp index 0777480499e55b..f1658e36ae1e92 100644 --- a/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/llvm/lib/CodeGen/WindowScheduler.cpp @@ -232,8 +232,11 @@ bool WindowScheduler::initialize() { return false; } for (auto &Def : MI.all_defs()) - if (Def.isReg() && Def.getReg().isPhysical()) + if (Def.isReg() && Def.getReg().isPhysical()) { + LLVM_DEBUG(dbgs() << "Physical registers are not supported in " + "window scheduling!\n"); return false; + } } if (SchedInstrNum <= WindowRegionLimit) { LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n"); @@ -437,14 +440,17 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, int PredCycle = getOriCycle(PredMI); ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency()); } - // ResourceManager can be used to detect resource conflicts between the - // current MI and the previously inserted MIs. - while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { - ++CurCycle; - if (CurCycle == (int)WindowIILimit) - return CurCycle; + // Zero cost instructions do not need to check resource. + if (!TII->isZeroCost(MI.getOpcode())) { + // ResourceManager can be used to detect resource conflicts between the + // current MI and the previously inserted MIs. + while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { + ++CurCycle; + if (CurCycle == (int)WindowIILimit) + return CurCycle; + } + RM.reserveResources(*SU, CurCycle); } - RM.reserveResources(*SU, CurCycle); OriToCycle[getOriMI(&MI)] = CurCycle; LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S." << getOriStage(getOriMI(&MI), Offset) << "]: " << MI); @@ -485,6 +491,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, // ======================================== int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { int MaxStallCycle = 0; + int CurrentII = MaxCycle + 1; auto Range = getScheduleRange(Offset, SchedInstrNum); for (auto &MI : Range) { auto *SU = TripleDAG->getSUnit(&MI); @@ -492,8 +499,8 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { for (auto &Succ : SU->Succs) { if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) continue; - // If the expected cycle does not exceed MaxCycle, no check is needed. - if (DefCycle + (int)Succ.getLatency() <= MaxCycle) + // If the expected cycle does not exceed CurrentII, no check is needed. + if (DefCycle + (int)Succ.getLatency() <= CurrentII) continue; // If the cycle of the scheduled MI A is less than that of the scheduled // MI B, the scheduling will fail because the lifetime of the @@ -503,7 +510,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { if (DefCycle < UseCycle) return WindowIILimit; // Get the stall cycle introduced by the register between two trips. - int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle; + int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle; MaxStallCycle = std::max(MaxStallCycle, StallCycle); } } diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir index 601b98dca8e20b..be75301b016ed9 100644 --- a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir +++ b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir @@ -3,6 +3,7 @@ # RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ # RUN: | FileCheck %s +# CHECK: Physical registers are not supported in window scheduling! # CHECK: The WindowScheduler failed to initialize! --- diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir new file mode 100644 index 00000000000000..6e69a76290fb1d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir @@ -0,0 +1,83 @@ +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ +# RUN: | FileCheck %s +# REQUIRES: asserts + +# Test that checks no window scheduler is performed if the II set by pragma was +# enabled + +# CHECK: Window scheduling is disabled when llvm.loop.pipeline.initiationinterval is set. + +--- | + define void @test_pragma_ii_fail(ptr %a0, i32 %a1) { + b0: + %v0 = icmp sgt i32 %a1, 1 + br i1 %v0, label %b1, label %b4 + + b1: ; preds = %b0 + %v1 = load i32, ptr %a0, align 4 + %v2 = add i32 %v1, 10 + %v4 = add i32 %a1, -1 + %cgep = getelementptr i32, ptr %a0, i32 1 + br label %b2 + + b2: ; preds = %b2, %b1 + %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ] + %v6 = phi ptr [ %cgep2, %b2 ], [ %cgep, %b1 ] + %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ] + store i32 %v7, ptr %v6, align 4 + %v8 = add i32 %v7, 10 + %cgep1 = getelementptr i32, ptr %v6, i32 -1 + store i32 %v8, ptr %cgep1, align 4 + %v10 = add i32 %v7, 10 + %v12 = add i32 %v5, -1 + %v13 = icmp eq i32 %v12, 0 + %cgep2 = getelementptr i32, ptr %v6, i32 1 + br i1 %v13, label %b4, label %b2, !llvm.loop !0 + + b4: ; preds = %b2, %b0 + ret void + } + + !0 = distinct !{!0, !1} + !1 = !{!"llvm.loop.pipeline.initiationinterval", i32 2} +... +--- +name: test_pragma_ii_fail +tracksRegLiveness: true +body: | + bb.0.b0: + successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $r0, $r1 + + %0:intregs = COPY $r1 + %1:intregs = COPY $r0 + %2:predregs = C2_cmpgti %0, 1 + J2_jumpf %2, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1.b1: + successors: %bb.2(0x80000000) + + %3:intregs, %4:intregs = L2_loadri_pi %1, 4 + %5:intregs = A2_addi killed %3, 10 + %6:intregs = A2_addi %0, -1 + %7:intregs = COPY %6 + J2_loop0r %bb.2, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2.b2 (machine-block-address-taken): + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %8:intregs = PHI %4, %bb.1, %9, %bb.2 + %10:intregs = PHI %5, %bb.1, %11, %bb.2 + S2_storeri_io %8, 0, %10 + %11:intregs = A2_addi %10, 10 + S2_storeri_io %8, -4, %11 + %9:intregs = A2_addi %8, 4 + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3.b4: + PS_jmpret $r31, implicit-def dead $pc + +... diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir b/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir new file mode 100644 index 00000000000000..4a9a09c4148cb1 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir @@ -0,0 +1,100 @@ +# REQUIRES: asserts +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ +# RUN: -window-search-ratio=100 -window-search-num=100 -window-diff-limit=1 \ +# RUN: | FileCheck %s + +# We want to verify that all three V6_vaddw instructions are emitted in the same cycle. +# CHECK-LABEL: Current window Offset is 2 +# CHECK: Cycle [[CycleNum:[0-9]+]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr +# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr +# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr +# CHECK-LABEL: Current window Offset is 3 + +--- | + define void @add_parallel(i32 %N, ptr noalias %x, ptr noalias %y) { + entry: + %isZeroLength = icmp eq i32 %N, 0 + br i1 %isZeroLength, label %loop.exit, label %loop.preheader + + loop.preheader: ; preds = %entry + %half_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608) + %one_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1065353216) + %two_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1073741824) + br label %loop.body + + loop.exit: ; preds = %loop.body, %entry + ret void + + loop.body: ; preds = %loop.body, %loop.preheader + %lsr.iv1 = phi ptr [ %cgep2, %loop.body ], [ %x, %loop.preheader ] + %lsr.iv = phi ptr [ %cgep1, %loop.body ], [ %y, %loop.preheader ] + %index = phi i32 [ 0, %loop.preheader ], [ %index.next, %loop.body ] + %vec_x1 = load <32 x i32>, ptr %lsr.iv1, align 128 + %vec_add_1 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %one_splat, <32 x i32> %vec_x1) + %vec_add_2 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %half_splat, <32 x i32> %vec_x1) + %vec_add_3 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %two_splat, <32 x i32> %vec_x1) + %vec_add_4 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_2) + %vec_add_5 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_3) + %vec_add_6 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_5, <32 x i32> %vec_add_4) + store <32 x i32> %vec_add_6, ptr %lsr.iv, align 128 + %index.next = add nuw i32 %index, 32 + %continue = icmp ult i32 %index.next, %N + %cgep1 = getelementptr i8, ptr %lsr.iv, i32 128 + %cgep2 = getelementptr i8, ptr %lsr.iv1, i32 128 + br i1 %continue, label %loop.body, label %loop.exit + } + + declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) + declare <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32>, <32 x i32>) +... +--- +name: add_parallel +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r1, $r2 + + %0:intregs = COPY $r2 + %1:intregs = COPY $r1 + %2:intregs = COPY $r0 + %3:predregs = C2_cmpeqi %2, 0 + J2_jumpt killed %3, %bb.2, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1.loop.preheader: + successors: %bb.3(0x80000000) + + %4:intregs = A2_tfrsi 1056964608 + %5:hvxvr = V6_lvsplatw killed %4 + %6:intregs = A2_tfrsi 1065353216 + %7:hvxvr = V6_lvsplatw killed %6 + %8:intregs = A2_tfrsi 1073741824 + %9:hvxvr = V6_lvsplatw killed %8 + %10:intregs = A2_addi %2, 31 + %11:intregs = S2_lsr_i_r %10, 5 + %12:intregs = COPY %11 + J2_loop0r %bb.3, %12, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + J2_jump %bb.3, implicit-def dead $pc + + bb.2.loop.exit: + PS_jmpret $r31, implicit-def dead $pc + + bb.3.loop.body (machine-block-address-taken): + successors: %bb.3(0x7c000000), %bb.2(0x04000000) + + %13:intregs = PHI %1, %bb.1, %14, %bb.3 + %15:intregs = PHI %0, %bb.1, %16, %bb.3 + %17:hvxvr, %14:intregs = V6_vL32b_pi %13, 128 :: (load (s1024) from %ir.lsr.iv1) + %18:hvxvr = V6_vaddw %7, %17 + %19:hvxvr = V6_vaddw %5, %17 + %20:hvxvr = V6_vaddw %9, %17 + %21:hvxvr = V6_vaddw %18, killed %19 + %22:hvxvr = V6_vaddw %18, killed %20 + %23:hvxvr = V6_vaddw killed %22, killed %21 + %16:intregs = V6_vS32b_pi %15, 128, killed %23 :: (store (s1024) into %ir.lsr.iv) + ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.2, implicit-def dead $pc + +... diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir new file mode 100644 index 00000000000000..ddba67d78eb58c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir @@ -0,0 +1,59 @@ +# REQUIRES: asserts +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs \ +# RUN: -window-region-limit=1 -window-search-ratio=100 -window-diff-limit=0 \ +# RUN: 2>&1 | FileCheck %s + +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 + +--- +name: test_window_stall_cycle +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $r0, $r1 + + %0:intregs = COPY $r1 + %1:intregs = COPY $r0 + %2:intregs = nsw A2_add %0, %1 + %3:intregs = S2_lsr_i_r_acc %2, %2, 31 + %4:intregs = S2_asr_i_r killed %3, 1 + %5:predregs = C2_cmpgt %1, %4 + %6:intregs = A2_tfrsi 0 + J2_jumpt killed %5, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1: + successors: %bb.2(0x80000000) + + %7:intregs = A2_addi %4, 2 + %8:intregs = A2_tfrsi 0 + %9:intregs = A2_sub %4, %1 + %10:intregs = A2_addi %9, 1 + %11:intregs = COPY %10 + J2_loop0r %bb.2, %11, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2 (machine-block-address-taken): + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %12:intregs = PHI %7, %bb.1, %13, %bb.2 + %14:intregs = PHI %8, %bb.1, %15, %bb.2 + %16:intregs = PHI %8, %bb.1, %17, %bb.2 + %18:intregs, %13:intregs = L2_loadri_pi %12, -4 + %17:intregs = nsw A2_add killed %18, %16 + %15:intregs = A2_max %17, %14 + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3: + %19:intregs = PHI %6, %bb.0, %15, %bb.2 + $r0 = COPY %19 + PS_jmpret $r31, implicit-def dead $pc, implicit $r0 + +... diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir new file mode 100644 index 00000000000000..ecf49a83c69e15 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir @@ -0,0 +1,45 @@ +# REQUIRES: asserts +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ +# RUN: | FileCheck %s + +# CHECK-NOT: Can't find a valid II. Keep searching... +# CHECK: Start analyzing II +# CHECK: Start scheduling Phis +# CHECK: Current window Offset is {{[0-9]+}} and II is {{[0-9]+}} + +--- +name: relu +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r1, $r2 + %0:intregs = COPY $r2 + %1:intregs = COPY $r1 + %2:intregs = COPY $r0 + %3:predregs = C2_cmpeqi %2, 0 + J2_jumpt killed %3, %bb.2, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + bb.1: + successors: %bb.3(0x80000000) + %4:hvxvr = V6_vd0 + %5:intregs = A2_addi %2, 31 + %6:intregs = S2_lsr_i_r %5, 5 + %7:intregs = COPY %6 + J2_loop0r %bb.3, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + J2_jump %bb.3, implicit-def dead $pc + bb.2: + PS_jmpret $r31, implicit-def dead $pc + bb.3 (machine-block-address-taken): + successors: %bb.3(0x7c000000), %bb.2(0x04000000) + %8:intregs = PHI %1, %bb.1, %9, %bb.3 + %10:intregs = PHI %0, %bb.1, %14, %bb.3 + %11:hvxvr, %9:intregs = V6_vL32b_pi %8, 128 + %12:intregs = COPY %10 + %13:hvxvr = V6_vmaxw killed %11, %4 + %14:intregs = V6_vS32b_pi %12, 128, killed %13 + ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.2, implicit-def dead $pc +... + `````````` </details> https://github.com/llvm/llvm-project/pull/102881 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits