Author: aankit-ca Date: 2025-04-25T16:22:32-07:00 New Revision: 89adc2d4f93b0f9dea251286bbb6561c7d7e7b0b
URL: https://github.com/llvm/llvm-project/commit/89adc2d4f93b0f9dea251286bbb6561c7d7e7b0b DIFF: https://github.com/llvm/llvm-project/commit/89adc2d4f93b0f9dea251286bbb6561c7d7e7b0b.diff LOG: [HEXAGON] Fix corner cases for hwloops pass (#135439) Add check to make sure Dist > 0 or Dist < 0 for appropriate cmp cases to hexagon hardware loops pass. The change modifies the HexagonHardwareLoops pass to add runtime checks to make sure that end_value > initial_value for less than comparisons and end_value < initial_value for greater than comparisons. Fix for https://github.com/llvm/llvm-project/issues/133241 @androm3da @iajbar PTAL --------- Co-authored-by: aankit-quic <aan...@quicinc.com> (cherry picked from commit da8ce56c53fe6e34809ba0b310fa90257e230a89) Added: llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir Modified: llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp llvm/test/CodeGen/Hexagon/swp-phi-start.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 9334746349240..dd4b240455126 100644 --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -731,6 +731,11 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, Register IVReg, int64_t IVBump, Comparison::Kind Cmp) const { + LLVM_DEBUG(llvm::dbgs() << "Loop: " << *Loop << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Initial Value: " << *Start << "\n"); + LLVM_DEBUG(llvm::dbgs() << "End Value: " << *End << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Inc/Dec Value: " << IVBump << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Comparison: " << Cmp << "\n"); // Cannot handle comparison EQ, i.e. while (A == B). if (Cmp == Comparison::EQ) return nullptr; @@ -846,6 +851,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, if (IVBump < 0) { std::swap(Start, End); IVBump = -IVBump; + std::swap(CmpLess, CmpGreater); } // Cmp may now have a wrong direction, e.g. LEs may now be GEs. // Signedness, and "including equality" are preserved. @@ -989,7 +995,45 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, CountSR = 0; } - return new CountValue(CountValue::CV_Register, CountR, CountSR); + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + Register MuxR = CountR; + unsigned MuxSR = CountSR; + // For the loop count to be valid unsigned number, CmpLess should imply + // Dist >= 0. Similarly, CmpGreater should imply Dist < 0. We can skip the + // check if the initial distance is zero and the comparison is LTu || LTEu. + if (!(Start->isImm() && StartV == 0 && Comparison::isUnsigned(Cmp) && + CmpLess) && + (CmpLess || CmpGreater)) { + // Generate: + // DistCheck = CMP_GT DistR, 0 --> CmpLess + // DistCheck = CMP_GT DistR, -1 --> CmpGreater + Register DistCheckR = MRI->createVirtualRegister(PredRC); + const MCInstrDesc &DistCheckD = TII->get(Hexagon::C2_cmpgti); + BuildMI(*PH, InsertPos, DL, DistCheckD, DistCheckR) + .addReg(DistR, 0, DistSR) + .addImm((CmpLess) ? 0 : -1); + + // Generate: + // MUXR = MUX DistCheck, CountR, 1 --> CmpLess + // MUXR = MUX DistCheck, 1, CountR --> CmpGreater + MuxR = MRI->createVirtualRegister(IntRC); + if (CmpLess) { + const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxir); + BuildMI(*PH, InsertPos, DL, MuxD, MuxR) + .addReg(DistCheckR) + .addReg(CountR, 0, CountSR) + .addImm(1); + } else { + const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxri); + BuildMI(*PH, InsertPos, DL, MuxD, MuxR) + .addReg(DistCheckR) + .addImm(1) + .addReg(CountR, 0, CountSR); + } + MuxSR = 0; + } + + return new CountValue(CountValue::CV_Register, MuxR, MuxSR); } /// Return true if the operation is invalid within hardware loop. diff --git a/llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir b/llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir new file mode 100644 index 0000000000000..9f8c14a314309 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir @@ -0,0 +1,277 @@ +# RUN: llc --mtriple=hexagon -run-pass=hwloops %s -o - | FileCheck %s + +# CHECK-LABEL: name: f +# CHECK: [[R1:%[0-9]+]]:predregs = C2_cmpgti [[R0:%[0-9]+]], 0 +# CHECK: [[R3:%[0-9]+]]:intregs = C2_muxir [[R1:%[0-9]+]], [[R2:%[0-9]+]], 1 +# CHECK-LABEL: name: g +# CHECK: [[R1:%[0-9]+]]:predregs = C2_cmpgti [[R0:%[0-9]+]], 0 +# CHECK: [[R3:%[0-9]+]]:intregs = C2_muxir [[R1:%[0-9]+]], [[R2:%[0-9]+]], 1 +--- | + @a = dso_local global [255 x ptr] zeroinitializer, align 8 + + ; Function Attrs: minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none) + define dso_local void @f(i32 noundef %m) local_unnamed_addr #0 { + entry: + %cond = tail call i32 @llvm.smax.i32(i32 %m, i32 2) + %0 = add nsw i32 %cond, -4 + %1 = shl i32 %cond, 3 + %cgep = getelementptr i8, ptr @a, i32 %1 + %cgep36 = bitcast ptr @a to ptr + br label %do.body + + do.body: ; preds = %do.body, %entry + %lsr.iv1 = phi ptr [ %cgep4, %do.body ], [ %cgep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %0, %entry ] + %sh.0 = phi i32 [ 256, %entry ], [ %shr, %do.body ] + %shr = lshr i32 %sh.0, 1 + %cgep5 = getelementptr inbounds [255 x ptr], ptr %cgep36, i32 0, i32 %shr + store ptr %lsr.iv1, ptr %cgep5, align 4, !tbaa !5 + %lsr.iv.next = add nsw i32 %lsr.iv, 4 + %cmp1 = icmp samesign ult i32 %lsr.iv.next, 1073741836 + %cgep4 = getelementptr i8, ptr %lsr.iv1, i32 32 + br i1 %cmp1, label %do.body, label %do.end, !llvm.loop !9 + + do.end: ; preds = %do.body + ret void + } + + ; Function Attrs: minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none) + define dso_local void @g(i32 noundef %m) local_unnamed_addr #0 { + entry: + %0 = add i32 %m, -4 + %1 = shl i32 %m, 3 + %cgep = getelementptr i8, ptr @a, i32 %1 + %cgep36 = bitcast ptr @a to ptr + br label %do.body + + do.body: ; preds = %do.body, %entry + %lsr.iv1 = phi ptr [ %cgep4, %do.body ], [ %cgep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %0, %entry ] + %sh.0 = phi i32 [ 256, %entry ], [ %shr, %do.body ] + %shr = lshr i32 %sh.0, 1 + %cgep5 = getelementptr inbounds [255 x ptr], ptr %cgep36, i32 0, i32 %shr + store ptr %lsr.iv1, ptr %cgep5, align 4, !tbaa !5 + %lsr.iv.next = add i32 %lsr.iv, 4 + %cmp = icmp slt i32 %lsr.iv.next, 1073741836 + %cgep4 = getelementptr i8, ptr %lsr.iv1, i32 32 + br i1 %cmp, label %do.body, label %do.end, !llvm.loop !11 + + do.end: ; preds = %do.body + ret void + } + + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) + declare i32 @llvm.smax.i32(i32, i32) #1 + + !llvm.module.flags = !{!0, !1, !2, !3} + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 8, !"PIC Level", i32 2} + !2 = !{i32 7, !"PIE Level", i32 2} + !3 = !{i32 7, !"frame-pointer", i32 2} + !5 = !{!6, !6, i64 0} + !6 = !{!"any pointer", !7, i64 0} + !7 = !{!"omnipotent char", !8, i64 0} + !8 = !{!"Simple C/C++ TBAA"} + !9 = distinct !{!9, !10} + !10 = !{!"llvm.loop.mustprogress"} + !11 = distinct !{!11, !10} + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: intregs, preferred-register: '', flags: [ ] } + - { id: 1, class: intregs, preferred-register: '', flags: [ ] } + - { id: 2, class: intregs, preferred-register: '', flags: [ ] } + - { id: 3, class: intregs, preferred-register: '', flags: [ ] } + - { id: 4, class: intregs, preferred-register: '', flags: [ ] } + - { id: 5, class: intregs, preferred-register: '', flags: [ ] } + - { id: 6, class: intregs, preferred-register: '', flags: [ ] } + - { id: 7, class: intregs, preferred-register: '', flags: [ ] } + - { id: 8, class: intregs, preferred-register: '', flags: [ ] } + - { id: 9, class: intregs, preferred-register: '', flags: [ ] } + - { id: 10, class: intregs, preferred-register: '', flags: [ ] } + - { id: 11, class: intregs, preferred-register: '', flags: [ ] } + - { id: 12, class: intregs, preferred-register: '', flags: [ ] } + - { id: 13, class: predregs, preferred-register: '', flags: [ ] } + - { id: 14, class: predregs, preferred-register: '', flags: [ ] } + - { id: 15, class: intregs, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$r0', virtual-reg: '%9' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0 + + %9:intregs = COPY $r0 + %11:intregs = A2_tfrsi 2 + %12:intregs = A2_max %9, %11 + %0:intregs = nsw A2_addi %12, -4 + %1:intregs = S4_addi_asl_ri @a, %12, 3 + %2:intregs = A2_tfrsi @a + %10:intregs = A2_tfrsi 256 + + bb.1.do.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + %3:intregs = PHI %1, %bb.0, %8, %bb.1 + %4:intregs = PHI %0, %bb.0, %7, %bb.1 + %5:intregs = PHI %10, %bb.0, %15, %bb.1 + %15:intregs = S2_extractu %5, 8, 1 + S4_storeri_rr %2, %15, 2, %3 :: (store (s32) into %ir.cgep5, !tbaa !5) + %7:intregs = nsw A2_addi %4, 4 + %13:predregs = C2_cmpgtui %7, 1073741835 + %8:intregs = A2_addi %3, 32 + J2_jumpf %13, %bb.1, implicit-def dead $pc + J2_jump %bb.2, implicit-def dead $pc + + bb.2.do.end: + PS_jmpret $r31, implicit-def dead $pc + +... +--- +name: g +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: intregs, preferred-register: '', flags: [ ] } + - { id: 1, class: intregs, preferred-register: '', flags: [ ] } + - { id: 2, class: intregs, preferred-register: '', flags: [ ] } + - { id: 3, class: intregs, preferred-register: '', flags: [ ] } + - { id: 4, class: intregs, preferred-register: '', flags: [ ] } + - { id: 5, class: intregs, preferred-register: '', flags: [ ] } + - { id: 6, class: intregs, preferred-register: '', flags: [ ] } + - { id: 7, class: intregs, preferred-register: '', flags: [ ] } + - { id: 8, class: intregs, preferred-register: '', flags: [ ] } + - { id: 9, class: intregs, preferred-register: '', flags: [ ] } + - { id: 10, class: intregs, preferred-register: '', flags: [ ] } + - { id: 11, class: predregs, preferred-register: '', flags: [ ] } + - { id: 12, class: predregs, preferred-register: '', flags: [ ] } + - { id: 13, class: intregs, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$r0', virtual-reg: '%9' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0 + + %9:intregs = COPY $r0 + %0:intregs = A2_addi %9, -4 + %1:intregs = S4_addi_asl_ri @a, %9, 3 + %2:intregs = A2_tfrsi @a + %10:intregs = A2_tfrsi 256 + + bb.1.do.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + %3:intregs = PHI %1, %bb.0, %8, %bb.1 + %4:intregs = PHI %0, %bb.0, %7, %bb.1 + %5:intregs = PHI %10, %bb.0, %13, %bb.1 + %13:intregs = S2_extractu %5, 8, 1 + S4_storeri_rr %2, %13, 2, %3 :: (store (s32) into %ir.cgep5, !tbaa !5) + %7:intregs = A2_addi %4, 4 + %11:predregs = C2_cmpgti %7, 1073741835 + %8:intregs = A2_addi %3, 32 + J2_jumpf %11, %bb.1, implicit-def dead $pc + J2_jump %bb.2, implicit-def dead $pc + + bb.2.do.end: + PS_jmpret $r31, implicit-def dead $pc + +... diff --git a/llvm/test/CodeGen/Hexagon/swp-phi-start.ll b/llvm/test/CodeGen/Hexagon/swp-phi-start.ll index 52c258656ec22..6c2b08d83b1c7 100644 --- a/llvm/test/CodeGen/Hexagon/swp-phi-start.ll +++ b/llvm/test/CodeGen/Hexagon/swp-phi-start.ll @@ -5,8 +5,9 @@ ; the same stage. ; CHECK-DAG: [[REG3:(r[0-9]+)]] = add([[REG1:(r[0-9]+)]],#-1) -; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG1]],#-1) -; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG3]]) +; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG4:(r[0-9]+)]],#-1) +; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG2]]) +; CHECK-NOT: = [[REG3]] ; CHECK-NOT: = [[REG2]] ; CHECK: .LBB0_[[LOOP]]: ; CHECK: }{{[ \t]*}}:endloop _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits