https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/172962
>From 0c2f9b463e2013710ba90e9b8a19f8e9b9fd693f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne <[email protected]> Date: Thu, 18 Dec 2025 23:37:39 -0800 Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 --- .../Target/AArch64/AArch64ISelLowering.cpp | 66 +++++++++++-------- .../AArch64/switch-cases-to-branch-and.ll | 40 +++++------ 2 files changed, 56 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 837393b0cbdcd..476a455f2e506 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11177,6 +11177,40 @@ std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) { return {Val, Val.getValueSizeInBits() - 1}; } +// Op is an SDValue that is being compared to 0. If the comparison is a bit +// test, optimize it to a TBZ or TBNZ. +static SDValue optimizeBitTest(SDValue Op, SDValue Chain, SDValue Dest, + unsigned Opcode, SelectionDAG &DAG) { + SDLoc DL(Op); + + if (Op.getOpcode() != ISD::AND) + return SDValue(); + + // See if we can use a TBZ to fold in an AND as well. + // TBZ has a smaller branch displacement than CBZ. If the offset is + // out of bounds, a late MI-layer pass rewrites branches. + // 403.gcc is an example that hits this case. + if (isa<ConstantSDNode>(Op.getOperand(1)) && + isPowerOf2_64(Op.getConstantOperandVal(1))) { + SDValue Test = Op.getOperand(0); + uint64_t Mask = Op.getConstantOperandVal(1); + return DAG.getNode(Opcode, DL, MVT::Other, Chain, Test, + DAG.getConstant(Log2_64(Mask), DL, MVT::i64), Dest); + } + + if (Op.getOperand(0).getOpcode() == ISD::SHL) { + auto Op00 = Op.getOperand(0).getOperand(0); + if (isa<ConstantSDNode>(Op00) && Op00->getAsZExtVal() == 1) { + auto Shr = DAG.getNode(ISD::SRL, DL, Op00.getValueType(), + Op.getOperand(1), Op.getOperand(0).getOperand(1)); + return DAG.getNode(Opcode, DL, MVT::Other, Chain, Shr, + DAG.getConstant(0, DL, MVT::i64), Dest); + } + } + + return SDValue(); +} + SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); @@ -11236,35 +11270,15 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS); if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) { if (CC == ISD::SETEQ) { - // See if we can use a TBZ to fold in an AND as well. - // TBZ has a smaller branch displacement than CBZ. If the offset is - // out of bounds, a late MI-layer pass rewrites branches. - // 403.gcc is an example that hits this case. - if (LHS.getOpcode() == ISD::AND && - isa<ConstantSDNode>(LHS.getOperand(1)) && - isPowerOf2_64(LHS.getConstantOperandVal(1))) { - SDValue Test = LHS.getOperand(0); - uint64_t Mask = LHS.getConstantOperandVal(1); - return DAG.getNode(AArch64ISD::TBZ, DL, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), DL, MVT::i64), - Dest); - } + if (SDValue Result = + optimizeBitTest(LHS, Chain, Dest, AArch64ISD::TBZ, DAG)) + return Result; return DAG.getNode(AArch64ISD::CBZ, DL, MVT::Other, Chain, LHS, Dest); } else if (CC == ISD::SETNE) { - // See if we can use a TBZ to fold in an AND as well. - // TBZ has a smaller branch displacement than CBZ. If the offset is - // out of bounds, a late MI-layer pass rewrites branches. - // 403.gcc is an example that hits this case. - if (LHS.getOpcode() == ISD::AND && - isa<ConstantSDNode>(LHS.getOperand(1)) && - isPowerOf2_64(LHS.getConstantOperandVal(1))) { - SDValue Test = LHS.getOperand(0); - uint64_t Mask = LHS.getConstantOperandVal(1); - return DAG.getNode(AArch64ISD::TBNZ, DL, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), DL, MVT::i64), - Dest); - } + if (SDValue Result = + optimizeBitTest(LHS, Chain, Dest, AArch64ISD::TBNZ, DAG)) + return Result; return DAG.getNode(AArch64ISD::CBNZ, DL, MVT::Other, Chain, LHS, Dest); } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) { diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll index 775ab3fe110e0..cb59a8d976eda 100644 --- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll +++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll @@ -422,25 +422,23 @@ e2: define i32 @switch_in_loop_with_matching_dests_0_and_pow2_4_cases(ptr %start) { ; CHECK-LABEL: switch_in_loop_with_matching_dests_0_and_pow2_4_cases: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x10, #32769 ; =0x8001 -; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: mov x8, #32769 ; =0x8001 ; CHECK-NEXT: add x9, x0, #1 -; CHECK-NEXT: movk x10, #1, lsl #32 +; CHECK-NEXT: movk x8, #1, lsl #32 ; CHECK-NEXT: b LBB5_2 ; CHECK-NEXT: LBB5_1: ; %loop ; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1 -; CHECK-NEXT: cmp w11, #124 +; CHECK-NEXT: cmp w10, #124 ; CHECK-NEXT: b.eq LBB5_5 ; CHECK-NEXT: LBB5_2: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb w11, [x9], #1 -; CHECK-NEXT: cmp w11, #32 +; CHECK-NEXT: ldrb w10, [x9], #1 +; CHECK-NEXT: cmp w10, #32 ; CHECK-NEXT: b.hi LBB5_1 ; CHECK-NEXT: ; %bb.3: ; %loop ; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1 -; CHECK-NEXT: lsl x12, x8, x11 -; CHECK-NEXT: tst x12, x10 -; CHECK-NEXT: b.eq LBB5_1 +; CHECK-NEXT: lsr x11, x8, x10 +; CHECK-NEXT: tbz w11, #0, LBB5_1 ; CHECK-NEXT: ; %bb.4: ; %e1 ; CHECK-NEXT: mov w0, #-1 ; =0xffffffff ; CHECK-NEXT: ret @@ -608,10 +606,9 @@ exit: define i64 @consecutive_match_both(ptr %p, i32 %param) { ; CHECK-LABEL: consecutive_match_both: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: mov w8, #249 ; =0xf9 ; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w10, #249 ; =0xf9 -; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: lsr w8, w8, w1 ; CHECK-NEXT: b LBB8_2 ; CHECK-NEXT: LBB8_1: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB8_2 Depth=1 @@ -623,8 +620,7 @@ define i64 @consecutive_match_both(ptr %p, i32 %param) { ; CHECK-NEXT: b.hi LBB8_1 ; CHECK-NEXT: ; %bb.3: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB8_2 Depth=1 -; CHECK-NEXT: tst w8, w10 -; CHECK-NEXT: b.eq LBB8_1 +; CHECK-NEXT: tbz w8, #0, LBB8_1 ; CHECK-NEXT: ; %bb.4: ; %e0 ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret @@ -688,10 +684,9 @@ e1: define i64 @consecutive_match_before(ptr %p, i32 %param) { ; CHECK-LABEL: consecutive_match_before: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: mov w8, #25 ; =0x19 ; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w10, #25 ; =0x19 -; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: lsr w8, w8, w1 ; CHECK-NEXT: b LBB9_2 ; CHECK-NEXT: LBB9_1: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB9_2 Depth=1 @@ -703,8 +698,7 @@ define i64 @consecutive_match_before(ptr %p, i32 %param) { ; CHECK-NEXT: b.hi LBB9_1 ; CHECK-NEXT: ; %bb.3: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB9_2 Depth=1 -; CHECK-NEXT: tst w8, w10 -; CHECK-NEXT: b.eq LBB9_1 +; CHECK-NEXT: tbz w8, #0, LBB9_1 ; CHECK-NEXT: ; %bb.4: ; %e0 ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret @@ -765,10 +759,9 @@ e1: define i64 @consecutive_match_after(ptr %p, i32 %param) { ; CHECK-LABEL: consecutive_match_after: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: mov w8, #49 ; =0x31 ; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w10, #49 ; =0x31 -; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: lsr w8, w8, w1 ; CHECK-NEXT: b LBB10_2 ; CHECK-NEXT: LBB10_1: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB10_2 Depth=1 @@ -780,8 +773,7 @@ define i64 @consecutive_match_after(ptr %p, i32 %param) { ; CHECK-NEXT: b.hi LBB10_1 ; CHECK-NEXT: ; %bb.3: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB10_2 Depth=1 -; CHECK-NEXT: tst w8, w10 -; CHECK-NEXT: b.eq LBB10_1 +; CHECK-NEXT: tbz w8, #0, LBB10_1 ; CHECK-NEXT: ; %bb.4: ; %e0 ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret >From ec13b590217090f56d3f05c59d0408f635faf603 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne <[email protected]> Date: Mon, 12 Jan 2026 16:18:38 -0800 Subject: [PATCH 2/2] Address review comments Created using spr 1.3.6-beta.1 --- .../Target/AArch64/AArch64ISelLowering.cpp | 11 +++---- llvm/test/CodeGen/AArch64/bit-test.ll | 33 ++++++++++--------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 476a455f2e506..8919b9614e2e9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11179,10 +11179,9 @@ std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) { // Op is an SDValue that is being compared to 0. If the comparison is a bit // test, optimize it to a TBZ or TBNZ. -static SDValue optimizeBitTest(SDValue Op, SDValue Chain, SDValue Dest, - unsigned Opcode, SelectionDAG &DAG) { - SDLoc DL(Op); - +static SDValue optimizeBitTest(SDLoc DL, SDValue Op, SDValue Chain, + SDValue Dest, unsigned Opcode, + SelectionDAG &DAG) { if (Op.getOpcode() != ISD::AND) return SDValue(); @@ -11271,13 +11270,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) { if (CC == ISD::SETEQ) { if (SDValue Result = - optimizeBitTest(LHS, Chain, Dest, AArch64ISD::TBZ, DAG)) + optimizeBitTest(DL, LHS, Chain, Dest, AArch64ISD::TBZ, DAG)) return Result; return DAG.getNode(AArch64ISD::CBZ, DL, MVT::Other, Chain, LHS, Dest); } else if (CC == ISD::SETNE) { if (SDValue Result = - optimizeBitTest(LHS, Chain, Dest, AArch64ISD::TBNZ, DAG)) + optimizeBitTest(DL, LHS, Chain, Dest, AArch64ISD::TBNZ, DAG)) return Result; return DAG.getNode(AArch64ISD::CBNZ, DL, MVT::Other, Chain, LHS, Dest); diff --git a/llvm/test/CodeGen/AArch64/bit-test.ll b/llvm/test/CodeGen/AArch64/bit-test.ll index aed7d6c5510d4..28e47c5f2946e 100644 --- a/llvm/test/CodeGen/AArch64/bit-test.ll +++ b/llvm/test/CodeGen/AArch64/bit-test.ll @@ -9,18 +9,19 @@ define void @bt(i64 %val) { ; CHECK-NEXT: mov w8, #123 // =0x7b ; CHECK-NEXT: lsr x8, x8, x0 ; CHECK-NEXT: tbz w8, #0, .LBB0_2 -; CHECK-NEXT: // %bb.1: // %t +; CHECK-NEXT: // %bb.1: // %common.ret +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %t ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: bl f ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .LBB0_2: // %common.ret ; CHECK-NEXT: ret - %shl = shl i64 1, %val - %and = and i64 123, %shl - %cmp = icmp ne i64 %and, 0 + %shl = shl nuw i64 1, %val + %and = and i64 %shl, 123 + %cmp = icmp eq i64 %and, 0 br i1 %cmp, label %t, label %f t: @@ -37,7 +38,9 @@ define void @bt_shl_use(i64 %val) { ; CHECK-NEXT: mov w8, #123 // =0x7b ; CHECK-NEXT: lsr x8, x8, x0 ; CHECK-NEXT: tbz w8, #0, .LBB1_2 -; CHECK-NEXT: // %bb.1: // %t +; CHECK-NEXT: // %bb.1: // %common.ret +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %t ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 @@ -45,11 +48,10 @@ define void @bt_shl_use(i64 %val) { ; CHECK-NEXT: lsl x0, x8, x0 ; CHECK-NEXT: bl f ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .LBB1_2: // %common.ret ; CHECK-NEXT: ret - %shl = shl i64 1, %val - %and = and i64 123, %shl - %cmp = icmp ne i64 %and, 0 + %shl = shl nuw i64 1, %val + %and = and i64 %shl, 123 + %cmp = icmp eq i64 %and, 0 br i1 %cmp, label %t, label %f t: @@ -66,7 +68,9 @@ define void @bt_and_use(i64 %val) { ; CHECK-NEXT: mov w8, #123 // =0x7b ; CHECK-NEXT: lsr x9, x8, x0 ; CHECK-NEXT: tbz w9, #0, .LBB2_2 -; CHECK-NEXT: // %bb.1: // %t +; CHECK-NEXT: // %bb.1: // %common.ret +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %t ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 @@ -75,11 +79,10 @@ define void @bt_and_use(i64 %val) { ; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: bl f ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .LBB2_2: // %common.ret ; CHECK-NEXT: ret - %shl = shl i64 1, %val - %and = and i64 123, %shl - %cmp = icmp ne i64 %and, 0 + %shl = shl nuw i64 1, %val + %and = and i64 %shl, 123 + %cmp = icmp eq i64 %and, 0 br i1 %cmp, label %t, label %f t: _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
