https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/181162
>From 9b8aebe720e10ee2a2575202f2266553fed1c49f Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <[email protected]> Date: Thu, 12 Feb 2026 14:39:05 +0000 Subject: [PATCH 1/2] [AArch64] Fold MIN/MAX(Vec[0], Vec[1]) to VECREDUCE_MIN/MAX(Vec) If we have a lowering for `VECREDUCE_MIN/MAX` this is generally more efficient than the scalar expansion. --- .../Target/AArch64/AArch64ISelLowering.cpp | 58 +++++++++-- llvm/test/CodeGen/AArch64/v2i64-min-max.ll | 99 +++++++++++++++++++ 2 files changed, 147 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/v2i64-min-max.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e55a75127235c..5621d43201bb1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1160,7 +1160,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::SIGN_EXTEND_INREG, ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR}); - setTargetDAGCombine(ISD::SMIN); + setTargetDAGCombine({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}); setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::LOAD); @@ -22639,14 +22639,6 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::SIGN_EXTEND, DL, DestVT, SQDMULH); } -static SDValue performSMINCombine(SDNode *N, SelectionDAG &DAG) { - if (SDValue V = trySQDMULHCombine(N, DAG)) { - return V; - } - - return SDValue(); -} - static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { SDLoc DL(N); @@ -28825,6 +28817,49 @@ static SDValue performCTPOPCombine(SDNode *N, return DAG.getNegative(NegPopCount, DL, VT); } +static unsigned getReductionForOpcode(unsigned Op) { + switch (Op) { + case ISD::SMIN: + return ISD::VECREDUCE_SMIN; + case ISD::SMAX: + return ISD::VECREDUCE_SMAX; + case ISD::UMIN: + return ISD::VECREDUCE_UMIN; + case ISD::UMAX: + return ISD::VECREDUCE_UMAX; + default: + llvm_unreachable("unimplemented mapping"); + } +} + +static SDValue performMINMAXCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget &Subtarget, + const AArch64TargetLowering &TLI) { + using namespace llvm::SDPatternMatch; + if (SDValue V = trySQDMULHCombine(N, DAG)) + return V; + + unsigned ReductionOpcode = getReductionForOpcode(N->getOpcode()); + if (!TLI.isOperationLegalOrCustom(ReductionOpcode, MVT::v2i64)) + return SDValue(); + + // Fold `min/max(vec[0], vec[1])` to `vecreduce_min/max(vec)` for v2i64. + + APInt Idx; + SDValue Vec; + if (!sd_match(N->getOperand(0), + m_OneUse(m_ExtractElt(m_SpecificVT(MVT::v2i64, m_Value(Vec)), + m_ConstInt(Idx))))) + return SDValue(); + + if (!sd_match( + N->getOperand(1), + m_OneUse(m_ExtractElt(m_Specific(Vec), m_SpecificInt(1 - Idx))))) + return SDValue(); + + return DAG.getNode(ReductionOpcode, SDLoc(N), MVT::i64, Vec); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -28843,8 +28878,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performAddSubCombine(N, DCI); case ISD::BUILD_VECTOR: return performBuildVectorCombine(N, DCI, DAG); + case ISD::UMAX: + case ISD::UMIN: + case ISD::SMAX: case ISD::SMIN: - return performSMINCombine(N, DAG); + return performMINMAXCombine(N, DAG, *Subtarget, *this); case ISD::TRUNCATE: return performTruncateCombine(N, DAG, DCI); case AArch64ISD::ANDS: diff --git a/llvm/test/CodeGen/AArch64/v2i64-min-max.ll b/llvm/test/CodeGen/AArch64/v2i64-min-max.ll new file mode 100644 index 0000000000000..43cd59413ba21 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/v2i64-min-max.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s -check-prefix=CHECK-SVE +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s -check-prefix=CHECK-NEON + +define i64 @smax(<2 x i64> %0) { +; CHECK-SVE-LABEL: smax: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: smaxv d0, p0, z0.d +; CHECK-SVE-NEXT: fmov x0, d0 +; CHECK-SVE-NEXT: ret +; +; CHECK-NEON-LABEL: smax: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: mov d1, v0.d[1] +; CHECK-NEON-NEXT: fmov x8, d0 +; CHECK-NEON-NEXT: fmov x9, d1 +; CHECK-NEON-NEXT: cmp x8, x9 +; CHECK-NEON-NEXT: fcsel d0, d0, d1, gt +; CHECK-NEON-NEXT: fmov x0, d0 +; CHECK-NEON-NEXT: ret + %2 = extractelement <2 x i64> %0, i64 0 + %3 = extractelement <2 x i64> %0, i64 1 + %4 = call i64 @llvm.smax.i64(i64 %2, i64 %3) + ret i64 %4 +} + +define i64 @umax(<2 x i64> %0) { +; CHECK-SVE-LABEL: umax: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: umaxv d0, p0, z0.d +; CHECK-SVE-NEXT: fmov x0, d0 +; CHECK-SVE-NEXT: ret +; +; CHECK-NEON-LABEL: umax: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: mov d1, v0.d[1] +; CHECK-NEON-NEXT: fmov x8, d0 +; CHECK-NEON-NEXT: fmov x9, d1 +; CHECK-NEON-NEXT: cmp x8, x9 +; CHECK-NEON-NEXT: fcsel d0, d0, d1, hi +; CHECK-NEON-NEXT: fmov x0, d0 +; CHECK-NEON-NEXT: ret + %2 = extractelement <2 x i64> %0, i64 0 + %3 = extractelement <2 x i64> %0, i64 1 + %4 = call i64 @llvm.umax.i64(i64 %2, i64 %3) + ret i64 %4 +} + +define i64 @smin(<2 x i64> %0) { +; CHECK-SVE-LABEL: smin: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: sminv d0, p0, z0.d +; CHECK-SVE-NEXT: fmov x0, d0 +; CHECK-SVE-NEXT: ret +; +; CHECK-NEON-LABEL: smin: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: mov d1, v0.d[1] +; CHECK-NEON-NEXT: fmov x8, d0 +; CHECK-NEON-NEXT: fmov x9, d1 +; CHECK-NEON-NEXT: cmp x8, x9 +; CHECK-NEON-NEXT: fcsel d0, d0, d1, lt +; CHECK-NEON-NEXT: fmov x0, d0 +; CHECK-NEON-NEXT: ret + %2 = extractelement <2 x i64> %0, i64 0 + %3 = extractelement <2 x i64> %0, i64 1 + %4 = call i64 @llvm.smin.i64(i64 %2, i64 %3) + ret i64 %4 +} + +define i64 @umin(<2 x i64> %0) { +; CHECK-SVE-LABEL: umin: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: uminv d0, p0, z0.d +; CHECK-SVE-NEXT: fmov x0, d0 +; CHECK-SVE-NEXT: ret +; +; CHECK-NEON-LABEL: umin: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: mov d1, v0.d[1] +; CHECK-NEON-NEXT: fmov x8, d0 +; CHECK-NEON-NEXT: fmov x9, d1 +; CHECK-NEON-NEXT: cmp x8, x9 +; CHECK-NEON-NEXT: fcsel d0, d0, d1, lo +; CHECK-NEON-NEXT: fmov x0, d0 +; CHECK-NEON-NEXT: ret + %2 = extractelement <2 x i64> %0, i64 0 + %3 = extractelement <2 x i64> %0, i64 1 + %4 = call i64 @llvm.umin.i64(i64 %2, i64 %3) + ret i64 %4 +} >From 715539a273f3c4f79f00cbec58c7bfb87e3d344a Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <[email protected]> Date: Thu, 12 Feb 2026 17:30:21 +0000 Subject: [PATCH 2/2] Fixups --- .../Target/AArch64/AArch64ISelLowering.cpp | 3 +- llvm/test/CodeGen/AArch64/v2i64-min-max.ll | 64 +++++++++++-------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5621d43201bb1..772ce3a5b84b4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -28833,7 +28833,6 @@ static unsigned getReductionForOpcode(unsigned Op) { } static SDValue performMINMAXCombine(SDNode *N, SelectionDAG &DAG, - const AArch64Subtarget &Subtarget, const AArch64TargetLowering &TLI) { using namespace llvm::SDPatternMatch; if (SDValue V = trySQDMULHCombine(N, DAG)) @@ -28882,7 +28881,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::UMIN: case ISD::SMAX: case ISD::SMIN: - return performMINMAXCombine(N, DAG, *Subtarget, *this); + return performMINMAXCombine(N, DAG, *this); case ISD::TRUNCATE: return performTruncateCombine(N, DAG, DCI); case AArch64ISD::ANDS: diff --git a/llvm/test/CodeGen/AArch64/v2i64-min-max.ll b/llvm/test/CodeGen/AArch64/v2i64-min-max.ll index 43cd59413ba21..2dae80df944fa 100644 --- a/llvm/test/CodeGen/AArch64/v2i64-min-max.ll +++ b/llvm/test/CodeGen/AArch64/v2i64-min-max.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s -check-prefix=CHECK-SVE -; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s -check-prefix=CHECK-NEON +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s -check-prefix=CHECK-NEON define i64 @smax(<2 x i64> %0) { ; CHECK-SVE-LABEL: smax: @@ -13,12 +13,10 @@ define i64 @smax(<2 x i64> %0) { ; ; CHECK-NEON-LABEL: smax: ; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: mov d1, v0.d[1] -; CHECK-NEON-NEXT: fmov x8, d0 -; CHECK-NEON-NEXT: fmov x9, d1 -; CHECK-NEON-NEXT: cmp x8, x9 -; CHECK-NEON-NEXT: fcsel d0, d0, d1, gt -; CHECK-NEON-NEXT: fmov x0, d0 +; CHECK-NEON-NEXT: mov x8, v0.d[1] +; CHECK-NEON-NEXT: fmov x9, d0 +; CHECK-NEON-NEXT: cmp x9, x8 +; CHECK-NEON-NEXT: csel x0, x9, x8, gt ; CHECK-NEON-NEXT: ret %2 = extractelement <2 x i64> %0, i64 0 %3 = extractelement <2 x i64> %0, i64 1 @@ -37,12 +35,10 @@ define i64 @umax(<2 x i64> %0) { ; ; CHECK-NEON-LABEL: umax: ; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: mov d1, v0.d[1] -; CHECK-NEON-NEXT: fmov x8, d0 -; CHECK-NEON-NEXT: fmov x9, d1 -; CHECK-NEON-NEXT: cmp x8, x9 -; CHECK-NEON-NEXT: fcsel d0, d0, d1, hi -; CHECK-NEON-NEXT: fmov x0, d0 +; CHECK-NEON-NEXT: mov x8, v0.d[1] +; CHECK-NEON-NEXT: fmov x9, d0 +; CHECK-NEON-NEXT: cmp x9, x8 +; CHECK-NEON-NEXT: csel x0, x9, x8, hi ; CHECK-NEON-NEXT: ret %2 = extractelement <2 x i64> %0, i64 0 %3 = extractelement <2 x i64> %0, i64 1 @@ -61,12 +57,10 @@ define i64 @smin(<2 x i64> %0) { ; ; CHECK-NEON-LABEL: smin: ; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: mov d1, v0.d[1] -; CHECK-NEON-NEXT: fmov x8, d0 -; CHECK-NEON-NEXT: fmov x9, d1 -; CHECK-NEON-NEXT: cmp x8, x9 -; CHECK-NEON-NEXT: fcsel d0, d0, d1, lt -; CHECK-NEON-NEXT: fmov x0, d0 +; CHECK-NEON-NEXT: mov x8, v0.d[1] +; CHECK-NEON-NEXT: fmov x9, d0 +; CHECK-NEON-NEXT: cmp x9, x8 +; CHECK-NEON-NEXT: csel x0, x9, x8, lt ; CHECK-NEON-NEXT: ret %2 = extractelement <2 x i64> %0, i64 0 %3 = extractelement <2 x i64> %0, i64 1 @@ -85,15 +79,35 @@ define i64 @umin(<2 x i64> %0) { ; ; CHECK-NEON-LABEL: umin: ; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: mov d1, v0.d[1] -; CHECK-NEON-NEXT: fmov x8, d0 -; CHECK-NEON-NEXT: fmov x9, d1 -; CHECK-NEON-NEXT: cmp x8, x9 -; CHECK-NEON-NEXT: fcsel d0, d0, d1, lo -; CHECK-NEON-NEXT: fmov x0, d0 +; CHECK-NEON-NEXT: mov x8, v0.d[1] +; CHECK-NEON-NEXT: fmov x9, d0 +; CHECK-NEON-NEXT: cmp x9, x8 +; CHECK-NEON-NEXT: csel x0, x9, x8, lo ; CHECK-NEON-NEXT: ret %2 = extractelement <2 x i64> %0, i64 0 %3 = extractelement <2 x i64> %0, i64 1 %4 = call i64 @llvm.umin.i64(i64 %2, i64 %3) ret i64 %4 } + +define i64 @umin_different_vectors(<2 x i64> %0, <2 x i64> %1) { +; CHECK-SVE-LABEL: umin_different_vectors: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: mov x8, v1.d[1] +; CHECK-SVE-NEXT: fmov x9, d0 +; CHECK-SVE-NEXT: cmp x9, x8 +; CHECK-SVE-NEXT: csel x0, x9, x8, lo +; CHECK-SVE-NEXT: ret +; +; CHECK-NEON-LABEL: umin_different_vectors: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: mov x8, v1.d[1] +; CHECK-NEON-NEXT: fmov x9, d0 +; CHECK-NEON-NEXT: cmp x9, x8 +; CHECK-NEON-NEXT: csel x0, x9, x8, lo +; CHECK-NEON-NEXT: ret + %3 = extractelement <2 x i64> %0, i64 0 + %4 = extractelement <2 x i64> %1, i64 1 + %5 = call i64 @llvm.umin.i64(i64 %3, i64 %4) + ret i64 %5 +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
