https://github.com/tru updated https://github.com/llvm/llvm-project/pull/150970
>From 81a3436485f262a2d8ed14b8bbf22dcfb9c6f110 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Mon, 28 Jul 2025 09:46:00 +0200 Subject: [PATCH] [CodeGen] More consistently expand float ops by default (#150597) These float operations were expanded for scalar f32/f64/f128, but not for f16 and more problematically, not for vectors. A small subset of them was separately set to expand for vectors. Change these to always expand by default, and adjust targets to mark these as legal where necessary instead. This is a much safer default, and avoids unnecessary legalization failures because a target failed to manually mark them as expand. Fixes https://github.com/llvm/llvm-project/issues/110753. Fixes https://github.com/llvm/llvm-project/issues/121390. (cherry picked from commit fe0dbe0f2950d95071be7140c7b4680f17a7ac4e) --- llvm/lib/CodeGen/TargetLoweringBase.cpp | 34 +++--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 10 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 11 ++ .../PowerPC/froundeven-legalization.ll | 111 ++++++++++++++++++ 4 files changed, 145 insertions(+), 21 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/froundeven-legalization.ll diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 6feeb19bb8589..db2065f878727 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() { ISD::SDIVFIX, ISD::SDIVFIXSAT, ISD::UDIVFIX, ISD::UDIVFIXSAT, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, - ISD::IS_FPCLASS}, + ISD::IS_FPCLASS, ISD::FCBRT, + ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, + ISD::FEXP2, ISD::FEXP10, + ISD::FFLOOR, ISD::FNEARBYINT, + ISD::FCEIL, ISD::FRINT, + ISD::FTRUNC, ISD::FROUNDEVEN, + ISD::FTAN, ISD::FACOS, + ISD::FASIN, ISD::FATAN, + ISD::FCOSH, ISD::FSINH, + ISD::FTANH, ISD::FATAN2}, VT, Expand); // Overflow operations default to expand @@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT.isVector()) - setOperationAction( - {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, - ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND, - ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, - ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2}, - VT, Expand); + setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, + ISD::ANY_EXTEND_VECTOR_INREG, + ISD::SIGN_EXTEND_VECTOR_INREG, + ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR, + ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND}, + VT, Expand); // Constrained floating-point operations default to expand. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ @@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() { {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, Expand); - // These library functions default to expand. - setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, - ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, - ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, - ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, - ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH, - ISD::FATAN2}, - {MVT::f32, MVT::f64, MVT::f128}, Expand); - // Insert custom handling default for llvm.canonicalize.*. setOperationAction(ISD::FCANONICALIZE, {MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3414fe758eff8..7b93382d1281f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -392,8 +392,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, // Library functions. These default to Expand, but we have instructions // for them. setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR, - ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM}, - MVT::f32, Legal); + ISD::FROUNDEVEN, ISD::FTRUNC}, + {MVT::f16, MVT::f32}, Legal); + setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal); setOperationAction(ISD::FLOG2, MVT::f32, Custom); setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom); @@ -413,9 +414,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); - if (Subtarget->has16BitInsts()) + if (Subtarget->has16BitInsts()) { setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal); - else { + setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Legal); + } else { setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal); setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index fb72bab03e750..9593038ff2c9b 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -370,6 +370,11 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::FMINNUM, VT, Legal); setOperationAction(ISD::FMAXNUM, VT, Legal); setOperationAction(ISD::FROUND, VT, Legal); + setOperationAction(ISD::FROUNDEVEN, VT, Legal); + setOperationAction(ISD::FRINT, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); @@ -1507,6 +1512,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FLOG2, MVT::f16, Promote); setOperationAction(ISD::FROUND, MVT::f16, Legal); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); + setOperationAction(ISD::FTRUNC, MVT::f16, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); + setOperationAction(ISD::FRINT, MVT::f16, Legal); + setOperationAction(ISD::FFLOOR, MVT::f16, Legal); + setOperationAction(ISD::FCEIL, MVT::f16, Legal); } if (Subtarget->hasNEON()) { diff --git a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll new file mode 100644 index 0000000000000..238e200bfc782 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s + +define void @test(ptr %p1, ptr %p2) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -224(1) +; CHECK-NEXT: li 5, 48 +; CHECK-NEXT: std 0, 240(1) +; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: li 27, 16 +; CHECK-NEXT: std 28, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: li 29, 32 +; CHECK-NEXT: li 28, 48 +; CHECK-NEXT: stxvd2x 56, 1, 5 # 16-byte Folded Spill +; CHECK-NEXT: li 5, 64 +; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 30, 4 +; CHECK-NEXT: stxvd2x 57, 1, 5 # 16-byte Folded Spill +; CHECK-NEXT: li 5, 80 +; CHECK-NEXT: stxvd2x 58, 1, 5 # 16-byte Folded Spill +; CHECK-NEXT: li 5, 96 +; CHECK-NEXT: lxvd2x 58, 0, 3 +; CHECK-NEXT: stxvd2x 59, 1, 5 # 16-byte Folded Spill +; CHECK-NEXT: li 5, 112 +; CHECK-NEXT: lxvd2x 59, 3, 27 +; CHECK-NEXT: stxvd2x 60, 1, 5 # 16-byte Folded Spill +; CHECK-NEXT: li 5, 128 +; CHECK-NEXT: stxvd2x 61, 1, 5 # 16-byte Folded Spill +; CHECK-NEXT: li 5, 144 +; CHECK-NEXT: stxvd2x 62, 1, 5 # 16-byte Folded Spill +; CHECK-NEXT: li 5, 160 +; CHECK-NEXT: lxvd2x 62, 3, 28 +; CHECK-NEXT: stxvd2x 63, 1, 5 # 16-byte Folded Spill +; CHECK-NEXT: lxvd2x 63, 3, 29 +; CHECK-NEXT: xxswapd 57, 58 +; CHECK-NEXT: xxswapd 1, 59 +; CHECK-NEXT: xxswapd 60, 62 +; CHECK-NEXT: xxswapd 61, 63 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd 56, 1 +; CHECK-NEXT: xxlor 1, 59, 59 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd 0, 1 +; CHECK-NEXT: xxlor 1, 60, 60 +; CHECK-NEXT: xxmrgld 59, 0, 56 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd 60, 1 +; CHECK-NEXT: xxlor 1, 62, 62 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd 0, 1 +; CHECK-NEXT: xxlor 1, 61, 61 +; CHECK-NEXT: xxmrgld 62, 0, 60 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd 61, 1 +; CHECK-NEXT: xxlor 1, 63, 63 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd 0, 1 +; CHECK-NEXT: xxlor 1, 57, 57 +; CHECK-NEXT: xxmrgld 63, 0, 61 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd 61, 1 +; CHECK-NEXT: xxlor 1, 58, 58 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: nop +; CHECK-NEXT: li 3, 160 +; CHECK-NEXT: stxvd2x 63, 30, 29 +; CHECK-NEXT: xxswapd 0, 1 +; CHECK-NEXT: stxvd2x 62, 30, 28 +; CHECK-NEXT: stxvd2x 59, 30, 27 +; CHECK-NEXT: ld 29, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, 192(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, 184(1) # 8-byte Folded Reload +; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: li 3, 144 +; CHECK-NEXT: xxmrgld 0, 0, 61 +; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: li 3, 128 +; CHECK-NEXT: stxvd2x 0, 0, 30 +; CHECK-NEXT: ld 30, 208(1) # 8-byte Folded Reload +; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: li 3, 112 +; CHECK-NEXT: lxvd2x 60, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: li 3, 96 +; CHECK-NEXT: lxvd2x 59, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: li 3, 80 +; CHECK-NEXT: lxvd2x 58, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: li 3, 64 +; CHECK-NEXT: lxvd2x 57, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: li 3, 48 +; CHECK-NEXT: lxvd2x 56, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 224 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %v = load <8 x double>, ptr %p1, align 64 + %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %v) + store <8 x double> %res, ptr %p2, align 64 + ret void +} + +declare <8 x double> @llvm.roundeven.v8f64(<8 x double>) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits