https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100378
>From 4edffb2750e8320c39109cd7c9c086c2ee86e9d4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Tue, 7 Feb 2023 12:22:05 -0400 Subject: [PATCH 1/3] DAG: Handle lowering unordered compare with inf Try to take advantage of the nan check behavior of fcmp. x86_64 looks better, x86_32 looks worse. --- llvm/include/llvm/CodeGen/CodeGenCommonISel.h | 7 +- llvm/lib/CodeGen/CodeGenCommonISel.cpp | 8 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 53 +++++++------ llvm/test/CodeGen/X86/is_fpclass.ll | 78 +++++++++---------- 4 files changed, 83 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h index 90ef890f22d1b1..e4b2e20babc07a 100644 --- a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h +++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h @@ -218,10 +218,15 @@ findSplitPointForStackProtector(MachineBasicBlock *BB, /// Evaluates if the specified FP class test is better performed as the inverse /// (i.e. fewer instructions should be required to lower it). An example is the /// test "inf|normal|subnormal|zero", which is an inversion of "nan". +/// /// \param Test The test as specified in 'is_fpclass' intrinsic invocation. +/// +/// \param UseFCmp The intention is to perform the comparison using +/// floating-point compare instructions which check for nan. +/// /// \returns The inverted test, or fcNone, if inversion does not produce a /// simpler test. -FPClassTest invertFPClassTestIfSimpler(FPClassTest Test); +FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp); /// Assuming the instruction \p MI is going to be deleted, attempt to salvage /// debug users of \p MI by writing the effect of \p MI in a DIExpression. diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index fe144d3c182039..d985751e2be0be 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, return SplitPoint; } -FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) { +FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) { FPClassTest InvertedTest = ~Test; + // Pick the direction with fewer tests // TODO: Handle more combinations of cases that can be handled together switch (static_cast<unsigned>(InvertedTest)) { @@ -200,6 +201,11 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) { case fcSubnormal | fcZero: case fcSubnormal | fcZero | fcNan: return InvertedTest; + case fcInf | fcNan: + // If we're trying to use fcmp, we can take advantage of the nan check + // behavior of the compare (but this is more instructions in the integer + // expansion). + return UseFCmp ? InvertedTest : fcNone; default: return fcNone; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4e796289cff0a1..1e3a0da0f3be5b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8672,7 +8672,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, // Degenerated cases. if (Test == fcNone) return DAG.getBoolConstant(false, DL, ResultVT, OperandVT); - if ((Test & fcAllFlags) == fcAllFlags) + if (Test == fcAllFlags) return DAG.getBoolConstant(true, DL, ResultVT, OperandVT); // PPC double double is a pair of doubles, of which the higher part determines @@ -8683,14 +8683,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, OperandVT = MVT::f64; } - // Some checks may be represented as inversion of simpler check, for example - // "inf|normal|subnormal|zero" => !"nan". - bool IsInverted = false; - if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) { - IsInverted = true; - Test = InvertedCheck; - } - // Floating-point type properties. EVT ScalarFloatVT = OperandVT.getScalarType(); const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext()); @@ -8702,9 +8694,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, if (Flags.hasNoFPExcept() && isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) { FPClassTest FPTestMask = Test; + bool IsInvertedFP = false; + + if (FPClassTest InvertedFPCheck = + invertFPClassTestIfSimpler(FPTestMask, true)) { + FPTestMask = InvertedFPCheck; + IsInvertedFP = true; + } - ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ; - ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ; + ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ; + ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ; // See if we can fold an | fcNan into an unordered compare. FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan; @@ -8717,7 +8716,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, const bool IsOrdered = FPTestMask == OrderedFPTestMask; if (std::optional<bool> IsCmp0 = - isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction()); + isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction()); IsCmp0 && (isCondCodeLegalOrCustom( *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode, OperandVT.getScalarType().getSimpleVT()))) { @@ -8729,15 +8728,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode); } - if (Test == fcNan && - isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO, - OperandVT.getScalarType().getSimpleVT())) { + if (FPTestMask == fcNan && + isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO, + OperandVT.getScalarType().getSimpleVT())) return DAG.getSetCC(DL, ResultVT, Op, Op, - IsInverted ? ISD::SETO : ISD::SETUO); - } + IsInvertedFP ? ISD::SETO : ISD::SETUO); - if (Test == fcInf && - isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ, + bool IsOrderedInf = FPTestMask == fcInf; + if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) && + isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode + : UnorderedCmpOpcode, OperandVT.getScalarType().getSimpleVT()) && isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) { // isinf(x) --> fabs(x) == inf @@ -8745,15 +8745,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT); return DAG.getSetCC(DL, ResultVT, Abs, Inf, - IsInverted ? ISD::SETUNE : ISD::SETOEQ); + IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode); } if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) { // TODO: Could handle ordered case, but it produces worse code for // x86. Maybe handle ordered if fabs is free? - ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT; - ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT; + ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT; + ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT; if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp, OperandVT.getScalarType().getSimpleVT())) { @@ -8770,6 +8770,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, } } + // Some checks may be represented as inversion of simpler check, for example + // "inf|normal|subnormal|zero" => !"nan". + bool IsInverted = false; + + if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) { + Test = InvertedCheck; + IsInverted = true; + } + // In the general case use integer operations. unsigned BitSize = OperandVT.getScalarSizeInBits(); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize); diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index cc4d4c4543a515..4e46abbdb01c34 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -240,18 +240,22 @@ entry: define i1 @isfinite_f(float %x) { ; X86-LABEL: isfinite_f: ; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: setl %al +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fabs +; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NEXT: fxch %st(1) +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: isfinite_f: ; X64: # %bb.0: # %entry -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X64-NEXT: setl %al +; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: setne %al ; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" @@ -1150,31 +1154,23 @@ entry: define i1 @isfinite_d(double %x) { ; X86-LABEL: isfinite_d: ; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000 -; X86-NEXT: setl %al +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: fabs +; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NEXT: fxch %st(1) +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setne %al ; X86-NEXT: retl ; -; X64-GENERIC-LABEL: isfinite_d: -; X64-GENERIC: # %bb.0: # %entry -; X64-GENERIC-NEXT: movq %xmm0, %rax -; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; X64-GENERIC-NEXT: andq %rax, %rcx -; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 -; X64-GENERIC-NEXT: cmpq %rax, %rcx -; X64-GENERIC-NEXT: setl %al -; X64-GENERIC-NEXT: retq -; -; X64-NDD-LABEL: isfinite_d: -; X64-NDD: # %bb.0: # %entry -; X64-NDD-NEXT: movq %xmm0, %rax -; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NDD-NEXT: andq %rcx, %rax -; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000 -; X64-NDD-NEXT: cmpq %rcx, %rax -; X64-NDD-NEXT: setl %al -; X64-NDD-NEXT: retq +; X64-LABEL: isfinite_d: +; X64: # %bb.0: # %entry +; X64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite" ret i1 %0 @@ -2053,18 +2049,22 @@ entry: define i1 @not_isinf_or_nan_f(float %x) { ; X86-LABEL: not_isinf_or_nan_f: ; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: setl %al +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fabs +; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NEXT: fxch %st(1) +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: not_isinf_or_nan_f: ; X64: # %bb.0: # %entry -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X64-NEXT: setl %al +; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: setne %al ; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)" >From 5c3bed8aefde9f2d49182fb1704bcbb2a3c49cac Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 21 Aug 2024 15:42:24 +0400 Subject: [PATCH 2/3] Check for immediate legality This avoids the x86_32 regressions, at the expense of several other cases. --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 5 +- llvm/test/CodeGen/AArch64/isinf.ll | 12 +-- llvm/test/CodeGen/PowerPC/fp-classify.ll | 28 +++---- llvm/test/CodeGen/X86/is_fpclass.ll | 78 +++++++++---------- 4 files changed, 63 insertions(+), 60 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1e3a0da0f3be5b..aa022480947a7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8739,7 +8739,10 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode, OperandVT.getScalarType().getSimpleVT()) && - isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) { + isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) && + (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) || + (OperandVT.isVector() && + isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) { // isinf(x) --> fabs(x) == inf SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op); SDValue Inf = diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll index 834417b98743a8..e68539bcf07d9c 100644 --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -26,10 +26,10 @@ define i32 @replace_isinf_call_f16(half %x) { define i32 @replace_isinf_call_f32(float %x) { ; CHECK-LABEL: replace_isinf_call_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fabs s0, s0 +; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: and w9, w9, #0x7fffffff +; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %abs = tail call float @llvm.fabs.f32(float %x) @@ -42,10 +42,10 @@ define i32 @replace_isinf_call_f32(float %x) { define i32 @replace_isinf_call_f64(double %x) { ; CHECK-LABEL: replace_isinf_call_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fabs d0, d0 +; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %abs = tail call double @llvm.fabs.f64(double %x) diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll index f527b3c48040e7..dc9853ff2e3014 100644 --- a/llvm/test/CodeGen/PowerPC/fp-classify.ll +++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll @@ -7,13 +7,13 @@ define zeroext i1 @abs_isinff(float %x) { ; P8-LABEL: abs_isinff: ; P8: # %bb.0: # %entry -; P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; P8-NEXT: xsabsdp 0, 1 -; P8-NEXT: li 4, 1 -; P8-NEXT: lfs 1, .LCPI0_0@toc@l(3) -; P8-NEXT: li 3, 0 -; P8-NEXT: fcmpu 0, 0, 1 -; P8-NEXT: iseleq 3, 4, 3 +; P8-NEXT: xscvdpspn 0, 1 +; P8-NEXT: lis 4, 32640 +; P8-NEXT: mffprwz 3, 0 +; P8-NEXT: clrlwi 3, 3, 1 +; P8-NEXT: xor 3, 3, 4 +; P8-NEXT: cntlzw 3, 3 +; P8-NEXT: srwi 3, 3, 5 ; P8-NEXT: blr ; ; P9-LABEL: abs_isinff: @@ -32,13 +32,13 @@ entry: define zeroext i1 @abs_isinf(double %x) { ; P8-LABEL: abs_isinf: ; P8: # %bb.0: # %entry -; P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha -; P8-NEXT: xsabsdp 0, 1 -; P8-NEXT: li 4, 1 -; P8-NEXT: lfs 1, .LCPI1_0@toc@l(3) -; P8-NEXT: li 3, 0 -; P8-NEXT: fcmpu 0, 0, 1 -; P8-NEXT: iseleq 3, 4, 3 +; P8-NEXT: mffprd 3, 1 +; P8-NEXT: li 4, 2047 +; P8-NEXT: rldic 4, 4, 52, 1 +; P8-NEXT: clrldi 3, 3, 1 +; P8-NEXT: xor 3, 3, 4 +; P8-NEXT: cntlzd 3, 3 +; P8-NEXT: rldicl 3, 3, 58, 63 ; P8-NEXT: blr ; ; P9-LABEL: abs_isinf: diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index 4e46abbdb01c34..cc4d4c4543a515 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -240,22 +240,18 @@ entry: define i1 @isfinite_f(float %x) { ; X86-LABEL: isfinite_f: ; X86: # %bb.0: # %entry -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fabs -; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NEXT: fxch %st(1) -; X86-NEXT: fucompp -; X86-NEXT: fnstsw %ax -; X86-NEXT: # kill: def $ah killed $ah killed $ax -; X86-NEXT: sahf -; X86-NEXT: setne %al +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al ; X86-NEXT: retl ; ; X64-LABEL: isfinite_f: ; X64: # %bb.0: # %entry -; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-NEXT: setne %al +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al ; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" @@ -1154,23 +1150,31 @@ entry: define i1 @isfinite_d(double %x) { ; X86-LABEL: isfinite_d: ; X86: # %bb.0: # %entry -; X86-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NEXT: fabs -; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NEXT: fxch %st(1) -; X86-NEXT: fucompp -; X86-NEXT: fnstsw %ax -; X86-NEXT: # kill: def $ah killed $ah killed $ax -; X86-NEXT: sahf -; X86-NEXT: setne %al +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000 +; X86-NEXT: setl %al ; X86-NEXT: retl ; -; X64-LABEL: isfinite_d: -; X64: # %bb.0: # %entry -; X64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-NEXT: setne %al -; X64-NEXT: retq +; X64-GENERIC-LABEL: isfinite_d: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movq %xmm0, %rax +; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-GENERIC-NEXT: andq %rax, %rcx +; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 +; X64-GENERIC-NEXT: cmpq %rax, %rcx +; X64-GENERIC-NEXT: setl %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: isfinite_d: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movq %xmm0, %rax +; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NDD-NEXT: andq %rcx, %rax +; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000 +; X64-NDD-NEXT: cmpq %rcx, %rax +; X64-NDD-NEXT: setl %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite" ret i1 %0 @@ -2049,22 +2053,18 @@ entry: define i1 @not_isinf_or_nan_f(float %x) { ; X86-LABEL: not_isinf_or_nan_f: ; X86: # %bb.0: # %entry -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fabs -; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NEXT: fxch %st(1) -; X86-NEXT: fucompp -; X86-NEXT: fnstsw %ax -; X86-NEXT: # kill: def $ah killed $ah killed $ax -; X86-NEXT: sahf -; X86-NEXT: setne %al +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al ; X86-NEXT: retl ; ; X64-LABEL: not_isinf_or_nan_f: ; X64: # %bb.0: # %entry -; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-NEXT: setne %al +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al ; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)" >From 238f697ae0fd38a8a6bfb0c011c416d5a11d71b6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Thu, 29 Aug 2024 08:17:49 +0400 Subject: [PATCH 3/3] Remove doxygen line --- llvm/include/llvm/CodeGen/CodeGenCommonISel.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h index e4b2e20babc07a..4c22be94507866 100644 --- a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h +++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h @@ -220,7 +220,6 @@ findSplitPointForStackProtector(MachineBasicBlock *BB, /// test "inf|normal|subnormal|zero", which is an inversion of "nan". /// /// \param Test The test as specified in 'is_fpclass' intrinsic invocation. -/// /// \param UseFCmp The intention is to perform the comparison using /// floating-point compare instructions which check for nan. /// _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits