https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/131270
Backport d642eec78fc94ef3c5266dc0b10b8c51ea046e7a Requested by: @androm3da >From aacfe069eaacb4c1b562308fe6be7808fc50ba8b Mon Sep 17 00:00:00 2001 From: aankit-ca <quic_aan...@quicinc.com> Date: Thu, 13 Mar 2025 12:48:31 -0700 Subject: [PATCH] [HEXAGON] Fix semantics of ordered FP compares (#131089) For the ordered FP compare bitcode instructions, the Hexagon backend was assuming that no operand could be a NaN. This assumption is flawed. This patch fixes the code-generation to produce fpcmp.uo and and appropriate bit comparison operators to account for the case when an operand to a FP compare is a NaN. Fix for https://github.com/llvm/llvm-project/issues/129391 Co-authored-by: aankit-quic <aan...@quicinc.com> (cherry picked from commit d642eec78fc94ef3c5266dc0b10b8c51ea046e7a) --- llvm/lib/Target/Hexagon/HexagonPatterns.td | 38 +++-- llvm/test/CodeGen/Hexagon/fcmp-nan.ll | 189 +++++++++++++++++++++ 2 files changed, 213 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/fcmp-nan.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index cba5ff1ab0d9b..244f204539c89 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -721,11 +721,6 @@ def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; -def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>; -def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>; -def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>; -def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>; -def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>; def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>; def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>; def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>; @@ -733,11 +728,6 @@ def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>; def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>; def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>; def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>; -def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>; -def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>; -def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>; -def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>; -def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>; def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>; def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>; def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>; @@ -900,15 +890,35 @@ def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>; def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>; def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; -def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; -def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; +class T4<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3, InstHexagon MI4> + : OutPatFrag<(ops node:$Rs, node:$Rt), + (MI1 (MI2 (MI3 $Rs, $Rt), (MI4 $Rs, $Rt)))>; -def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>; -def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>; +class Cmpof<InstHexagon MI>: T3<C2_andn, MI, F2_sfcmpuo>; +class Cmpod<InstHexagon MI>: T3<C2_andn, MI, F2_dfcmpuo>; + +class Cmpofn<InstHexagon MI>: T4<C2_not, C2_or, MI, F2_sfcmpuo>; +class Cmpodn<InstHexagon MI>: T4<C2_not, C2_or, MI, F2_dfcmpuo>; + +def: OpmR_RR_pat<Cmpof<F2_sfcmpeq>, setoeq, i1, F32>; +def: OpmR_RR_pat<Cmpof<F2_sfcmpge>, setoge, i1, F32>; +def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>, setogt, i1, F32>; +def: OpmR_RR_pat<Cmpof<F2_sfcmpge>, RevCmp<setole>, i1, F32>; +def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>, RevCmp<setolt>, i1, F32>; +def: OpmR_RR_pat<Cmpofn<F2_sfcmpeq>, setone, i1, F32>; + +def: OpmR_RR_pat<Cmpod<F2_dfcmpeq>, setoeq, i1, F64>; +def: OpmR_RR_pat<Cmpod<F2_dfcmpge>, setoge, i1, F64>; +def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>, setogt, i1, F64>; +def: OpmR_RR_pat<Cmpod<F2_dfcmpge>, RevCmp<setole>, i1, F64>; +def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>, RevCmp<setolt>, i1, F64>; +def: OpmR_RR_pat<Cmpodn<F2_dfcmpeq>, setone, i1, F64>; def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>; def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>; +def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; +def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>; // --(6) Select ---------------------------------------------------------- // diff --git a/llvm/test/CodeGen/Hexagon/fcmp-nan.ll b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll new file mode 100644 index 0000000000000..1469402911601 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll @@ -0,0 +1,189 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; +; Test that all FP ordered compare instructions generate the correct +; post-processing to accommodate NaNs. +; +; Specifically for ordered FP compares, we have to check if one of +; the operands was a NaN to comform to the semantics of the ordered +; fcmp bitcode instruction +; +target triple = "hexagon" + +; +; Functions for float: +; + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_oeq_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp oeq float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1) +; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]]) +; CHECK: r0 = mux([[REG2]],#0,#1) +; +define i32 @compare_one_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp one float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r0,r1) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_ogt_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp ogt float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r1,r0) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_ole_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp ole float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r0,r1) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_oge_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp oge float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r1,r0) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_olt_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp olt float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + + +; +; Functions for double: +; + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_oeq_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp oeq double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2) +; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]]) +; CHECK: r0 = mux([[REG2]],#0,#1) +; +define i32 @compare_one_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp one double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r1:0,r3:2) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_ogt_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp ogt double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r3:2,r1:0) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_ole_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp ole double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r1:0,r3:2) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_oge_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp oge double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r3:2,r1:0) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_olt_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp olt double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits