On Fri, Dec 15, 2017 at 11:57:46AM +0000, Sudi Das wrote: > Hi > > This patch fixes the inconsistent behavior observed at -O3 for the > unordered comparisons. According to the online docs > (https://gcc.gnu.org/onlinedocs/gcc-7.2.0/gccint/Unary-and-Binary-Expressions.html), > > all of the following should not raise an FP exception: > - UNGE_EXPR > - UNGT_EXPR > - UNLE_EXPR > - UNLT_EXPR > - UNEQ_EXPR > Also ORDERED_EXPR and UNORDERED_EXPR should only return zero or one. > > The aarch64-simd.md handling of these were generating exception raising > instructions such as fcmgt. This patch changes the instructions that are > emitted to in order to not give out the exceptions. We first check each > operand for NaNs and force any elements containing NaN to zero before > using them in the compare. > > Example: UN<cc> (a, b) -> UNORDERED (a, b) | (cm<cc> (isnan (a) ? 0.0 : > a, isnan (b) ? 0.0 : b)) > > > The ORDERED_EXPR is now handled as (cmeq (a, a) & cmeq (b, b)) and > UNORDERED_EXPR as ~ORDERED_EXPR and UNEQ as (~ORDERED_EXPR | cmeq (a,b)). > > Testing done: Checked for regressions on bootstrapped > aarch64-none-linux-gnu and added a new test case. > > Is this ok for trunk? This will probably need a back-port to > gcc-7-branch as well.
OK. Let it soak on trunk for a while before the backport. Thanks, James > ChangeLog Entries: > > *** gcc/ChangeLog *** > > 2017-12-15 Sudakshina Das <sudi....@arm.com> > > PR target/81647 > * config/aarch64/aarch64-simd.md (vec_cmp<mode><v_int_equiv>): Modify > instructions for > UNLT, UNLE, UNGT, UNGE, UNEQ, UNORDERED and ORDERED. > > *** gcc/testsuite/ChangeLog *** > > 2017-12-15 Sudakshina Das <sudi....@arm.com> > > PR target/81647 > * gcc.target/aarch64/pr81647.c: New. > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > f90f74fe7fd5990a97b9f4eb68f5735b7d4fb9aa..acff06c753b3e3aaa5775632929909afa4d3294b > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -2731,10 +2731,10 @@ > break; > } > /* Fall through. */ > - case UNGE: > + case UNLT: > std::swap (operands[2], operands[3]); > /* Fall through. */ > - case UNLE: > + case UNGT: > case GT: > comparison = gen_aarch64_cmgt<mode>; > break; > @@ -2745,10 +2745,10 @@ > break; > } > /* Fall through. */ > - case UNGT: > + case UNLE: > std::swap (operands[2], operands[3]); > /* Fall through. */ > - case UNLT: > + case UNGE: > case GE: > comparison = gen_aarch64_cmge<mode>; > break; > @@ -2771,21 +2771,35 @@ > case UNGT: > case UNLE: > case UNLT: > - case NE: > - /* FCM returns false for lanes which are unordered, so if we use > - the inverse of the comparison we actually want to emit, then > - invert the result, we will end up with the correct result. > - Note that a NE NaN and NaN NE b are true for all a, b. > - > - Our transformations are: > - a UNGE b -> !(b GT a) > - a UNGT b -> !(b GE a) > - a UNLE b -> !(a GT b) > - a UNLT b -> !(a GE b) > - a NE b -> !(a EQ b) */ > - gcc_assert (comparison != NULL); > - emit_insn (comparison (operands[0], operands[2], operands[3])); > - emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); > + { > + /* All of the above must not raise any FP exceptions. Thus we first > + check each operand for NaNs and force any elements containing NaN to > + zero before using them in the compare. > + Example: UN<cc> (a, b) -> UNORDERED (a, b) | > + (cm<cc> (isnan (a) ? 0.0 : a, > + isnan (b) ? 0.0 : b)) > + We use the following transformations for doing the comparisions: > + a UNGE b -> a GE b > + a UNGT b -> a GT b > + a UNLE b -> b GE a > + a UNLT b -> b GT a. */ > + > + rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode); > + rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode); > + rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode); > + emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2])); > + emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3])); > + emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1)); > + emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0, > + lowpart_subreg (<V_INT_EQUIV>mode, operands[2], > <MODE>mode))); > + emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1, > + lowpart_subreg (<V_INT_EQUIV>mode, operands[3], > <MODE>mode))); > + gcc_assert (comparison != NULL); > + emit_insn (comparison (operands[0], > + lowpart_subreg (<MODE>mode, tmp0, > <V_INT_EQUIV>mode), > + lowpart_subreg (<MODE>mode, tmp1, > <V_INT_EQUIV>mode))); > + emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0])); > + } > break; > > case LT: > @@ -2793,25 +2807,19 @@ > case GT: > case GE: > case EQ: > + case NE: > /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. > As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: > a GE b -> a GE b > a GT b -> a GT b > a LE b -> b GE a > a LT b -> b GT a > - a EQ b -> a EQ b */ > + a EQ b -> a EQ b > + a NE b -> ~(a EQ b) */ > gcc_assert (comparison != NULL); > emit_insn (comparison (operands[0], operands[2], operands[3])); > - break; > - > - case UNEQ: > - /* We first check (a > b || b > a) which is !UNEQ, inverting > - this result will then give us (a == b || a UNORDERED b). */ > - emit_insn (gen_aarch64_cmgt<mode> (operands[0], > - operands[2], operands[3])); > - emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); > - emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); > - emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); > + if (code == NE) > + emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); > break; > > case LTGT: > @@ -2823,21 +2831,22 @@ > emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); > break; > > - case UNORDERED: > - /* Operands are ORDERED iff (a > b || b >= a), so we can compute > - UNORDERED as !ORDERED. */ > - emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3])); > - emit_insn (gen_aarch64_cmge<mode> (operands[0], > - operands[3], operands[2])); > - emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); > - emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); > - break; > - > case ORDERED: > - emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3])); > - emit_insn (gen_aarch64_cmge<mode> (operands[0], > - operands[3], operands[2])); > - emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); > + case UNORDERED: > + case UNEQ: > + /* cmeq (a, a) & cmeq (b, b). */ > + emit_insn (gen_aarch64_cmeq<mode> (operands[0], > + operands[2], operands[2])); > + emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3])); > + emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp)); > + > + if (code == UNORDERED) > + emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); > + else if (code == UNEQ) > + { > + emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3])); > + emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp)); > + } > break; > > default: > diff --git a/gcc/testsuite/gcc.target/aarch64/pr81647.c > b/gcc/testsuite/gcc.target/aarch64/pr81647.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..f60dfba49d538e3b2164b11392ab8dbfdba6546e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/pr81647.c > @@ -0,0 +1,44 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -fdump-tree-ssa" } */ > + > +#include <fenv.h> > + > +double x[28], y[28]; > +int res[28]; > + > +int > +main (void) > +{ > + int i; > + for (i = 0; i < 28; ++i) > + { > + x[i] = __builtin_nan (""); > + y[i] = i; > + } > + __asm__ volatile ("" ::: "memory"); > + feclearexcept (FE_ALL_EXCEPT); > + for (i = 0; i < 4; ++i) > + res[i] = __builtin_isgreater (x[i], y[i]); > + for (i = 4; i < 8; ++i) > + res[i] = __builtin_isgreaterequal (x[i], y[i]); > + for (i = 8; i < 12; ++i) > + res[i] = __builtin_isless (x[i], y[i]); > + for (i = 12; i < 16; ++i) > + res[i] = __builtin_islessequal (x[i], y[i]); > + for (i = 16; i < 20; ++i) > + res[i] = __builtin_islessgreater (x[i], y[i]); > + for (i = 20; i < 24; ++i) > + res[i] = __builtin_isunordered (x[i], y[i]); > + for (i = 24; i < 28; ++i) > + res[i] = !(__builtin_isunordered (x[i], y[i])); > + __asm__ volatile ("" ::: "memory"); > + return fetestexcept (FE_ALL_EXCEPT) != 0; > +} > + > +/* { dg-final { scan-tree-dump " u> " "ssa" } } */ > +/* { dg-final { scan-tree-dump " u>= " "ssa" } } */ > +/* { dg-final { scan-tree-dump " u< " "ssa" } } */ > +/* { dg-final { scan-tree-dump " u<= " "ssa" } } */ > +/* { dg-final { scan-tree-dump " u== " "ssa" } } */ > +/* { dg-final { scan-tree-dump " unord " "ssa" } } */ > +/* { dg-final { scan-tree-dump " ord " "ssa" } } */