On Fri, Dec 15, 2017 at 11:57:46AM +0000, Sudi Das wrote:
> Hi
> 
> This patch fixes the inconsistent behavior observed at -O3 for the 
> unordered comparisons. According to the online docs 
> (https://gcc.gnu.org/onlinedocs/gcc-7.2.0/gccint/Unary-and-Binary-Expressions.html),
>  
> all of the following should not raise an FP exception:
> - UNGE_EXPR
> - UNGT_EXPR
> - UNLE_EXPR
> - UNLT_EXPR
> - UNEQ_EXPR
> Also ORDERED_EXPR and UNORDERED_EXPR should only return zero or one.
> 
> The aarch64-simd.md handling of these were generating exception raising 
> instructions such as fcmgt. This patch changes the instructions that are 
> emitted to in order to not give out the exceptions. We first check each 
> operand for NaNs and force any elements containing NaN to zero before 
> using them in the compare.
> 
> Example: UN<cc> (a, b) -> UNORDERED (a, b) | (cm<cc> (isnan (a) ? 0.0 : 
> a, isnan (b) ? 0.0 : b))
> 
> 
> The ORDERED_EXPR is now handled as (cmeq (a, a) & cmeq (b, b)) and 
> UNORDERED_EXPR as ~ORDERED_EXPR and UNEQ as (~ORDERED_EXPR | cmeq (a,b)).
> 
> Testing done: Checked for regressions on bootstrapped 
> aarch64-none-linux-gnu and added a new test case.
> 
> Is this ok for trunk? This will probably need a back-port to 
> gcc-7-branch as well.

OK.

Let it soak on trunk for a while before the backport.

Thanks,
James

> ChangeLog Entries:
> 
> *** gcc/ChangeLog ***
> 
> 2017-12-15  Sudakshina Das  <sudi....@arm.com>
> 
>       PR target/81647
>       * config/aarch64/aarch64-simd.md (vec_cmp<mode><v_int_equiv>): Modify 
> instructions for
>       UNLT, UNLE, UNGT, UNGE, UNEQ, UNORDERED and ORDERED.
> 
> *** gcc/testsuite/ChangeLog ***
> 
> 2017-12-15  Sudakshina Das  <sudi....@arm.com>
> 
>       PR target/81647
>       * gcc.target/aarch64/pr81647.c: New.

> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> f90f74fe7fd5990a97b9f4eb68f5735b7d4fb9aa..acff06c753b3e3aaa5775632929909afa4d3294b
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -2731,10 +2731,10 @@
>         break;
>       }
>        /* Fall through.  */
> -    case UNGE:
> +    case UNLT:
>        std::swap (operands[2], operands[3]);
>        /* Fall through.  */
> -    case UNLE:
> +    case UNGT:
>      case GT:
>        comparison = gen_aarch64_cmgt<mode>;
>        break;
> @@ -2745,10 +2745,10 @@
>         break;
>       }
>        /* Fall through.  */
> -    case UNGT:
> +    case UNLE:
>        std::swap (operands[2], operands[3]);
>        /* Fall through.  */
> -    case UNLT:
> +    case UNGE:
>      case GE:
>        comparison = gen_aarch64_cmge<mode>;
>        break;
> @@ -2771,21 +2771,35 @@
>      case UNGT:
>      case UNLE:
>      case UNLT:
> -    case NE:
> -      /* FCM returns false for lanes which are unordered, so if we use
> -      the inverse of the comparison we actually want to emit, then
> -      invert the result, we will end up with the correct result.
> -      Note that a NE NaN and NaN NE b are true for all a, b.
> -
> -      Our transformations are:
> -      a UNGE b -> !(b GT a)
> -      a UNGT b -> !(b GE a)
> -      a UNLE b -> !(a GT b)
> -      a UNLT b -> !(a GE b)
> -      a   NE b -> !(a EQ b)  */
> -      gcc_assert (comparison != NULL);
> -      emit_insn (comparison (operands[0], operands[2], operands[3]));
> -      emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
> +      {
> +     /* All of the above must not raise any FP exceptions.  Thus we first
> +        check each operand for NaNs and force any elements containing NaN to
> +        zero before using them in the compare.
> +        Example: UN<cc> (a, b) -> UNORDERED (a, b) |
> +                                  (cm<cc> (isnan (a) ? 0.0 : a,
> +                                           isnan (b) ? 0.0 : b))
> +        We use the following transformations for doing the comparisions:
> +        a UNGE b -> a GE b
> +        a UNGT b -> a GT b
> +        a UNLE b -> b GE a
> +        a UNLT b -> b GT a.  */
> +
> +     rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
> +     rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
> +     rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
> +     emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
> +     emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
> +     emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
> +     emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
> +                     lowpart_subreg (<V_INT_EQUIV>mode, operands[2], 
> <MODE>mode)));
> +     emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
> +                     lowpart_subreg (<V_INT_EQUIV>mode, operands[3], 
> <MODE>mode)));
> +     gcc_assert (comparison != NULL);
> +     emit_insn (comparison (operands[0],
> +                            lowpart_subreg (<MODE>mode, tmp0, 
> <V_INT_EQUIV>mode),
> +                            lowpart_subreg (<MODE>mode, tmp1, 
> <V_INT_EQUIV>mode)));
> +     emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
> +      }
>        break;
>  
>      case LT:
> @@ -2793,25 +2807,19 @@
>      case GT:
>      case GE:
>      case EQ:
> +    case NE:
>        /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
>        As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
>        a GE b -> a GE b
>        a GT b -> a GT b
>        a LE b -> b GE a
>        a LT b -> b GT a
> -      a EQ b -> a EQ b  */
> +      a EQ b -> a EQ b
> +      a NE b -> ~(a EQ b)  */
>        gcc_assert (comparison != NULL);
>        emit_insn (comparison (operands[0], operands[2], operands[3]));
> -      break;
> -
> -    case UNEQ:
> -      /* We first check (a > b ||  b > a) which is !UNEQ, inverting
> -      this result will then give us (a == b || a UNORDERED b).  */
> -      emit_insn (gen_aarch64_cmgt<mode> (operands[0],
> -                                      operands[2], operands[3]));
> -      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
> -      emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
> -      emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
> +      if (code == NE)
> +     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
>        break;
>  
>      case LTGT:
> @@ -2823,21 +2831,22 @@
>        emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
>        break;
>  
> -    case UNORDERED:
> -      /* Operands are ORDERED iff (a > b || b >= a), so we can compute
> -      UNORDERED as !ORDERED.  */
> -      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
> -      emit_insn (gen_aarch64_cmge<mode> (operands[0],
> -                                      operands[3], operands[2]));
> -      emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
> -      emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
> -      break;
> -
>      case ORDERED:
> -      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
> -      emit_insn (gen_aarch64_cmge<mode> (operands[0],
> -                                      operands[3], operands[2]));
> -      emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
> +    case UNORDERED:
> +    case UNEQ:
> +      /* cmeq (a, a) & cmeq (b, b).  */
> +      emit_insn (gen_aarch64_cmeq<mode> (operands[0],
> +                                      operands[2], operands[2]));
> +      emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
> +      emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
> +
> +      if (code == UNORDERED)
> +     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
> +      else if (code == UNEQ)
> +     {
> +       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
> +       emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
> +     }
>        break;
>  
>      default:
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr81647.c 
> b/gcc/testsuite/gcc.target/aarch64/pr81647.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..f60dfba49d538e3b2164b11392ab8dbfdba6546e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr81647.c
> @@ -0,0 +1,44 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -fdump-tree-ssa" } */
> +
> +#include <fenv.h>
> +
> +double x[28], y[28];
> +int res[28];
> +
> +int
> +main (void)
> +{
> +  int i;
> +  for (i = 0; i < 28; ++i)
> +    {
> +      x[i] = __builtin_nan ("");
> +      y[i] = i;
> +    }
> +  __asm__ volatile ("" ::: "memory");
> +  feclearexcept (FE_ALL_EXCEPT);
> +  for (i = 0; i < 4; ++i)
> +    res[i] = __builtin_isgreater (x[i], y[i]);
> +  for (i = 4; i < 8; ++i)
> +    res[i] = __builtin_isgreaterequal (x[i], y[i]);
> +  for (i = 8; i < 12; ++i)
> +    res[i] = __builtin_isless (x[i], y[i]);
> +  for (i = 12; i < 16; ++i)
> +    res[i] = __builtin_islessequal (x[i], y[i]);
> +  for (i = 16; i < 20; ++i)
> +    res[i] = __builtin_islessgreater (x[i], y[i]);
> +  for (i = 20; i < 24; ++i)
> +    res[i] = __builtin_isunordered (x[i], y[i]);
> +  for (i = 24; i < 28; ++i)
> +    res[i] = !(__builtin_isunordered (x[i], y[i]));
> +  __asm__ volatile ("" ::: "memory");
> +  return fetestexcept (FE_ALL_EXCEPT) != 0;
> +}
> +
> +/* { dg-final { scan-tree-dump " u> " "ssa" } } */
> +/* { dg-final { scan-tree-dump " u>= " "ssa" } } */
> +/* { dg-final { scan-tree-dump " u< " "ssa" } } */
> +/* { dg-final { scan-tree-dump " u<= " "ssa" } } */
> +/* { dg-final { scan-tree-dump " u== " "ssa" } } */
> +/* { dg-final { scan-tree-dump " unord " "ssa" } } */
> +/* { dg-final { scan-tree-dump " ord " "ssa" } } */

Reply via email to