On 6/18/20 11:02 AM, Martin Liška wrote:
Now I've got it.

I've just reduced that to:

$ cat pr50310.c
double s1[4], s2[4], s3[64];

int
main ()
{
  s1[0] = 5.0;
  s1[1] = 6.0;
  s1[2] = 5.0;
  s1[3] = __builtin_nan ("");
  s2[0] = 6.0;
  s2[1] = 5.0;
  s2[2] = 5.0;
  s2[3] = 5.0;

  asm volatile ("" : : : "memory");
  for (int i = 1; i < 4; i++)
    s3[i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
  asm volatile ("" : : : "memory");

  __builtin_printf ("val: %f\n", s3[1]);
  if (s3[1] != -1.0)
    __builtin_abort ();
  return 0;
}

Optimized dump differs in expected manner:

   <bb 2> [local count: 805306369]:
   MEM <vector(2) double> [(double *)&s1] = { 5.0e+0, 6.0e+0 };
   MEM <vector(2) double> [(double *)&s1 + 16B] = { 5.0e+0,  Nan };
   MEM <vector(2) double> [(double *)&s2] = { 6.0e+0, 5.0e+0 };
   MEM <vector(2) double> [(double *)&s2 + 16B] = { 5.0e+0, 5.0e+0 };
   __asm__ __volatile__("" :  :  : "memory");
   vect__1.13_51 = MEM <vector(2) double> [(double *)&s1 + 8B];
   vect__2.16_55 = MEM <vector(2) double> [(double *)&s2 + 8B];
-  vect_iftmp.17_58 = VEC_COND_EXPR <vect__1.13_51 u<= vect__2.16_55, { 0.0, 0.0 
}, { -1.0e+0, -1.0e+0 }>;
-  MEM <vector(2) double> [(double *)&s3 + 8B] = vect_iftmp.17_58;
+  _58 = vect__1.13_51 u<= vect__2.16_55;
+  vect_iftmp.17_59 = .VCOND (vect__1.13_51, vect__2.16_55, { 0.0, 0.0 }, { 
-1.0e+0, -1.0e+0 }, 117);
+  MEM <vector(2) double> [(double *)&s3 + 8B] = vect_iftmp.17_59;
   _41 = s1[3];
   _42 = s2[3];
   if (_41 u<= _42)
     goto <bb 3>; [50.00%]
   else
     goto <bb 4>; [50.00%]
<bb 3> [local count: 402653185]: <bb 4> [local count: 805306369]:
   # iftmp.0_43 = PHI <-1.0e+0(2), 0.0(3)>
   s3[3] = iftmp.0_43;
   __asm__ __volatile__("" :  :  : "memory");

but we fail with:

$ gcc pr50310.c -m32 -O3 -ffloat-store && ./a.out
val: -nan
Aborted (core dumped)

I'm digging deeper.
Martin

Reply via email to