On Tue, Dec 8, 2015 at 7:10 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
> Hello!
>
>> this patch fixes the asm statements in the gcc.target/i386/sse4_1-round* 
>> test cases.
>>
>> They do lots of things that are just absolutely forbidden, like clobber 
>> registers
>> that are not mentioned in the clobber list, and create a hidden data flow.
>>
>> The test cases work just by chance, and You can see the asm statements
>> ripped completely apart by the loop optimizer if you try to do the assembler
>> part in a loop:
>>
>>   for (i = 0; i < 10; i++) {
>>   __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f));
>>
>>   __asm__ ("fstcw %0" : "=m" (*&saved_cw));
>>   new_cw = saved_cw & clr_mask;
>>   new_cw |= type;
>>   __asm__ ("fldcw %0" : : "m" (*&new_cw));
>>
>>   __asm__ ("frndint\n"
>>            "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret));
>>   __asm__ ("fldcw %0" : : "m" (*&saved_cw));
>>   }
>>   return ret;
>>
>> So this patch avoids the hidden data flow, and
>> adds "st" to the clobber list.
>>
>> Boot-strapped and reg-tested on x86_64-pc-linux-gnu
>> OK for trunk?
>
> Uh, no.
>
> x87 is a strange beast, and even your patch is wrong. The assembly
> should be written in this way:
>
>   __asm__ ("fnstcw %0" : "=m" (saved_cw));
>
>   new_cw = saved_cw & clr_mask;
>   new_cw |= type;
>
>   __asm__ ("fldcw %2\n\t"
>        "frndint\n\t"
>        "fldcw %3" : "=t" (ret)
>               : "0" (f), "m" (new_cw), "m" (saved_cw));
>
> I'm testing the attached patch.

Committed slightly updated patch to mainline SVN with the following ChangeLog:

2015-12-08  Uros Bizjak  <ubiz...@gmail.com>

    * gcc.target/i386/sse4_1-round.h (do_round): Fix inline asm statements.
    * gcc.target/i386/sse4_1-roundsd-4.c (do_round): Ditto.
    * gcc.target/i386/sse4_1-roundss-4.c (do_round): Ditto.

Tested on x86_64-linux-gnu {,-m32}, committed to mainline SVN, will be
backported to all active branches.

Uros.
Index: gcc.target/i386/sse4_1-round.h
===================================================================
--- gcc.target/i386/sse4_1-round.h      (revision 231413)
+++ gcc.target/i386/sse4_1-round.h      (working copy)
@@ -28,7 +28,7 @@ init_round (FP_T *src)
 static FP_T
 do_round (FP_T f, int type)
 {
-  short saved_cw, new_cw, clr_mask;
+  unsigned short saved_cw, new_cw, clr_mask;
   FP_T ret;
 
   if ((type & 4))
@@ -42,16 +42,15 @@ do_round (FP_T f, int type)
       clr_mask = ~0x0C3F;
     }
 
-  __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f));
+  __asm__ ("fnstcw %0" : "=m" (saved_cw));
 
-  __asm__ ("fstcw %0" : "=m" (*&saved_cw));
   new_cw = saved_cw & clr_mask;
   new_cw |= type;
-  __asm__ ("fldcw %0" : : "m" (*&new_cw));
 
-  __asm__ ("frndint\n"
-          "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret));
-  __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+  __asm__ ("fldcw %2\n\t"
+          "frndint\n\t"
+          "fldcw %3" : "=t" (ret)
+                     : "0" (f), "m" (new_cw), "m" (saved_cw));
   return ret;
 }
 
Index: gcc.target/i386/sse4_1-roundsd-4.c
===================================================================
--- gcc.target/i386/sse4_1-roundsd-4.c  (revision 231413)
+++ gcc.target/i386/sse4_1-roundsd-4.c  (working copy)
@@ -36,7 +36,7 @@ init_round (double *src)
 static double
 do_round (double f, int type)
 {
-  short saved_cw, new_cw, clr_mask;
+  unsigned short saved_cw, new_cw, clr_mask;
   double ret;
 
   if ((type & 4))
@@ -50,16 +50,15 @@ do_round (double f, int type)
       clr_mask = ~0x0C3F;
     }
 
-  __asm__ ("fldl %0" : : "m" (*&f));
+  __asm__ ("fnstcw %0" : "=m" (saved_cw));
 
-  __asm__ ("fstcw %0" : "=m" (*&saved_cw));
   new_cw = saved_cw & clr_mask;
   new_cw |= type;
-  __asm__ ("fldcw %0" : : "m" (*&new_cw));
 
-  __asm__ ("frndint\n"
-          "fstpl %0\n" : "=m" (*&ret));
-  __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+  __asm__ ("fldcw %2\n\t"
+          "frndint\n\t"
+          "fldcw %3" : "=t" (ret)
+                     : "0" (f), "m" (new_cw), "m" (saved_cw));
   return ret;
 }
 
Index: gcc.target/i386/sse4_1-roundss-4.c
===================================================================
--- gcc.target/i386/sse4_1-roundss-4.c  (revision 231413)
+++ gcc.target/i386/sse4_1-roundss-4.c  (working copy)
@@ -36,7 +36,7 @@ init_round (float *src)
 static float
 do_round (float f, int type)
 {
-  short saved_cw, new_cw, clr_mask;
+  unsigned short saved_cw, new_cw, clr_mask;
   float ret;
 
   if ((type & 4))
@@ -50,16 +50,15 @@ do_round (float f, int type)
       clr_mask = ~0x0C3F;
     }
 
-  __asm__ ("flds %0" : : "m" (*&f));
+  __asm__ ("fnstcw %0" : "=m" (saved_cw));
 
-  __asm__ ("fstcw %0" : "=m" (*&saved_cw));
   new_cw = saved_cw & clr_mask;
   new_cw |= type;
-  __asm__ ("fldcw %0" : : "m" (*&new_cw));
 
-  __asm__ ("frndint\n"
-          "fstps %0\n" : "=m" (*&ret));
-  __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+  __asm__ ("fldcw %2\n\t"
+          "frndint\n\t"
+          "fldcw %3" : "=t" (ret)
+                     : "0" (f), "m" (new_cw), "m" (saved_cw));
   return ret;
 }
 

Reply via email to