On Tue, Dec 8, 2015 at 7:10 PM, Uros Bizjak <ubiz...@gmail.com> wrote: > Hello! > >> this patch fixes the asm statements in the gcc.target/i386/sse4_1-round* >> test cases. >> >> They do lots of things that are just absolutely forbidden, like clobber >> registers >> that are not mentioned in the clobber list, and create a hidden data flow. >> >> The test cases work just by chance, and You can see the asm statements >> ripped completely apart by the loop optimizer if you try to do the assembler >> part in a loop: >> >> for (i = 0; i < 10; i++) { >> __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f)); >> >> __asm__ ("fstcw %0" : "=m" (*&saved_cw)); >> new_cw = saved_cw & clr_mask; >> new_cw |= type; >> __asm__ ("fldcw %0" : : "m" (*&new_cw)); >> >> __asm__ ("frndint\n" >> "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret)); >> __asm__ ("fldcw %0" : : "m" (*&saved_cw)); >> } >> return ret; >> >> So this patch avoids the hidden data flow, and >> adds "st" to the clobber list. >> >> Boot-strapped and reg-tested on x86_64-pc-linux-gnu >> OK for trunk? > > Uh, no. > > x87 is a strange beast, and even your patch is wrong. The assembly > should be written in this way: > > __asm__ ("fnstcw %0" : "=m" (saved_cw)); > > new_cw = saved_cw & clr_mask; > new_cw |= type; > > __asm__ ("fldcw %2\n\t" > "frndint\n\t" > "fldcw %3" : "=t" (ret) > : "0" (f), "m" (new_cw), "m" (saved_cw)); > > I'm testing the attached patch.
Committed slightly updated patch to mainline SVN with the following ChangeLog: 2015-12-08 Uros Bizjak <ubiz...@gmail.com> * gcc.target/i386/sse4_1-round.h (do_round): Fix inline asm statements. * gcc.target/i386/sse4_1-roundsd-4.c (do_round): Ditto. * gcc.target/i386/sse4_1-roundss-4.c (do_round): Ditto. Tested on x86_64-linux-gnu {,-m32}, committed to mainline SVN, will be backported to all active branches. Uros.
Index: gcc.target/i386/sse4_1-round.h =================================================================== --- gcc.target/i386/sse4_1-round.h (revision 231413) +++ gcc.target/i386/sse4_1-round.h (working copy) @@ -28,7 +28,7 @@ init_round (FP_T *src) static FP_T do_round (FP_T f, int type) { - short saved_cw, new_cw, clr_mask; + unsigned short saved_cw, new_cw, clr_mask; FP_T ret; if ((type & 4)) @@ -42,16 +42,15 @@ do_round (FP_T f, int type) clr_mask = ~0x0C3F; } - __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f)); + __asm__ ("fnstcw %0" : "=m" (saved_cw)); - __asm__ ("fstcw %0" : "=m" (*&saved_cw)); new_cw = saved_cw & clr_mask; new_cw |= type; - __asm__ ("fldcw %0" : : "m" (*&new_cw)); - __asm__ ("frndint\n" - "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret)); - __asm__ ("fldcw %0" : : "m" (*&saved_cw)); + __asm__ ("fldcw %2\n\t" + "frndint\n\t" + "fldcw %3" : "=t" (ret) + : "0" (f), "m" (new_cw), "m" (saved_cw)); return ret; } Index: gcc.target/i386/sse4_1-roundsd-4.c =================================================================== --- gcc.target/i386/sse4_1-roundsd-4.c (revision 231413) +++ gcc.target/i386/sse4_1-roundsd-4.c (working copy) @@ -36,7 +36,7 @@ init_round (double *src) static double do_round (double f, int type) { - short saved_cw, new_cw, clr_mask; + unsigned short saved_cw, new_cw, clr_mask; double ret; if ((type & 4)) @@ -50,16 +50,15 @@ do_round (double f, int type) clr_mask = ~0x0C3F; } - __asm__ ("fldl %0" : : "m" (*&f)); + __asm__ ("fnstcw %0" : "=m" (saved_cw)); - __asm__ ("fstcw %0" : "=m" (*&saved_cw)); new_cw = saved_cw & clr_mask; new_cw |= type; - __asm__ ("fldcw %0" : : "m" (*&new_cw)); - __asm__ ("frndint\n" - "fstpl %0\n" : "=m" (*&ret)); - __asm__ ("fldcw %0" : : "m" (*&saved_cw)); + __asm__ ("fldcw %2\n\t" + "frndint\n\t" + "fldcw %3" : "=t" (ret) + : "0" (f), "m" (new_cw), "m" (saved_cw)); return ret; } Index: gcc.target/i386/sse4_1-roundss-4.c =================================================================== --- gcc.target/i386/sse4_1-roundss-4.c (revision 231413) +++ gcc.target/i386/sse4_1-roundss-4.c (working copy) @@ -36,7 +36,7 @@ init_round (float *src) static float do_round (float f, int type) { - short saved_cw, new_cw, clr_mask; + unsigned short saved_cw, new_cw, clr_mask; float ret; if ((type & 4)) @@ -50,16 +50,15 @@ do_round (float f, int type) clr_mask = ~0x0C3F; } - __asm__ ("flds %0" : : "m" (*&f)); + __asm__ ("fnstcw %0" : "=m" (saved_cw)); - __asm__ ("fstcw %0" : "=m" (*&saved_cw)); new_cw = saved_cw & clr_mask; new_cw |= type; - __asm__ ("fldcw %0" : : "m" (*&new_cw)); - __asm__ ("frndint\n" - "fstps %0\n" : "=m" (*&ret)); - __asm__ ("fldcw %0" : : "m" (*&saved_cw)); + __asm__ ("fldcw %2\n\t" + "frndint\n\t" + "fldcw %3" : "=t" (ret) + : "0" (f), "m" (new_cw), "m" (saved_cw)); return ret; }