[PATCH], Add support for __builtin_{sqrt,fma}f128 on PowerPC ISA 3.0

Michael Meissner Wed, 13 Sep 2017 14:46:41 -0700

This patch adds support on PowerPC ISA 3.0 for the built-in function
__builtin_sqrtf128 generating the XSSQRTQP hardware square root instruction and
the built-in function __builtin_fmaf128 generating XSMADDQP, XSMSUBQP,
XSNMADDQP, and XSNMSUBQP fused multiply-add instructions.


While I was at it, I changed the documentation so that it no longer documents
the 'q' built-in functions (to mirror libquadmath) but instead just documented
the 'f128' functions that matches glibc 2.26 and the technical report that
added the _FloatF128 date.

I changed the tests that used __fabsq to use __fabsf128 instead.

I also added && lp64 to float128-5.c so that it doesn't cause errors when doing
the test for a 32-bit target.  This is due to the fact that if you enable
hardware IEEE 128-bit floating point, you eventually will need TImode
supported, and that is not supported on 32-bit targets.

I did a bootstrap and make check with subversion id 252033 on a little endian
power8 system.  The subversion id 252033 is one of the last svn ids that
bootstrap without additional patches on the PowerPC.  There were no regressions
in this patch, and I verified the 4 new tests were run.  Can I check this patch
into the trunk?

[gcc]
2017-09-13  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/rs6000-builtin.def (BU_FLOAT128_1_HW): New macros
        to support float128 built-in functions that require the ISA 3.0
        hardware.
        (BU_FLOAT128_3_HW): Likewise.
        (SQRTF128): Add support for the IEEE 128-bit square root and fma
        built-in functions.
        (FMAF128): Likewise.
        (FMAQ): Likewise.
        * config/rs6000/rs6000.c (rs6000_builtin_mask_calculate): Add
        support for built-in functions that need the ISA 3.0 IEEE 128-bit
        floating point instructions.
        (rs6000_invalid_builtin): Likewise.
        (rs6000_builtin_mask_names): Likewise.
        * config/rs6000/rs6000.h (MASK_FLOAT128_HW): Likewise.
        (RS6000_BTM_FLOAT128_HW): Likewise.
        (RS6000_BTM_COMMON): Likewise.
        * config/rs6000/rs6000.md (fma<mode>4_hw): Add a generator
        function.
        * doc/extend.texi (RS/6000 built-in functions): Document the
        'f128' IEEE 128-bit floating point built-in functions.  Don't
        document the older 'q' versions of the functions. Document the
        built-in IEEE 128-bit floating point square root and fused
        multiply-add built-ins.

[gcc/testsuite]
2017-09-13  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * gcc.target/powerpc/abs128-1.c: Use __builtin_fabsf128 instead of
        __builtin_fabsq.
        * gcc.target/powerpc/float128-5.c: Use __builtin_fabsf128 instead
        of __builtin_fabsq.  Prevent the test from running on 32-bit.
        * gcc.target/powerpc/float128-fma1.c: New test.
        * gcc.target/powerpc/float128-fma2.c: Likewise.
        * gcc.target/powerpc/float128-sqrt1.c: Likewise.
        * gcc.target/powerpc/float128-sqrt2.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

Index: gcc/config/rs6000/rs6000-builtin.def
===================================================================
--- gcc/config/rs6000/rs6000-builtin.def        (revision 252730)
+++ gcc/config/rs6000/rs6000-builtin.def        (working copy)
@@ -667,6 +667,23 @@
                     | RS6000_BTC_UNARY),                               \
                    CODE_FOR_ ## ICODE)                 /* ICODE */
 
+/* IEEE 128-bit floating-point builtins that need the ISA 3.0 hardware.  */
+#define BU_FLOAT128_1_HW(ENUM, NAME, ATTR, ICODE)                       \
+  RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM,              /* ENUM */      \
+                   "__builtin_" NAME,                  /* NAME */      \
+                   RS6000_BTM_FLOAT128_HW,             /* MASK */      \
+                   (RS6000_BTC_ ## ATTR                /* ATTR */      \
+                    | RS6000_BTC_UNARY),                               \
+                   CODE_FOR_ ## ICODE)                 /* ICODE */
+
+#define BU_FLOAT128_3_HW(ENUM, NAME, ATTR, ICODE)                       \
+  RS6000_BUILTIN_3 (MISC_BUILTIN_ ## ENUM,              /* ENUM */      \
+                   "__builtin_" NAME,                  /* NAME */      \
+                   RS6000_BTM_FLOAT128_HW,             /* MASK */      \
+                   (RS6000_BTC_ ## ATTR                /* ATTR */      \
+                    | RS6000_BTC_TERNARY),                             \
+                   CODE_FOR_ ## ICODE)                 /* ICODE */
+
 /* Miscellaneous builtins for instructions added in ISA 3.0.  These
    instructions don't require either the DFP or VSX options, just the basic
    ISA 3.0 enablement since they operate on general purpose registers.  */
@@ -2328,6 +2345,16 @@ BU_FLOAT128_1 (FABSQ,            "fabsq",       CO
 
 /* 2 argument IEEE 128-bit floating-point functions.  */
 BU_FLOAT128_2 (COPYSIGNQ,      "copysignq",   CONST, copysignkf3)
+
+/* 1 argument IEEE 128-bit floating point functions that require ISA 3.0
+   hardware.  We define both a 'q' version for libquadmath compatibility, and a
+   'f128' for glibc 2.26.  We didn't need this for FABS/COPYSIGN, since the
+   machine independent built-in support already defines the F128 versions,  */
+BU_FLOAT128_1_HW (SQRTF128,    "sqrtf128",     CONST, sqrtkf2)
+
+/* 3 argument IEEE 128-bit floating point functions that require ISA 3.0
+   hardware.  */
+BU_FLOAT128_3_HW (FMAF128,     "fmaf128",      CONST, fmakf4_hw)
 
 /* 1 argument crypto functions.  */
 BU_CRYPTO_1 (VSBOX,            "vsbox",          CONST, crypto_vsbox)
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 252730)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -3903,7 +3903,8 @@ rs6000_builtin_mask_calculate (void)
          | ((TARGET_DFP)                   ? RS6000_BTM_DFP       : 0)
          | ((TARGET_HARD_FLOAT)            ? RS6000_BTM_HARD_FLOAT : 0)
          | ((TARGET_LONG_DOUBLE_128)       ? RS6000_BTM_LDBL128   : 0)
-         | ((TARGET_FLOAT128_TYPE)         ? RS6000_BTM_FLOAT128  : 0));
+         | ((TARGET_FLOAT128_TYPE)         ? RS6000_BTM_FLOAT128  : 0)
+         | ((TARGET_FLOAT128_HW)           ? RS6000_BTM_FLOAT128_HW : 0));
 }
 
 /* Implement TARGET_MD_ASM_ADJUST.  All asm statements are considered
@@ -16107,6 +16108,9 @@ rs6000_invalid_builtin (enum rs6000_buil
   else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
     error ("builtin function %qs requires the %qs option", name,
           "-mhard-float");
+  else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
+    error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
+          name);
   else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
     error ("builtin function %qs requires the %qs option", name, "-mfloat128");
   else
@@ -36227,6 +36231,7 @@ static struct rs6000_opt_mask const rs60
   { "hard-float",       RS6000_BTM_HARD_FLOAT, false, false },
   { "long-double-128",  RS6000_BTM_LDBL128,    false, false },
   { "float128",                 RS6000_BTM_FLOAT128,   false, false },
+  { "float128-hw",      RS6000_BTM_FLOAT128_HW,false, false },
 };
 
 /* Option variables that we want to support inside attribute((target)) and
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h  (revision 252730)
+++ gcc/config/rs6000/rs6000.h  (working copy)
@@ -640,6 +640,7 @@ extern int rs6000_vector_align[];
 #define MASK_DLMZB                     OPTION_MASK_DLMZB
 #define MASK_EABI                      OPTION_MASK_EABI
 #define MASK_FLOAT128_KEYWORD          OPTION_MASK_FLOAT128_KEYWORD
+#define MASK_FLOAT128_HW               OPTION_MASK_FLOAT128_HW
 #define MASK_FPRND                     OPTION_MASK_FPRND
 #define MASK_P8_FUSION                 OPTION_MASK_P8_FUSION
 #define MASK_HARD_FLOAT                        OPTION_MASK_HARD_FLOAT
@@ -2499,6 +2500,7 @@ extern int frame_pointer_needed;
 #define RS6000_BTM_LDBL128     MASK_MULTIPLE   /* 128-bit long double.  */
 #define RS6000_BTM_64BIT       MASK_64BIT      /* 64-bit addressing.  */
 #define RS6000_BTM_FLOAT128    MASK_FLOAT128_KEYWORD /* IEEE 128-bit float.  */
+#define RS6000_BTM_FLOAT128_HW MASK_FLOAT128_HW /* IEEE 128-bit float h/w.  */
 
 #define RS6000_BTM_COMMON      (RS6000_BTM_ALTIVEC                     \
                                 | RS6000_BTM_VSX                       \
@@ -2517,7 +2519,8 @@ extern int frame_pointer_needed;
                                 | RS6000_BTM_DFP                       \
                                 | RS6000_BTM_HARD_FLOAT                \
                                 | RS6000_BTM_LDBL128                   \
-                                | RS6000_BTM_FLOAT128)
+                                | RS6000_BTM_FLOAT128                  \
+                                | RS6000_BTM_FLOAT128_HW)
 
 /* Define builtin enum index.  */
 
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 252730)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -14316,7 +14316,7 @@ (define_insn "*nabs<mode>2_hw"
    (set_attr "size" "128")])
 
 ;; Initially don't worry about doing fusion
-(define_insn "*fma<mode>4_hw"
+(define_insn "fma<mode>4_hw"
   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
        (fma:IEEE128
         (match_operand:IEEE128 1 "altivec_register_operand" "%v")
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi (revision 252730)
+++ gcc/doc/extend.texi (working copy)
@@ -15309,34 +15309,50 @@ Additional built-in functions are availa
 family of processors, for efficient use of 128-bit floating point
 (@code{__float128}) values.
 
-The following floating-point built-in functions are available with
-@code{-mfloat128} and Altivec support.  All of them implement the
-function that is part of the name.
-
-@smallexample
-__float128 __builtin_fabsq (__float128)
-__float128 __builtin_copysignq (__float128, __float128)
-@end smallexample
-
-The following built-in functions are available with @code{-mfloat128}
-and Altivec support.
+The following built-in functions are available on Linux 64-bit systems
+with @code{-mfloat128} and Altivec support.
 
 @table @code
-@item __float128 __builtin_infq (void)
+@item __float128 __builtin_fabsf128 (__float128)
+Similar to @code{__builtin_fabs}, except the return and input types
+are @code{__float128}.
+@findex __builtin_fabsf128
+
+@item __float128 __builtin_copysignf128 (__float128, __float128)
+Similar to @code{__builtin_copysign}, except the return and input
+types are @code{__float128}.
+@findex __builtin_copysignf128
+
+@item __float128 __builtin_inff128 (void)
 Similar to @code{__builtin_inf}, except the return type is @code{__float128}.
-@findex __builtin_infq
+@findex __builtin_inff128
 
-@item __float128 __builtin_huge_valq (void)
+@item __float128 __builtin_huge_valf128 (void)
 Similar to @code{__builtin_huge_val}, except the return type is 
@code{__float128}.
-@findex __builtin_huge_valq
+@findex __builtin_huge_valf128
 
-@item __float128 __builtin_nanq (void)
+@item __float128 __builtin_nanf128 (const char *)
 Similar to @code{__builtin_nan}, except the return type is @code{__float128}.
-@findex __builtin_nanq
+@findex __builtin_nanf128
 
-@item __float128 __builtin_nansq (void)
+@item __float128 __builtin_nansf128 (const char *)
 Similar to @code{__builtin_nans}, except the return type is @code{__float128}.
-@findex __builtin_nansq
+@findex __builtin_nansf128
+@end table
+
+The following built-in functions are available on Linux 64-bit systems
+that use the ISA 3.0 instruction set.
+
+@table @code
+@item __float128 __builtin_sqrtf128 (__float128)
+Similar to @code{__builtin_sqrtf}, except the return and input types
+are @code{__float128}.
+@findex __builtin_sqrtf128
+
+@item __float128 __builtin_fmaf128 (__float128, __float128, __float128)
+Similar to @code{__builtin_fma}, except the return and input types are
+@code{__float128}.
+@findex __builtin_fmaf128
 @end table
 
 The following built-in functions are available for the PowerPC family
Index: gcc/testsuite/gcc.target/powerpc/abs128-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/abs128-1.c (revision 252730)
+++ gcc/testsuite/gcc.target/powerpc/abs128-1.c (working copy)
@@ -39,7 +39,7 @@ main (int argc, int *argv[])
   x.nan.mant_high = 0x1234;
   x.nan.mant_low = 0xabcdef;
 
-  z.value = __builtin_fabsq (x.value);
+  z.value = __builtin_fabsf128 (x.value);
 
   if (z.nan.negative != 0
       || z.nan.exponent != 0x22
@@ -48,7 +48,7 @@ main (int argc, int *argv[])
       || z.nan.mant_low != 0xabcdef)
     abort ();
 
-  z.value = __builtin_fabsq (z.value);
+  z.value = __builtin_fabsf128 (z.value);
 
   if (z.nan.negative != 0
       || z.nan.exponent != 0x22
Index: gcc/testsuite/gcc.target/powerpc/float128-5.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-5.c       (revision 252730)
+++ gcc/testsuite/gcc.target/powerpc/float128-5.c       (working copy)
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-O2 -mpower9-vector -mno-float128" } */
 
@@ -18,7 +18,7 @@
 __float128
 qabs (__float128 a)
 {
-  return __builtin_fabsq (a);
+  return __builtin_fabsf128 (a);
 }
 
 /* { dg-final { scan-assembler "xsabsqp"  } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-fma1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-fma1.c    (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/float128-fma1.c    (working copy)
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2" } */
+
+__float128
+xfma (__float128 a, __float128 b, __float128 c)
+{
+  return __builtin_fmaf128 (a, b, c);
+}
+
+__float128
+xfms (__float128 a, __float128 b, __float128 c)
+{
+  return __builtin_fmaf128 (a, b, -c);
+}
+
+__float128
+xfnma (__float128 a, __float128 b, __float128 c)
+{
+  return -__builtin_fmaf128 (a, b, c);
+}
+
+__float128
+xfnms (__float128 a, __float128 b, __float128 c)
+{
+  return -__builtin_fmaf128 (a, b, -c);
+}
+
+/* { dg-final { scan-assembler "xsmaddqp"  } } */
+/* { dg-final { scan-assembler "xsmsubqp"  } } */
+/* { dg-final { scan-assembler "xsnmaddqp" } } */
+/* { dg-final { scan-assembler "xsnmsubqp" } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-fma2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-fma2.c    (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/float128-fma2.c    (working copy)
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -mno-float128-hardware -O2" } */
+
+__float128
+xfma (__float128 a, __float128 b, __float128 c)
+{
+  return __builtin_fmaf128 (a, b, c); /* { dg-error "ISA 3.0 IEEE 128-bit" } */
+}
Index: gcc/testsuite/gcc.target/powerpc/float128-sqrt1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-sqrt1.c   (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/float128-sqrt1.c   (working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2" } */
+
+__float128
+xsqrt (__float128 a)
+{
+  return __builtin_sqrtf128 (a);
+}
+
+/* { dg-final { scan-assembler "xssqrtqp"  } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-sqrt2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-sqrt2.c   (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/float128-sqrt2.c   (working copy)
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -mno-float128-hardware -O2" } */
+
+__float128
+xsqrt (__float128 a)
+{
+  return __builtin_sqrtf128 (a); /* { dg-error "ISA 3.0 IEEE 128-bit" } */
+}

[PATCH], Add support for __builtin_{sqrt,fma}f128 on PowerPC ISA 3.0

Reply via email to