This patch addss built-in functions on PowerPC ISA 3.0 (power9) that allow the user to access the round to odd IEEE 128-bit floating point instructions.
I have checked it on a little endian power8 system doing a bootstrap and make check. There were no regressions in the testsuite. I verified that the new test (float128-odd.c) did run sucessfully. Can I check this patch into the trunk? [gcc] 2017-09-28 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/rs6000-builtin.def (BU_FLOAT128_2_HW): Define new helper macro for IEEE float128 hardware built-in functions. (SQRTF128_ODD): Add built-in functions with the round-to-odd semantics. (TRUNCF128_ODD): Likewise. (ADDF128_ODD): Likewise. (SUBF128_ODD): Likewise. (MULF128_ODD): Likewise. (DIVF128_ODD): Likewise. (FMAF128_ODD): Likewise. * config/rs6000/rs6000.md (trunc<mode>sf2_hw): Change the truncate with round to odd expansion to use float_truncate:DF inside of the UNSPEC to better document what the insn does. (add<mode>3_odd): Add insns for IEEE 128-bit floating point round to odd hardware instructions. (sub<mode>3_odd): Likewise. (mul<mode>3_odd): Likewise. (div<mode>3_odd): Likewise. (sqrt<mode>2_odd): Likewise. (fma<mode>4_odd): Likewise. (fms<mode>4_odd): Likewise. (nfma<mode>4_odd): Likewise. (nfms<mode>4_odd): Likewise. (trunc<mode>df2_odd): Change insn format to make it more readable, and add a generator function. * doc/extend.texi (PowerPC built-in functions): Update documentation for existing IEEE float128-bit built-in functions. Add built-in functions that generate the IEEE 128-bit floating point round to odd instructions. [gcc/testsuite] 2017-09-28 Michael Meissner <meiss...@linux.vnet.ibm.com> * gcc.target/powerpc/float128-odd.c: New test. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000-builtin.def =================================================================== --- gcc/config/rs6000/rs6000-builtin.def (revision 253267) +++ gcc/config/rs6000/rs6000-builtin.def (working copy) @@ -686,6 +686,14 @@ | RS6000_BTC_UNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +#define BU_FLOAT128_2_HW(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128_HW, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_FLOAT128_3_HW(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_3 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_" NAME, /* NAME */ \ @@ -2365,11 +2373,19 @@ BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set") BU_FLOAT128_1 (FABSQ, "fabsq", CONST, abskf2) BU_FLOAT128_2 (COPYSIGNQ, "copysignq", CONST, copysignkf3) -/* 1 and 3 argument IEEE 128-bit floating point functions that require ISA 3.0 - hardware. These functions use the new 'f128' suffix. Eventually these - should be folded into the common built-in function handling. */ -BU_FLOAT128_1_HW (SQRTF128, "sqrtf128", CONST, sqrtkf2) -BU_FLOAT128_3_HW (FMAF128, "fmaf128", CONST, fmakf4_hw) +/* 1, 2, and 3 argument IEEE 128-bit floating point functions that require ISA + 3.0 hardware. These functions use the new 'f128' suffix. Eventually the + standard functions should be folded into the common built-in function + handling. */ +BU_FLOAT128_1_HW (SQRTF128, "sqrtf128", CONST, sqrtkf2) +BU_FLOAT128_1_HW (SQRTF128_ODD, "sqrtf128_round_to_odd", CONST, sqrtkf2_odd) +BU_FLOAT128_1_HW (TRUNCF128_ODD, "truncf128_round_to_odd", CONST, trunckfdf2_odd) +BU_FLOAT128_2_HW (ADDF128_ODD, "addf128_round_to_odd", CONST, addkf3_odd) +BU_FLOAT128_2_HW (SUBF128_ODD, "subf128_round_to_odd", CONST, subkf3_odd) +BU_FLOAT128_2_HW (MULF128_ODD, "mulf128_round_to_odd", CONST, mulkf3_odd) +BU_FLOAT128_2_HW (DIVF128_ODD, "divf128_round_to_odd", CONST, divkf3_odd) +BU_FLOAT128_3_HW (FMAF128, "fmaf128", CONST, fmakf4_hw) +BU_FLOAT128_3_HW (FMAF128_ODD, "fmaf128_round_to_odd", CONST, fmakf4_odd) /* 1 argument crypto functions. */ BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox) Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 253267) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -14505,7 +14505,9 @@ (define_insn_and_split "trunc<mode>sf2_h "#" "&& 1" [(set (match_dup 2) - (unspec:DF [(match_dup 1)] UNSPEC_ROUND_TO_ODD)) + (unspec:DF [(float_truncate:DF + (match_dup 1))] + UNSPEC_ROUND_TO_ODD)) (set (match_dup 0) (float_truncate:SF (match_dup 2)))] { @@ -14682,9 +14684,125 @@ (define_insn_and_split "floatuns<QHI:mod (set_attr "size" "128")]) ;; IEEE 128-bit instructions with round to odd semantics -(define_insn "*trunc<mode>df2_odd" +(define_insn "add<mode>3_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(plus:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v"))] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsaddqpo %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "sub<mode>3_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(minus:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v"))] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xssubqpo %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "mul<mode>3_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(mult:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v"))] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsmulqpo %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "div<mode>3_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(div:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v"))] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsdivqpo %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "size" "128")]) + +(define_insn "sqrt<mode>2_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(sqrt:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v"))] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xssqrtqpo %0,%1" + [(set_attr "type" "vecdiv") + (set_attr "size" "128")]) + +(define_insn "fma<mode>4_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "0"))] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsmaddqpo %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "*fms<mode>4_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (neg:IEEE128 + (match_operand:IEEE128 3 "altivec_register_operand" "0")))] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsmsubqpo %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "*nfma<mode>4_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (unspec:IEEE128 + [(fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "0"))] + UNSPEC_ROUND_TO_ODD)))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsnmaddqpo %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "*nfms<mode>4_odd" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (unspec:IEEE128 + [(fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (neg:IEEE128 + (match_operand:IEEE128 3 "altivec_register_operand" "0")))] + UNSPEC_ROUND_TO_ODD)))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsnmsubqpo %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "trunc<mode>df2_odd" [(set (match_operand:DF 0 "vsx_register_operand" "=v") - (unspec:DF [(match_operand:IEEE128 1 "altivec_register_operand" "v")] + (unspec:DF [(float_truncate:DF + (match_operand:IEEE128 1 "altivec_register_operand" "v"))] UNSPEC_ROUND_TO_ODD))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xscvqpdpo %0,%1" Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (revision 253267) +++ gcc/doc/extend.texi (working copy) @@ -15348,14 +15348,47 @@ that use the ISA 3.0 instruction set. @table @code @item __float128 __builtin_sqrtf128 (__float128) -Similar to @code{__builtin_sqrtf}, except the return and input types -are @code{__float128}. +Perform a 128-bit IEEE floating point square root operation. @findex __builtin_sqrtf128 @item __float128 __builtin_fmaf128 (__float128, __float128, __float128) -Similar to @code{__builtin_fma}, except the return and input types are -@code{__float128}. +Perform a 128-bit IEEE floating point fused multiply and add operation. @findex __builtin_fmaf128 + +@item __float128 __builtin_addf128_round_to_odd (__float128, __float128) +Perform a 128-bit IEEE floating point add using round to odd as the +rounding mode. +@findex __builtin_addf128_round_to_odd + +@item __float128 __builtin_subf128_round_to_odd (__float128, __float128) +Perform a 128-bit IEEE floating point subtract using round to odd as +the rounding mode. +@findex __builtin_subf128_round_to_odd + +@item __float128 __builtin_mulf128_round_to_odd (__float128, __float128) +Perform a 128-bit IEEE floating point multiply using round to odd as +the rounding mode. +@findex __builtin_mulf128_round_to_odd + +@item __float128 __builtin_divf128_round_to_odd (__float128, __float128) +Perform a 128-bit IEEE floating point divide using round to odd as +the rounding mode. +@findex __builtin_divf128_round_to_odd + +@item __float128 __builtin_sqrtf128_round_to_odd (__float128) +Perform a 128-bit IEEE floating point square root using round to odd +as the rounding mode. +@findex __builtin_sqrtf128_round_to_odd + +@item __float128 __builtin_fmaf128 (__float128, __float128, __float128) +Perform a 128-bit IEEE floating point fused multiply and add operation +using round to odd as the rounding mode. +@findex __builtin_fmaf128_round_to_odd + +@item double __builtin_truncf128_round_to_odd (__float128) +Convert a 128-bit IEEE floating point value to @code{double} using +round to odd as the rounding mode. +@findex __builtin_truncf128_round_to_odd @end table The following built-in functions are available for the PowerPC family Index: gcc/testsuite/gcc.target/powerpc/float128-odd.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/float128-odd.c (nonexistent) +++ gcc/testsuite/gcc.target/powerpc/float128-odd.c (working copy) @@ -0,0 +1,75 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mpower9-vector -O2" } */ + +/* Test the generation of the round to odd instructions. */ +__float128 +f128_add(__float128 a, __float128 b) +{ + return __builtin_addf128_round_to_odd (a, b); +} + +__float128 +f128_sub (__float128 a, __float128 b) +{ + return __builtin_subf128_round_to_odd (a, b); +} + +__float128 +f128_mul (__float128 a, __float128 b) +{ + return __builtin_mulf128_round_to_odd (a, b); +} + +__float128 +f128_div (__float128 a, __float128 b) +{ + return __builtin_divf128_round_to_odd (a, b); +} + +__float128 +f128_sqrt (__float128 a) +{ + return __builtin_sqrtf128_round_to_odd (a); +} + +double +f128_trunc (__float128 a) +{ + return __builtin_truncf128_round_to_odd (a); +} + +__float128 +f128_fma (__float128 a, __float128 b, __float128 c) +{ + return __builtin_fmaf128_round_to_odd (a, b, c); +} + +__float128 +f128_fms (__float128 a, __float128 b, __float128 c) +{ + return __builtin_fmaf128_round_to_odd (a, b, -c); +} + +__float128 +f128_nfma (__float128 a, __float128 b, __float128 c) +{ + return - __builtin_fmaf128_round_to_odd (a, b, c); +} + +__float128 +f128_nfms (__float128 a, __float128 b, __float128 c) +{ + return - __builtin_fmaf128_round_to_odd (a, b, -c); +} + +/* { dg-final { scan-assembler {\mxsaddqpo\M} } } */ +/* { dg-final { scan-assembler {\mxssubqpo\M} } } */ +/* { dg-final { scan-assembler {\mxsmulqpo\M} } } */ +/* { dg-final { scan-assembler {\mxsdivqpo\M} } } */ +/* { dg-final { scan-assembler {\mxssqrtqpo\M} } } */ +/* { dg-final { scan-assembler {\mxscvqpdpo\M} } } */ +/* { dg-final { scan-assembler {\mxsmaddqpo\M} } } */ +/* { dg-final { scan-assembler {\mxsmsubqpo\M} } } */ +/* { dg-final { scan-assembler {\mxsnmaddqpo\M} } } */ +/* { dg-final { scan-assembler {\mxsnmsubqpo\M} } } */