Re: [PATCH v2] powerpc: Optimized conversion of IBM long double to int128/int64
On 10/25/2018 09:40 PM, Joseph Myers wrote: On Thu, 25 Oct 2018, Rajalakshmi Srinivasaraghavan wrote: + if (__builtin_unpack_longdouble (a, 0) < TWO53) +{ + /* In this case the integer portion is completely contained + within the high double. So use the hardware convert to + integer doubleword, and then extend to int. */ + l1 = __builtin_unpack_longdouble (a, 0); + result = l1; But if the high double is a positive integer, and the low double is strictly negative (not -0), you need to subtract 1 to get a result that's correctly truncated towards zero. Do you suggest like this? if (__builtin_unpack_longdouble (a, 0) < TWO53) { l1 = __builtin_unpack_longdouble (a, 0); if (__builtin_unpack_longdouble (a, 1) < 0.0) result = l1 - 1; else result = l1; } + if (__builtin_unpack_longdouble (a, 0) < TWO53) +{ + /* In this case the integer portion is completely contained + within the high double. So use the hardware convert to + integer doubleword, and then extend to __int128. */ + l1 = __builtin_unpack_longdouble (a, 0); + result = l1; Likewise here. -- Thanks Rajalakshmi S
Re: [PATCH] powerpc: Optimized conversion of IBM long double to int128/int64
On 10/23/2018 10:07 PM, Segher Boessenkool wrote: On Tue, Oct 23, 2018 at 09:01:26PM +0530, Rajalakshmi Srinivasaraghavan wrote: This new implementation of fixunstfdi and fixunstfti gives 16X performance improvement. :-) * libgcc/config/rs6000/t-ppc64-fp (LIB2ADD): Add $(srcdir)/config/rs6000/fixunstfti.c. And fixunstfdi.c? Added now. * libgcc/config/rs6000/ppc64-fp.c (__fixunstfdi): Remove definition. * libgcc/config/rs6000/fixunstfti.c: New file. * libgcc/config/rs6000/fixunstfdi.c: Likewise. * libgcc/config/rs6000/ibm-ldouble.h: Likewise. libgcc/ has its own changelog; the path names in the changelog should be relative to that (so should start with config/). Fixed in the v2 patch. Does -m32 still work after this? (Did it before?) __int128 is not supported on powerpc m32 build. I tried to build gcc on powerpc 32bit machine to make sure there are no build failures. Okay for trunk with the changelog fixed up (unless it broke -m32 ;-) ) Segher -- Thanks Rajalakshmi S
[PATCH v2] powerpc: Optimized conversion of IBM long double to int128/int64
This new implementation of fixunstfdi and fixunstfti gives 16X performance improvement. The design is focused on: - Making sure the end result was a pure leaf function that only needed builtins or inline functions. - Assumed power8 direct register transfer and accessed the IBM long double as int bit field structure. - Understanding the quirks of IBM long double and decompose the code in to a set of optimized sub cases. Tested on powerpc64le. libgcc ChangeLog: 2018-10-25 Steven Munroe Rajalakshmi Srinivasaraghavan * config/rs6000/t-ppc64-fp (LIB2ADD): Add $(srcdir)/config/rs6000/fixunstfti.c and $(srcdir)/config/rs6000/fixunstfdi.c. * config/rs6000/ppc64-fp.c (__fixunstfdi): Remove definition. * config/rs6000/fixunstfti.c: New file. * config/rs6000/fixunstfdi.c: Likewise. * config/rs6000/ibm-ldouble.h: Likewise. --- libgcc/config/rs6000/fixunstfdi.c | 124 libgcc/config/rs6000/fixunstfti.c | 125 + libgcc/config/rs6000/ibm-ldouble.h | 121 libgcc/config/rs6000/ppc64-fp.c| 24 -- libgcc/config/rs6000/t-ppc64-fp| 5 +- 5 files changed, 374 insertions(+), 25 deletions(-) create mode 100755 libgcc/config/rs6000/fixunstfdi.c create mode 100755 libgcc/config/rs6000/fixunstfti.c create mode 100755 libgcc/config/rs6000/ibm-ldouble.h diff --git a/libgcc/config/rs6000/fixunstfdi.c b/libgcc/config/rs6000/fixunstfdi.c new file mode 100755 index 000..1b1a4f280bd --- /dev/null +++ b/libgcc/config/rs6000/fixunstfdi.c @@ -0,0 +1,124 @@ +/* Convert IBM long double to 64bit unsigned integer. + + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file into + combinations with other programs, and to distribute those + combinations without any restriction coming from the use of this + file. (The Lesser General Public License restrictions do apply in + other respects; for example, they cover modification of the file, + and distribution when not linked into a combine executable.) + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined(__powerpc64__) || defined (__64BIT__) || defined(__ppc64__) +#include +#include "ibm-ldouble.h" + +typedef unsigned int UDItype __attribute__ ((mode (DI))); +typedef float TFtype __attribute__ ((mode (TF))); +extern UDItype __fixunstfdi (TFtype); + +#define TWO53 9007199254740992.0L +#define TWO64 18446744073709551616.0L + +UDItype +__fixunstfdi (TFtype a) +{ + unsigned long result; + unsigned long qi0, qi1; + union ibm_extended_long_double ld; + uint64_t l0, l1; + long exp0, exp1; + const uint64_t two52 = 0x10; + if (__builtin_unpack_longdouble (a, 0) < TWO53) +{ + /* In this case the integer portion is completely contained + within the high double. So use the hardware convert to + integer doubleword, and then extend to int. */ + l1 = __builtin_unpack_longdouble (a, 0); + result = l1; +} + else +{ + if (a < TWO64) + { + ld.ld = a; + l0 = two52 | ((uint64_t)ld.d[0].ieee.mantissa0 << 32) + | ld.d[0].ieee.mantissa1; + l1 = two52 | ((uint64_t)ld.d[1].ieee.mantissa0 << 32) + | ld.d[1].ieee.mantissa1; + exp0 = ld.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS; + exp1 = ld.d[1].ieee.exponent - IEEE754_DOUBLE_BIAS; + /* The high double shift is (non-negative) because in this +case we know the value it greater than 2^53 -1. */ + qi0 = l0; + qi0 = qi0 << (exp0 - 52); + /* The low double is tricky because it could be +zero/denormal and have a large negative exponent. */ + if ( exp1 > -1022) + { + /* Need to right justify the integer portion of the +low double. This may be a left or right shift. */ + exp1 = exp1 - 52; + if (exp1 < 0) + { + /* Negative exponent, shift right to tru
[PATCH] powerpc: Optimized conversion of IBM long double to int128/int64
This new implementation of fixunstfdi and fixunstfti gives 16X performance improvement. The design is focused on: - Making sure the end result was a pure leaf function that only needed builtins or inline functions. - Assumed power8 direct register transfer and accessed the IBM long double as int bit field structure. - Understanding the quirks of IBM long double and decompose the code in to a set of optimized sub cases. Tested on powerpc64le. 2018-10-20 Steven Munroe Rajalakshmi Srinivasaraghavan * libgcc/config/rs6000/t-ppc64-fp (LIB2ADD): Add $(srcdir)/config/rs6000/fixunstfti.c. * libgcc/config/rs6000/ppc64-fp.c (__fixunstfdi): Remove definition. * libgcc/config/rs6000/fixunstfti.c: New file. * libgcc/config/rs6000/fixunstfdi.c: Likewise. * libgcc/config/rs6000/ibm-ldouble.h: Likewise. --- libgcc/config/rs6000/fixunstfdi.c | 124 libgcc/config/rs6000/fixunstfti.c | 125 + libgcc/config/rs6000/ibm-ldouble.h | 121 libgcc/config/rs6000/ppc64-fp.c| 24 -- libgcc/config/rs6000/t-ppc64-fp| 5 +- 5 files changed, 374 insertions(+), 25 deletions(-) create mode 100755 libgcc/config/rs6000/fixunstfdi.c create mode 100755 libgcc/config/rs6000/fixunstfti.c create mode 100755 libgcc/config/rs6000/ibm-ldouble.h diff --git a/libgcc/config/rs6000/fixunstfdi.c b/libgcc/config/rs6000/fixunstfdi.c new file mode 100755 index 000..1b1a4f280bd --- /dev/null +++ b/libgcc/config/rs6000/fixunstfdi.c @@ -0,0 +1,124 @@ +/* Convert IBM long double to 64bit unsigned integer. + + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file into + combinations with other programs, and to distribute those + combinations without any restriction coming from the use of this + file. (The Lesser General Public License restrictions do apply in + other respects; for example, they cover modification of the file, + and distribution when not linked into a combine executable.) + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined(__powerpc64__) || defined (__64BIT__) || defined(__ppc64__) +#include +#include "ibm-ldouble.h" + +typedef unsigned int UDItype __attribute__ ((mode (DI))); +typedef float TFtype __attribute__ ((mode (TF))); +extern UDItype __fixunstfdi (TFtype); + +#define TWO53 9007199254740992.0L +#define TWO64 18446744073709551616.0L + +UDItype +__fixunstfdi (TFtype a) +{ + unsigned long result; + unsigned long qi0, qi1; + union ibm_extended_long_double ld; + uint64_t l0, l1; + long exp0, exp1; + const uint64_t two52 = 0x10; + if (__builtin_unpack_longdouble (a, 0) < TWO53) +{ + /* In this case the integer portion is completely contained + within the high double. So use the hardware convert to + integer doubleword, and then extend to int. */ + l1 = __builtin_unpack_longdouble (a, 0); + result = l1; +} + else +{ + if (a < TWO64) + { + ld.ld = a; + l0 = two52 | ((uint64_t)ld.d[0].ieee.mantissa0 << 32) + | ld.d[0].ieee.mantissa1; + l1 = two52 | ((uint64_t)ld.d[1].ieee.mantissa0 << 32) + | ld.d[1].ieee.mantissa1; + exp0 = ld.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS; + exp1 = ld.d[1].ieee.exponent - IEEE754_DOUBLE_BIAS; + /* The high double shift is (non-negative) because in this +case we know the value it greater than 2^53 -1. */ + qi0 = l0; + qi0 = qi0 << (exp0 - 52); + /* The low double is tricky because it could be +zero/denormal and have a large negative exponent. */ + if ( exp1 > -1022) + { + /* Need to right justify the integer portion of the +low double. This may be a left or right shift. */ + exp1 = exp1 - 52; + if (exp1 < 0) + { + /* Negative exponent, shift right to truncate. */ +