Re: [PATCH v9] Practical improvement to libgcc complex divide

Bernhard Reutner-Fischer via Gcc-patches Wed, 31 Mar 2021 10:03:19 -0700

On Fri, 26 Mar 2021 23:14:41 +0000
Patrick McGehearty via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:


> diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
> index 9f993c4..c0d9e57 100644
> --- a/gcc/c-family/c-cppbuiltin.c
> +++ b/gcc/c-family/c-cppbuiltin.c
> @@ -1277,8 +1277,10 @@ c_cpp_builtins (cpp_reader *pfile)
>       {
>         scalar_float_mode mode = mode_iter.require ();
>         const char *name = GET_MODE_NAME (mode);
> +       const int name_len = strlen (name);

strlen returns a size_t

Funny that we do not have a pre-seeded mode_name_len array but call
strlen on modes over and over again.

> +       char float_h_prefix[16] = "";
>         char *macro_name
> -         = (char *) alloca (strlen (name)
> +         = (char *) alloca (name_len
>                              + sizeof ("__LIBGCC__MANT_DIG__"));
>         sprintf (macro_name, "__LIBGCC_%s_MANT_DIG__", name);
>         builtin_define_with_int_value (macro_name,
> @@ -1286,20 +1288,29 @@ c_cpp_builtins (cpp_reader *pfile)
>         if (!targetm.scalar_mode_supported_p (mode)
>             || !targetm.libgcc_floating_mode_supported_p (mode))
>           continue;
> -       macro_name = (char *) alloca (strlen (name)
> +       macro_name = (char *) alloca (name_len
>                                       + sizeof ("__LIBGCC_HAS__MODE__"));
>         sprintf (macro_name, "__LIBGCC_HAS_%s_MODE__", name);
>         cpp_define (pfile, macro_name);
> -       macro_name = (char *) alloca (strlen (name)
> +       macro_name = (char *) alloca (name_len
>                                       + sizeof ("__LIBGCC__FUNC_EXT__"));
>         sprintf (macro_name, "__LIBGCC_%s_FUNC_EXT__", name);

The above should have been split out as separate independent patchlet
to get it out of the way.

As noted already the use of alloca is a pre-existing (coding-style) bug
and we should use XALLOCAVEC or appropriately sized fixed buffers to
begin with. The amount of alloca in defining all these is amazing.

> diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c 
> b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c
> new file mode 100644
> index 0000000..409123f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c
> @@ -0,0 +1,126 @@
> +/*
> +  Program to test complex divide for correct results on selected values.
> +  Checking known failure points.
> +*/
> +
> +#include <float.h>
> +
> +extern void abort ();
> +extern void exit ();

As Joseph pointed out in the v8 review already, please use prototyped
function declarations in all new tests.

> diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c 
> b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c
> new file mode 100644
> index 0000000..28d707d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c
> @@ -0,0 +1,167 @@
> +/*
> +  Program to test complex divide for correct results on selected values.
> +  Checking known failure points.
> +*/
> +
> +#include <float.h>
> +
> +extern void abort ();
> +extern void exit ();

Likewise.

> +#if (LDBL_MAX_EXP < 2048)
> +  /*
> +    Test values when mantissa is 11 or fewer bits.  Either LDBL is
> +    using DBL on this platform or we are using IBM extended double
> +    precision Test values will be automatically trucated the available
> +    precision.

s/trucated/truncated/g; # with an 'n'
I have trouble parsing the sentence nevertheless. There might be a
missing "which" somewhere and maybe a "to"?

> +#else
> +  /*
> +    Test values intended for either IEEE128 or Intel80 formats.  In
> +    either case, 15 bits of exponent are available.  Test values will
> +    be automatically trucated the available precision.
> +  */

again, truNcated and a couple of words missing?

> diff --git a/libgcc/config/rs6000/_divkc3.c b/libgcc/config/rs6000/_divkc3.c
> index d261f40..f57e14f 100644
> --- a/libgcc/config/rs6000/_divkc3.c
> +++ b/libgcc/config/rs6000/_divkc3.c
> @@ -37,31 +37,118 @@ see the files COPYING3 and COPYING.RUNTIME respectively. 
>  If not, see
>  #define __divkc3 __divkc3_sw
>  #endif
>  
> +#define RBIG   (__LIBGCC_TF_MAX__ / 2)
> +#define RMIN   (__LIBGCC_TF_MIN__)
> +#define RMIN2  (__LIBGCC_TF_EPSILON__)
> +#define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
> +#define RMAX2  (RBIG * RMIN2)
> +
> +
>  TCtype
>  __divkc3 (TFtype a, TFtype b, TFtype c, TFtype d)
>  {
>    TFtype denom, ratio, x, y;
>    TCtype res;
>  
> -  /* ??? We can get better behavior from logarithmic scaling instead of
> -     the division.  But that would mean starting to link libgcc against
> -     libm.  We could implement something akin to ldexp/frexp as gcc builtins
> -     fairly easily...  */
> +  /* long double has significant potential underflow/overflow errors than
> +     can be greatly reduced with a limited number of tests and adjustments.
> +  */

"than" doesn't parse. that?

>    else
>      {
> +      /* Prevent underflow when denominator is near max representable.  */
> +      if (FABS (c) >= RBIG)
> +     {
> +       a = a / 2;
> +       b = b / 2;
> +       c = c / 2;
> +       d = d / 2;
> +     }
> +      /* Avoid overflow/underflow issues when both c and d are small.
> +      Scaling up helps avoid some underflows.
> +      No new overflow possible since both c&d are less than RMIN2.  */
> +      if (FABS (c) < RMIN2)
> +     {
> +       a = a * RMINSCAL;
> +       b = b * RMINSCAL;
> +       c = c * RMINSCAL;
> +       d = d * RMINSCAL;
> +     }
> +      else
> +     {
> +       if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
> +           || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
> +               && (FABS (c) < RMAX2)))
> +         {
> +           a = a * RMINSCAL;
> +           b = b * RMINSCAL;
> +           c = c * RMINSCAL;
> +           d = d * RMINSCAL;
> +         }
> +     }
>        ratio = d / c;
>        denom = (d * ratio) + c;
> -      x = ((b * ratio) + a) / denom;
> -      y = (b - (a * ratio)) / denom;
> +      /* Choose alternate order of computation if ratio is subnormal.  */
> +      if (FABS (ratio) > RMIN)
> +     {
> +       x = ((b * ratio) + a) / denom;
> +       y = (b - (a * ratio)) / denom;
> +     }
> +      else
> +     {
> +       x = (a + (d * (b / c))) / denom;
> +       y = (b - (d * (a / c))) / denom;
> +     }
>      }
>  
> +

spurious whitespace change above.

>    /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
>       are nonzero/zero, infinite/finite, and finite/infinite.  */
>    if (isnan (x) && isnan (y))
> diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c
> index 17de0a7..42fd07a 100644
> --- a/libgcc/libgcc2.c
> +++ b/libgcc/libgcc2.c
> @@ -1860,33 +1860,57 @@ NAME (TYPE x, int m)
>  #if defined(L_mulhc3) || defined(L_divhc3)
>  # define MTYPE       HFtype
>  # define CTYPE       HCtype
> +# define XMTYPE SFtype
> +# define XCTYPE SCtype

X sounds pretty extended while it's "just" SF. Doesn't matter much
though.

>  # define MODE        hc
>  # define CEXT        __LIBGCC_HF_FUNC_EXT__
>  # define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__)
>  #elif defined(L_mulsc3) || defined(L_divsc3)
>  # define MTYPE       SFtype
>  # define CTYPE       SCtype
> +# define XMTYPE DFtype
> +# define XCTYPE DCtype

likewise.

> @@ -1994,30 +2018,136 @@ CONCAT3(__mul,MODE,3) (MTYPE a, MTYPE b, MTYPE c, 
> MTYPE d)
>  CTYPE
>  CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
>  {
> +#if defined(L_divhc3)                                                \
> +  || (defined(L_divsc3) && defined(__LIBGCC_HAVE_HWDBL__) )
> +
> +  /* Half precision is handled with float precision.
> +     float is handled with double precision when double precision
> +     hardware is available.
> +     Due to the additional precision, the simple complex divide
> +     method (without Smith's method) is sufficient to get accurate
> +     answers and runs slightly faster than Smith's method.  */
> +
> +  XMTYPE aa, bb, cc, dd;
> +  XMTYPE denom;
> +  MTYPE x, y;
> +  CTYPE res;
> +  aa = a;
> +  bb = b;
> +  cc = c;
> +  dd = d;
> +
> +  denom = (cc * cc) + (dd * dd);
> +  x = ((aa * cc) + (bb * dd)) / denom;
> +  y = ((bb * cc) - (aa * dd)) / denom;
> +
> +#else
>    MTYPE denom, ratio, x, y;
>    CTYPE res;
>  
> -  /* ??? We can get better behavior from logarithmic scaling instead of
> -     the division.  But that would mean starting to link libgcc against
> -     libm.  We could implement something akin to ldexp/frexp as gcc builtins
> -     fairly easily...  */
> +  /* double, extended, long double have significant potential
> +     underflow/overflow errors than can be greatly reduced with

"than" ?

thanks,

Re: [PATCH v9] Practical improvement to libgcc complex divide

Reply via email to