On Fri, Aug 23, 2019 at 3:40 AM luoxhu <luo...@linux.ibm.com> wrote: > > Hi Richard, > > On 2019/8/13 17:10, Richard Biener wrote: > > On Tue, Aug 13, 2019 at 4:22 AM luoxhu <luo...@linux.ibm.com> wrote: > >> > >> Hi Richard, > >> > >> On 2019/8/12 16:51, Richard Biener wrote: > >>> On Mon, Aug 12, 2019 at 8:50 AM luoxhu <luo...@linux.ibm.com> wrote: > >>>> > >>>> Hi Richard, > >>>> Thanks for your comments, updated the v2 patch as below: > >>>> 1. Define and use builtin_with_linkage_p. > >>>> 2. Add comments. > >>>> 3. Add a testcase. > >>>> > >>>> In LTO mode, if static library and dynamic library contains same > >>>> function and both libraries are passed as arguments, linker will link > >>>> the function in dynamic library no matter the sequence. This patch > >>>> will output LTO symbol node as UNDEF if BUILT_IN_NORMAL function FNDECL > >>>> is a math function, then the function in static library will be linked > >>>> first if its sequence is ahead of the dynamic library. > >>> > >>> Comments below > >>> > >>>> gcc/ChangeLog > >>>> > >>>> 2019-08-12 Xiong Hu Luo <luo...@linux.ibm.com> > >>>> > >>>> PR lto/91287 > >>>> * builtins.c (builtin_with_linkage_p): New function. > >>>> * builtins.h (builtin_with_linkage_p): New function. > >>>> * symtab.c (write_symbol): Use builtin_with_linkage_p. > >>>> * lto-streamer-out.c > >>>> (symtab_node::output_to_lto_symbol_table_p): > >>>> Likewise. > >>>> > >>>> gcc/testsuite/ChangeLog > >>>> > >>>> 2019-08-12 Xiong Hu Luo <luo...@linux.ibm.com> > >>>> > >>>> PR lto/91287 > >>>> * gcc.dg/pr91287.c: New testcase. > >>>> --- > >>>> gcc/builtins.c | 89 ++++++++++++++++++++++++++++++++++ > >>>> gcc/builtins.h | 2 + > >>>> gcc/lto-streamer-out.c | 4 +- > >>>> gcc/symtab.c | 13 ++++- > >>>> gcc/testsuite/gcc.dg/pr91287.c | 40 +++++++++++++++ > >>>> 5 files changed, 145 insertions(+), 3 deletions(-) > >>>> create mode 100644 gcc/testsuite/gcc.dg/pr91287.c > >>>> > >>>> diff --git a/gcc/builtins.c b/gcc/builtins.c > >>>> index 695a9d191af..f4dea941a27 100644 > >>>> --- a/gcc/builtins.c > >>>> +++ b/gcc/builtins.c > >>>> @@ -11244,3 +11244,92 @@ target_char_cst_p (tree t, char *p) > >>>> *p = (char)tree_to_uhwi (t); > >>>> return true; > >>>> } > >>>> + > >>>> +/* Return true if DECL is a specified builtin math function. These > >>>> functions > >>>> + should have symbol in symbol table to provide linkage with faster > >>>> version of > >>>> + libraries. */ > >>> > >>> The comment should read like > >>> > >>> /* Return true if the builtin DECL is implemented in a standard > >>> library. Otherwise > >>> returns false which doesn't guarantee it is not (thus the list of > >>> handled builtins > >>> below may be incomplete). */ > >>> > >>>> +bool > >>>> +builtin_with_linkage_p (tree decl) > >>>> +{ > >>>> + if (!decl) > >>>> + return false; > >>> > >>> Omit this check please. > >>> > >>>> + if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) > >>>> + switch (DECL_FUNCTION_CODE (decl)) > >>>> + { > >>>> + CASE_FLT_FN (BUILT_IN_ACOS): > >>>> + CASE_FLT_FN (BUILT_IN_ACOSH): > >>>> + CASE_FLT_FN (BUILT_IN_ASIN): > >>>> + CASE_FLT_FN (BUILT_IN_ASINH): > >>>> + CASE_FLT_FN (BUILT_IN_ATAN): > >>>> + CASE_FLT_FN (BUILT_IN_ATANH): > >>>> + CASE_FLT_FN (BUILT_IN_ATAN2): > >>>> + CASE_FLT_FN (BUILT_IN_CBRT): > >>>> + CASE_FLT_FN (BUILT_IN_CEIL): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_CEIL): > >>>> + CASE_FLT_FN (BUILT_IN_COPYSIGN): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): > >>>> + CASE_FLT_FN (BUILT_IN_COS): > >>>> + CASE_FLT_FN (BUILT_IN_COSH): > >>>> + CASE_FLT_FN (BUILT_IN_ERF): > >>>> + CASE_FLT_FN (BUILT_IN_ERFC): > >>>> + CASE_FLT_FN (BUILT_IN_EXP): > >>>> + CASE_FLT_FN (BUILT_IN_EXP2): > >>>> + CASE_FLT_FN (BUILT_IN_EXPM1): > >>>> + CASE_FLT_FN (BUILT_IN_FABS): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_FABS): > >>>> + CASE_FLT_FN (BUILT_IN_FDIM): > >>>> + CASE_FLT_FN (BUILT_IN_FLOOR): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_FLOOR): > >>>> + CASE_FLT_FN (BUILT_IN_FMA): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_FMA): > >>>> + CASE_FLT_FN (BUILT_IN_FMAX): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_FMAX): > >>>> + CASE_FLT_FN (BUILT_IN_FMIN): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_FMIN): > >>>> + CASE_FLT_FN (BUILT_IN_FMOD): > >>>> + CASE_FLT_FN (BUILT_IN_FREXP): > >>>> + CASE_FLT_FN (BUILT_IN_HYPOT): > >>>> + CASE_FLT_FN (BUILT_IN_ILOGB): > >>>> + CASE_FLT_FN (BUILT_IN_LDEXP): > >>>> + CASE_FLT_FN (BUILT_IN_LGAMMA): > >>>> + CASE_FLT_FN (BUILT_IN_LLRINT): > >>>> + CASE_FLT_FN (BUILT_IN_LLROUND): > >>>> + CASE_FLT_FN (BUILT_IN_LOG): > >>>> + CASE_FLT_FN (BUILT_IN_LOG10): > >>>> + CASE_FLT_FN (BUILT_IN_LOG1P): > >>>> + CASE_FLT_FN (BUILT_IN_LOG2): > >>>> + CASE_FLT_FN (BUILT_IN_LOGB): > >>>> + CASE_FLT_FN (BUILT_IN_LRINT): > >>>> + CASE_FLT_FN (BUILT_IN_LROUND): > >>>> + CASE_FLT_FN (BUILT_IN_MODF): > >>>> + CASE_FLT_FN (BUILT_IN_NAN): > >>>> + CASE_FLT_FN (BUILT_IN_NEARBYINT): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_NEARBYINT): > >>>> + CASE_FLT_FN (BUILT_IN_NEXTAFTER): > >>>> + CASE_FLT_FN (BUILT_IN_NEXTTOWARD): > >>>> + CASE_FLT_FN (BUILT_IN_POW): > >>>> + CASE_FLT_FN (BUILT_IN_REMAINDER): > >>>> + CASE_FLT_FN (BUILT_IN_REMQUO): > >>>> + CASE_FLT_FN (BUILT_IN_RINT): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_RINT): > >>>> + CASE_FLT_FN (BUILT_IN_ROUND): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_ROUND): > >>>> + CASE_FLT_FN (BUILT_IN_SCALBLN): > >>>> + CASE_FLT_FN (BUILT_IN_SCALBN): > >>>> + CASE_FLT_FN (BUILT_IN_SIN): > >>>> + CASE_FLT_FN (BUILT_IN_SINH): > >>>> + CASE_FLT_FN (BUILT_IN_SINCOS): > >>>> + CASE_FLT_FN (BUILT_IN_SQRT): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_SQRT): > >>>> + CASE_FLT_FN (BUILT_IN_TAN): > >>>> + CASE_FLT_FN (BUILT_IN_TANH): > >>>> + CASE_FLT_FN (BUILT_IN_TGAMMA): > >>>> + CASE_FLT_FN (BUILT_IN_TRUNC): > >>>> + CASE_FLT_FN_FLOATN_NX (BUILT_IN_TRUNC): > >>>> + return true; > >>>> + default: > >>>> + break; > >>>> + } > >>>> + return false; > >>>> +} > >>>> diff --git a/gcc/builtins.h b/gcc/builtins.h > >>>> index 1ffb491d785..91cbd81be48 100644 > >>>> --- a/gcc/builtins.h > >>>> +++ b/gcc/builtins.h > >>>> @@ -151,4 +151,6 @@ extern internal_fn replacement_internal_fn (gcall *); > >>>> extern void warn_string_no_nul (location_t, const char *, tree, tree); > >>>> extern tree unterminated_array (tree, tree * = NULL, bool * = NULL); > >>>> > >>>> +extern bool builtin_with_linkage_p (tree); > >>> > >>> no vertical space above this line. > >>> > >>>> + > >>>> #endif /* GCC_BUILTINS_H */ > >>>> diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c > >>>> index 47a9143ae26..73bf45810f0 100644 > >>>> --- a/gcc/lto-streamer-out.c > >>>> +++ b/gcc/lto-streamer-out.c > >>>> @@ -2644,7 +2644,9 @@ write_symbol (struct streamer_tree_cache_d *cache, > >>>> > >>>> gcc_checking_assert (TREE_PUBLIC (t) > >>>> && (TREE_CODE (t) != FUNCTION_DECL > >>>> - || !fndecl_built_in_p (t)) > >>>> + || cgraph_node::get (t)->definition > >>>> + || !fndecl_built_in_p (t, BUILT_IN_NORMAL) > >>>> + || builtin_with_linkage_p (t)) > >>>> && !DECL_ABSTRACT_P (t) > >>>> && (!VAR_P (t) || !DECL_HARD_REGISTER (t))); > >>> > >>> Please remove the whole assert - both callers are guarded by > >>> symtab_node::output_to_lto_symbol_table_p which means it is redundant > >>> and fragile to keep in-sync. > >>> > >>>> diff --git a/gcc/symtab.c b/gcc/symtab.c > >>>> index 63e2820eb93..e806afdede4 100644 > >>>> --- a/gcc/symtab.c > >>>> +++ b/gcc/symtab.c > >>>> @@ -2377,8 +2377,17 @@ symtab_node::output_to_lto_symbol_table_p (void) > >>>> return false; > >>>> /* FIXME: Builtins corresponding to real functions probably should > >>>> have > >>>> symbol table entries. */ > >>> > >>> Remove this comment > >>> > >>>> - if (TREE_CODE (decl) == FUNCTION_DECL && fndecl_built_in_p (decl)) > >>>> - return false; > >>>> + if (TREE_CODE (decl) == FUNCTION_DECL && !definition > >>>> + && fndecl_built_in_p (decl)) > >>>> + { > >>>> + /* Math functions may have faster version in static library, > >>>> output the > >>>> + symbol for linkage. Functions have implementation in libgcc > >>>> will be > >>>> + excluded as the symbol name may mismatch. */ > >>> > >>> /* Builtins like those for most math functions have actual > >>> implementations in > >>> libraries so make sure to output references into the symbol table > >>> to make > >>> those libraries referenced. Note this is incomplete handling for > >>> now and > >>> only covers math functions. */ > >>> > >>>> + if (builtin_with_linkage_p (decl)) > >>>> + return true; > >>>> + else > >>>> + return false; > >>>> + } > >>>> > >>>> /* We have real symbol that should be in symbol table. However try > >>>> to trim > >>>> down the refernces to libraries bit more because linker will > >>>> otherwise > >>>> diff --git a/gcc/testsuite/gcc.dg/pr91287.c > >>>> b/gcc/testsuite/gcc.dg/pr91287.c > >>>> new file mode 100644 > >>>> index 00000000000..c816e0537aa > >>>> --- /dev/null > >>>> +++ b/gcc/testsuite/gcc.dg/pr91287.c > >>>> @@ -0,0 +1,40 @@ > >>>> +/* { dg-do assemble } */ > >>>> +/* { dg-options "-O2" } */ > >>> > >>> You don't use -flto here so the testcase doesn't exercise any of the > >>> patched > >>> code. Does it work when you add -flto here? That is, do > >>> scan-symbol[-not] properly use gcc-nm or the linker plugin? > >> > >> -flto is needed here to check this patch correctness, my mistake here, > >> thanks for catching. atan2 will exists in pr91287.o even without lto as > >> pr91287.o has the instruction "bl atan2". > > > > Sure, that's expected. It is expected that without the patch but with -flto > > the symbol doesn't appear. > > > > Note that to fail we rely on -fno-fat-lto-objects and linker plugin > > availability. > > So you should add > > > > /* { dg-require-linker-plugin "" } */ > > > > after /* { dg-do assemble } */ > > > > and use -O2 -flto -fno-fat-lto-objects -fuse-linker-plugin > > > > the other issue is that scan-symbol invokes 'nm' without any plugin > > arguments so we rely on plugin auto-loading and a system-installed > > plugin. We could use built gcc-nm here if present but would need to > > pass an appropriate -B argument, see how we find xg++ in g++.exp > > and scan-symbol-common in gcc-dg.exp. It might be better / easier > > to move the testcase to gcc.dg/lto/ but lto.exp might not support > > scan-symbol ... > > > > So... I think the patch is ok as-is if you omit the testcase which needs > > more work to be useful (and not sure if worth all the trouble). > > Committed the patch as r274411. > Since this patch could leverage the IBM MASS library and provides about > GEOMEAN 9% improvement for fprate(no change to intrate), especially for > 521.wrf_r (+48%),527.cam4_r (+30%) and 554.roms_r(+15%), backport it to > gcc-9-branch, gcc-8-branch and even gcc-7-branch?
I don't think it's a regression, we've always behaved that way. I'm OK with putting it on the gcc-9-branch though but please not 7 or 8. Thanks, Richard. > 503.bwaves_r 214 214 0.00% > 508.namd_r 287 287 0.00% > 510.parest_r 1121 1138 -1.52% > 511.povray_r 1049 1049 0.00% > 519.lbm_r 166 166 0.00% > 521.wrf_r 1095 564 48.49% > 526.blender_r 606 603 0.50% > 527.cam4_r 487 339 30.39% > 538.imagick_r 507 531 -4.73% > 544.nab_r 451 451 0.00% > 549.fotonik3d_r 332 332 0.00% > 554.roms_r 370 313 15.41% > specrate 497.21 471.04 5.26% > intrate 539.88 539.94 -0.01% > fprate 467.44 425.19 9.04% > > Thanks > Xionghu > > > > > Thanks, > > Richard. > > > >> After adding -flto the case also works as symbol is written to pr91287.o. > >> PS: update other changes in patch attached. > > > > > > > >> Xionghu > >> > >>> > >>>> +#include <stdio.h> > >>>> +#include <string.h> > >>>> +#include <math.h> > >>>> +#include <stdlib.h> > >>>> + > >>>> +double __attribute__((noinline)) zowie (double x, double y, double z) > >>>> +{ > >>>> + x = __builtin_clz(x); > >>>> + return atan2 (x * y, z); > >>>> +} > >>>> + > >>>> +double __attribute__((noinline)) rand_finite_double (void) > >>>> +{ > >>>> + union { > >>>> + double d; > >>>> + unsigned char uc[sizeof(double)]; > >>>> + } u; > >>>> + do { > >>>> + for (unsigned i = 0; i < sizeof u.uc; i++) { > >>>> + u.uc[i] = (unsigned char) rand(); > >>>> + } > >>>> + } while (!isfinite(u.d)); > >>>> + return u.d; > >>>> +} > >>>> + > >>>> + > >>>> +int main () > >>>> +{ > >>>> + char str[100]; > >>>> + strcpy(str, "test\n"); > >>>> + double a = rand_finite_double (); > >>>> + printf ("%lf, %s\n", zowie (a, 4.5, 2.2), str); > >>>> + return 0; > >>>> +} > >>>> + > >>>> +/* { dg-final { scan-symbol "atan2" } } */ > >>>> +/* { dg-final { scan-symbol-not "__builtin_clz" } } */ > >>>> -- > >>>> 2.21.0.777.g83232e3864 > >>>> > >>> > > >