On 2017-03-25 20:59:27 -0700, Andres Freund wrote: > On 2017-03-25 23:51:45 -0400, Tom Lane wrote: > > Andres Freund <and...@anarazel.de> writes: > > > On March 25, 2017 4:56:11 PM PDT, Ants Aasma <ants.aa...@eesti.ee> wrote: > > >> I haven't had the time to research this properly, but initial tests > > >> show that with GCC 6.2 adding > > >> > > >> #pragma GCC optimize ("no-crossjumping") > > >> > > >> fixes merging of the op tail jumps. > > >> > > >> Some quick and dirty benchmarking suggests that the benefit for the > > >> interpreter is about 15% (5% speedup on a workload that spends 1/3 in > > >> ExecInterpExpr). My idea of prefetching op->resnull/resvalue to local > > >> vars before the indirect jump is somewhere between a tiny benefit and > > >> no effect, certainly not worth introducing extra complexity. Clang 3.8 > > >> does the correct thing out of the box and is a couple of percent > > >> faster than GCC with the pragma. > > > > > That's large enough to be worth doing (although I recall you seeing all > > > jumps commonalized). We should probably do this on a per function basis > > > however (either using pragma push option, or function attributes). > > > > Seems like it would be fine to do it on a per-file basis. > > I personally find per-function annotation ala > __attribute__((optimize("no-crossjumping"))) > cleaner anyway. I tested that, and it seems to work. > > Obviously we'd have to hide that behind a configure test. Could also do > tests based on __GNUC__ / __GNUC_MINOR__, but that seems uglier.
Checking for this isn't entirely pretty - see my attached attempt at doing so. I considered hiding __attribute__((optimize("no-crossjumping"))) in execInterpExpr.c behind a macro (like PG_DISABLE_CROSSJUMPING), but I don't really think that makes things better. Comments? Greetings, Andres Freund
>From ce42086871ebfcbb7ae52266e9c4549b5958e80b Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Sun, 26 Mar 2017 22:38:21 -0700 Subject: [PATCH] Disable gcc's crossjumping optimization for ExecInterpExpr(). GCC merges code in ExecInterpExpr() too aggressively, reducing branch prediction accuracy. As this is performance critical code, go through the trouble of disabling the relevant optimization for the function. To do so, have to detect whether the compiler support __attribute__((optimize("no-crossjumping"))) as a function annotation. Do so in configure. Discussion: https://postgr.es/m/20170326035927.5mubkfdtaqlrg...@alap3.anarazel.de --- config/c-compiler.m4 | 35 ++++++++++++++++++++++++++++++ configure | 41 +++++++++++++++++++++++++++++++++++ configure.in | 1 + src/backend/executor/execExprInterp.c | 9 ++++++++ src/include/pg_config.h.in | 4 ++++ src/include/pg_config.h.win32 | 4 ++++ 6 files changed, 94 insertions(+) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 3321f226f3..ac72539dec 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -297,6 +297,41 @@ fi])# PGAC_C_COMPUTED_GOTO +# PGAC_C_FUNCATTR_NO_CROSSJUMPING +# ----------------------- +# Check if the C compiler understands the +# __attribute__((optimize("no-crossjumping"))) function attribute. +# Define HAVE_FUNCATTR_NO_CROSSJUMPING if so. +# +# At some later point it might make sense to generalize this so we can +# check for other optimization flags, but so far there's no need for +# that. +# +# Have to enable Werror, as some compilers (e.g. clang) would +# otherwise just warn about an unknown type of attribute. +AC_DEFUN([PGAC_C_FUNCATTR_NO_CROSSJUMPING], +[AC_CACHE_CHECK([for function attribute disabling crossjumping optimizations], pgac_cv_funcattr_no_crossjumping, +[ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ +extern int testme(void); +int +__attribute__((optimize("no-crossjumping"))) +testme(void) +{ + return 0; +} +])], +[pgac_cv_funcattr_no_crossjumping=yes], +[pgac_cv_funcattr_no_crossjumping=no]) +ac_c_werror_flag=$ac_save_c_werror_flag]) +if test x"$pgac_cv_funcattr_no_crossjumping" = xyes ; then +AC_DEFINE(HAVE_FUNCATTR_NO_CROSSJUMPING, 1, + [Define to 1 if your compiler understands __attribute__((optimize("no-crossjumping"))).]) +fi])# PGAC_C_FUNCATTR_NO_CROSSJUMPING + + + # PGAC_C_VA_ARGS # -------------- # Check if the C compiler understands C99-style variadic macros, diff --git a/configure b/configure index 4b8229e959..87c4799be3 100755 --- a/configure +++ b/configure @@ -11835,6 +11835,47 @@ if test x"$pgac_cv_computed_goto" = xyes ; then $as_echo "#define HAVE_COMPUTED_GOTO 1" >>confdefs.h fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for function attribute disabling crossjumping optimizations" >&5 +$as_echo_n "checking for function attribute disabling crossjumping optimizations... " >&6; } +if ${pgac_cv_funcattr_no_crossjumping+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +extern int testme(void); +int +__attribute__((optimize("no-crossjumping"))) +testme(void) +{ + return 0; +} + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_funcattr_no_crossjumping=yes +else + pgac_cv_funcattr_no_crossjumping=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_funcattr_no_crossjumping" >&5 +$as_echo "$pgac_cv_funcattr_no_crossjumping" >&6; } +if test x"$pgac_cv_funcattr_no_crossjumping" = xyes ; then + +$as_echo "#define HAVE_FUNCATTR_NO_CROSSJUMPING 1" >>confdefs.h + +fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __VA_ARGS__" >&5 $as_echo_n "checking for __VA_ARGS__... " >&6; } if ${pgac_cv__va_args+:} false; then : diff --git a/configure.in b/configure.in index 6c74214171..dc677a5dae 100644 --- a/configure.in +++ b/configure.in @@ -1336,6 +1336,7 @@ PGAC_C_BUILTIN_BSWAP64 PGAC_C_BUILTIN_CONSTANT_P PGAC_C_BUILTIN_UNREACHABLE PGAC_C_COMPUTED_GOTO +PGAC_C_FUNCATTR_NO_CROSSJUMPING PGAC_C_VA_ARGS PGAC_STRUCT_TIMEZONE PGAC_UNION_SEMUN diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 982d16c6c8..3a4e8f759b 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -269,8 +269,17 @@ ExecReadyInterpretedExpr(ExprState *state) * As a special case, return the dispatch table's address if state is NULL. * This is used by ExecInitInterpreter to set up the dispatch_table global. * (Only applies when EEO_USE_COMPUTED_GOTO is defined.) + * + * + * In various versions GCC merges parts of the various branches below - that's + * harmful because it reduces branch prediction accuracy. As this is + * performance critical, go to the trouble of disabling the relevant + * optimization if necessary (only gcc has that option). */ static Datum +#ifdef HAVE_FUNCATTR_NO_CROSSJUMPING +__attribute__((optimize("no-crossjumping"))) +#endif ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) { ExprEvalStep *op; diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index e1c1c9e9b4..93883780f4 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -188,6 +188,10 @@ /* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ #undef HAVE_FSEEKO +/* Define to 1 if your compiler understands + __attribute__((optimize("no-crossjumping"))). */ +#undef HAVE_FUNCATTR_NO_CROSSJUMPING + /* Define to 1 if your compiler understands __func__. */ #undef HAVE_FUNCNAME__FUNC diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 5af8369202..8c560883dc 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -139,6 +139,10 @@ /* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ #define HAVE_FSEEKO 1 +/* Define to 1 if your compiler understands + __attribute__((optimize("no-crossjumping"))). */ +#undef HAVE_FUNCATTR_NO_CROSSJUMPING + /* Define to 1 if your compiler understands __func__. */ //#define HAVE_FUNCNAME__FUNC 1 -- 2.12.0.264.gd6db3f2165.dirty
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers