Hi Florian,

This is a report from Linaro’s automatic benchmarking system, which tracks 
upstream components and automatically bisects performance regressions down to a 
single commit.  We have recently enabled these emails to be sent to patch 
authors automatically.

It appears that after your patch SPEC CPU2006’s 410.bwaves slowed down by 4% on 
aarch64-linux-gnu when built with -O2 -flto.  Any theories on what could’ve 
caused it?

This is one of the first reports, and we welcome feedback on improving 
reporting template.

Regards,

--
Maxim Kuvyrkov
https://www.linaro.org

> On 26 Jun 2021, at 12:12, ci_not...@linaro.org wrote:
> 
> Successfully identified regression in *glibc* in CI configuration 
> tcwg_bmk_gnu_tx1/gnu-master-aarch64-spec2k6-O2_LTO.  So far, this commit has 
> regressed CI configurations:
> - tcwg_bmk_gnu_tx1/gnu-master-aarch64-spec2k6-O2_LTO
> 
> Culprit:
> <cut>
> commit f79f2065817e080f65f3c3a2fee966f5a97f1746
> Author: Florian Weimer <fwei...@redhat.com>
> Date:   Wed Apr 21 19:49:50 2021 +0200
> 
>    nptl: Move legacy unwinding implementation into libc
> 
>    It is still used internally.  Since unwinding is now available
>    unconditionally, avoid indirect calls through function pointers loaded
>    from the stack by inlining the non-cancellation cleanup code.  This
>    avoids a regression in security hardening.
> 
>    The out-of-line  __libc_cleanup_routine implementation is no longer
>    needed because the inline definition is now static __always_inline.
> 
>    Reviewed-by: Adhemerval Zanella  <adhemerval.zane...@linaro.org>
> </cut>
> 
> Results regressed to (for first_bad == 
> f79f2065817e080f65f3c3a2fee966f5a97f1746)
> # reset_artifacts:
> -10
> # build_abe binutils:
> -9
> # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
> -8
> # build_abe linux:
> -7
> # build_abe glibc:
> -6
> # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
> -5
> # true:
> 0
> # benchmark -O2_LTO -- 
> artifacts/build-f79f2065817e080f65f3c3a2fee966f5a97f1746/results_id:
> 1
> # 410.bwaves,bwaves_base.default                                regressed by 
> 104
> 
> from (for last_good == 5715c29e91076800418833f2196f2082f439da75)
> # reset_artifacts:
> -10
> # build_abe binutils:
> -9
> # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
> -8
> # build_abe linux:
> -7
> # build_abe glibc:
> -6
> # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
> -5
> # true:
> 0
> # benchmark -O2_LTO -- 
> artifacts/build-5715c29e91076800418833f2196f2082f439da75/results_id:
> 1
> 
> Artifacts of last_good build: 
> https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/build-5715c29e91076800418833f2196f2082f439da75/
> Results ID of last_good: 
> tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-master-aarch64-spec2k6-O2_LTO/461
> Artifacts of first_bad build: 
> https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/build-f79f2065817e080f65f3c3a2fee966f5a97f1746/
> Results ID of first_bad: 
> tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-master-aarch64-spec2k6-O2_LTO/470
> Build top page/logs: 
> https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aarch64-spec2k6-O2_LTO/18/
> 
> Configuration details:
> 
> 
> Reproduce builds:
> <cut>
> mkdir investigate-glibc-f79f2065817e080f65f3c3a2fee966f5a97f1746
> cd investigate-glibc-f79f2065817e080f65f3c3a2fee966f5a97f1746
> 
> git clone https://git.linaro.org/toolchain/jenkins-scripts
> 
> mkdir -p artifacts/manifests
> curl -o artifacts/manifests/build-baseline.sh 
> https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/manifests/build-baseline.sh
>  --fail
> curl -o artifacts/manifests/build-parameters.sh 
> https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/manifests/build-parameters.sh
>  --fail
> curl -o artifacts/test.sh 
> https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/test.sh
>  --fail
> chmod +x artifacts/test.sh
> 
> # Reproduce the baseline build (build all pre-requisites)
> ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
> 
> cd glibc
> 
> # Reproduce first_bad build
> git checkout --detach f79f2065817e080f65f3c3a2fee966f5a97f1746
> ../artifacts/test.sh
> 
> # Reproduce last_good build
> git checkout --detach 5715c29e91076800418833f2196f2082f439da75
> ../artifacts/test.sh
> 
> cd ..
> </cut>
> 
> History of pending regressions and results: 
> https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/ci/tcwg_bmk_gnu_tx1/gnu-master-aarch64-spec2k6-O2_LTO
> 
> Artifacts: 
> https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aarch64-spec2k6-O2_LTO/18/artifact/artifacts/
> Build log: 
> https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aarch64-spec2k6-O2_LTO/18/consoleText
> 
> Full commit (up to 1000 lines):
> <cut>
> commit f79f2065817e080f65f3c3a2fee966f5a97f1746
> Author: Florian Weimer <fwei...@redhat.com>
> Date:   Wed Apr 21 19:49:50 2021 +0200
> 
>    nptl: Move legacy unwinding implementation into libc
> 
>    It is still used internally.  Since unwinding is now available
>    unconditionally, avoid indirect calls through function pointers loaded
>    from the stack by inlining the non-cancellation cleanup code.  This
>    avoids a regression in security hardening.
> 
>    The out-of-line  __libc_cleanup_routine implementation is no longer
>    needed because the inline definition is now static __always_inline.
> 
>    Reviewed-by: Adhemerval Zanella  <adhemerval.zane...@linaro.org>
> ---
> nptl/Versions                    |  2 ++
> nptl/cleanup_defer_compat.c      | 56 ++---------------------------------
> nptl/libc-cleanup.c              | 64 ++++++++++++++++++++++++++++++++++++++--
> nptl/nptl-init.c                 |  2 --
> sysdeps/nptl/libc-lock.h         | 59 ++++++++++++++++++------------------
> sysdeps/nptl/libc-lockP.h        | 26 +---------------
> sysdeps/nptl/pthread-functions.h |  4 ---
> 7 files changed, 97 insertions(+), 116 deletions(-)
> 
> diff --git a/nptl/Versions b/nptl/Versions
> index 60202b4969..2e5a964b11 100644
> --- a/nptl/Versions
> +++ b/nptl/Versions
> @@ -87,6 +87,8 @@ libc {
>     __futex_abstimed_wait64;
>     __futex_abstimed_wait_cancelable64;
>     __libc_alloca_cutoff;
> +    __libc_cleanup_pop_restore;
> +    __libc_cleanup_push_defer;
>     __libc_dl_error_tsd;
>     __libc_pthread_init;
>     __lll_clocklock_elision;
> diff --git a/nptl/cleanup_defer_compat.c b/nptl/cleanup_defer_compat.c
> index 49ef53ea60..1957318208 100644
> --- a/nptl/cleanup_defer_compat.c
> +++ b/nptl/cleanup_defer_compat.c
> @@ -17,41 +17,15 @@
>    <https://www.gnu.org/licenses/>.  */
> 
> #include "pthreadP.h"
> -
> +#include <libc-lock.h>
> 
> void
> _pthread_cleanup_push_defer (struct _pthread_cleanup_buffer *buffer,
>                            void (*routine) (void *), void *arg)
> {
> -  struct pthread *self = THREAD_SELF;
> -
>   buffer->__routine = routine;
>   buffer->__arg = arg;
> -  buffer->__prev = THREAD_GETMEM (self, cleanup);
> -
> -  int cancelhandling = THREAD_GETMEM (self, cancelhandling);
> -
> -  /* Disable asynchronous cancellation for now.  */
> -  if (__glibc_unlikely (cancelhandling & CANCELTYPE_BITMASK))
> -    while (1)
> -      {
> -     int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling,
> -                                             cancelhandling
> -                                             & ~CANCELTYPE_BITMASK,
> -                                             cancelhandling);
> -     if (__glibc_likely (curval == cancelhandling))
> -       /* Successfully replaced the value.  */
> -       break;
> -
> -     /* Prepare for the next round.  */
> -     cancelhandling = curval;
> -      }
> -
> -  buffer->__canceltype = (cancelhandling & CANCELTYPE_BITMASK
> -                       ? PTHREAD_CANCEL_ASYNCHRONOUS
> -                       : PTHREAD_CANCEL_DEFERRED);
> -
> -  THREAD_SETMEM (self, cleanup, buffer);
> +  __libc_cleanup_push_defer (buffer);
> }
> strong_alias (_pthread_cleanup_push_defer, __pthread_cleanup_push_defer)
> 
> @@ -60,31 +34,7 @@ void
> _pthread_cleanup_pop_restore (struct _pthread_cleanup_buffer *buffer,
>                             int execute)
> {
> -  struct pthread *self = THREAD_SELF;
> -
> -  THREAD_SETMEM (self, cleanup, buffer->__prev);
> -
> -  int cancelhandling;
> -  if (__builtin_expect (buffer->__canceltype != PTHREAD_CANCEL_DEFERRED, 0)
> -      && ((cancelhandling = THREAD_GETMEM (self, cancelhandling))
> -       & CANCELTYPE_BITMASK) == 0)
> -    {
> -      while (1)
> -     {
> -       int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling,
> -                                               cancelhandling
> -                                               | CANCELTYPE_BITMASK,
> -                                               cancelhandling);
> -       if (__glibc_likely (curval == cancelhandling))
> -         /* Successfully replaced the value.  */
> -         break;
> -
> -       /* Prepare for the next round.  */
> -       cancelhandling = curval;
> -     }
> -
> -      CANCELLATION_P (self);
> -    }
> +  __libc_cleanup_pop_restore (buffer);
> 
>   /* If necessary call the cleanup routine after we removed the
>      current cleanup block from the list.  */
> diff --git a/nptl/libc-cleanup.c b/nptl/libc-cleanup.c
> index 61f97eceda..14ccfe9285 100644
> --- a/nptl/libc-cleanup.c
> +++ b/nptl/libc-cleanup.c
> @@ -17,11 +17,69 @@
>    <https://www.gnu.org/licenses/>.  */
> 
> #include "pthreadP.h"
> +#include <tls.h>
> +#include <libc-lock.h>
> 
> +void
> +__libc_cleanup_push_defer (struct _pthread_cleanup_buffer *buffer)
> +{
> +  struct pthread *self = THREAD_SELF;
> +
> +  buffer->__prev = THREAD_GETMEM (self, cleanup);
> +
> +  int cancelhandling = THREAD_GETMEM (self, cancelhandling);
> +
> +  /* Disable asynchronous cancellation for now.  */
> +  if (__glibc_unlikely (cancelhandling & CANCELTYPE_BITMASK))
> +    while (1)
> +      {
> +     int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling,
> +                                             cancelhandling
> +                                             & ~CANCELTYPE_BITMASK,
> +                                             cancelhandling);
> +     if (__glibc_likely (curval == cancelhandling))
> +       /* Successfully replaced the value.  */
> +       break;
> +
> +     /* Prepare for the next round.  */
> +     cancelhandling = curval;
> +      }
> +
> +  buffer->__canceltype = (cancelhandling & CANCELTYPE_BITMASK
> +                       ? PTHREAD_CANCEL_ASYNCHRONOUS
> +                       : PTHREAD_CANCEL_DEFERRED);
> +
> +  THREAD_SETMEM (self, cleanup, buffer);
> +}
> +libc_hidden_def (__libc_cleanup_push_defer)
> 
> void
> -__libc_cleanup_routine (struct __pthread_cleanup_frame *f)
> +__libc_cleanup_pop_restore (struct _pthread_cleanup_buffer *buffer)
> {
> -  if (f->__do_it)
> -    f->__cancel_routine (f->__cancel_arg);
> +  struct pthread *self = THREAD_SELF;
> +
> +  THREAD_SETMEM (self, cleanup, buffer->__prev);
> +
> +  int cancelhandling;
> +  if (__builtin_expect (buffer->__canceltype != PTHREAD_CANCEL_DEFERRED, 0)
> +      && ((cancelhandling = THREAD_GETMEM (self, cancelhandling))
> +       & CANCELTYPE_BITMASK) == 0)
> +    {
> +      while (1)
> +     {
> +       int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling,
> +                                               cancelhandling
> +                                               | CANCELTYPE_BITMASK,
> +                                               cancelhandling);
> +       if (__glibc_likely (curval == cancelhandling))
> +         /* Successfully replaced the value.  */
> +         break;
> +
> +       /* Prepare for the next round.  */
> +       cancelhandling = curval;
> +     }
> +
> +      CANCELLATION_P (self);
> +    }
> }
> +libc_hidden_def (__libc_cleanup_pop_restore)
> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
> index 2c7e2222d4..43e2564e59 100644
> --- a/nptl/nptl-init.c
> +++ b/nptl/nptl-init.c
> @@ -95,8 +95,6 @@ static const struct pthread_functions pthread_functions =
>     .ptr___pthread_key_create = __pthread_key_create,
>     .ptr___pthread_getspecific = __pthread_getspecific,
>     .ptr___pthread_setspecific = __pthread_setspecific,
> -    .ptr__pthread_cleanup_push_defer = __pthread_cleanup_push_defer,
> -    .ptr__pthread_cleanup_pop_restore = __pthread_cleanup_pop_restore,
>     .ptr_nthreads = &__nptl_nthreads,
>     .ptr___pthread_unwind = &__pthread_unwind,
>     .ptr__nptl_deallocate_tsd = __nptl_deallocate_tsd,
> diff --git a/sysdeps/nptl/libc-lock.h b/sysdeps/nptl/libc-lock.h
> index dea96121b3..e8a5e68a12 100644
> --- a/sysdeps/nptl/libc-lock.h
> +++ b/sysdeps/nptl/libc-lock.h
> @@ -143,39 +143,40 @@ typedef struct __libc_lock_recursive_opaque__ 
> __libc_lock_recursive_t;
>   __libc_maybe_call (__pthread_mutex_unlock, (&(NAME).mutex), 0)
> #endif
> 
> -/* Note that for I/O cleanup handling we are using the old-style
> -   cancel handling.  It does not have to be integrated with C++ since
> -   no C++ code is called in the middle.  The old-style handling is
> -   faster and the support is not going away.  */
> -extern void _pthread_cleanup_push_defer (struct _pthread_cleanup_buffer 
> *buffer,
> -                                      void (*routine) (void *), void *arg);
> -extern void _pthread_cleanup_pop_restore (struct _pthread_cleanup_buffer 
> *buffer,
> -                                       int execute);
> +/* Put the unwind buffer BUFFER on the per-thread callback stack.  The
> +   caller must fill BUFFER->__routine and BUFFER->__arg before calling
> +   this function.  */
> +void __libc_cleanup_push_defer (struct _pthread_cleanup_buffer *buffer);
> +libc_hidden_proto (__libc_cleanup_push_defer)
> +/* Remove BUFFER from the unwind callback stack.  The caller must invoke
> +   the callback if desired.  */
> +void __libc_cleanup_pop_restore (struct _pthread_cleanup_buffer *buffer);
> +libc_hidden_proto (__libc_cleanup_pop_restore)
> 
> /* Start critical region with cleanup.  */
> -#define __libc_cleanup_region_start(DOIT, FCT, ARG) \
> -  { struct _pthread_cleanup_buffer _buffer;                                \
> -    int _avail;                                                              
>       \
> -    if (DOIT) {                                                              
>       \
> -      _avail = PTFAVAIL (_pthread_cleanup_push_defer);                       
>       \
> -      if (_avail) {                                                        \
> -     __libc_ptf_call_always (_pthread_cleanup_push_defer, (&_buffer, FCT,  \
> -                                                           ARG));          \
> -      } else {                                                               
>       \
> -     _buffer.__routine = (FCT);                                            \
> -     _buffer.__arg = (ARG);                                                \
> -      }                                                                      
>       \
> -    } else {                                                               \
> -      _avail = 0;                                                          \
> -    }
> +#define __libc_cleanup_region_start(DOIT, FCT, ARG)                  \
> +  {   bool _cleanup_start_doit;                                              
> \
> +  struct _pthread_cleanup_buffer _buffer;                            \
> +  /* Non-addressable copy of FCT, so that we avoid indirect calls on \
> +     the non-unwinding path.  */                                     \
> +  void (*_cleanup_routine) (void *) = (FCT);                         \
> +  _buffer.__arg = (ARG);                                             \
> +  if (DOIT)                                                          \
> +    {                                                                        
> \
> +      _cleanup_start_doit = true;                                    \
> +      _buffer.__routine = _cleanup_routine;                          \
> +      __libc_cleanup_push_defer (&_buffer);                          \
> +    }                                                                        
> \
> +  else                                                                       
> \
> +      _cleanup_start_doit = false;
> 
> /* End critical region with cleanup.  */
> -#define __libc_cleanup_region_end(DOIT) \
> -    if (_avail) {                                                          \
> -      __libc_ptf_call_always (_pthread_cleanup_pop_restore, (&_buffer, 
> DOIT));\
> -    } else if (DOIT)                                                       \
> -      _buffer.__routine (_buffer.__arg);                                   \
> -  }
> +#define __libc_cleanup_region_end(DOIT)              \
> +  if (_cleanup_start_doit)                   \
> +    __libc_cleanup_pop_restore (&_buffer);   \
> +  if (DOIT)                                  \
> +    _cleanup_routine (_buffer.__arg);                \
> +  } /* matches __libc_cleanup_region_start */
> 
> 
> /* Hide the definitions which are only supposed to be used inside libc in
> diff --git a/sysdeps/nptl/libc-lockP.h b/sysdeps/nptl/libc-lockP.h
> index 63b605dee2..2c928b7a76 100644
> --- a/sysdeps/nptl/libc-lockP.h
> +++ b/sysdeps/nptl/libc-lockP.h
> @@ -251,32 +251,12 @@ _Static_assert (LLL_LOCK_INITIALIZER == 0, 
> "LLL_LOCK_INITIALIZER != 0");
> /* Get once control variable.  */
> #define __libc_once_get(ONCE_CONTROL) ((ONCE_CONTROL) != PTHREAD_ONCE_INIT)
> 
> -/* Note that for I/O cleanup handling we are using the old-style
> -   cancel handling.  It does not have to be integrated with C++ snce
> -   no C++ code is called in the middle.  The old-style handling is
> -   faster and the support is not going away.  */
> -extern void _pthread_cleanup_push (struct _pthread_cleanup_buffer *buffer,
> -                                void (*routine) (void *), void *arg);
> -extern void _pthread_cleanup_pop (struct _pthread_cleanup_buffer *buffer,
> -                               int execute);
> -extern void _pthread_cleanup_push_defer (struct _pthread_cleanup_buffer 
> *buffer,
> -                                      void (*routine) (void *), void *arg);
> -extern void _pthread_cleanup_pop_restore (struct _pthread_cleanup_buffer 
> *buffer,
> -                                       int execute);
> -
> -/* Sometimes we have to exit the block in the middle.  */
> -#define __libc_cleanup_end(DOIT) \
> -    if (_avail) {                                                          \
> -      __libc_ptf_call_always (_pthread_cleanup_pop_restore, (&_buffer, 
> DOIT));\
> -    } else if (DOIT)                                                       \
> -      _buffer.__routine (_buffer.__arg)
> -
> /* __libc_cleanup_push and __libc_cleanup_pop depend on exception
>    handling and stack unwinding.  */
> #ifdef __EXCEPTIONS
> 
> /* Normal cleanup handling, based on C cleanup attribute.  */
> -__extern_inline void
> +static __always_inline void
> __libc_cleanup_routine (struct __pthread_cleanup_frame *f)
> {
>   if (f->__do_it)
> @@ -388,8 +368,6 @@ weak_extern (__pthread_once)
> weak_extern (__pthread_initialize)
> weak_extern (__pthread_atfork)
> weak_extern (__pthread_setcancelstate)
> -weak_extern (_pthread_cleanup_push_defer)
> -weak_extern (_pthread_cleanup_pop_restore)
> # else
> #  pragma weak __pthread_mutex_init
> #  pragma weak __pthread_mutex_destroy
> @@ -412,8 +390,6 @@ weak_extern (_pthread_cleanup_pop_restore)
> #  pragma weak __pthread_initialize
> #  pragma weak __pthread_atfork
> #  pragma weak __pthread_setcancelstate
> -#  pragma weak _pthread_cleanup_push_defer
> -#  pragma weak _pthread_cleanup_pop_restore
> # endif
> #endif
> 
> diff --git a/sysdeps/nptl/pthread-functions.h 
> b/sysdeps/nptl/pthread-functions.h
> index 97a5c48939..4268084b66 100644
> --- a/sysdeps/nptl/pthread-functions.h
> +++ b/sysdeps/nptl/pthread-functions.h
> @@ -57,10 +57,6 @@ struct pthread_functions
>   int (*ptr___pthread_key_create) (pthread_key_t *, void (*) (void *));
>   void *(*ptr___pthread_getspecific) (pthread_key_t);
>   int (*ptr___pthread_setspecific) (pthread_key_t, const void *);
> -  void (*ptr__pthread_cleanup_push_defer) (struct _pthread_cleanup_buffer *,
> -                                        void (*) (void *), void *);
> -  void (*ptr__pthread_cleanup_pop_restore) (struct _pthread_cleanup_buffer *,
> -                                         int);
> #define HAVE_PTR_NTHREADS
>   unsigned int *ptr_nthreads;
>   void (*ptr___pthread_unwind) (__pthread_unwind_buf_t *)
> </cut>

_______________________________________________
linaro-toolchain mailing list
linaro-toolchain@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/linaro-toolchain

Reply via email to