Atomic reference counting generates pessimistic code in platforms where builtin atomics could optimize code for following branch with subtract instruction.
To allow better code generation with compile time constant addition can be checked for negative value. Those cases can then be better optimized with __atomic_fetch_sub builtin. Extra branch checking presence of threads in application generates a lot extra instructions when builtin atomics often generate very few instructions. Can I assume that __builtin_constant_p is available in all compilers with _GLIBCXX_ATOMIC_BUILTINS is available? bits/stl_list.h assumes __builtin_constant_p is available inside _GLIBCXX_USE_CXX11_ABI. x86_64 test results: FAIL: 22_locale/numpunct/members/char/3.cc execution test FAIL: 22_locale/time_get/get_date/wchar_t/4.cc execution test of expected passes 11941 of unexpected failures 2 of expected failures 65 of unsupported tests 244 --- 2016-11-09 Pauli Nieminen <suok...@gmail.com> * include/ext/atomicity.h (__exchange_and_add): Optimize negative add with builtin __atomic_fetch_sub. (__atomic_add): Call __exchange_and_add to reuse same code (__exchange_adn_add_dispatch) (__atomic_add_dispatch): Optimize threading check if builtins generate inline instructions. --- libstdc++-v3/include/ext/atomicity.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/ext/atomicity.h b/libstdc++-v3/include/ext/atomicity.h index 0fcb390..ecf5a9d 100644 --- a/libstdc++-v3/include/ext/atomicity.h +++ b/libstdc++-v3/include/ext/atomicity.h @@ -35,6 +35,14 @@ #include <bits/gthr.h> #include <bits/atomic_word.h> +#if defined(__GCC_ATOMIC_INT_LOCK_FREE) +#define _GLIBCXX_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE +#else +#define _GLIBCXX_INT_LOCK_FREE 2 +#endif +#define _GLIBCXX_ATOMIC_INLINE \ + (_GLIBCXX_ATOMIC_BUILTINS && _GLIBCXX_INT_LOCK_FREE == 2) + namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -46,11 +54,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifdef _GLIBCXX_ATOMIC_BUILTINS static inline _Atomic_word __exchange_and_add(volatile _Atomic_word* __mem, int __val) - { return __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); } + { + if (__builtin_constant_p(__val) && __val < 0) + return __atomic_fetch_sub(__mem, 0 - __val, __ATOMIC_ACQ_REL); + else + return __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); + } static inline void __atomic_add(volatile _Atomic_word* __mem, int __val) - { __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); } + { __exchange_and_add(__mem, __val); } #else _Atomic_word __attribute__ ((__unused__)) @@ -78,7 +91,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __exchange_and_add_dispatch(_Atomic_word* __mem, int __val) { #ifdef __GTHREADS - if (__gthread_active_p()) + if (_GLIBCXX_ATOMIC_INLINE || __gthread_active_p()) return __exchange_and_add(__mem, __val); else return __exchange_and_add_single(__mem, __val); @@ -92,7 +105,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __atomic_add_dispatch(_Atomic_word* __mem, int __val) { #ifdef __GTHREADS - if (__gthread_active_p()) + if (_GLIBCXX_ATOMIC_INLINE || __gthread_active_p()) __atomic_add(__mem, __val); else __atomic_add_single(__mem, __val); @@ -104,6 +117,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION } // namespace +#undef _GLIBCXX_INT_LOCK_FREE +#undef _GLIBCXX_ATOMIC_INLINE + // Even if the CPU doesn't need a memory barrier, we need to ensure // that the compiler doesn't reorder memory accesses across the // barriers. -- 2.9.3