The __spin_until_impl function was presumably intended to just spin for
a short time, then give up and let the caller wait on a futex or
condvar. However, __spin_until_impl will never stop spinning unless
either the value changes or the timeout is reached. This means that when
__spin_until_impl returns, the caller should immediately return (because
either the value we were waiting for has changed, or the timeout
happened). So __wait_until_impl should never block on a futex or
condvar. However, the check for the return value of __spin_until_impl
would only return if the value changed (i.e. !__res._M_timeout). So if
the timeout occurred, it would fall through and block on the
futex/condvar, even though the timeout has already been reached.
This was causing a major performance regression in the timed waiting
functions of std::counting_semaphore.
The simplest fix is to replace __spin_until_impl entirely, just calling
__spin_impl to spin a small, finite number of times, and then return
immediately if either the value changed or the timeout happened. This
ensures that we don't block on the futex/condvar unnecessarily.
Removing __spin_until_impl also has the advantage that we no longer keep
calling steady_clock::now() on every iteration to check for a timeout.
That was also adding significant overhead to the timed waiting
functions.
libstdc++-v3/ChangeLog:
PR libstdc++/122878
* src/c++20/atomic.cc (__spin_until_impl): Remove.
(__wait_until_impl): Use __spin_impl instead of
__spin_until_impl and return if timeout is reached after
spinning.
---
Tested x86_64-linux.
libstdc++-v3/src/c++20/atomic.cc | 47 ++------------------------------
1 file changed, 3 insertions(+), 44 deletions(-)
diff --git a/libstdc++-v3/src/c++20/atomic.cc b/libstdc++-v3/src/c++20/atomic.cc
index fdd67d834768..16fd91b7d7ab 100644
--- a/libstdc++-v3/src/c++20/atomic.cc
+++ b/libstdc++-v3/src/c++20/atomic.cc
@@ -455,49 +455,6 @@ __cond_wait_until(__condvar& __cv, mutex& __mx,
return __wait_clock_t::now() < __atime;
}
#endif // ! HAVE_PLATFORM_WAIT
-
-// Unlike __spin_impl, does not always return _M_has_val == true.
-// If the deadline has already passed then no fresh value is loaded.
-__wait_result_type
-__spin_until_impl(const __platform_wait_t* __addr,
- const __wait_args_base& __args,
- const __wait_clock_t::time_point& __deadline)
-{
- using namespace literals::chrono_literals;
-
- __wait_result_type __res{};
- auto __t0 = __wait_clock_t::now();
- auto __now = __t0;
- for (; __now < __deadline; __now = __wait_clock_t::now())
- {
- auto __elapsed = __now - __t0;
-#ifndef _GLIBCXX_NO_SLEEP
- if (__elapsed > 128ms)
- this_thread::sleep_for(64ms);
- else if (__elapsed > 64us)
- this_thread::sleep_for(__elapsed / 2);
- else
-#endif
- if (__elapsed > 4us)
- __thread_yield();
- else
- {
- __res = __detail::__spin_impl(__addr, __args);
- if (!__res._M_timeout)
- return __res;
- }
-
- __res._M_val = __atomic_load_n(__addr, __args._M_order);
- __res._M_has_val = true;
- if (__res._M_val != __args._M_old)
- {
- __res._M_timeout = false;
- return __res;
- }
- }
- __res._M_timeout = true;
- return __res;
-}
} // namespace
__wait_result_type
@@ -509,11 +466,13 @@ __wait_until_impl([[maybe_unused]] const void* __addr,
__wait_args_base& __args,
if (__args & __wait_flags::__do_spin)
{
- auto __res = __detail::__spin_until_impl(__wait_addr, __args, __atime);
+ auto __res = __detail::__spin_impl(__wait_addr, __args);
if (!__res._M_timeout)
return __res;
if (__args & __wait_flags::__spin_only)
return __res;
+ if (__wait_clock_t::now() >= __atime)
+ return __res;
}
#ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
--
2.51.1