https://gcc.gnu.org/g:5f02ba92580040c2fb1fea24998b727bd0ba40c5

commit r16-6658-g5f02ba92580040c2fb1fea24998b727bd0ba40c5
Author: Jonathan Wakely <[email protected]>
Date:   Thu Jan 8 14:03:01 2026 +0000

    libstdc++: Fix proxy wait detection in atomic wait
    
    A failed assertion was observed with std::atomic<bool>::wait when the
    loop in __atomic_wait_address is entered and calls _M_setup_wait a
    second time, after waking from __wait_impl. When the first call to
    _M_setup_wait makes a call to _M_setup_proxy_wait that function decides
    that a proxy wait is needed for an object of type bool, and it updates
    the _M_obj and _M_obj_size members to refer to the futex in the proxy
    state, instead of referring to the bool object itself. The next time
    _M_setup_wait is called it calls _M_setup_proxy_wait again but now it
    sees _M_obj_size == sizeof(futex) and so this time decides a proxy wait
    is *not* needed, and then fails the __glibcxx_assert(_M_obj == addr)
    check.
    
    The problem is that _M_setup_proxy_wait wasn't correctly handling the
    case where it's called a second time, after the decision to use a proxy
    wait has already been made. That can be fixed in _M_setup_proxy_wait by
    checking if _M_obj != addr, which implies that a proxy wait has already
    been set up by a previous call. In that case, _M_setup_proxy_wait should
    only update _M_old to the latest value of the proxy _M_ver.
    
    This change means that _M_setup_proxy_wait is safe to call repeatedly
    for a proxy wait, and will only update _M_wait_state, _M_obj, and
    _M_obj_size on the first call. On the second and subsequent calls, those
    variables are already correctly set for the proxy wait so don't need to
    be set again.
    
    For non-proxy waits, calling _M_setup_proxy_wait more than once is safe,
    but pessimizes performance. The caller shouldn't make a second call to
    _M_setup_proxy_wait because we don't need to check again if a proxy wait
    should be used (the answer won't change) and we don't need to load a
    value from the proxy _M_ver.
    
    However, it was difficult to detect the case of a non-proxy wait,
    because _M_setup_wait doesn't know if it's being called the first time
    (when _M_setup_proxy_wait is called to make the initial decision) or a
    subsequent time (in which case _M_obj == addr implies a non-proxy wait
    was already decided on). As a result, _M_setup_proxy_wait was being used
    every time to see if it's a proxy wait. We can resolve this by splitting
    the _M_setup_wait function into _M_setup_wait and _M_on_wake, where the
    former is only called once to do the initial setup and the latter is
    called after __wait_impl returns, to prepare to check the predicate and
    possibly wait again.  The new _M_on_wake function can avoid unnecessary
    calls to _M_setup_proxy_wait by checking _M_obj == addr to identify a
    non-proxy wait.
    
    The three callers of _M_setup_wait are updated to use _M_on_wake instead
    of _M_setup_wait after waking from a waiting function. This change
    revealed a latent performance bug in __atomic_wait_address_for which was
    not passing __res to _M_setup_wait, so a new value was always loaded
    even when __res._M_has_val was true. By splitting _M_on_wake out of
    _M_setup_wait this problem became more obvious, because we no longer
    have _M_setup_wait doing two different jobs, depending on whether it was
    passed the optional third argument or not.
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/atomic_timed_wait.h (__atomic_wait_address_until):
            Use _M_on_wake instead of _M_setup_wait after waking.
            (__atomic_wait_address_for): Likewise.
            * include/bits/atomic_wait.h (__atomic_wait_address): Likewise.
            (__wait_args::_M_setup_wait): Remove third parameter and move
            code to update _M_old to ...
            (__wait_args::_M_on_wake): New member function to update _M_old
            after waking, only calling _M_setup_proxy_wait if needed.
            (__wait_args::_M_store): New member function to update _M_old
            from a value, for non-proxy waits.
            * src/c++20/atomic.cc (__wait_args::_M_setup_proxy_wait): If
            _M_obj is not addr, only load a new value and return true.
    
    Reviewed-by: Tomasz KamiƄski <[email protected]>

Diff:
---
 libstdc++-v3/include/bits/atomic_timed_wait.h |  4 +-
 libstdc++-v3/include/bits/atomic_wait.h       | 82 ++++++++++++++++++---------
 libstdc++-v3/src/c++20/atomic.cc              | 34 ++++++-----
 3 files changed, 77 insertions(+), 43 deletions(-)

diff --git a/libstdc++-v3/include/bits/atomic_timed_wait.h 
b/libstdc++-v3/include/bits/atomic_timed_wait.h
index bca43e3222dd..c195cefc6b85 100644
--- a/libstdc++-v3/include/bits/atomic_timed_wait.h
+++ b/libstdc++-v3/include/bits/atomic_timed_wait.h
@@ -140,7 +140,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          auto __res = __detail::__wait_until(__addr, __args, __atime);
          if (__res._M_timeout)
            return false; // C++26 will also return last observed __val
-         __val = __args._M_setup_wait(__addr, __vfn, __res);
+         __val = __args._M_on_wake(__addr, __vfn, __res);
        }
       return true; // C++26 will also return last observed __val
     }
@@ -192,7 +192,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          auto __res = __detail::__wait_for(__addr, __args, __rtime);
          if (__res._M_timeout)
            return false; // C++26 will also return last observed __val
-         __val = __args._M_setup_wait(__addr, __vfn);
+         __val = __args._M_on_wake(__addr, __vfn, __res);
        }
       return true; // C++26 will also return last observed __val
     }
diff --git a/libstdc++-v3/include/bits/atomic_wait.h 
b/libstdc++-v3/include/bits/atomic_wait.h
index b6240a95370f..8511e003abca 100644
--- a/libstdc++-v3/include/bits/atomic_wait.h
+++ b/libstdc++-v3/include/bits/atomic_wait.h
@@ -205,43 +205,72 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       template<typename _Tp, typename _ValFn>
        _Tp
-       _M_setup_wait(const _Tp* __addr, _ValFn __vfn,
-                     __wait_result_type __res = {})
+       _M_setup_wait(const _Tp* __addr, _ValFn __vfn)
        {
          static_assert(is_same_v<_Tp, decay_t<decltype(__vfn())>>);
 
-         if (__res._M_has_val) // A previous wait loaded a recent value.
-           {
-             _M_old = __res._M_val;
-             if constexpr (!__platform_wait_uses_type<_Tp>)
-               {
-                 // __res._M_val might be the value of a proxy wait object,
-                 // not the value of *__addr. Call __vfn() to get new value.
-                 return __vfn();
-               }
-             // Not a proxy wait, so the value in __res._M_val was loaded
-             // from *__addr and we don't need to call __vfn().
-             else if constexpr (sizeof(_Tp) == sizeof(__UINT32_TYPE__))
-               return __builtin_bit_cast(_Tp, (__UINT32_TYPE__)_M_old);
-             else if constexpr (sizeof(_Tp) == sizeof(__UINT64_TYPE__))
-               return __builtin_bit_cast(_Tp, (__UINT64_TYPE__)_M_old);
-             else
-               static_assert(false); // Unsupported size
-           }
-
          if constexpr (!__platform_wait_uses_type<_Tp>)
            if (_M_setup_proxy_wait(__addr))
              {
                // We will use a proxy wait for this object.
-               // The library has set _M_obj and _M_obj_size and _M_old.
+               // The library has set _M_wait_state, _M_obj, _M_obj_size,
+               // and _M_old.
                // Call __vfn to load the current value from *__addr
                // (which must happen after the call to _M_setup_proxy_wait).
                return __vfn();
              }
 
          // We will use a futex-like operation to wait on this object,
-         // and so can just load the value and store it into _M_old.
-         auto __val = __vfn();
+         // so just load the value, store it into _M_old, and return it.
+         return _M_store(__vfn());
+       }
+
+      // Called after a wait returns, to prepare to wait again.
+      template<typename _Tp, typename _ValFn>
+       _Tp
+       _M_on_wake(const _Tp* __addr, _ValFn __vfn, __wait_result_type __res)
+       {
+         if constexpr (!__platform_wait_uses_type<_Tp>) // maybe a proxy wait
+           if (_M_obj != __addr) // definitely a proxy wait
+             {
+               if (__res._M_has_val)
+                 // Previous wait loaded a recent value from the proxy.
+                 _M_old = __res._M_val;
+               else // Load a new value from the proxy and store in _M_old.
+                 (void) _M_setup_proxy_wait(nullptr);
+               // Read the current value of *__addr
+               return __vfn();
+             }
+
+         if (__res._M_has_val) // The previous wait loaded a recent value.
+           {
+             _M_old = __res._M_val;
+
+             // Not a proxy wait, so the value in __res._M_val was loaded
+             // from *__addr and we don't need to call __vfn().
+             if constexpr (sizeof(_Tp) == sizeof(__UINT64_TYPE__))
+               return __builtin_bit_cast(_Tp, (__UINT64_TYPE__)_M_old);
+             else if constexpr (sizeof(_Tp) == sizeof(__UINT32_TYPE__))
+               return __builtin_bit_cast(_Tp, (__UINT32_TYPE__)_M_old);
+             else if constexpr (sizeof(_Tp) == sizeof(__UINT16_TYPE__))
+               return __builtin_bit_cast(_Tp, (__UINT16_TYPE__)_M_old);
+             else if constexpr (sizeof(_Tp) == sizeof(__UINT8_TYPE__))
+               return __builtin_bit_cast(_Tp, (__UINT8_TYPE__)_M_old);
+             else // Should be a proxy wait for this size!
+               __glibcxx_assert(false);
+           }
+         else
+           return _M_store(__vfn());
+       }
+
+    private:
+      // Store __val in _M_old.
+      // pre: This must be a non-proxy wait.
+      template<typename _Tp>
+       [[__gnu__::__always_inline__]]
+       _Tp
+       _M_store(_Tp __val)
+       {
          // We have to consider various sizes, because a future libstdc++.so
          // might enable non-proxy waits for additional sizes.
          if constexpr (sizeof(_Tp) == sizeof(__UINT64_TYPE__))
@@ -252,12 +281,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
            _M_old = __builtin_bit_cast(__UINT16_TYPE__, __val);
          else if constexpr (sizeof(_Tp) == sizeof(__UINT8_TYPE__))
            _M_old = __builtin_bit_cast(__UINT8_TYPE__, __val);
-         else // _M_setup_proxy_wait should have returned true for this type!
+         else // Should be a proxy wait for this size!
            __glibcxx_assert(false);
          return __val;
        }
 
-    private:
       // Returns true if a proxy wait will be used for __addr, false otherwise.
       // If true, _M_wait_state, _M_obj, _M_obj_size, and _M_old are set.
       // If false, data members are unchanged.
@@ -297,7 +325,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       while (!__pred(__val))
        {
          auto __res = __detail::__wait_impl(__addr, __args);
-         __val = __args._M_setup_wait(__addr, __vfn, __res);
+         __val = __args._M_on_wake(__addr, __vfn, __res);
        }
       // C++26 will return __val
     }
diff --git a/libstdc++-v3/src/c++20/atomic.cc b/libstdc++-v3/src/c++20/atomic.cc
index 2d5842df30c3..cbff5e9ba1a6 100644
--- a/libstdc++-v3/src/c++20/atomic.cc
+++ b/libstdc++-v3/src/c++20/atomic.cc
@@ -311,26 +311,32 @@ namespace
 
 // Return false (and don't change any data members) if we can do a non-proxy
 // wait for the object of size `_M_obj_size` at address `addr`.
-// Otherwise, the object at addr needs to use a proxy wait. Set _M_wait_state,
-// set _M_obj and _M_obj_size to refer to the _M_wait_state->_M_ver proxy,
-// load the current value from _M_obj and store it in _M_old, then return true.
+// Otherwise, we will use a proxy wait. If addr == _M_obj this is the initial
+// setup of the proxy wait, so set _M_wait_state to the proxy state for addr,
+// and set _M_obj and _M_obj_size to refer to the _M_wait_state->_M_ver proxy.
+// For both the initial setup of a proxy wait and for subsequent calls to this
+// function for proxy waits, we load the current value from _M_obj (the proxy)
+// and store it in _M_old, then return true.
 bool
 __wait_args::_M_setup_proxy_wait(const void* addr)
 {
-  if (!use_proxy_wait(*this, addr)) // We can wait on this address directly.
+  __waitable_state* state = nullptr;
+
+  if (addr == _M_obj)
     {
-      // Ensure the caller set _M_obj correctly, as that's what we'll wait on:
-      __glibcxx_assert(_M_obj == addr);
-      return false;
-    }
+      if (!use_proxy_wait(*this, addr)) // We can wait on this address 
directly.
+       return false;
 
-  // This will be a proxy wait, so get a waitable state.
-  auto state = set_wait_state(addr, *this);
+      // This will be a proxy wait, so get a waitable state.
+      state = set_wait_state(addr, *this);
 
-  // The address we will wait on is the version count of the waitable state:
-  _M_obj = &state->_M_ver;
-  // __wait_impl and __wait_until_impl need to know this size:
-  _M_obj_size = sizeof(state->_M_ver);
+      // The address we will wait on is the version count of the waitable 
state:
+      _M_obj = &state->_M_ver;
+      // __wait_impl and __wait_until_impl need to know this size:
+      _M_obj_size = sizeof(state->_M_ver);
+    }
+  else // This is not the first call to this function, already a proxy wait.
+    state = static_cast<__waitable_state*>(_M_wait_state);
 
   // Read the value of the _M_ver counter.
   _M_old = __atomic_load_n(&state->_M_ver, __ATOMIC_ACQUIRE);

Reply via email to