http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52839

--- Comment #19 from Alan Modra <amodra at gmail dot com> 2012-04-10 15:13:24 
UTC ---
I think I was on the right track when I questioned whether the problem might be
mixing atomics and mutex protected ops, but was looking in the wrong place.  I
should have looked at default_weaktoshared.o rather than libstdc++.so.

objdump -drS default_weaktoshared.o shows
[snip]
void* thread_hammer(void* opaque_weak)
{
   0:   94 21 ff d0     stwu    r1,-48(r1)
  static inline _Atomic_word
  __attribute__ ((__unused__))
  __exchange_and_add_dispatch(_Atomic_word* __mem, int __val)
  {
#ifdef __GTHREADS
    if (__gthread_active_p())
   4:   3d 20 00 00     lis     r9,0
                        6: R_PPC_ADDR16_HA      pthread_cancel
   8:   7d 80 00 26     mfcr    r12
   c:   7c 08 02 a6     mflr    r0
  10:   39 29 00 00     addi    r9,r9,0
                        12: R_PPC_ADDR16_LO     pthread_cancel
  14:   2e 09 00 00     cmpwi   cr4,r9,0
  18:   93 c1 00 28     stw     r30,40(r1)
      typedef typename __traits_type::pointer           pointer;

      _GLIBCXX_CONSTEXPR __normal_iterator() : _M_current(_Iterator()) { }

      explicit
      __normal_iterator(const _Iterator& __i) : _M_current(__i) { }
  1c:   3f c0 00 01     lis     r30,1
  20:   90 01 00 34     stw     r0,52(r1)
  24:   93 81 00 20     stw     r28,32(r1)
  28:   93 a1 00 24     stw     r29,36(r1)
  2c:   93 e1 00 2c     stw     r31,44(r1)
  30:   91 81 00 1c     stw     r12,28(r1)
  34:   7c 7c 1b 78     mr      r28,r3
  38:   63 de 86 a0     ori     r30,r30,34464
  3c:   83 a3 00 00     lwz     r29,0(r3)
  40:   48 00 00 18     b       58 <_Z13thread_hammerPv+0x58>
  44:   60 00 00 00     nop
  48:   60 00 00 00     nop
  4c:   60 00 00 00     nop
    0x9908b0dful, 11, 7,
    0x9d2c5680ul, 15,
    0xefc60000ul, 18> rng;
  wp_vector_t::iterator cur_weak = weak_pool.begin();

  for (unsigned int i = 0; i < HAMMER_REPEAT; ++i)
  50:   37 de ff ff     addic.  r30,r30,-1
  54:   41 82 00 ac     beq-    100 <_Z13thread_hammerPv+0x100>
  // now that __weak_count is defined we can define this constructor:
  template<_Lock_policy _Lp>
    inline
    __shared_count<_Lp>::
    __shared_count(const __weak_count<_Lp>& __r)
    : _M_pi(__r._M_pi)
  58:   83 fd 00 04     lwz     r31,4(r29)
    {
      if (_M_pi != 0)
  5c:   2f 9f 00 00     cmpwi   cr7,r31,0
  60:   41 9e 00 cc     beq-    cr7,12c <_Z13thread_hammerPv+0x12c>
    inline void
    _Sp_counted_base<_S_atomic>::
    _M_add_ref_lock()
    {
      // Perform lock-free add-if-not-zero operation.
      _Atomic_word __count = _M_use_count;
  64:   81 3f 00 04     lwz     r9,4(r31)
  68:   91 21 00 08     stw     r9,8(r1)
      do
        {
          if (__count == 0)
  6c:   2f 89 00 00     cmpwi   cr7,r9,0
  70:   41 9e 00 d8     beq-    cr7,148 <_Z13thread_hammerPv+0x148>
          // Replace the current counter value with the old value + 1, as
          // long as it's not changed meanwhile.
        }
      while (!__atomic_compare_exchange_n(&_M_use_count, &__count, __count + 1,
                                          true, __ATOMIC_ACQ_REL,
                                          __ATOMIC_RELAXED));
  74:   81 01 00 08     lwz     r8,8(r1)
  78:   39 29 00 01     addi    r9,r9,1
  7c:   38 7f 00 04     addi    r3,r31,4
  80:   7c 20 04 ac     lwsync
  84:   7d 40 18 28     lwarx   r10,0,r3
  88:   7c 0a 40 00     cmpw    r10,r8
  8c:   40 82 00 0c     bne-    98 <_Z13thread_hammerPv+0x98>
  90:   7d 20 19 2d     stwcx.  r9,0,r3
  94:   4c 00 01 2c     isync
  98:   91 41 00 08     stw     r10,8(r1)
    _Sp_counted_base<_S_atomic>::
    _M_add_ref_lock()
    {
      // Perform lock-free add-if-not-zero operation.
      _Atomic_word __count = _M_use_count;
      do
  9c:   40 82 00 a4     bne-    140 <_Z13thread_hammerPv+0x140>
  a0:   41 92 00 ac     beq-    cr4,14c <_Z13thread_hammerPv+0x14c>
      return __exchange_and_add(__mem, __val);
  a4:   38 80 ff ff     li      r4,-1
  a8:   48 00 00 01     bl      a8 <_Z13thread_hammerPv+0xa8>
                        a8: R_PPC_REL24 _ZN9__gnu_cxx18__exchange_and_addEPVii
      void
      _M_release() // nothrow
      {
        // Be race-detector-friendly.  For more info see bits/c++config.
        _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_use_count);
        if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
  ac:   2f 83 00 01     cmpwi   cr7,r3,1
  b0:   40 9e ff a0     bne+    cr7,50 <_Z13thread_hammerPv+0x50>
          {
            _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count);
            _M_dispose();
  b4:   81 3f 00 00     lwz     r9,0(r31)
  b8:   7f e3 fb 78     mr      r3,r31
  bc:   81 29 00 08     lwz     r9,8(r9)
  c0:   7d 29 03 a6     mtctr   r9
  c4:   4e 80 04 21     bctrl
                _GLIBCXX_WRITE_MEM_BARRIER;

That certainly looks like _M_use_count is fiddled with both by atomics and
__exchange_and_add with pthread mutex.

Reply via email to