http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52839
--- Comment #19 from Alan Modra <amodra at gmail dot com> 2012-04-10 15:13:24 UTC --- I think I was on the right track when I questioned whether the problem might be mixing atomics and mutex protected ops, but was looking in the wrong place. I should have looked at default_weaktoshared.o rather than libstdc++.so. objdump -drS default_weaktoshared.o shows [snip] void* thread_hammer(void* opaque_weak) { 0: 94 21 ff d0 stwu r1,-48(r1) static inline _Atomic_word __attribute__ ((__unused__)) __exchange_and_add_dispatch(_Atomic_word* __mem, int __val) { #ifdef __GTHREADS if (__gthread_active_p()) 4: 3d 20 00 00 lis r9,0 6: R_PPC_ADDR16_HA pthread_cancel 8: 7d 80 00 26 mfcr r12 c: 7c 08 02 a6 mflr r0 10: 39 29 00 00 addi r9,r9,0 12: R_PPC_ADDR16_LO pthread_cancel 14: 2e 09 00 00 cmpwi cr4,r9,0 18: 93 c1 00 28 stw r30,40(r1) typedef typename __traits_type::pointer pointer; _GLIBCXX_CONSTEXPR __normal_iterator() : _M_current(_Iterator()) { } explicit __normal_iterator(const _Iterator& __i) : _M_current(__i) { } 1c: 3f c0 00 01 lis r30,1 20: 90 01 00 34 stw r0,52(r1) 24: 93 81 00 20 stw r28,32(r1) 28: 93 a1 00 24 stw r29,36(r1) 2c: 93 e1 00 2c stw r31,44(r1) 30: 91 81 00 1c stw r12,28(r1) 34: 7c 7c 1b 78 mr r28,r3 38: 63 de 86 a0 ori r30,r30,34464 3c: 83 a3 00 00 lwz r29,0(r3) 40: 48 00 00 18 b 58 <_Z13thread_hammerPv+0x58> 44: 60 00 00 00 nop 48: 60 00 00 00 nop 4c: 60 00 00 00 nop 0x9908b0dful, 11, 7, 0x9d2c5680ul, 15, 0xefc60000ul, 18> rng; wp_vector_t::iterator cur_weak = weak_pool.begin(); for (unsigned int i = 0; i < HAMMER_REPEAT; ++i) 50: 37 de ff ff addic. r30,r30,-1 54: 41 82 00 ac beq- 100 <_Z13thread_hammerPv+0x100> // now that __weak_count is defined we can define this constructor: template<_Lock_policy _Lp> inline __shared_count<_Lp>:: __shared_count(const __weak_count<_Lp>& __r) : _M_pi(__r._M_pi) 58: 83 fd 00 04 lwz r31,4(r29) { if (_M_pi != 0) 5c: 2f 9f 00 00 cmpwi cr7,r31,0 60: 41 9e 00 cc beq- cr7,12c <_Z13thread_hammerPv+0x12c> inline void _Sp_counted_base<_S_atomic>:: _M_add_ref_lock() { // Perform lock-free add-if-not-zero operation. _Atomic_word __count = _M_use_count; 64: 81 3f 00 04 lwz r9,4(r31) 68: 91 21 00 08 stw r9,8(r1) do { if (__count == 0) 6c: 2f 89 00 00 cmpwi cr7,r9,0 70: 41 9e 00 d8 beq- cr7,148 <_Z13thread_hammerPv+0x148> // Replace the current counter value with the old value + 1, as // long as it's not changed meanwhile. } while (!__atomic_compare_exchange_n(&_M_use_count, &__count, __count + 1, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)); 74: 81 01 00 08 lwz r8,8(r1) 78: 39 29 00 01 addi r9,r9,1 7c: 38 7f 00 04 addi r3,r31,4 80: 7c 20 04 ac lwsync 84: 7d 40 18 28 lwarx r10,0,r3 88: 7c 0a 40 00 cmpw r10,r8 8c: 40 82 00 0c bne- 98 <_Z13thread_hammerPv+0x98> 90: 7d 20 19 2d stwcx. r9,0,r3 94: 4c 00 01 2c isync 98: 91 41 00 08 stw r10,8(r1) _Sp_counted_base<_S_atomic>:: _M_add_ref_lock() { // Perform lock-free add-if-not-zero operation. _Atomic_word __count = _M_use_count; do 9c: 40 82 00 a4 bne- 140 <_Z13thread_hammerPv+0x140> a0: 41 92 00 ac beq- cr4,14c <_Z13thread_hammerPv+0x14c> return __exchange_and_add(__mem, __val); a4: 38 80 ff ff li r4,-1 a8: 48 00 00 01 bl a8 <_Z13thread_hammerPv+0xa8> a8: R_PPC_REL24 _ZN9__gnu_cxx18__exchange_and_addEPVii void _M_release() // nothrow { // Be race-detector-friendly. For more info see bits/c++config. _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_use_count); if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1) ac: 2f 83 00 01 cmpwi cr7,r3,1 b0: 40 9e ff a0 bne+ cr7,50 <_Z13thread_hammerPv+0x50> { _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count); _M_dispose(); b4: 81 3f 00 00 lwz r9,0(r31) b8: 7f e3 fb 78 mr r3,r31 bc: 81 29 00 08 lwz r9,8(r9) c0: 7d 29 03 a6 mtctr r9 c4: 4e 80 04 21 bctrl _GLIBCXX_WRITE_MEM_BARRIER; That certainly looks like _M_use_count is fiddled with both by atomics and __exchange_and_add with pthread mutex.