Dear Qpid-Users,

I recently ran into this bug using the cpp-client 0.28 with Proton 0.8
under Solaris Sparc 5.10. The client comunicates with a qpid-cpp brooker,
also v 0.28. Daemon and client run on the same machine, so network latency
should not be an issue. The capacity of the qpid::messaging::Receiver is
unchanged.
We have the same codebase set up on Linux; we never encountered the error
(or an error like it) there, but then the error does not occur very often
so this may not mean anything.

I cannot actively reproduce the bug; it showed up twice within the last
three weeks (daily restart of the client); both times when receiving a
larger amount of small messages (~5000). This amount of traffic happens
regularly; in general this works.
I tried to reproduce the error by sending ~50000 similar messages; also no
luck repreducing the error with the exact same messages either.

Please find stack traces below... I looked through the map creation code in
proton where the actual stack overflow happens but could not see an error
there.

Best regards,

Daniel

pstack core yields:
---
-----------------  lwp# 1 / thread# 1  --------------------
 ffffffff794d8ba0 __lwp_park (ffffffff77700280, 0, 100ed2850, 0, 100ed2848,
1) + 14
 ffffffff7e5a12e8 _ZN4qpid3sys5Mutex4lockEv (100ed2848, ffffffff7fffe2e8,
ffffffff7fffe2f0, 0, 100fde4c0, 3b9aca00) + 20
 ffffffff7e5a3508 _ZN4qpid3sys10ScopedLockINS0_7MonitorEEC1ERS2_
(ffffffff7fffe3c8, 100ed2848, ffffffff7964a300, ffffffff796455d4,
ffffffff77700280, 7fffffffffffffff) + 24
 ffffffff7e592ff8
_ZN4qpid9messaging4amqp17ConnectionContext12nextReceiverEN5boost10shared_ptrINS1_14SessionContextEEENS0_8DurationE
(ffffffff7fffe4b8, 100ed2650, ffffffff7fffe4c8, 3e8, 11a0, fffc00) + 40
 ffffffff7e5c96a4
_ZN4qpid9messaging4amqp13SessionHandle12nextReceiverERNS0_8ReceiverENS0_8DurationE
(100deb0f0, ffffffff7fffe890, 3e8, 30, ffffffff7963e000, 100ed2650) + 60
 ffffffff7e5c9958
_ZN4qpid9messaging4amqp13SessionHandle12nextReceiverENS0_8DurationE
(ffffffff7fffe890, 100deb0f0, 3e8, 0, 0, 1) + 50
 ffffffff7e63a05c _ZN4qpid9messaging7Session12nextReceiverENS0_8DurationE
(ffffffff7fffe890, ffffffff7e37a790, 3e8, ffffffff796455d4,
ffffffff77700280, 0) + 40
 ffffffff7d2a38a0
_ZN4comm14QpidWrpSession12nextReceiverENS_15QpidWrpDurationE
(ffffffff7fffeb00, ffffffff7e37a788, ffffffff7fffeb10, ffffffff796455d4,
ffffffff77700280, ffffffff7e37a790) + 78
 ffffffff7defdcd0 _ZN4comm10QpidReader4waitENS_15QpidWrpDurationE
(ffffffff7fffede8, ffffffff7fffebd8, 4d, 0, ffffffff7e37a788,
ffffffff7fffee18) + 240
 ffffffff7defda74 _ZN4comm10QpidReader4waitEm (ffffffff7fffede8, 3e8, 1, 0,
100fde4c0, 101149660) + 2c
 000000010002edf8 _ZN6merger8mainLoopEv (ffffffff7ffff307, 0,
ffffffff7964a300, ffffffff796455d4, ffffffff77700280, 1) + 428
 000000010002e370 _ZN6merger5startEiPPc (3, ffffffff7ffff4a8, 0, 0, 0,
ffffffff7ffff198) + 428
 000000010002f4f0 main (3, ffffffff7ffff4a8, ffffffff7ffff4c8, 1002cdb48,
100000000, ffffffff77700280) + 40
 000000010002d464 _start (0, 0, 0, 0, 0, 0) + 7c
-----------------  lwp# 2 / thread# 2  --------------------
 ffffffff77816854 pni_map_load (100f3d700, 0, 0, 0, 0, fffc00) + 4
---

Thread 1 (under our control / our application which runs in one thread
only) is currently parked; Thread 2 looks "better" using dbx:

---
=>[1] pni_map_load(0x100f3d700, 0x240, 0xffffffff7964a300,
0xffffffff796455d4, 0xffffffff77700a80, 0x0), at 0xffffffff77816854
  [2] pni_map_entry(0x100f3d700, 0xd08, 0x0, 0x1, 0xffffffff7963e000,
0x2000), at 0xffffffff77816d38
  [3] pn_map_put(0x100f3d700, 0xd08, 0x1018c5f80, 0x0, 0x0, 0x2), at
0xffffffff77816f54
  [4] pni_map_ensure(0x100f3d700, 0x295, 0xffffffff7964a300,
0xffffffff796455d4, 0xffffffff77700a80, 0x0), at 0xffffffff77816ae4
  [5] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80, 0x0),
at 0xffffffff77816d38
  [6] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80, 0x0),
at 0xffffffff77816d6c
  [7] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80, 0x0),
at 0xffffffff77816d6c
  [8] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80, 0x0),
at 0xffffffff77816d6c
  [9] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80, 0x0),
at 0xffffffff77816d6c
  [10] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [11] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [12] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [13] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [14] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [15] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [16] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [17] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [18] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c

(... thousands more of these...)

  [9232] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9233] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9234] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9235] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9236] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9237] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9238] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9239] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9240] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9241] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9242] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9243] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9244] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9245] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9246] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9247] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9248] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9249] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9250] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff77700a80,
0x0), at 0xffffffff77816d6c
  [9251] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff7963e000,
0x2000), at 0xffffffff77816d6                                 c
  [9252] pni_map_entry(0x100f3d700, 0xecd, 0x0, 0x1, 0xffffffff7963e000,
0x2000), at 0xffffffff77816d6                                 c
  [9253] pn_map_put(0x100f3d700, 0xecd, 0x101bcb050, 0x101fdfff4, 0xcbb34c,
0x10), at 0xffffffff77816f                                 54
  [9254] pn_hash_put(0x100f3d700, 0xecd, 0x101bcb050, 0x0, 0x100ed2848,
0xffffffff7964a300), at 0xffff                                 ffff7781765c
  [9255] pn_delivery_map_push(0x100f3c6d0, 0x101bcb050, 0x100ed3770,
0xffffffff69cfa1af, 0xffffffff69c                                 fa1a8,
0xffffffff69cfa1b0), at 0xffffffff77832f4c
  [9256] pn_do_transfer(0x100ed2ab0, 0x0, 0x0, 0x100e728e0, 0x100edcfd5,
0x165), at 0xffffffff77835434
  [9257] pn_dispatch_frame(0x100ed2ab0, 0xffffffff69cfa3b0, 0x18a,
0xffffffff69cfb4c8, 0x100ee6fe0, 0x
ffffffff69cfa3b0), at 0xffffffff7782a614
  [9258] pn_dispatcher_input(0x100ed2ab0, 0x100edcfb0, 0x18a, 0x0, 0x0,
0x0), at 0xffffffff7782a724
  [9259] pn_input_read_amqp(0x100ed29c8, 0x100edcfb0, 0x18a, 0x0, 0x0,
0x0), at 0xffffffff77836bcc
  [9260] pn_io_layer_input_passthru(0x100ed2990, 0x100edcfb0, 0x18a,
0x100f27660, 0x0, 0x0), at 0xffff
ffff7783a42c
  [9261] pn_io_layer_input_passthru(0x100ed2958, 0x100edcfb0, 0x18a,
0x1cc4, 0x1c00, 0x100ed25d0), at
0xffffffff7783a42c
  [9262] transport_consume(0x100ed28c0, 0x6, 0x1, 0x0, 0x100ed2848,
0xffffffff7964a300), at 0xffffffff                                 778366f4
  [9263] pn_transport_process(0x100ed28c0, 0x18a, 0x0, 0x30, 0x4, 0x6), at
0xffffffff7783a8fc
  [9264] pn_transport_input(0x100ed28c0, 0x100f0760e, 0x18a, 0x10,
0xffffffff77700a80, 0x0), at 0xffff
ffff77836610
  [9265]
_ZN4qpid9messaging4amqp17ConnectionContext11decodePlainEPKcm(0x100ed2650,
0x100f0760e, 0x18a,                                  0xffffffff69cfb4c8,
0x100ee6fe0, 0x29fb09), at 0xffffffff7e5992c0
  [9266]
_ZN4qpid9messaging4amqp17ConnectionContext6decodeEPKcm(0x100ed2650,
0x100f0760e, 0x18a, 0x0,                                  0x0, 0x0), at
0xffffffff7e59b200
  [9267]
_ZN4qpid9messaging4amqp12TcpTransport4readERNS_3sys8AsynchIOEPNS3_18AsynchIOBufferBaseE(0x100
ed25d0, 0x100ee6cf0, 0x100f27660, 0x0, 0x0, 0x0), at 0xffffffff7e5cb364
  [9268]
_ZNK5boost4_mfi3mf2IvN4qpid9messaging4amqp12TcpTransportERNS2_3sys8AsynchIOEPNS6_18AsynchIOBu
fferBaseEEclEPS5_S8_SA_(0x100ee6e40, 0x100ed25d0, 0x100ee6cf0, 0x100f27660,
0x0, 0x0), at 0xffffffff7e                                 5cfd0c
  [9269]
_ZN5boost3_bi5list3INS0_5valueIPN4qpid9messaging4amqp12TcpTransportEEENS_3argILi1EEENS9_ILi2E
EEEclINS_4_mfi3mf2IvS6_RNS3_3sys8AsynchIOEPNSG_18AsynchIOBufferBaseEEENS0_5list2ISI_RSK_EEEEvNS0_4type
IvEERT_RT0_i(0x100ee6e50, 0x0, 0x100ee6e40, 0x100ee6e40, 0x100ee6cf0,
0x100ed25d0), at 0xffffffff7e5cf                                 064
  [9270]
_ZN5boost3_bi6bind_tIvNS_4_mfi3mf2IvN4qpid9messaging4amqp12TcpTransportERNS4_3sys8AsynchIOEPN
S8_18AsynchIOBufferBaseEEENS0_5list3INS0_5valueIPS7_EENS_3argILi1EEENSI_ILi2EEEEEEclIS9_SC_EEvRT_RT0_(
0x100ee6e40, 0x100ee6cf0, 0xffffffff69cfb2e0, 0x0, 0x0, 0x0), at
0xffffffff7e5ce7a0
  [9271]
_ZN5boost6detail8function26void_function_obj_invoker2INS_3_bi6bind_tIvNS_4_mfi3mf2IvN4qpid9me
ssaging4amqp12TcpTransportERNS7_3sys8AsynchIOEPNSB_18AsynchIOBufferBaseEEENS3_5list3INS3_5valueIPSA_EE
NS_3argILi1EEENSL_ILi2EEEEEEEvSD_SF_E6invokeERNS1_15function_bufferESD_SF_(0x100ee6e40,
0x100ee6cf0, 0                                 x100f27660, 0x0,
0x7fffffffffffffff, 0x558a16fa), at 0xffffffff7e5cdf94
  [9272]
_ZNK5boost9function2IvRN4qpid3sys8AsynchIOEPNS2_18AsynchIOBufferBaseEEclES4_S6_(0x100ee6e38,
0x100ee6cf0, 0x100f27660, 0x10, 0xffffffff77700a80, 0x0), at
0xffffffff7ea7d81c
  [9273] qpid::sys::posix::AsynchIO::readable(0x100ee6cf0, 0x100ee6cf8,
0xff000000, 0xffffffff69cfb4c8                                 ,
0x100ee6fe0, 0x29fb09), at 0xffffffff7ea79ce4
  [9274]
_ZNK5boost4_mfi3mf1IvN4qpid3sys5posix8AsynchIOERNS3_14DispatchHandleEEclEPS5_S7_(0x100ee6d10,
0x100ee6cf0, 0x100ee6cf8, 0x100ee6d68, 0x101bcb040, 0xfffc00), at
0xffffffff7ea842c4
  [9275]
_ZN5boost3_bi5list2INS0_5valueIPN4qpid3sys5posix8AsynchIOEEENS_3argILi1EEEEclINS_4_mfi3mf1IvS
6_RNS4_14DispatchHandleEEENS0_5list1ISG_EEEEvNS0_4typeIvEERT_RT0_i(0x100ee6d20,
0x600000000000000, 0x1                                 00ee6d10,
0xffffffff69cfb790, 0x100ee6d10, 0x100ee6cf0), at 0xffffffff7ea8321c
  [9276]
_ZN5boost3_bi6bind_tIvNS_4_mfi3mf1IvN4qpid3sys5posix8AsynchIOERNS5_14DispatchHandleEEENS0_5li
st2INS0_5valueIPS7_EENS_3argILi1EEEEEEclIS8_EEvRT_(0x100ee6d10,
0x100ee6cf8, 0xffffffff7964a300, 0x100
ee6e1c, 0xffffffff77700a80, 0x6), at 0xffffffff7ea8202c
  [9277]
_ZN5boost6detail8function26void_function_obj_invoker1INS_3_bi6bind_tIvNS_4_mfi3mf1IvN4qpid3sy
s5posix8AsynchIOERNS8_14DispatchHandleEEENS3_5list2INS3_5valueIPSA_EENS_3argILi1EEEEEEEvSC_E6invokeERN
S1_15function_bufferESC_(0x100ee6d10, 0x100ee6cf8, 0x1, 0x0, 0x0, 0x0), at
0xffffffff7ea80af0
  [9278]
_ZNK5boost9function1IvRN4qpid3sys14DispatchHandleEEclES4_(0x100ee6d08,
0x100ee6cf8, 0x0, 0x0,                                  0x0, 0x0), at
0xffffffff7eb46f30
  [9279] qpid::sys::DispatchHandle::processEvent(0x100ee6cf8, 0x1,
0xffffffff77700a50, 0x10, 0x7ffffff
fffffffff, 0x2), at 0xffffffff7eb460c8
  [9280] _ZN4qpid3sys6Poller5Event7processEv(0xffffffff69cfbd08,
0x100000000, 0x0, 0x0, 0xffffffff7770                                 0a80,
0x100deab00), at 0xffffffff7eab1c94
  [9281] qpid::sys::Poller::run(0x100d80ae0, 0x1, 0xffffffff7964a300, 0x10,
0xffffffff7964bf98, 0xffff                                 ffff77700a80),
at 0xffffffff7eab10bc
  [9282] qpid::sys::_GLOBAL__N_1::runRunnable(0x100d80ae0, 0x1fc000, 0x0,
0x0, 0xffffffff7eaa1fa8, 0x1                                 ), at
0xffffffff7eaa1fc4
---

Reply via email to