[ 
https://issues.apache.org/jira/browse/AMQCPP-534?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jeremy Leung updated AMQCPP-534:
--------------------------------
    Attachment:     (was: AMQCPP-534.patch.txt)

> Closing connection with setTransportListener to NULL may cause core dump
> ------------------------------------------------------------------------
>
>                 Key: AMQCPP-534
>                 URL: https://issues.apache.org/jira/browse/AMQCPP-534
>             Project: ActiveMQ C++ Client
>          Issue Type: Bug
>          Components: Transports
>    Affects Versions: 3.8.2
>            Reporter: Jeremy Leung
>            Assignee: Timothy Bish
>
> This happen in random; and will cause core dump under Solaris 10.
> {panel:title=Core dump happened on thread #22 which is a thread for IO for 
> activemq|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
> {noformat}
> t@22 (l@22) terminated by signal SEGV (no mapping at the fault address)
> 0xfcbef02c: onCommand+0x0370:   movl     (%edi),%eax
> (dbx) where -h
> current thread: t@22
> =>[1] 
> activemq::transport::failover::FailoverTransportListener::onCommand(0x1c20d280,
>  0xfa567ce0), at 0xfcbef02c
>   [2] activemq::transport::TransportFilter::onCommand(0x116c3700, 
> 0xfa567d20), at 0xfcb7ae15
>   [3] 
> activemq::wireformat::openwire::OpenWireFormatNegotiator::onCommand(0x116c3700,
>  0xfa567d70), at 0xfcc5ff05
>   [4] activemq::transport::TransportFilter::onCommand(0x10bdaaa0, 
> 0xfa567de0), at 0xfcb7ae15
>   [5] 
> activemq::transport::inactivity::InactivityMonitor::onCommand(0x10bdaaa0, 
> 0xfa567e40), at 0xfcbf69f7
>   [6] activemq::transport::TransportFilter::onCommand(0x1027cbc0, 
> 0xfa567e90), at 0xfcb7ae15
>   [7] activemq::transport::IOTransport::fire(0x1bbc4ab8, 0xfa567ef0), at 
> 0xfcb78a49
>   [8] activemq::transport::IOTransport::run(0x1bbc4ab8), at 0xfcb79827
>   [9] decaf::lang::Thread::run(0x1c2080a0), at 0xfcd58512
>   [10] __unnamed_syh4P47F8S2uD::runCallback(0x1c21c138), at 0xfcd1cc2f
>   [11] __unnamed_syh4P47F8S2uD::threadEntryMethod(0x1c21c138), at 0xfcd1cbc6
>   [12] _thr_setup(0xfac2a200), at 0xfc0771c0
>   [13] _lwp_start(), at 0xfc0774b0
> {noformat}
> {panel}
> {panel:title=Stack trace for the main 
> thread|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
> {noformat}
> -----------------  lwp# 1 / thread# 1  --------------------
>  fc07750b __lwp_park (163995d8, 1bbc1e10, 8046358) + b
>  fc071d07 cond_wait_queue (163995d8, 1bbc1e10, 8046358) + 5e
>  fc072084 cond_wait_common (163995d8, 1bbc1e10, 8046358) + 1db
>  fc0722b6 _cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 51
>  fc072321 cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 24
>  fc07235d pthread_cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 1e
>  fcd21550 bool 
> decaf::internal::util::concurrent::PlatformThread::interruptibleWaitOnCondition(_pthread_cond*,_pthread_mutex*,long
>  long,int,decaf::internal::util::concurrent::CompletionCondition&) (163995d8, 
> 1bbc1e10, 493e0, 0, 0, 8046434) + 110
>  fcd1f051 bool 
> decaf::internal::util::concurrent::Threading::join(decaf::internal::util::concurrent::ThreadHandle*,long
>  long,int) (1bbe5660, 493e0, 0, 0, fcd5877a) + 13d
>  fcd587a0 void decaf::lang::Thread::join(long long) (1c20d270, 493e0, 0) + 38
>  fcb61945 void activemq::threads::CompositeTaskRunner::shutdown(long long) 
> (f3b1730, 493e0, 0) + e5
>  fcbd26d3 void activemq::transport::failover::FailoverTransport::close() 
> (efde3f8) + 327
>  fcb7b799 void activemq::transport::TransportFilter::close() (118b49e0) + 191
>  fc940003 void activemq::core::ActiveMQConnection::disconnect(long long) 
> (eef0fb8, 0, 0) + 52f
>  fc93c3f3 void activemq::core::ActiveMQConnection::close() (eef0fb8) + b47
>  0888e6ec main     (7, 80475dc, 80475fc) + 149
>  0888dd6a _start   (7, 9e7dafc, 804775d, 8047760, 8047769, 804777f) + 7a
> {noformat}
> {panel}
> This is happen only during the disconnect. So, what i suspected is that there 
> have an concurrent issue between the ActiveMQConnection, TransportFilter, 
> FailoverTransportListener.
> As i dig into the code, i see during the disconnection, the transport 
> listener will being set to NULL at the very first stage. However, the another 
> thread may have access to the pointer of the null transport listener as the 
> NULL check is not thread safe.
> {noformat}
> void ActiveMQConnection::disconnect(long long lastDeliveredSequenceId) {
>     try {
>         // Clear the listener, we don't care about async errors at this point.
>         this->config->transport->setTransportListener(NULL);
>         [...]
> {noformat}
> {noformat}
> void FailoverTransportListener::onCommand(const Pointer<Command> command) {
>     if (command == NULL) {
>         return;
>     }
>     if (command->isResponse()) {
>         Pointer<Response> response = command.dynamicCast<Response>();
>         parent->processResponse(response);
>     }
>     if (!parent->isInitialized()) {
>         parent->setInitialized(true);
>     }
>     if (command->isConnectionControl()) {
>         parent->handleConnectionControl(command);
>     }
>     if (parent->getTransportListener() != NULL) {
>         parent->getTransportListener()->onCommand(command);
>     }
> }
> {noformat}
> {noformat}
> void TransportFilter::onCommand(const Pointer<Command> command) {
>     if (!this->impl->started.get() || this->impl->closed.get()) {
>         return;
>     }
>     try {
>         if (this->listener != NULL) {
>             this->listener->onCommand(command);
>         }
>     } catch (...) {
>     }
> }
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to