[
https://issues.apache.org/jira/browse/AMQCPP-534?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jeremy Leung updated AMQCPP-534:
--------------------------------
Attachment: AMQCPP-534.patch.txt
> Closing connection with setTransportListener to NULL may cause core dump
> ------------------------------------------------------------------------
>
> Key: AMQCPP-534
> URL: https://issues.apache.org/jira/browse/AMQCPP-534
> Project: ActiveMQ C++ Client
> Issue Type: Bug
> Components: Transports
> Affects Versions: 3.8.2
> Reporter: Jeremy Leung
> Assignee: Timothy Bish
> Fix For: 3.8.4, 3.9.0
>
> Attachments: AMQCPP-534.patch.txt
>
>
> This happen in random; and will cause core dump under Solaris 10.
> {panel:title=Core dump happened on thread #22 which is a thread for IO for
> activemq|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
> {noformat}
> t@22 (l@22) terminated by signal SEGV (no mapping at the fault address)
> 0xfcbef02c: onCommand+0x0370: movl (%edi),%eax
> (dbx) where -h
> current thread: t@22
> =>[1]
> activemq::transport::failover::FailoverTransportListener::onCommand(0x1c20d280,
> 0xfa567ce0), at 0xfcbef02c
> [2] activemq::transport::TransportFilter::onCommand(0x116c3700,
> 0xfa567d20), at 0xfcb7ae15
> [3]
> activemq::wireformat::openwire::OpenWireFormatNegotiator::onCommand(0x116c3700,
> 0xfa567d70), at 0xfcc5ff05
> [4] activemq::transport::TransportFilter::onCommand(0x10bdaaa0,
> 0xfa567de0), at 0xfcb7ae15
> [5]
> activemq::transport::inactivity::InactivityMonitor::onCommand(0x10bdaaa0,
> 0xfa567e40), at 0xfcbf69f7
> [6] activemq::transport::TransportFilter::onCommand(0x1027cbc0,
> 0xfa567e90), at 0xfcb7ae15
> [7] activemq::transport::IOTransport::fire(0x1bbc4ab8, 0xfa567ef0), at
> 0xfcb78a49
> [8] activemq::transport::IOTransport::run(0x1bbc4ab8), at 0xfcb79827
> [9] decaf::lang::Thread::run(0x1c2080a0), at 0xfcd58512
> [10] __unnamed_syh4P47F8S2uD::runCallback(0x1c21c138), at 0xfcd1cc2f
> [11] __unnamed_syh4P47F8S2uD::threadEntryMethod(0x1c21c138), at 0xfcd1cbc6
> [12] _thr_setup(0xfac2a200), at 0xfc0771c0
> [13] _lwp_start(), at 0xfc0774b0
> {noformat}
> {panel}
> {panel:title=Stack trace for the main
> thread|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
> {noformat}
> ----------------- lwp# 1 / thread# 1 --------------------
> fc07750b __lwp_park (163995d8, 1bbc1e10, 8046358) + b
> fc071d07 cond_wait_queue (163995d8, 1bbc1e10, 8046358) + 5e
> fc072084 cond_wait_common (163995d8, 1bbc1e10, 8046358) + 1db
> fc0722b6 _cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 51
> fc072321 cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 24
> fc07235d pthread_cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 1e
> fcd21550 bool
> decaf::internal::util::concurrent::PlatformThread::interruptibleWaitOnCondition(_pthread_cond*,_pthread_mutex*,long
> long,int,decaf::internal::util::concurrent::CompletionCondition&) (163995d8,
> 1bbc1e10, 493e0, 0, 0, 8046434) + 110
> fcd1f051 bool
> decaf::internal::util::concurrent::Threading::join(decaf::internal::util::concurrent::ThreadHandle*,long
> long,int) (1bbe5660, 493e0, 0, 0, fcd5877a) + 13d
> fcd587a0 void decaf::lang::Thread::join(long long) (1c20d270, 493e0, 0) + 38
> fcb61945 void activemq::threads::CompositeTaskRunner::shutdown(long long)
> (f3b1730, 493e0, 0) + e5
> fcbd26d3 void activemq::transport::failover::FailoverTransport::close()
> (efde3f8) + 327
> fcb7b799 void activemq::transport::TransportFilter::close() (118b49e0) + 191
> fc940003 void activemq::core::ActiveMQConnection::disconnect(long long)
> (eef0fb8, 0, 0) + 52f
> fc93c3f3 void activemq::core::ActiveMQConnection::close() (eef0fb8) + b47
> 0888e6ec main (7, 80475dc, 80475fc) + 149
> 0888dd6a _start (7, 9e7dafc, 804775d, 8047760, 8047769, 804777f) + 7a
> {noformat}
> {panel}
> This is happen only during the disconnect. So, what i suspected is that there
> have an concurrent issue between the ActiveMQConnection, TransportFilter,
> FailoverTransportListener.
> As i dig into the code, i see during the disconnection, the transport
> listener will being set to NULL at the very first stage. However, the another
> thread may have access to the pointer of the null transport listener as the
> NULL check is not thread safe.
> {noformat}
> void ActiveMQConnection::disconnect(long long lastDeliveredSequenceId) {
> try {
> // Clear the listener, we don't care about async errors at this point.
> this->config->transport->setTransportListener(NULL);
> [...]
> {noformat}
> {noformat}
> void FailoverTransportListener::onCommand(const Pointer<Command> command) {
> if (command == NULL) {
> return;
> }
> if (command->isResponse()) {
> Pointer<Response> response = command.dynamicCast<Response>();
> parent->processResponse(response);
> }
> if (!parent->isInitialized()) {
> parent->setInitialized(true);
> }
> if (command->isConnectionControl()) {
> parent->handleConnectionControl(command);
> }
> if (parent->getTransportListener() != NULL) {
> parent->getTransportListener()->onCommand(command);
> }
> }
> {noformat}
> {noformat}
> void TransportFilter::onCommand(const Pointer<Command> command) {
> if (!this->impl->started.get() || this->impl->closed.get()) {
> return;
> }
> try {
> if (this->listener != NULL) {
> this->listener->onCommand(command);
> }
> } catch (...) {
> }
> }
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)