[ 
https://issues.apache.org/jira/browse/AMQCPP-534?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jeremy Leung updated AMQCPP-534:
--------------------------------

    Description: 
This happen in random; and will cause core dump under Solaris 10.

{panel:title=Core dump happened on thread #22 which is a thread for IO for 
activemq|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
{noformat}
t@22 (l@22) terminated by signal SEGV (no mapping at the fault address)
0xfcbef02c: onCommand+0x0370:   movl     (%edi),%eax
(dbx) where -h
current thread: t@22
=>[1] 
activemq::transport::failover::FailoverTransportListener::onCommand(0x1c20d280, 
0xfa567ce0), at 0xfcbef02c
  [2] activemq::transport::TransportFilter::onCommand(0x116c3700, 0xfa567d20), 
at 0xfcb7ae15
  [3] 
activemq::wireformat::openwire::OpenWireFormatNegotiator::onCommand(0x116c3700, 
0xfa567d70), at 0xfcc5ff05
  [4] activemq::transport::TransportFilter::onCommand(0x10bdaaa0, 0xfa567de0), 
at 0xfcb7ae15
  [5] activemq::transport::inactivity::InactivityMonitor::onCommand(0x10bdaaa0, 
0xfa567e40), at 0xfcbf69f7
  [6] activemq::transport::TransportFilter::onCommand(0x1027cbc0, 0xfa567e90), 
at 0xfcb7ae15
  [7] activemq::transport::IOTransport::fire(0x1bbc4ab8, 0xfa567ef0), at 
0xfcb78a49
  [8] activemq::transport::IOTransport::run(0x1bbc4ab8), at 0xfcb79827
  [9] decaf::lang::Thread::run(0x1c2080a0), at 0xfcd58512
  [10] __unnamed_syh4P47F8S2uD::runCallback(0x1c21c138), at 0xfcd1cc2f
  [11] __unnamed_syh4P47F8S2uD::threadEntryMethod(0x1c21c138), at 0xfcd1cbc6
  [12] _thr_setup(0xfac2a200), at 0xfc0771c0
  [13] _lwp_start(), at 0xfc0774b0
{noformat}
{panel}

{panel:title=Stack trace for the main 
thread|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
{noformat}
-----------------  lwp# 1 / thread# 1  --------------------
 fc07750b __lwp_park (163995d8, 1bbc1e10, 8046358) + b
 fc071d07 cond_wait_queue (163995d8, 1bbc1e10, 8046358) + 5e
 fc072084 cond_wait_common (163995d8, 1bbc1e10, 8046358) + 1db
 fc0722b6 _cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 51
 fc072321 cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 24
 fc07235d pthread_cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 1e
 fcd21550 bool 
decaf::internal::util::concurrent::PlatformThread::interruptibleWaitOnCondition(_pthread_cond*,_pthread_mutex*,long
 long,int,decaf::internal::util::concurrent::CompletionCondition&) (163995d8, 
1bbc1e10, 493e0, 0, 0, 8046434) + 110
 fcd1f051 bool 
decaf::internal::util::concurrent::Threading::join(decaf::internal::util::concurrent::ThreadHandle*,long
 long,int) (1bbe5660, 493e0, 0, 0, fcd5877a) + 13d
 fcd587a0 void decaf::lang::Thread::join(long long) (1c20d270, 493e0, 0) + 38
 fcb61945 void activemq::threads::CompositeTaskRunner::shutdown(long long) 
(f3b1730, 493e0, 0) + e5
 fcbd26d3 void activemq::transport::failover::FailoverTransport::close() 
(efde3f8) + 327
 fcb7b799 void activemq::transport::TransportFilter::close() (118b49e0) + 191
 fc940003 void activemq::core::ActiveMQConnection::disconnect(long long) 
(eef0fb8, 0, 0) + 52f
 fc93c3f3 void activemq::core::ActiveMQConnection::close() (eef0fb8) + b47
 0888e6ec main     (7, 80475dc, 80475fc) + 149
 0888dd6a _start   (7, 9e7dafc, 804775d, 8047760, 8047769, 804777f) + 7a
{noformat}
{panel}

This is happen only during the disconnect. So, what i suspected is that there 
have an concurrent issue between the ActiveMQConnection, TransportFilter, 
FailoverTransportListener.

As i dig into the code, i see during the disconnection, the transport listener 
will being set to NULL at the very first stage. However, the another thread may 
have access to the pointer of the null transport listener as the NULL check is 
not thread safe.

{noformat}
void ActiveMQConnection::disconnect(long long lastDeliveredSequenceId) {

    try {

        // Clear the listener, we don't care about async errors at this point.
        this->config->transport->setTransportListener(NULL);

        [...]
{noformat}

{noformat}
void FailoverTransportListener::onCommand(const Pointer<Command> command) {

    if (command == NULL) {
        return;
    }

    if (command->isResponse()) {
        Pointer<Response> response = command.dynamicCast<Response>();
        parent->processResponse(response);
    }

    if (!parent->isInitialized()) {
        parent->setInitialized(true);
    }

    if (command->isConnectionControl()) {
        parent->handleConnectionControl(command);
    }

    if (parent->getTransportListener() != NULL) {
        parent->getTransportListener()->onCommand(command);
    }
}
{noformat}

{noformat}
void TransportFilter::onCommand(const Pointer<Command> command) {

    if (!this->impl->started.get() || this->impl->closed.get()) {
        return;
    }

    try {
        if (this->listener != NULL) {
            this->listener->onCommand(command);
        }
    } catch (...) {
    }
}
{noformat}


  was:
This happen in random; and will cause core dump under Solaris 10.

{panel:title=Core dump happened on thread #22 which is a thread for IO for 
activemq|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
{noformat}
t@22 (l@22) terminated by signal SEGV (no mapping at the fault address)
0xfcbef02c: onCommand+0x0370:   movl     (%edi),%eax
(dbx) where -h
current thread: t@22
=>[1] 
activemq::transport::failover::FailoverTransportListener::onCommand(0x1c20d280, 
0xfa567ce0), at 0xfcbef02c
  [2] activemq::transport::TransportFilter::onCommand(0x116c3700, 0xfa567d20), 
at 0xfcb7ae15
  [3] 
activemq::wireformat::openwire::OpenWireFormatNegotiator::onCommand(0x116c3700, 
0xfa567d70), at 0xfcc5ff05
  [4] activemq::transport::TransportFilter::onCommand(0x10bdaaa0, 0xfa567de0), 
at 0xfcb7ae15
  [5] activemq::transport::inactivity::InactivityMonitor::onCommand(0x10bdaaa0, 
0xfa567e40), at 0xfcbf69f7
  [6] activemq::transport::TransportFilter::onCommand(0x1027cbc0, 0xfa567e90), 
at 0xfcb7ae15
  [7] activemq::transport::IOTransport::fire(0x1bbc4ab8, 0xfa567ef0), at 
0xfcb78a49
  [8] activemq::transport::IOTransport::run(0x1bbc4ab8), at 0xfcb79827
  [9] decaf::lang::Thread::run(0x1c2080a0), at 0xfcd58512
  [10] __unnamed_syh4P47F8S2uD::runCallback(0x1c21c138), at 0xfcd1cc2f
  [11] __unnamed_syh4P47F8S2uD::threadEntryMethod(0x1c21c138), at 0xfcd1cbc6
  [12] _thr_setup(0xfac2a200), at 0xfc0771c0
  [13] _lwp_start(), at 0xfc0774b0
{noformat}
{panel}

{panel:title=Stack trace for the main 
thread|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
{noformat}
-----------------  lwp# 1 / thread# 1  --------------------
 fc07750b __lwp_park (163995d8, 1bbc1e10, 8046358) + b
 fc071d07 cond_wait_queue (163995d8, 1bbc1e10, 8046358) + 5e
 fc072084 cond_wait_common (163995d8, 1bbc1e10, 8046358) + 1db
 fc0722b6 _cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 51
 fc072321 cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 24
 fc07235d pthread_cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 1e
 fcd21550 bool 
decaf::internal::util::concurrent::PlatformThread::interruptibleWaitOnCondition(_pthread_cond*,_pthread_mutex*,long
 long,int,decaf::internal::util::concurrent::CompletionCondition&) (163995d8, 
1bbc1e10, 493e0, 0, 0, 8046434) + 110
 fcd1f051 bool 
decaf::internal::util::concurrent::Threading::join(decaf::internal::util::concurrent::ThreadHandle*,long
 long,int) (1bbe5660, 493e0, 0, 0, fcd5877a) + 13d
 fcd587a0 void decaf::lang::Thread::join(long long) (1c20d270, 493e0, 0) + 38
 fcb61945 void activemq::threads::CompositeTaskRunner::shutdown(long long) 
(f3b1730, 493e0, 0) + e5
 fcbd26d3 void activemq::transport::failover::FailoverTransport::close() 
(efde3f8) + 327
 fcb7b799 void activemq::transport::TransportFilter::close() (118b49e0) + 191
 fc940003 void activemq::core::ActiveMQConnection::disconnect(long long) 
(eef0fb8, 0, 0) + 52f
 fc93c3f3 void activemq::core::ActiveMQConnection::close() (eef0fb8) + b47
 0888e6ec main     (7, 80475dc, 80475fc) + 149
 0888dd6a _start   (7, 9e7dafc, 804775d, 8047760, 8047769, 804777f) + 7a
{noformat}
{panel}

This is happen only during the disconnect. So, what i suspected is that there 
have an concurrent issue between the ActiveMQConnection and the TransportFilter.
As i dig into the code, i see during the disconnection, the transport listener 
will being set to NULL at the very first stage.

{noformat}
void ActiveMQConnection::disconnect(long long lastDeliveredSequenceId) {

    try {

        // Clear the listener, we don't care about async errors at this point.
        this->config->transport->setTransportListener(NULL);

        [...]
{noformat}




> Closing connection with setTransportListener to NULL may cause core dump
> ------------------------------------------------------------------------
>
>                 Key: AMQCPP-534
>                 URL: https://issues.apache.org/jira/browse/AMQCPP-534
>             Project: ActiveMQ C++ Client
>          Issue Type: Bug
>          Components: Transports
>    Affects Versions: 3.8.2
>            Reporter: Jeremy Leung
>            Assignee: Timothy Bish
>
> This happen in random; and will cause core dump under Solaris 10.
> {panel:title=Core dump happened on thread #22 which is a thread for IO for 
> activemq|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
> {noformat}
> t@22 (l@22) terminated by signal SEGV (no mapping at the fault address)
> 0xfcbef02c: onCommand+0x0370:   movl     (%edi),%eax
> (dbx) where -h
> current thread: t@22
> =>[1] 
> activemq::transport::failover::FailoverTransportListener::onCommand(0x1c20d280,
>  0xfa567ce0), at 0xfcbef02c
>   [2] activemq::transport::TransportFilter::onCommand(0x116c3700, 
> 0xfa567d20), at 0xfcb7ae15
>   [3] 
> activemq::wireformat::openwire::OpenWireFormatNegotiator::onCommand(0x116c3700,
>  0xfa567d70), at 0xfcc5ff05
>   [4] activemq::transport::TransportFilter::onCommand(0x10bdaaa0, 
> 0xfa567de0), at 0xfcb7ae15
>   [5] 
> activemq::transport::inactivity::InactivityMonitor::onCommand(0x10bdaaa0, 
> 0xfa567e40), at 0xfcbf69f7
>   [6] activemq::transport::TransportFilter::onCommand(0x1027cbc0, 
> 0xfa567e90), at 0xfcb7ae15
>   [7] activemq::transport::IOTransport::fire(0x1bbc4ab8, 0xfa567ef0), at 
> 0xfcb78a49
>   [8] activemq::transport::IOTransport::run(0x1bbc4ab8), at 0xfcb79827
>   [9] decaf::lang::Thread::run(0x1c2080a0), at 0xfcd58512
>   [10] __unnamed_syh4P47F8S2uD::runCallback(0x1c21c138), at 0xfcd1cc2f
>   [11] __unnamed_syh4P47F8S2uD::threadEntryMethod(0x1c21c138), at 0xfcd1cbc6
>   [12] _thr_setup(0xfac2a200), at 0xfc0771c0
>   [13] _lwp_start(), at 0xfc0774b0
> {noformat}
> {panel}
> {panel:title=Stack trace for the main 
> thread|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1|bgColor=#FFFFCE}
> {noformat}
> -----------------  lwp# 1 / thread# 1  --------------------
>  fc07750b __lwp_park (163995d8, 1bbc1e10, 8046358) + b
>  fc071d07 cond_wait_queue (163995d8, 1bbc1e10, 8046358) + 5e
>  fc072084 cond_wait_common (163995d8, 1bbc1e10, 8046358) + 1db
>  fc0722b6 _cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 51
>  fc072321 cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 24
>  fc07235d pthread_cond_timedwait (163995d8, 1bbc1e10, 80463c8) + 1e
>  fcd21550 bool 
> decaf::internal::util::concurrent::PlatformThread::interruptibleWaitOnCondition(_pthread_cond*,_pthread_mutex*,long
>  long,int,decaf::internal::util::concurrent::CompletionCondition&) (163995d8, 
> 1bbc1e10, 493e0, 0, 0, 8046434) + 110
>  fcd1f051 bool 
> decaf::internal::util::concurrent::Threading::join(decaf::internal::util::concurrent::ThreadHandle*,long
>  long,int) (1bbe5660, 493e0, 0, 0, fcd5877a) + 13d
>  fcd587a0 void decaf::lang::Thread::join(long long) (1c20d270, 493e0, 0) + 38
>  fcb61945 void activemq::threads::CompositeTaskRunner::shutdown(long long) 
> (f3b1730, 493e0, 0) + e5
>  fcbd26d3 void activemq::transport::failover::FailoverTransport::close() 
> (efde3f8) + 327
>  fcb7b799 void activemq::transport::TransportFilter::close() (118b49e0) + 191
>  fc940003 void activemq::core::ActiveMQConnection::disconnect(long long) 
> (eef0fb8, 0, 0) + 52f
>  fc93c3f3 void activemq::core::ActiveMQConnection::close() (eef0fb8) + b47
>  0888e6ec main     (7, 80475dc, 80475fc) + 149
>  0888dd6a _start   (7, 9e7dafc, 804775d, 8047760, 8047769, 804777f) + 7a
> {noformat}
> {panel}
> This is happen only during the disconnect. So, what i suspected is that there 
> have an concurrent issue between the ActiveMQConnection, TransportFilter, 
> FailoverTransportListener.
> As i dig into the code, i see during the disconnection, the transport 
> listener will being set to NULL at the very first stage. However, the another 
> thread may have access to the pointer of the null transport listener as the 
> NULL check is not thread safe.
> {noformat}
> void ActiveMQConnection::disconnect(long long lastDeliveredSequenceId) {
>     try {
>         // Clear the listener, we don't care about async errors at this point.
>         this->config->transport->setTransportListener(NULL);
>         [...]
> {noformat}
> {noformat}
> void FailoverTransportListener::onCommand(const Pointer<Command> command) {
>     if (command == NULL) {
>         return;
>     }
>     if (command->isResponse()) {
>         Pointer<Response> response = command.dynamicCast<Response>();
>         parent->processResponse(response);
>     }
>     if (!parent->isInitialized()) {
>         parent->setInitialized(true);
>     }
>     if (command->isConnectionControl()) {
>         parent->handleConnectionControl(command);
>     }
>     if (parent->getTransportListener() != NULL) {
>         parent->getTransportListener()->onCommand(command);
>     }
> }
> {noformat}
> {noformat}
> void TransportFilter::onCommand(const Pointer<Command> command) {
>     if (!this->impl->started.get() || this->impl->closed.get()) {
>         return;
>     }
>     try {
>         if (this->listener != NULL) {
>             this->listener->onCommand(command);
>         }
>     } catch (...) {
>     }
> }
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.1.5#6160)

Reply via email to