[
https://issues.apache.org/jira/browse/MESOS-2539?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14519321#comment-14519321
]
haosdent commented on MESOS-2539:
---------------------------------
I use gdb trace the error in CentOS. Seems this code have problems:
{code}
signaledWrapper = defer(self(), &Slave::signaled, lambda::_1, lambda::_2);
{code}
The stack is
{code}
(gdb) where
#0 0x0000003056a32635 in raise () from /lib64/libc.so.6
#1 0x0000003056a33e15 in abort () from /lib64/libc.so.6
#2 0x0000003056a70547 in __libc_message () from /lib64/libc.so.6
#3 0x0000003056a75e76 in malloc_printerr () from /lib64/libc.so.6
#4 0x00007ffff6254d5f in _Base_manager<process::_Defer<void
(*(process::PID<mesos::internal::slave::Slave>, void
(mesos::internal::slave::Slave::*)(int, int), std::tr1::_Placeholder<1>,
std::tr1::_Placeholder<2>))(const process::PID<mesos::internal::slave::Slave>&,
void (mesos::internal::slave::Slave::*)(int, int), int, int)>
>::_M_destroy(std::tr1::_Any_data &, std::tr1::false_type) (__victim=...) at
/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1523#5
0x00007ffff62471ee in _Base_manager<process::_Defer<void
(*(process::PID<mesos::internal::slave::Slave>, void
(mesos::internal::slave::Slave::*)(int, int), std::tr1::_Placeholder<1>,
std::tr1::_Placeholder<2>))(const process::PID<mesos::internal::slave::Slave>&,
void (mesos::internal::slave::Slave::*)(int, int), int, int)>
>::_M_manager(std::tr1::_Any_data &, const std::tr1::_Any_data &,
std::tr1::_Manager_operation) (__dest=..., __source=...,
__op=std::tr1::__destroy_functor) at
/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1547
#6 0x0000000000436051 in std::tr1::_Function_base::~_Function_base
(this=0x7ffff07d0850, __in_chrg=<value optimized out>)
at
/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1628
#7 0x00007ffff6224348 in std::tr1::function<void(int, int)>::~function(void)
(this=0x7ffff07d0850, __in_chrg=<value optimized out>)
at
/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1463
#8 0x00007ffff62243cf in std::tr1::function<void(int,
int)>::operator=<process::_Defer<void
(*(process::PID<mesos::internal::slave::Slave>, void
(mesos::internal::slave::Slave::*)(int, int), std::tr1::_Placeholder<1>,
std::tr1::_Placeholder<2>))(const process::PID<mesos::internal::slave::Slave>&,
void (mesos::internal::slave::Slave::*)(int, int), int, int)>
>(process::_Defer<void (*(process::PID<mesos::internal::slave::Slave>, void
(mesos::internal::slave::Slave::*)(int, int), std::tr1::_Placeholder<1>,
std::tr1::_Placeholder<2>))(const process::PID<mesos::internal::slave::Slave>&,
void (mesos::internal::slave::Slave::*)(int, int), int, int)>)
(this=0x7ffff77e8440, __f=...) at
/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1885
#9 0x00007ffff61f13ec in mesos::internal::slave::Slave::initialize
(this=0x717d40) at ../../src/slave/slave.cpp:491
#10 0x00007ffff68bfa4f in resume () from
/home/ld-sgdev/huangh/mesos/build/src/.libs/libmesos-0.23.0.so
#11 0x00007ffff68b44bd in schedule () from
/home/ld-sgdev/huangh/mesos/build/src/.libs/libmesos-0.23.0.so
#12 0x00000030572079d1 in start_thread () from /lib64/libpthread.so.0
#13 0x0000003056ae886d in clone () from /lib64/libc.so.6
{code}
{code}
(gdb) frame 9
#9 0x00007ffff61f13ec in mesos::internal::slave::Slave::initialize
(this=0x717d40) at ../../src/slave/slave.cpp:491
491 signaledWrapper = defer(self(), &Slave::signaled, lambda::_1,
lambda::_2);
(gdb) list
486
487 // The SA_SIGINFO flag tells sigaction() to use
488 // the sa_sigaction field, not sa_handler.
489 action.sa_flags = SA_SIGINFO;
490
491 signaledWrapper = defer(self(), &Slave::signaled, lambda::_1,
lambda::_2);
492
493 action.sa_sigaction = signalHandler;
494
495 if (sigaction(SIGUSR1, &action, NULL) < 0) {
{code}
{code}
(gdb) frame 4
#4 0x00007ffff6254d5f in _Base_manager<process::_Defer<void
(*(process::PID<mesos::internal::slave::Slave>, void
(mesos::internal::slave::Slave::*)(int, int), std::tr1::_Placeholder<1>,
std::tr1::_Placeholder<2>))(const process::PID<mesos::internal::slave::Slave>&,
void (mesos::internal::slave::Slave::*)(int, int), int, int)>
>::_M_destroy(std::tr1::_Any_data &, std::tr1::false_type) (__victim=...) at
/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:15231523
delete __victim._M_access<_Functor*>();
(gdb) lis15181519 // Destroying an object located on the heap.
1520 static void
1521 _M_destroy(_Any_data& __victim, false_type)
1522 {
1523 delete __victim._M_access<_Functor*>();
1524 }
1525
1526 public:
1527 static bool
{code}
But I still could not understand why {code} signaledWrapper = defer(self(),
&Slave::signaled, lambda::_1, lambda::_2); {code} would cause {code}delete
__victim._M_access<_Functor*>();{code}
> ExamplesTest.LowLevelSchedulerLibprocess is flaky
> -------------------------------------------------
>
> Key: MESOS-2539
> URL: https://issues.apache.org/jira/browse/MESOS-2539
> Project: Mesos
> Issue Type: Bug
> Affects Versions: 0.22.0, 0.23.0
> Reporter: Jie Yu
>
> Centos6 gcc-44
> sudo make check
> {noformat}
> [ RUN ] ExamplesTest.LowLevelSchedulerLibprocess
> 2015-03-24
> 19:54:54,995:5735(0x7fc007fff700):ZOO_ERROR@handle_socket_error_msg@1697:
> Socket [127.0.0.1:37590] zk retcode=-4, errno
> =111(Connection refused): server refused to accept the client
> *** glibc detected *** /home/jyu/workspace/mesos-dist/build/src/.libs: double
> free or corruption (fasttop): 0x00007f7f6c003150 ***
> ======= Backtrace: =========
> /lib64/libc.so.6(+0x75e66)[0x7f7f8b79ee66]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZNSt3tr114_Function_base13_Base_managerIN7process6_DeferIFPFvRK
> NS2_3PIDIN5mesos8internal5slave5SlaveEEEMS8_FviiEiiES9_SD_NS_12_PlaceholderILi1EEENSG_ILi2EEEEEEE10_M_destroyERNS_9_Any_dataENS_17
> integral_constantIbLb0EEE+0x31)[0x7f7f8ecef16b]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZNSt3tr114_Function_base13_Base_managerIN7process6_DeferIFPFvRK
> NS2_3PIDIN5mesos8internal5slave5SlaveEEEMS8_FviiEiiES9_SD_NS_12_PlaceholderILi1EEENSG_ILi2EEEEEEE10_M_managerERNS_9_Any_dataERKSM_
> NS_18_Manager_operationE+0x92)[0x7f7f8ece17c0]
> /home/jyu/workspace/mesos-dist/build/src/.libs(_ZNSt3tr114_Function_baseD1Ev+0x37)[0x45107d]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZNSt3tr18functionIFviiEED1Ev+0x18)[0x7f7f8ecbeb34]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZNSt3tr18functionIFviiEEaSIN7process6_DeferIFPFvRKNS4_3PIDIN5me
> sos8internal5slave5SlaveEEEMSA_FviiEiiESB_SF_NS_12_PlaceholderILi1EEENSI_ILi2EEEEEEEEN9__gnu_cxx11__enable_ifIXntsrNS_11is_integra
> lIT_EE5valueERS2_E6__typeESQ_+0x85)[0x7f7f8ecbebbb]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZN5mesos8internal5slave5Slave10initializeEv+0x31bb)[0x7f7f8ec8b
> f99]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZN7process14ProcessManager6resumeEPNS_11ProcessBaseE+0x299)[0x7
> f7f8f3bf007]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZN7process8scheduleEPv+0x91)[0x7f7f8f3b3a75]
> /lib64/libpthread.so.0(+0x79d1)[0x7f7f8c2649d1]
> /lib64/libc.so.6(clone+0x6d)[0x7f7f8b8118fd]
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)