[
https://issues.apache.org/jira/browse/MESOS-3475?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15563271#comment-15563271
]
Ilya Pronin commented on MESOS-3475:
------------------------------------
Just putting in my 2 cents. I noticed that the described {{getenv}} /
{{setenv}} race relatively frequently occurs in
{{MasterMaintenanceTest.InverseOffersFilters}} test and causes segfault. If I
run it 1000 times it will most likely fail.
Here's a stacktrace example.
{code:none}
Thread 7 (Thread 0x7fffe609d700 (LWP 17817)):
#0 maybe_split_for_insert (rootp=0x7fffc400bae8, mode=0, gp_r=-8, p_r=-4,
gparentp=0x7fffc803ff00, parentp=0x7fffd803c908) at tsearch.c:170
#1 __tsearch (key=0x7fffcc0a0be0, vrootp=vrootp@entry=0x7ffff0918e58
<known_values>, compar=0x7ffff05dffa0 <__strcmp_sse2>)
at tsearch.c:263
#2 0x00007ffff0594bdc in __add_to_environ (name=<optimized out>,
value=0x7fffcc04b238
"/tmp/MasterMaintenanceTest_InverseOffersFilters_aKHUPK/slaves/80c27644-d289-47fe-b9b4-f5d1e592a52b-S1/frameworks/80c27644-d289-47fe-b9b4-f5d1e592a52b-0000/executors/executor-1/runs/7149f1fe-3fb7-4a79-"...,
combined=<optimized out>,
replace=<optimized out>) at setenv.c:212
#3 0x0000000000d44d64 in os::setenv ()
#4 0x0000000000d42a35 in mesos::internal::tests::TestContainerizer::_launch ()
#5 0x0000000000d62b11 in
testing::internal::InvokeHelper<process::Future<bool>,
std::tuple<mesos::ContainerID const&, Option<mesos::TaskInfo> const&,
mesos::ExecutorInfo const&, std::string const&, Option<std::string> const&,
mesos::SlaveID const&, std::map<std::string, std::string,
std::less<std::string>, std::allocator<std::pair<std::string const,
std::string> > > const&, bool>
>::InvokeMethod<mesos::internal::tests::TestContainerizer,
process::Future<bool>
(mesos::internal::tests::TestContainerizer::*)(mesos::ContainerID const&,
Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string const&,
Option<std::string> const&, mesos::SlaveID const&, std::map<std::string,
std::string, std::less<std::string>, std::allocator<std::pair<std::string
const, std::string> > > const&, bool)> ()
#6 0x0000000000d623b9 in
testing::internal::InvokeMethodAction<mesos::internal::tests::TestContainerizer,
process::Future<bool>
(mesos::internal::tests::TestContainerizer::*)(mesos::ContainerID const&,
Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string const&,
Option<std::string> const&, mesos::SlaveID const&, std::map<std::string,
std::string, std::less<std::string>, std::allocator<std::pair<std::string
const, std::string> > > const&, bool)>::Perform<process::Future<bool>,
std::tuple<mesos::ContainerID const&, Option<mesos::TaskInfo> const&,
mesos::ExecutorInfo const&, std::string const&, Option<std::string> const&,
mesos::SlaveID const&, std::map<std::string, std::string,
std::less<std::string>, std::allocator<std::pair<std::string const,
std::string> > > const&, bool> > ()
#7 0x0000000000d61c17 in
testing::PolymorphicAction<testing::internal::InvokeMethodAction<mesos::internal::tests::TestContainerizer,
process::Future<bool>
(mesos::internal::tests::TestContainerizer::*)(mesos::ContainerID const&,
Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string const&,
Option<std::string> const&, mesos::SlaveID const&, std::map<std::string,
std::string, std::less<std::string>, std::allocator<std::pair<std::string
const, std::string> > > const&, bool)> >::MonomorphicImpl<process::Future<bool>
(mesos::ContainerID const&, Option<mesos::TaskInfo> const&, mesos::ExecutorInfo
const&, std::string const&, Option<std::string> const&, mesos::SlaveID const&,
std::map<std::string, std::string, std::less<std::string>,
std::allocator<std::pair<std::string const, std::string> > > const&,
bool)>::Perform(std::tuple<mesos::ContainerID const&, Option<mesos::TaskInfo>
const&, mesos::ExecutorInfo const&, std::string const&, Option<std::string>
const&, mesos::SlaveID const&, std::map<std::string, std::string,
std::less<std::string>, std::allocator<std::pair<std::string const,
std::string> > > const&, bool> const&) ()
#8 0x0000000000bebae1 in testing::Action<process::Future<bool>
(mesos::ContainerID const&, Option<mesos::TaskInfo> const&, mesos::ExecutorInfo
const&, std::string const&, Option<std::string> const&, mesos::SlaveID const&,
std::map<std::string, std::string, std::less<std::string>,
std::allocator<std::pair<std::string const, std::string> > > const&,
bool)>::Perform(std::tuple<mesos::ContainerID const&, Option<mesos::TaskInfo>
const&, mesos::ExecutorInfo const&, std::string const&, Option<std::string>
const&, mesos::SlaveID const&, std::map<std::string, std::string,
std::less<std::string>, std::allocator<std::pair<std::string const,
std::string> > > const&, bool> const&) const ()
#9 0x0000000000bd234d in
testing::internal::ActionResultHolder<process::Future<bool>
>::PerformAction<process::Future<bool> (mesos::ContainerID const&,
Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string const&,
Option<std::string> const&, mesos::SlaveID const&, std::map<std::string,
std::string, std::less<std::string>, std::allocator<std::pair<std::string
const, std::string> > > const&, bool)>(testing::Action<process::Future<bool>
(mesos::ContainerID const&, Option<mesos::TaskInfo> const&, mesos::ExecutorInfo
const&, std::string const&, Option<std::string> const&, mesos::SlaveID const&,
std::map<std::string, std::string, std::less<std::string>,
std::allocator<std::pair<std::string const, std::string> > > const&, bool)>
const&, testing::internal::Function<process::Future<bool> (mesos::ContainerID
const&, Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string
const&, Option<std::string> const&, mesos::SlaveID const&,
std::map<std::string, std::string, std::less<std::string>,
std::allocator<std::pair<std::string const, std::string> > > const&,
bool)>::ArgumentTuple const&)
()
#10 0x0000000000bbdff3 in
testing::internal::FunctionMockerBase<process::Future<bool> (mesos::ContainerID
const&, Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string
const&, Option<std::string> const&, mesos::SlaveID const&,
std::map<std::string, std::string, std::less<std::string>,
std::allocator<std::pair<std::string const, std::string> > > const&,
bool)>::UntypedPerformAction(void const*, void const*) const ()
#11 0x0000000001ae6aad in
testing::internal::UntypedFunctionMockerBase::UntypedInvokeWith ()
#12 0x0000000000ae017f in
testing::internal::FunctionMockerBase<process::Future<bool> (mesos::ContainerID
const&, Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string
const&, Option<std::string> const&, mesos::SlaveID const&,
std::map<std::string, std::string, std::less<std::string>,
std::allocator<std::pair<std::string const, std::string> > > const&,
bool)>::InvokeWith(std::tuple<mesos::ContainerID const&,
Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string const&,
Option<std::string> const&, mesos::SlaveID const&, std::map<std::string,
std::string, std::less<std::string>, std::allocator<std::pair<std::string
const, std::string> > > const&, bool> const&)
()
#13 0x0000000000abd88c in
testing::internal::FunctionMocker<process::Future<bool> (mesos::ContainerID
const&, Option<mesos::TaskInfo> const&, mesos::ExecutorInfo const&, std::string
const&, Option<std::string> const&, mesos::SlaveID const&,
std::map<std::string, std::string, std::less<std::string>,
std::allocator<std::pair<std::string const, std::string> > > const&,
bool)>::Invoke(mesos::ContainerID const&, Option<mesos::TaskInfo> const&,
mesos::ExecutorInfo const&, std::string const&, Option<std::string> const&,
mesos::SlaveID const&, std::map<std::string, std::string,
std::less<std::string>, std::allocator<std::pair<std::string const,
std::string> > > const&, bool) ()
#14 0x0000000000d44fa5 in mesos::internal::tests::TestContainerizer::launch ()
#15 0x00007ffff5d529ee in mesos::internal::slave::Framework::launchExecutor ()
from /home/vagrant/build/src/.libs/libmesos-1.1.0.so
#16 0x00007ffff5d2e4bb in mesos::internal::slave::Slave::_run () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#17 0x00007ffff5d84be7 in void process::dispatch<mesos::internal::slave::Slave,
process::Future<bool> const&, mesos::FrameworkInfo const&, mesos::ExecutorInfo
const&, Option<mesos::TaskInfo> const&, Option<mesos::TaskGroupInfo> const&,
process::Future<bool>, mesos::FrameworkInfo, mesos::ExecutorInfo,
Option<mesos::TaskInfo>, Option<mesos::TaskGroupInfo>
>(process::PID<mesos::internal::slave::Slave> const&, void
(mesos::internal::slave::Slave::*)(process::Future<bool> const&,
mesos::FrameworkInfo const&, mesos::ExecutorInfo const&,
Option<mesos::TaskInfo> con---Type <return> to continue, or q <return> to
quit---
st&, Option<mesos::TaskGroupInfo> const&), process::Future<bool>,
mesos::FrameworkInfo, mesos::ExecutorInfo, Option<mesos::TaskInfo>,
Option<mesos::TaskGroupInfo>)::{lambda(process::ProcessBase*)#1}::operator()(process::ProcessBase*)
const ()
from /home/vagrant/build/src/.libs/libmesos-1.1.0.so
#18 0x00007ffff5db83f5 in std::_Function_handler<void (process::ProcessBase*),
void process::dispatch<mesos::internal::slave::Slave, process::Future<bool>
const&, mesos::FrameworkInfo const&, mesos::ExecutorInfo const&,
Option<mesos::TaskInfo> const&, Option<mesos::TaskGroupInfo> const&,
process::Future<bool>, mesos::FrameworkInfo, mesos::ExecutorInfo,
Option<mesos::TaskInfo>, Option<mesos::TaskGroupInfo>
>(process::PID<mesos::internal::slave::Slave> const&, void
(mesos::internal::slave::Slave::*)(process::Future<bool> const&,
mesos::FrameworkInfo const&, mesos::ExecutorInfo const&,
Option<mesos::TaskInfo> const&, Option<mesos::TaskGroupInfo> const&),
process::Future<bool>, mesos::FrameworkInfo, mesos::ExecutorInfo,
Option<mesos::TaskInfo>,
Option<mesos::TaskGroupInfo>)::{lambda(process::ProcessBase*)#1}>::_M_invoke(std::_Any_data
const&, process::ProcessBase*) () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#19 0x00007ffff697d42d in std::function<void
(process::ProcessBase*)>::operator()(process::ProcessBase*) const ()
from /home/vagrant/build/src/.libs/libmesos-1.1.0.so
#20 0x00007ffff696035d in process::ProcessBase::visit () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#21 0x00007ffff6968772 in process::DispatchEvent::visit () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#22 0x0000000000a0cf72 in process::ProcessBase::serve ()
#23 0x00007ffff695c652 in process::ProcessManager::resume () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#24 0x00007ffff69592dd in operator() () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#25 0x00007ffff6967f1a in _M_invoke<>(void) () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#26 0x00007ffff6967e71 in operator() () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#27 0x00007ffff6967e0a in _M_run () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#28 0x00007ffff0eeba60 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#29 0x00007ffff1552184 in start_thread (arg=0x7fffe609d700) at
pthread_create.c:312
#30 0x00007ffff065337d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:111
Thread 2 (Thread 0x7fffe88a2700 (LWP 17812)):
#0 __GI_getenv (name=0x7fffd808e6ea "BPROCESS_IP") at getenv.c:85
#1 0x0000000000a38ca4 in os::getenv ()
#2 0x00007ffff5d578a5 in mesos::internal::slave::executorEnvironment () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#3 0x00007ffff5d528de in mesos::internal::slave::Framework::launchExecutor ()
from /home/vagrant/build/src/.libs/libmesos-1.1.0.so
#4 0x00007ffff5d2e4bb in mesos::internal::slave::Slave::_run () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#5 0x00007ffff5d84be7 in void process::dispatch<mesos::internal::slave::Slave,
process::Future<bool> const&, mesos::FrameworkInfo const&, mesos::ExecutorInfo
const&, Option<mesos::TaskInfo> const&, Option<mesos::TaskGroupInfo> const&,
process::Future<bool>, mesos::FrameworkInfo, mesos::ExecutorInfo,
Option<mesos::TaskInfo>, Option<mesos::TaskGroupInfo>
>(process::PID<mesos::internal::slave::Slave> const&, void
(mesos::internal::slave::Slave::*)(process::Future<bool> const&,
mesos::FrameworkInfo const&, mesos::ExecutorInfo const&,
Option<mesos::TaskInfo> const&, Option<mesos::TaskGroupInfo> const&),
process::Future<bool>, mesos::FrameworkInfo, mesos::ExecutorInfo,
Option<mesos::TaskInfo>,
Option<mesos::TaskGroupInfo>)::{lambda(process::ProcessBase*)#1}::operator()(process::ProcessBase*)
const ()
from /home/vagrant/build/src/.libs/libmesos-1.1.0.so
#6 0x00007ffff5db83f5 in std::_Function_handler<void (process::ProcessBase*),
void process::dispatch<mesos::internal::slave::Slave, process::Future<bool>
const&, mesos::FrameworkInfo const&, mesos::ExecutorInfo const&,
Option<mesos::TaskInfo> const&, Option<mesos::TaskGroupInfo> const&,
process::Future<bool>, mesos::FrameworkInfo, mesos::ExecutorInfo,
Option<mesos::TaskInfo>, Option<mesos::TaskGroupInfo>
>(process::PID<mesos::internal::slave::Slave> const&, void
(mesos::internal::slave::Slave::*)(process::Future<bool> const&,
mesos::FrameworkInfo const&, mesos::ExecutorInfo const&,
Option<mesos::TaskInfo> const&, Option<mesos::TaskGroupInfo> const&),
process::Future<bool>, mesos::FrameworkInfo, mesos::ExecutorInfo,
Option<mesos::TaskInfo>,
Option<mesos::TaskGroupInfo>)::{lambda(process::ProcessBase*)#1}>::_M_invoke(std::_Any_data
const&, process::ProcessBase*) () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#7 0x00007ffff697d42d in std::function<void
(process::ProcessBase*)>::operator()(process::ProcessBase*) const ()
from /home/vagrant/build/src/.libs/libmesos-1.1.0.so
#8 0x00007ffff696035d in process::ProcessBase::visit () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#9 0x00007ffff6968772 in process::DispatchEvent::visit () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#10 0x0000000000a0cf72 in process::ProcessBase::serve ()
#11 0x00007ffff695c652 in process::ProcessManager::resume () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#12 0x00007ffff69592dd in operator() () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#13 0x00007ffff6967f1a in _M_invoke<>(void) () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#14 0x00007ffff6967e71 in operator() () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#15 0x00007ffff6967e0a in _M_run () from
/home/vagrant/build/src/.libs/libmesos-1.1.0.so
#16 0x00007ffff0eeba60 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#17 0x00007ffff1552184 in start_thread (arg=0x7fffe88a2700) at
pthread_create.c:312
#18 0x00007ffff065337d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:111
{code}
> TestContainerizer should not modify global environment variables
> ----------------------------------------------------------------
>
> Key: MESOS-3475
> URL: https://issues.apache.org/jira/browse/MESOS-3475
> Project: Mesos
> Issue Type: Bug
> Reporter: Joris Van Remoortere
> Assignee: haosdent
>
> Currently the {{TestContainerizer}} modifies the environment variables. Since
> these are global variables, this can cause other threads reading these
> variables to get inconsistent results, or even segfault if they happen to
> read while the environment is being changed.
> Synchronizing within the TestContainerizer is not sufficient. We should pass
> the environment variables into a fork, or set them on the command line of an
> execute.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)