This is an automated email from the ASF dual-hosted git repository. gilbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit c3964a83a773a43321a7dfa6455482002ed96e64 Author: Andrei Budnik <[email protected]> AuthorDate: Thu Jul 18 09:10:41 2019 -0700 Wrapped isolators in `IsolatorTracker`. This patch wraps every isolator in instance of `IsolatorTracker` class. If an isolator gets stuck in some operation, `pendingFutures` will return the isolator name, method name along with its arguments such as `containerId`, `pid`, etc. Review: https://reviews.apache.org/r/70889/ --- src/slave/containerizer/containerizer.cpp | 13 ++++++++-- src/slave/containerizer/containerizer.hpp | 5 +++- src/slave/containerizer/mesos/containerizer.cpp | 34 ++++++++++++++++++------- src/slave/containerizer/mesos/containerizer.hpp | 3 ++- src/slave/main.cpp | 12 ++++++++- src/tests/cluster.cpp | 10 +++++++- 6 files changed, 62 insertions(+), 15 deletions(-) diff --git a/src/slave/containerizer/containerizer.cpp b/src/slave/containerizer/containerizer.cpp index 5ce0d9c..9e44e5e 100644 --- a/src/slave/containerizer/containerizer.cpp +++ b/src/slave/containerizer/containerizer.cpp @@ -219,7 +219,8 @@ Try<Containerizer*> Containerizer::create( Fetcher* fetcher, GarbageCollector* gc, SecretResolver* secretResolver, - VolumeGidManager* volumeGidManager) + VolumeGidManager* volumeGidManager, + PendingFutureTracker* futureTracker) { // Get the set of containerizer types. const vector<string> _types = strings::split(flags.containerizers, ","); @@ -289,7 +290,15 @@ Try<Containerizer*> Containerizer::create( foreach (const string& type, containerizerTypes) { if (type == "mesos") { Try<MesosContainerizer*> containerizer = MesosContainerizer::create( - flags, local, fetcher, gc, secretResolver, nvidia, volumeGidManager); + flags, + local, + fetcher, + gc, + secretResolver, + nvidia, + volumeGidManager, + futureTracker); + if (containerizer.isError()) { return Error("Could not create MesosContainerizer: " + containerizer.error()); diff --git a/src/slave/containerizer/containerizer.hpp b/src/slave/containerizer/containerizer.hpp index d33c65c..6a9ebed 100644 --- a/src/slave/containerizer/containerizer.hpp +++ b/src/slave/containerizer/containerizer.hpp @@ -36,6 +36,8 @@ #include <stout/option.hpp> #include <stout/try.hpp> +#include "common/future_tracker.hpp" + #include "slave/gc.hpp" #include "slave/volume_gid_manager/volume_gid_manager.hpp" @@ -75,7 +77,8 @@ public: Fetcher* fetcher, GarbageCollector* gc, SecretResolver* secretResolver = nullptr, - VolumeGidManager* volumeGidManager = nullptr); + VolumeGidManager* volumeGidManager = nullptr, + PendingFutureTracker* futureTracker = nullptr); // Determine slave resources from flags, probing the system or // querying a delegate. diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index c9a369b..b4d10a7 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -70,6 +70,7 @@ #include "slave/containerizer/mesos/constants.hpp" #include "slave/containerizer/mesos/containerizer.hpp" +#include "slave/containerizer/mesos/isolator_tracker.hpp" #include "slave/containerizer/mesos/launch.hpp" #include "slave/containerizer/mesos/launcher.hpp" #include "slave/containerizer/mesos/paths.hpp" @@ -177,7 +178,8 @@ Try<MesosContainerizer*> MesosContainerizer::create( GarbageCollector* gc, SecretResolver* secretResolver, const Option<NvidiaComponents>& nvidia, - VolumeGidManager* volumeGidManager) + VolumeGidManager* volumeGidManager, + PendingFutureTracker* futureTracker) { Try<hashset<string>> isolations = [&flags]() -> Try<hashset<string>> { const vector<string> tokens(strings::tokenize(flags.isolation, ",")); @@ -537,26 +539,40 @@ Try<MesosContainerizer*> MesosContainerizer::create( cgroupsIsolatorCreated = true; } - Try<Isolator*> isolator = creator.second(flags); - if (isolator.isError()) { + Try<Isolator*> _isolator = creator.second(flags); + if (_isolator.isError()) { return Error("Failed to create isolator '" + creator.first + "': " + - isolator.error()); + _isolator.error()); + } + + Owned<Isolator> isolator(_isolator.get()); + + if (futureTracker != nullptr) { + isolator = Owned<Isolator>( + new IsolatorTracker(isolator, creator.first, futureTracker)); } - isolators.push_back(Owned<Isolator>(isolator.get())); + isolators.push_back(isolator); } // Next, apply any custom isolators in the order given by the flags. foreach (const string& name, strings::tokenize(flags.isolation, ",")) { if (ModuleManager::contains<Isolator>(name)) { - Try<Isolator*> isolator = ModuleManager::create<Isolator>(name); + Try<Isolator*> _isolator = ModuleManager::create<Isolator>(name); - if (isolator.isError()) { + if (_isolator.isError()) { return Error("Failed to create isolator '" + name + "': " + - isolator.error()); + _isolator.error()); + } + + Owned<Isolator> isolator(_isolator.get()); + + if (futureTracker != nullptr) { + isolator = Owned<Isolator>( + new IsolatorTracker(isolator, name, futureTracker)); } - isolators.push_back(Owned<Isolator>(isolator.get())); + isolators.push_back(isolator); continue; } diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp index 558e412..271d632 100644 --- a/src/slave/containerizer/mesos/containerizer.hpp +++ b/src/slave/containerizer/mesos/containerizer.hpp @@ -72,7 +72,8 @@ public: GarbageCollector* gc = nullptr, SecretResolver* secretResolver = nullptr, const Option<NvidiaComponents>& nvidia = None(), - VolumeGidManager* volumeGidManager = nullptr); + VolumeGidManager* volumeGidManager = nullptr, + PendingFutureTracker* futureTracker = nullptr); static Try<MesosContainerizer*> create( const Flags& flags, diff --git a/src/slave/main.cpp b/src/slave/main.cpp index ef5ea02..c974ba0 100644 --- a/src/slave/main.cpp +++ b/src/slave/main.cpp @@ -491,13 +491,21 @@ int main(int argc, char** argv) } #endif // __WINDOWS__ + // Initialize PendingFutureTracker. + Try<PendingFutureTracker*> futureTracker = PendingFutureTracker::create(); + if (futureTracker.isError()) { + EXIT(EXIT_FAILURE) << "Failed to initialize pending future tracker: " + << futureTracker.error(); + } + Try<Containerizer*> containerizer = Containerizer::create( flags, false, fetcher, gc, secretResolver.get(), - volumeGidManager); + volumeGidManager, + futureTracker.get()); if (containerizer.isError()) { EXIT(EXIT_FAILURE) @@ -636,6 +644,8 @@ int main(int argc, char** argv) delete containerizer.get(); + delete futureTracker.get(); + #ifndef __WINDOWS__ delete volumeGidManager; #endif // __WINDOWS__ diff --git a/src/tests/cluster.cpp b/src/tests/cluster.cpp index b43f806..9f180cc 100644 --- a/src/tests/cluster.cpp +++ b/src/tests/cluster.cpp @@ -71,6 +71,7 @@ #include "authorizer/local/authorizer.hpp" #include "common/authorization.hpp" +#include "common/future_tracker.hpp" #include "common/http.hpp" #include "files/files.hpp" @@ -446,6 +447,12 @@ Try<process::Owned<Slave>> Slave::create( } #endif // __WINDOWS__ + Try<PendingFutureTracker*> futureTracker = PendingFutureTracker::create(); + if (futureTracker.isError()) { + return Error( + "Failed to create pending future tracker: " + futureTracker.error()); + } + // If the containerizer is not provided, create a default one. if (containerizer.isSome()) { slave->containerizer = containerizer.get(); @@ -460,7 +467,8 @@ Try<process::Owned<Slave>> Slave::create( slave->fetcher.get(), gc.getOrElse(slave->gc.get()), nullptr, - volumeGidManager); + volumeGidManager, + futureTracker.get()); if (_containerizer.isError()) { return Error("Failed to create containerizer: " + _containerizer.error());
