Repository: mesos Updated Branches: refs/heads/master e54a75229 -> e048e898e
Added more logging to agent recovery path. Added logging in some agent recovery continuations to make analyzing agent recovery related issue less painful. Review: https://reviews.apache.org/r/66749/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e048e898 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e048e898 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e048e898 Branch: refs/heads/master Commit: e048e898e5b6ff99c61b321b70ae49d26cd612f8 Parents: e54a752 Author: Meng Zhu <[email protected]> Authored: Fri May 4 11:50:02 2018 -0700 Committer: Chun-Hung Hsiao <[email protected]> Committed: Fri May 4 11:50:02 2018 -0700 ---------------------------------------------------------------------- src/slave/containerizer/composing.cpp | 2 ++ src/slave/containerizer/docker.cpp | 4 ++++ src/slave/containerizer/mesos/containerizer.cpp | 6 +++++- src/slave/containerizer/mesos/linux_launcher.cpp | 2 ++ src/slave/slave.cpp | 5 +++++ 5 files changed, 18 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/containerizer/composing.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/composing.cpp b/src/slave/containerizer/composing.cpp index 186102c..1fb79f5 100644 --- a/src/slave/containerizer/composing.cpp +++ b/src/slave/containerizer/composing.cpp @@ -329,6 +329,8 @@ Future<Nothing> ComposingContainerizerProcess::__recover( Future<Nothing> ComposingContainerizerProcess::___recover() { + LOG(INFO) << "Finished recovering all containerizers"; + return Nothing(); } http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/containerizer/docker.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index a4c9c10..7171cb5 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -910,6 +910,8 @@ Future<Nothing> DockerContainerizerProcess::_recover( const Option<SlaveState>& state, const list<Docker::Container>& _containers) { + LOG(INFO) << "Got the list of Docker containers"; + if (state.isSome()) { // This mapping of ContainerIDs to running Docker container names // is established for two reasons: @@ -1144,6 +1146,8 @@ Future<Nothing> DockerContainerizerProcess::__recover( } } + LOG(INFO) << "Finished processing orphaned Docker containers"; + return Nothing(); })); } http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/containerizer/mesos/containerizer.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index 01386ac..eac1d16 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -709,7 +709,7 @@ Future<Nothing> MesosContainerizer::pruneImages( Future<Nothing> MesosContainerizerProcess::recover( const Option<state::SlaveState>& state) { - LOG(INFO) << "Recovering containerizer"; + LOG(INFO) << "Recovering Mesos containers"; // Gather the container states that we will attempt to recover. list<ContainerState> recoverable; @@ -1016,6 +1016,8 @@ Future<list<Nothing>> MesosContainerizerProcess::recoverIsolators( const list<ContainerState>& recoverable, const hashset<ContainerID>& orphans) { + LOG(INFO) << "Recovering isolators"; + list<Future<Nothing>> futures; // Then recover the isolators. @@ -1053,6 +1055,8 @@ Future<Nothing> MesosContainerizerProcess::recoverProvisioner( const list<ContainerState>& recoverable, const hashset<ContainerID>& orphans) { + LOG(INFO) << "Recovering provisioner"; + // TODO(gilbert): Consolidate 'recoverProvisioner()' interface // once the launcher returns a full set of known containers. hashset<ContainerID> knownContainerIds = orphans; http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/containerizer/mesos/linux_launcher.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/linux_launcher.cpp b/src/slave/containerizer/mesos/linux_launcher.cpp index af34a85..80e4445 100644 --- a/src/slave/containerizer/mesos/linux_launcher.cpp +++ b/src/slave/containerizer/mesos/linux_launcher.cpp @@ -300,6 +300,8 @@ LinuxLauncherProcess::LinuxLauncherProcess( Future<hashset<ContainerID>> LinuxLauncherProcess::recover( const list<ContainerState>& states) { + LOG(INFO) << "Recovering Linux launcher"; + // Recover all of the "containers" we know about based on the // existing cgroups. Note that we check both the freezer hierarchy // and the systemd hierarchy (if enabled), and combine the results. http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index 69280d9..c6d9152 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -6922,6 +6922,9 @@ Future<Nothing> Slave::recover(const Try<state::State>& state) return Failure(state.error()); } + LOG(INFO) << "Finished recovering checkpointed state from '" << metaDir + << "', beginning agent recovery"; + Option<ResourcesState> resourcesState = state->resources; Option<SlaveState> slaveState = state->slave; @@ -7148,6 +7151,8 @@ Future<Nothing> Slave::_recoverContainerizer( Future<Nothing> Slave::_recover() { + LOG(INFO) << "Recovering executors"; + // Alow HTTP based executors to subscribe after the // containerizer recovery is complete. recoveryInfo.reconnect = true;
