This is an automated email from the ASF dual-hosted git repository. bmahler pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 948382d6e713b813f3c9f301ffbf7fe367dded27 Author: Benjamin Mahler <[email protected]> AuthorDate: Wed Feb 12 20:40:33 2020 -0500 Added logging of tasks and operations during agent drain initiation. When draining an agent, it's hard to tell which tasks failed to terminate from the logs. The master prints a count of the tasks remaining (only as VLOG(1) however), but not the IDs. This patch adds logging to the initiation of the drain on both the master and agent, that shows which tasks and operations are present. This makes it possible to then see which ones did not transition to a terminal state (with a bit of log analysis effort). Review: https://reviews.apache.org/r/72124 --- src/master/http.cpp | 24 ++++++++++++++++++++++++ src/slave/slave.cpp | 38 +++++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/src/master/http.cpp b/src/master/http.cpp index c7d674c..0ae8e36 100644 --- a/src/master/http.cpp +++ b/src/master/http.cpp @@ -3778,7 +3778,31 @@ Future<Response> Master::Http::_drainAgent( master->slaves.deactivated.insert(slaveId); Slave* slave = master->slaves.registered.get(slaveId); + + // It's possible for the slave to be removed in the interim + // if it is marked unreachable. if (slave != nullptr) { + hashmap<FrameworkID, hashset<TaskID>> pendingTaskIds; + foreachpair (const FrameworkID& frameworkId, + const auto& tasks, + slave->pendingTasks) { + pendingTaskIds[frameworkId] = tasks.keys(); + } + + hashmap<FrameworkID, hashset<TaskID>> taskIds; + foreachpair (const FrameworkID& frameworkId, + const auto& tasks, + slave->tasks) { + taskIds[frameworkId] = tasks.keys(); + } + + LOG(INFO) + << "Transitioning agent " << slaveId << " to the DRAINING state" + << "; agent has (pending tasks, tasks, operations) == (" + << stringify(pendingTaskIds) << ", " + << stringify(taskIds) << ", " + << stringify(slave->operations.keys()) << ")"; + master->deactivate(slave); // Tell the agent to start draining. diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index cce275a..a914de4 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -1069,10 +1069,42 @@ void Slave::drain( const UPID& from, DrainSlaveMessage&& drainSlaveMessage) { + hashmap<FrameworkID, hashset<TaskID>> pendingTaskIds; + foreachvalue (Framework* framework, frameworks) { + foreachvalue (const auto& taskMap, framework->pendingTasks) { + pendingTaskIds[framework->id()] = taskMap.keys(); + } + } + + hashmap<FrameworkID, hashset<TaskID>> queuedTaskIds; + foreachvalue (Framework* framework, frameworks) { + foreachvalue (Executor* executor, framework->executors) { + foreachkey (const TaskID& taskId, executor->queuedTasks) { + queuedTaskIds[framework->id()].insert(taskId); + } + } + } + + hashmap<FrameworkID, hashset<TaskID>> launchedTaskIds; + foreachvalue (Framework* framework, frameworks) { + foreachvalue (Executor* executor, framework->executors) { + foreachkey (const TaskID& taskId, executor->launchedTasks) { + launchedTaskIds[framework->id()].insert(taskId); + } + } + } + LOG(INFO) - << "Checkpointing DrainConfig. Previous drain config was " - << (drainConfig.isSome() ? stringify(drainConfig.get()) : "NONE") - << ", new drain config is " << drainSlaveMessage.config(); + << "Initiating drain with DrainConfig " << drainSlaveMessage.config() + << (drainConfig.isSome() + ? "; overwriting previous DrainConfig " + stringify(*drainConfig) + : "") + << "; agent has (pending tasks, queued tasks, launched tasks, operations)" + << " == (" + << stringify(pendingTaskIds) << ", " + << stringify(queuedTaskIds) << ", " + << stringify(launchedTaskIds) << ", " + << stringify(operations.keys()) << ")"; CHECK_SOME(state::checkpoint( paths::getDrainConfigPath(metaDir, info.id()),
