Repository: mesos Updated Branches: refs/heads/master 814aafab2 -> bf65c7426
Fixed UpdateFrameworkMessage handling backwards incompatibility. The commit 4912f341dce12f10b43fe82db20086032391e95a introduced a new `framework_info` field in the `UpdateFrameworkMessage` sent from the master to the agent. Unfortunately, the agent code unconditionally uses the new field, which results in a default-intialized `FrameworkInfo` when a new agent is receiving the message from an old master. This leads to an agent abort when it subsequently tries to launch a new task for the updated framework. This fixes the issue by ignoring the field if unset. Review: https://reviews.apache.org/r/57836/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bf65c742 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bf65c742 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bf65c742 Branch: refs/heads/master Commit: bf65c742696f97445381583e53e9a19155bef06d Parents: 814aafa Author: Ilya Pronin <[email protected]> Authored: Thu Mar 23 22:47:03 2017 -0700 Committer: Benjamin Mahler <[email protected]> Committed: Thu Mar 23 22:47:03 2017 -0700 ---------------------------------------------------------------------- src/slave/slave.cpp | 20 +++++++++++--------- src/slave/slave.hpp | 4 +--- src/tests/slave_recovery_tests.cpp | 9 ++++++--- 3 files changed, 18 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/bf65c742/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index a4f4a9c..c6ee4fa 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -503,10 +503,7 @@ void Slave::initialize() &FrameworkToExecutorMessage::data); install<UpdateFrameworkMessage>( - &Slave::updateFramework, - &UpdateFrameworkMessage::framework_id, - &UpdateFrameworkMessage::pid, - &UpdateFrameworkMessage::framework_info); + &Slave::updateFramework); install<CheckpointResourcesMessage>( &Slave::checkpointResources, @@ -2877,14 +2874,15 @@ void Slave::schedulerMessage( void Slave::updateFramework( - const FrameworkID& frameworkId, - const UPID& pid, - const FrameworkInfo& frameworkInfo) + const UpdateFrameworkMessage& message) { CHECK(state == RECOVERING || state == DISCONNECTED || state == RUNNING || state == TERMINATING) << state; + const FrameworkID& frameworkId = message.framework_id(); + const UPID& pid = message.pid(); + if (state != RUNNING) { LOG(WARNING) << "Dropping updateFramework message for " << frameworkId << " because the agent is in " << state << " state"; @@ -2909,8 +2907,12 @@ void Slave::updateFramework( << (pid != UPID() ? "with pid updated to " + stringify(pid) : ""); - framework->info.CopyFrom(frameworkInfo); - framework->capabilities = frameworkInfo.capabilities(); + // The framework info was added in 1.3, so it will not be set + // if from a master older than 1.3. + if (message.has_framework_info()) { + framework->info.CopyFrom(message.framework_info()); + framework->capabilities = message.framework_info().capabilities(); + } if (pid == UPID()) { framework->pid = None(); http://git-wip-us.apache.org/repos/asf/mesos/blob/bf65c742/src/slave/slave.hpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp index e2de66c..f365a53 100644 --- a/src/slave/slave.hpp +++ b/src/slave/slave.hpp @@ -186,9 +186,7 @@ public: const std::string& data); void updateFramework( - const FrameworkID& frameworkId, - const process::UPID& pid, - const FrameworkInfo& frameworkInfo); + const UpdateFrameworkMessage& message); void checkpointResources(const std::vector<Resource>& checkpointedResources); http://git-wip-us.apache.org/repos/asf/mesos/blob/bf65c742/src/tests/slave_recovery_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp index 09bfa75..53f33a2 100644 --- a/src/tests/slave_recovery_tests.cpp +++ b/src/tests/slave_recovery_tests.cpp @@ -1978,14 +1978,17 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingFramework) // Set the `FrameworkID` in `FrameworkInfo`. frameworkInfo.mutable_id()->CopyFrom(frameworkId); + UpdateFrameworkMessage updateFrameworkMessage; + updateFrameworkMessage.mutable_framework_id()->CopyFrom(frameworkId); + updateFrameworkMessage.set_pid(""); + updateFrameworkMessage.mutable_framework_info()->CopyFrom(frameworkInfo); + // Simulate a 'UpdateFrameworkMessage' to ensure framework pid is // not being checkpointed. process::dispatch( slave.get()->pid, &Slave::updateFramework, - frameworkId, - "", - frameworkInfo); + updateFrameworkMessage); AWAIT_READY(updateFramework);
