Repository: mesos Updated Branches: refs/heads/master 4b591a0d0 -> 0d5dc05b5
Updated createdStatusUpdate to take an optional UUID. Review: https://reviews.apache.org/r/35912 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0d5dc05b Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0d5dc05b Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0d5dc05b Branch: refs/heads/master Commit: 0d5dc05b5d3a4bf33c7f38a07ae68ef078600552 Parents: fda49c0 Author: Benjamin Mahler <[email protected]> Authored: Thu Jun 25 22:59:29 2015 -0700 Committer: Benjamin Mahler <[email protected]> Committed: Mon Jun 29 10:01:27 2015 -0700 ---------------------------------------------------------------------- src/common/protobuf_utils.cpp | 7 ++++++- src/common/protobuf_utils.hpp | 7 +++++++ src/master/master.cpp | 15 +++++++++++++++ src/sched/sched.cpp | 2 ++ src/slave/slave.cpp | 14 +++++++++++++- src/tests/fault_tolerance_tests.cpp | 5 ++++- src/tests/partition_tests.cpp | 4 +++- 7 files changed, 50 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/common/protobuf_utils.cpp ---------------------------------------------------------------------- diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp index 9ccd70d..f0642ba 100644 --- a/src/common/protobuf_utils.cpp +++ b/src/common/protobuf_utils.cpp @@ -49,6 +49,7 @@ StatusUpdate createStatusUpdate( const TaskID& taskId, const TaskState& state, const TaskStatus::Source& source, + const Option<UUID>& uuid, const string& message = "", const Option<TaskStatus::Reason>& reason = None(), const Option<ExecutorID>& executorId = None(), @@ -57,7 +58,6 @@ StatusUpdate createStatusUpdate( StatusUpdate update; update.set_timestamp(process::Clock::now().secs()); - update.set_uuid(UUID::random().toBytes()); update.mutable_framework_id()->MergeFrom(frameworkId); if (slaveId.isSome()) { @@ -80,6 +80,11 @@ StatusUpdate createStatusUpdate( status->set_message(message); status->set_timestamp(update.timestamp()); + if (uuid.isSome()) { + update.set_uuid(uuid.get().toBytes()); + status->set_uuid(uuid.get().toBytes()); + } + if (reason.isSome()) { status->set_reason(reason.get()); } http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/common/protobuf_utils.hpp ---------------------------------------------------------------------- diff --git a/src/common/protobuf_utils.hpp b/src/common/protobuf_utils.hpp index 9ecd234..afe5a85 100644 --- a/src/common/protobuf_utils.hpp +++ b/src/common/protobuf_utils.hpp @@ -23,6 +23,7 @@ #include <stout/ip.hpp> #include <stout/option.hpp> +#include <stout/uuid.hpp> #include "messages/messages.hpp" @@ -38,12 +39,18 @@ namespace protobuf { bool isTerminalState(const TaskState& state); +// See TaskStatus for more information about these fields. Note +// that the 'uuid' must be provided for updates that need +// acknowledgement. Currently, all slave and executor generated +// updates require acknowledgement, whereas master generated +// and scheduler driver generated updates do not. StatusUpdate createStatusUpdate( const FrameworkID& frameworkId, const Option<SlaveID>& slaveId, const TaskID& taskId, const TaskState& state, const TaskStatus::Source& source, + const Option<UUID>& uuid, const std::string& message = "", const Option<TaskStatus::Reason>& reason = None(), const Option<ExecutorID>& executorId = None(), http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index b8ed699..34ce744 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -2403,6 +2403,7 @@ void Master::accept( task.task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), "Task launched with invalid offers: " + error.get().message, TaskStatus::REASON_INVALID_OFFERS); @@ -2514,6 +2515,7 @@ void Master::_accept( task.task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), slave == NULL ? "Slave removed" : "Slave disconnected", reason); @@ -2688,6 +2690,7 @@ void Master::_accept( task.task_id(), TASK_ERROR, TaskStatus::SOURCE_MASTER, + None(), authorization.isFailed() ? "Authorization failure: " + authorization.failure() : "Not authorized to launch as user '" + user + "'", @@ -2730,6 +2733,7 @@ void Master::_accept( task_.task_id(), TASK_ERROR, TaskStatus::SOURCE_MASTER, + None(), validationError.get().message, TaskStatus::REASON_TASK_INVALID); @@ -2870,6 +2874,7 @@ void Master::kill(Framework* framework, const scheduler::Call::Kill& kill) taskId, TASK_KILLED, TaskStatus::SOURCE_MASTER, + None(), "Killed pending task"); forward(update, UPID(), framework); @@ -3835,6 +3840,7 @@ void Master::_reconcileTasks( task.task_id(), TASK_STAGING, TaskStatus::SOURCE_MASTER, + None(), "Reconciliation: Latest task state", TaskStatus::REASON_RECONCILIATION); @@ -3865,6 +3871,7 @@ void Master::_reconcileTasks( task->task_id(), state, TaskStatus::SOURCE_MASTER, + None(), "Reconciliation: Latest task state", TaskStatus::REASON_RECONCILIATION, executorId, @@ -3920,6 +3927,7 @@ void Master::_reconcileTasks( task_.task_id(), TASK_STAGING, TaskStatus::SOURCE_MASTER, + None(), "Reconciliation: Latest task state", TaskStatus::REASON_RECONCILIATION); } else if (task != NULL) { @@ -3938,6 +3946,7 @@ void Master::_reconcileTasks( task->task_id(), state, TaskStatus::SOURCE_MASTER, + None(), "Reconciliation: Latest task state", TaskStatus::REASON_RECONCILIATION, executorId, @@ -3950,6 +3959,7 @@ void Master::_reconcileTasks( status.task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), "Reconciliation: Task is unknown to the slave", TaskStatus::REASON_RECONCILIATION); } else if (slaves.transitioning(slaveId)) { @@ -3965,6 +3975,7 @@ void Master::_reconcileTasks( status.task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), "Reconciliation: Task is unknown", TaskStatus::REASON_RECONCILIATION); } @@ -4339,6 +4350,7 @@ void Master::reconcile( task->task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), "Task is unknown to the slave", TaskStatus::REASON_TASK_UNKNOWN); @@ -4600,6 +4612,7 @@ void Master::removeFramework(Framework* framework) task->task_id(), TASK_KILLED, TaskStatus::SOURCE_MASTER, + None(), "Framework " + framework->id().value() + " removed", TaskStatus::REASON_FRAMEWORK_REMOVED, (task->has_executor_id() @@ -4695,6 +4708,7 @@ void Master::removeFramework(Slave* slave, Framework* framework) task->task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), "Slave " + slave->info.hostname() + " disconnected", TaskStatus::REASON_SLAVE_DISCONNECTED, (task->has_executor_id() @@ -4834,6 +4848,7 @@ void Master::removeSlave( task->task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), "Slave " + slave->info.hostname() + " removed: " + message, TaskStatus::REASON_SLAVE_REMOVED, (task->has_executor_id() ? http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/sched/sched.cpp ---------------------------------------------------------------------- diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp index d37b256..7563abb 100644 --- a/src/sched/sched.cpp +++ b/src/sched/sched.cpp @@ -926,6 +926,7 @@ protected: task.task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), "Master disconnected", TaskStatus::REASON_MASTER_DISCONNECTED); @@ -1005,6 +1006,7 @@ protected: task.task_id(), TASK_LOST, TaskStatus::SOURCE_MASTER, + None(), "Master disconnected", TaskStatus::REASON_MASTER_DISCONNECTED); http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index 1105a66..008170f 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -61,6 +61,7 @@ #include <stout/stringify.hpp> #include <stout/strings.hpp> #include <stout/try.hpp> +#include <stout/uuid.hpp> #include <stout/utils.hpp> #ifdef __linux__ @@ -1041,6 +1042,7 @@ void Slave::reregistered( taskId, TASK_LOST, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Reconciliation: task unknown to the slave", TaskStatus::REASON_RECONCILIATION); @@ -1413,6 +1415,7 @@ void Slave::_runTask( task.task_id(), TASK_LOST, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Could not launch the task because we failed to unschedule directories" " scheduled for gc", TaskStatus::REASON_GC_ERROR); @@ -1454,7 +1457,8 @@ void Slave::_runTask( task.task_id(), TASK_LOST, TaskStatus::SOURCE_SLAVE, - "The checkpointed resources being used by the task are unknown to " + UUID::random(), + "The checkpointed resources being used by the task are unknown to " "the slave", TaskStatus::REASON_RESOURCES_UNKNOWN); @@ -1486,6 +1490,7 @@ void Slave::_runTask( task.task_id(), TASK_LOST, TaskStatus::SOURCE_SLAVE, + UUID::random(), "The checkpointed resources being used by the executor are unknown " "to the slave", TaskStatus::REASON_RESOURCES_UNKNOWN, @@ -1551,6 +1556,7 @@ void Slave::_runTask( task.task_id(), TASK_LOST, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Executor terminating/terminated", TaskStatus::REASON_EXECUTOR_TERMINATED); @@ -1797,6 +1803,7 @@ void Slave::killTask( taskId, TASK_KILLED, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Task killed before it was launched"); statusUpdate(update, UPID()); @@ -1824,6 +1831,7 @@ void Slave::killTask( taskId, TASK_LOST, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Cannot find executor", TaskStatus::REASON_EXECUTOR_TERMINATED); @@ -1840,6 +1848,7 @@ void Slave::killTask( taskId, TASK_KILLED, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Unregistered executor", TaskStatus::REASON_EXECUTOR_UNREGISTERED, executor->id); @@ -1887,6 +1896,7 @@ void Slave::killTask( taskId, TASK_KILLED, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Task killed when it was queued", None(), executor->id); @@ -2566,6 +2576,7 @@ void Slave::reregisterExecutor( task->task_id(), TASK_LOST, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Task launched during slave restart", TaskStatus::REASON_SLAVE_RESTARTED, executorId); @@ -4531,6 +4542,7 @@ void Slave::sendExecutorTerminatedStatusUpdate( taskId, taskState, TaskStatus::SOURCE_SLAVE, + UUID::random(), termination.isReady() ? termination.get().message() : "Abnormal executor termination", reason, http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/tests/fault_tolerance_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp index fb28e2a..1070ccf 100644 --- a/src/tests/fault_tolerance_tests.cpp +++ b/src/tests/fault_tolerance_tests.cpp @@ -38,6 +38,7 @@ #include <stout/json.hpp> #include <stout/stringify.hpp> +#include <stout/uuid.hpp> #include "common/protobuf_utils.hpp" @@ -1133,6 +1134,7 @@ TEST_F(FaultToleranceTest, ForwardStatusUpdateUnknownExecutor) taskId, TASK_RUNNING, TaskStatus::SOURCE_SLAVE, + UUID::random(), "Dummy update"); process::dispatch(slave.get(), &Slave::statusUpdate, statusUpdate2, UPID()); @@ -1694,7 +1696,8 @@ TEST_F(FaultToleranceTest, SplitBrainMasters) runningStatus.get().slave_id(), runningStatus.get().task_id(), TASK_LOST, - TaskStatus::SOURCE_SLAVE)); + TaskStatus::SOURCE_SLAVE, + UUID::random())); // Spoof a message from a random master; this should be dropped by // the scheduler driver. Since this is delivered locally, it is http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/tests/partition_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/partition_tests.cpp b/src/tests/partition_tests.cpp index c6ec14d..a1fb6f7 100644 --- a/src/tests/partition_tests.cpp +++ b/src/tests/partition_tests.cpp @@ -26,6 +26,7 @@ #include <process/pid.hpp> #include <stout/try.hpp> +#include <stout/uuid.hpp> #include "common/protobuf_utils.hpp" @@ -389,7 +390,8 @@ TEST_F(PartitionTest, PartitionedSlaveStatusUpdates) slaveId, taskId, TASK_RUNNING, - TaskStatus::SOURCE_SLAVE); + TaskStatus::SOURCE_SLAVE, + UUID::random()); StatusUpdateMessage message; message.mutable_update()->CopyFrom(update);
