Added new operation states to be used for status reconciliation. Review: https://reviews.apache.org/r/66462/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/918f99e6 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/918f99e6 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/918f99e6 Branch: refs/heads/master Commit: 918f99e6558db465f4c6aca75563e0c49b0203d1 Parents: c265ae6 Author: Gaston Kleiman <[email protected]> Authored: Mon Apr 23 13:43:28 2018 -0700 Committer: Greg Mann <[email protected]> Committed: Mon Apr 23 13:48:36 2018 -0700 ---------------------------------------------------------------------- include/mesos/mesos.proto | 29 +++++++++++++++++++++++++++++ include/mesos/v1/mesos.proto | 29 +++++++++++++++++++++++++++++ src/common/protobuf_utils.cpp | 6 +++++- src/master/master.cpp | 8 ++++++-- src/slave/slave.cpp | 8 ++++++-- 5 files changed, 75 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/include/mesos/mesos.proto ---------------------------------------------------------------------- diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index 9e24d3e..5bc4a80 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -2309,6 +2309,35 @@ enum OperationState { // TERMINAL: The operation was dropped due to a transient error. OPERATION_DROPPED = 5; + + // The operation affects an agent that has lost contact with the master, + // typically due to a network failure or partition. The operation may or may + // not still be pending. + OPERATION_UNREACHABLE = 6; + + // The operation affected an agent that the master cannot contact; + // the operator has asserted that the agent has been shutdown, but this has + // not been directly confirmed by the master. + // + // If the operator is correct, the operation is not pending and this is a + // terminal state; if the operator is mistaken, the operation may still be + // pending and might return to a different state in the future. + OPERATION_GONE_BY_OPERATOR = 7; + + // The operation affects an agent that the master recovered from its + // state, but that agent has not yet re-registered. + // + // The operation can transition to `OPERATION_UNREACHABLE` if the + // corresponding agent is marked as unreachable, and will transition to + // another status if the agent re-registers. + OPERATION_RECOVERING = 8; + + // The master has no knowledge of the operation. This is typically + // because either (a) the master never had knowledge of the operation, or + // (b) the master forgot about the operation because it garbage collected + // its metadata about the operation. The operation may or may not still be + // pending. + OPERATION_UNKNOWN = 9; } http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/include/mesos/v1/mesos.proto ---------------------------------------------------------------------- diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index 0f3fd8a..5a4e733 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -2301,6 +2301,35 @@ enum OperationState { // TERMINAL: The operation was dropped due to a transient error. OPERATION_DROPPED = 5; + + // The operation affects an agent that has lost contact with the master, + // typically due to a network failure or partition. The operation may or may + // not still be pending. + OPERATION_UNREACHABLE = 6; + + // The operation affected an agent that the master cannot contact; + // the operator has asserted that the agent has been shutdown, but this has + // not been directly confirmed by the master. + // + // If the operator is correct, the operation is not pending and this is a + // terminal state; if the operator is mistaken, the operation may still be + // pending and might return to a different state in the future. + OPERATION_GONE_BY_OPERATOR = 7; + + // The operation affects an agent that the master recovered from its + // state, but that agent has not yet re-registered. + // + // The operation can transition to `OPERATION_UNREACHABLE` if the + // corresponding agent is marked as unreachable, and will transition to + // another status if the agent re-registers. + OPERATION_RECOVERING = 8; + + // The master has no knowledge of the operation. This is typically + // because either (a) the master never had knowledge of the operation, or + // (b) the master forgot about the operation because it garbage collected + // its metadata about the operation. The operation may or may not still be + // pending. + OPERATION_UNKNOWN = 9; } http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/src/common/protobuf_utils.cpp ---------------------------------------------------------------------- diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp index 141a444..78bffd8 100644 --- a/src/common/protobuf_utils.cpp +++ b/src/common/protobuf_utils.cpp @@ -408,8 +408,12 @@ bool isTerminalState(const OperationState& state) case OPERATION_ERROR: case OPERATION_DROPPED: return true; - case OPERATION_PENDING: case OPERATION_UNSUPPORTED: + case OPERATION_PENDING: + case OPERATION_UNREACHABLE: + case OPERATION_GONE_BY_OPERATOR: + case OPERATION_RECOVERING: + case OPERATION_UNKNOWN: return false; } http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 67baa6b..ada7709 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -10743,9 +10743,13 @@ void Master::updateOperation( break; } - // Non-terminal. This shouldn't happen. + // Non-terminal or not expected from an agent. This shouldn't happen. + case OPERATION_UNSUPPORTED: case OPERATION_PENDING: - case OPERATION_UNSUPPORTED: { + case OPERATION_UNREACHABLE: + case OPERATION_GONE_BY_OPERATOR: + case OPERATION_RECOVERING: + case OPERATION_UNKNOWN: { LOG(FATAL) << "Unexpected operation state " << operation->latest_status().state(); http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index 2b8c6e0..d0ff5f8 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -7975,9 +7975,13 @@ void Slave::updateOperation( break; } - // Non-terminal. This shouldn't happen. + // Non-terminal or not sent by resource providers. This shouldn't happen. + case OPERATION_UNSUPPORTED: case OPERATION_PENDING: - case OPERATION_UNSUPPORTED: { + case OPERATION_UNREACHABLE: + case OPERATION_GONE_BY_OPERATOR: + case OPERATION_RECOVERING: + case OPERATION_UNKNOWN: { LOG(FATAL) << "Unexpected operation state " << operation->latest_status().state(); }
