Added new operation states to be used for status reconciliation.

Review: https://reviews.apache.org/r/66462/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/918f99e6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/918f99e6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/918f99e6

Branch: refs/heads/master
Commit: 918f99e6558db465f4c6aca75563e0c49b0203d1
Parents: c265ae6
Author: Gaston Kleiman <[email protected]>
Authored: Mon Apr 23 13:43:28 2018 -0700
Committer: Greg Mann <[email protected]>
Committed: Mon Apr 23 13:48:36 2018 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto     | 29 +++++++++++++++++++++++++++++
 include/mesos/v1/mesos.proto  | 29 +++++++++++++++++++++++++++++
 src/common/protobuf_utils.cpp |  6 +++++-
 src/master/master.cpp         |  8 ++++++--
 src/slave/slave.cpp           |  8 ++++++--
 5 files changed, 75 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 9e24d3e..5bc4a80 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -2309,6 +2309,35 @@ enum OperationState {
 
   // TERMINAL: The operation was dropped due to a transient error.
   OPERATION_DROPPED = 5;
+
+  // The operation affects an agent that has lost contact with the master,
+  // typically due to a network failure or partition. The operation may or may
+  // not still be pending.
+  OPERATION_UNREACHABLE = 6;
+
+  // The operation affected an agent that the master cannot contact;
+  // the operator has asserted that the agent has been shutdown, but this has
+  // not been directly confirmed by the master.
+  //
+  // If the operator is correct, the operation is not pending and this is a
+  // terminal state; if the operator is mistaken, the operation may still be
+  // pending and might return to a different state in the future.
+  OPERATION_GONE_BY_OPERATOR = 7;
+
+  // The operation affects an agent that the master recovered from its
+  // state, but that agent has not yet re-registered.
+  //
+  // The operation can transition to `OPERATION_UNREACHABLE` if the
+  // corresponding agent is marked as unreachable, and will transition to
+  // another status if the agent re-registers.
+  OPERATION_RECOVERING = 8;
+
+  // The master has no knowledge of the operation. This is typically
+  // because either (a) the master never had knowledge of the operation, or
+  // (b) the master forgot about the operation because it garbage collected
+  // its metadata about the operation. The operation may or may not still be
+  // pending.
+  OPERATION_UNKNOWN = 9;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index 0f3fd8a..5a4e733 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -2301,6 +2301,35 @@ enum OperationState {
 
   // TERMINAL: The operation was dropped due to a transient error.
   OPERATION_DROPPED = 5;
+
+  // The operation affects an agent that has lost contact with the master,
+  // typically due to a network failure or partition. The operation may or may
+  // not still be pending.
+  OPERATION_UNREACHABLE = 6;
+
+  // The operation affected an agent that the master cannot contact;
+  // the operator has asserted that the agent has been shutdown, but this has
+  // not been directly confirmed by the master.
+  //
+  // If the operator is correct, the operation is not pending and this is a
+  // terminal state; if the operator is mistaken, the operation may still be
+  // pending and might return to a different state in the future.
+  OPERATION_GONE_BY_OPERATOR = 7;
+
+  // The operation affects an agent that the master recovered from its
+  // state, but that agent has not yet re-registered.
+  //
+  // The operation can transition to `OPERATION_UNREACHABLE` if the
+  // corresponding agent is marked as unreachable, and will transition to
+  // another status if the agent re-registers.
+  OPERATION_RECOVERING = 8;
+
+  // The master has no knowledge of the operation. This is typically
+  // because either (a) the master never had knowledge of the operation, or
+  // (b) the master forgot about the operation because it garbage collected
+  // its metadata about the operation. The operation may or may not still be
+  // pending.
+  OPERATION_UNKNOWN = 9;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/src/common/protobuf_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp
index 141a444..78bffd8 100644
--- a/src/common/protobuf_utils.cpp
+++ b/src/common/protobuf_utils.cpp
@@ -408,8 +408,12 @@ bool isTerminalState(const OperationState& state)
     case OPERATION_ERROR:
     case OPERATION_DROPPED:
       return true;
-    case OPERATION_PENDING:
     case OPERATION_UNSUPPORTED:
+    case OPERATION_PENDING:
+    case OPERATION_UNREACHABLE:
+    case OPERATION_GONE_BY_OPERATOR:
+    case OPERATION_RECOVERING:
+    case OPERATION_UNKNOWN:
       return false;
   }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 67baa6b..ada7709 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -10743,9 +10743,13 @@ void Master::updateOperation(
       break;
     }
 
-    // Non-terminal. This shouldn't happen.
+    // Non-terminal or not expected from an agent. This shouldn't happen.
+    case OPERATION_UNSUPPORTED:
     case OPERATION_PENDING:
-    case OPERATION_UNSUPPORTED: {
+    case OPERATION_UNREACHABLE:
+    case OPERATION_GONE_BY_OPERATOR:
+    case OPERATION_RECOVERING:
+    case OPERATION_UNKNOWN: {
       LOG(FATAL) << "Unexpected operation state "
                  << operation->latest_status().state();
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/918f99e6/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 2b8c6e0..d0ff5f8 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -7975,9 +7975,13 @@ void Slave::updateOperation(
       break;
     }
 
-    // Non-terminal. This shouldn't happen.
+    // Non-terminal or not sent by resource providers. This shouldn't happen.
+    case OPERATION_UNSUPPORTED:
     case OPERATION_PENDING:
-    case OPERATION_UNSUPPORTED: {
+    case OPERATION_UNREACHABLE:
+    case OPERATION_GONE_BY_OPERATOR:
+    case OPERATION_RECOVERING:
+    case OPERATION_UNKNOWN: {
       LOG(FATAL) << "Unexpected operation state "
                  << operation->latest_status().state();
     }

Reply via email to