Repository: mesos Updated Branches: refs/heads/master cac8fdf01 -> 3f44f0e2f
Made the agent forward status updates for unknown offer operations. There are some cases in which an agent can get offer operation status updates for operations that it doesn't know about. If this happens, the agent should forward the updates to the master without updating its internal state. Review: https://reviews.apache.org/r/64588/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3f44f0e2 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3f44f0e2 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3f44f0e2 Branch: refs/heads/master Commit: 3f44f0e2ff13a54fd5c5822b5c5a30a9b6fec6de Parents: cac8fdf Author: Gaston Kleiman <[email protected]> Authored: Wed Dec 13 21:04:21 2017 -0800 Committer: Jie Yu <[email protected]> Committed: Wed Dec 13 21:19:12 2017 -0800 ---------------------------------------------------------------------- src/slave/slave.cpp | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/3f44f0e2/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index e8f7591..63828a4 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -7230,15 +7230,35 @@ void Slave::handleResourceProviderMessage( CHECK_SOME(operationUUID); OfferOperation* operation = getOfferOperation(operationUUID.get()); - if (operation == nullptr) { - LOG(WARNING) << "Failed to find the offer operation '" - << update.status().operation_id() << "' (uuid: " - << operationUUID->toString() << ") for framework " - << update.framework_id(); - break; - } - updateOfferOperation(operation, update); + if (operation != nullptr) { + // The agent might not know about the operation in the + // following cases: + // + // Case 1: + // (1) The agent sends to a resource provder an ACK for a + // terminal offer operation status update and removes the + // offer operation. + // (2) The resource provider doesn't get the ACK. + // (3) The resource provider's status update manager resends + // the offer operation status update. + // + // Case 2: + // (1) The master knows an operation that the agent doesn't + // know, because an ApplyOfferOperationMessage was + // dropped. + // (2) The master sends a ReconcileOfferOperationsMessage + // message to the agent, who forwards it to a resource + // provider. + // (3) The resource provider doesn't know the operation, so it + // sends an offer operation status update with the state + // OFFER_OPERATION_DROPPED. + // + // In both cases the agent should not update it's internal + // state, but it should still forward the offer operation + // status update. + updateOfferOperation(operation, update); + } switch (state) { case RECOVERING: @@ -7252,14 +7272,15 @@ void Slave::handleResourceProviderMessage( break; } case RUNNING: { - LOG(INFO) << "Forwarding status update of offer operation '" - << update.status().operation_id() + LOG(INFO) << "Forwarding status update of " + << (operation == nullptr ? "unknown " : "") + << "offer operation '" << update.status().operation_id() << "' (uuid: " << operationUUID->toString() << ") for framework " << update.framework_id(); // The status update from the resource provider didn't - // provide the agent ID (because the resource provider doesn't - // know it), hence we inject it here. + // provide the agent ID (because the resource provider + // doesn't know it), hence we inject it here. OfferOperationStatusUpdate _update; _update.CopyFrom(update); _update.mutable_slave_id()->CopyFrom(info.id());
