This is an automated email from the ASF dual-hosted git repository.
chhsiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new af6ef2c Fixed a race between status updates and acknowledgements in
SLRP tests.
af6ef2c is described below
commit af6ef2cca58993c4c99b3a10509d1036818d3f79
Author: Chun-Hung Hsiao <[email protected]>
AuthorDate: Fri Jul 5 16:51:47 2019 -0700
Fixed a race between status updates and acknowledgements in SLRP tests.
SLRP tests `RetryOperationStatusUpdate*` check that no operation update
will be sent by SLRP once acknowledgements are sent by master. However,
since the acknowledgements are delivered via HTTP, it is possible that
operation status updates race with acknowledgements that are still in
HTTP pipes. This patch fixes this race condition.
Review: https://reviews.apache.org/r/71018
---
.../storage_local_resource_provider_tests.cpp | 36 ++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/src/tests/storage_local_resource_provider_tests.cpp
b/src/tests/storage_local_resource_provider_tests.cpp
index 3823305..6986126 100644
--- a/src/tests/storage_local_resource_provider_tests.cpp
+++ b/src/tests/storage_local_resource_provider_tests.cpp
@@ -44,6 +44,7 @@
#include <stout/strings.hpp>
#include <stout/unreachable.hpp>
#include <stout/uri.hpp>
+#include <stout/uuid.hpp>
#include <stout/os/realpath.hpp>
@@ -59,6 +60,8 @@
#include "module/manager.hpp"
+#include "messages/messages.hpp"
+
#include "slave/container_daemon_process.hpp"
#include "slave/paths.hpp"
#include "slave/state.hpp"
@@ -67,6 +70,8 @@
#include "slave/containerizer/mesos/containerizer.hpp"
+#include "status_update_manager/status_update_manager_process.hpp"
+
#include "tests/disk_profile_server.hpp"
#include "tests/environment.hpp"
#include "tests/flags.hpp"
@@ -4330,10 +4335,21 @@ TEST_P(StorageLocalResourceProviderTest,
RetryOperationStatusUpdate)
FUTURE_PROTOBUF(
AcknowledgeOperationStatusMessage(), master.get()->pid,
slave.get()->pid);
+ // Since the acknowledgement is delivered to the SLRP via HTTP, we wait for
+ // a dispatch event to ensure that the acknowledgement is received by SLRP.
+ Future<Nothing> statusUpdateManagerAcknowledgement = FUTURE_DISPATCH(
+ _,
+ (&StatusUpdateManagerProcess<
+ id::UUID,
+ UpdateOperationStatusRecord,
+ UpdateOperationStatusMessage>::acknowledgement));
+
Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN);
AWAIT_READY(retriedUpdateOperationStatusMessage);
+
AWAIT_READY(acknowledgeOperationStatusMessage);
+ AWAIT_READY(statusUpdateManagerAcknowledgement);
// The master acknowledged the operation status update, so the SLRP shouldn't
// send further operation status updates.
@@ -4491,6 +4507,15 @@ TEST_P(
Future<AcknowledgeOperationStatusMessage> acknowledgeOperationStatusMessage =
FUTURE_PROTOBUF(AcknowledgeOperationStatusMessage(), master.get()->pid, _);
+ // Since the acknowledgement is delivered to the SLRP via HTTP, we wait for
+ // a dispatch event to ensure that the acknowledgement is received by SLRP.
+ Future<Nothing> statusUpdateManagerAcknowledgement = FUTURE_DISPATCH(
+ _,
+ (&StatusUpdateManagerProcess<
+ id::UUID,
+ UpdateOperationStatusRecord,
+ UpdateOperationStatusMessage>::acknowledgement));
+
slave = StartSlave(detector.get(), flags);
ASSERT_SOME(slave);
@@ -4512,6 +4537,7 @@ TEST_P(
AWAIT_READY(retriedUpdateOperationStatusMessage);
AWAIT_READY(acknowledgeOperationStatusMessage);
+ AWAIT_READY(statusUpdateManagerAcknowledgement);
// The master has acknowledged the operation status update, so the SLRP
// shouldn't send further operation status updates.
@@ -5523,10 +5549,20 @@ TEST_P(StorageLocalResourceProviderTest,
RetryOperationStatusUpdateToScheduler)
FUTURE_PROTOBUF(
AcknowledgeOperationStatusMessage(), master.get()->pid,
slave.get()->pid);
+ // Since the acknowledgement is delivered to the SLRP via HTTP, we wait for
+ // a dispatch event to ensure that the acknowledgement is received by SLRP.
+ Future<Nothing> statusUpdateManagerAcknowledgement = FUTURE_DISPATCH(
+ _,
+ (&StatusUpdateManagerProcess<
+ id::UUID,
+ UpdateOperationStatusRecord,
+ UpdateOperationStatusMessage>::acknowledgement));
+
mesos.send(v1::createCallAcknowledgeOperationStatus(
frameworkId, offer.agent_id(), resourceProviderId.get(), update.get()));
AWAIT_READY(acknowledgeOperationStatusMessage);
+ AWAIT_READY(statusUpdateManagerAcknowledgement);
// Verify that the retry was only counted as one operation.
EXPECT_TRUE(metricEquals("master/operations/finished", 1));