Prevent resubscription of resource providers with unknown IDs. This patch adds a check to the resource provider manager's subscribe functionality making sure that any ID sent by a resubscribing resource provider corresponds to some previously known resource provider.
This not only serves as convenient validation of user-provided data, but also makes sure that the internal state of the resource provider manager remains consistent. Review: https://reviews.apache.org/r/66546/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d7e21259 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d7e21259 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d7e21259 Branch: refs/heads/master Commit: d7e21259f107fde235c5482818702d873f2e12fb Parents: 65ed45f Author: Benjamin Bannier <benjamin.bann...@mesosphere.io> Authored: Tue May 1 13:09:22 2018 -0700 Committer: Chun-Hung Hsiao <chhs...@mesosphere.io> Committed: Tue May 1 13:09:22 2018 -0700 ---------------------------------------------------------------------- src/resource_provider/manager.cpp | 13 ++++- src/tests/resource_provider_manager_tests.cpp | 61 ++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/d7e21259/src/resource_provider/manager.cpp ---------------------------------------------------------------------- diff --git a/src/resource_provider/manager.cpp b/src/resource_provider/manager.cpp index 5979480..aff1ca5 100644 --- a/src/resource_provider/manager.cpp +++ b/src/resource_provider/manager.cpp @@ -678,7 +678,18 @@ void ResourceProviderManagerProcess::subscribe( // restarted or an agent failover. The 'ResourceProviderInfo' might // have been updated, but its type and name should remain the same. // We should checkpoint its 'type', 'name' and ID, then check if the - // resubscribption is consistent with the checkpointed record. + // resubscription is consistent with the checkpointed record. + + const ResourceProviderID& resourceProviderId = resourceProviderInfo.id(); + + if (!resourceProviders.known.contains(resourceProviderId)) { + LOG(INFO) + << "Dropping resubscription attempt of resource provider with ID " + << resourceProviderId + << " since it is unknown"; + + return; + } // If the resource provider is known we do not need to admit it // again, and the registrar operation implicitly succeeded. http://git-wip-us.apache.org/repos/asf/mesos/blob/d7e21259/src/tests/resource_provider_manager_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/resource_provider_manager_tests.cpp b/src/tests/resource_provider_manager_tests.cpp index 8c364a1..1ee4dc3 100644 --- a/src/tests/resource_provider_manager_tests.cpp +++ b/src/tests/resource_provider_manager_tests.cpp @@ -1150,6 +1150,67 @@ TEST_P(ResourceProviderManagerHttpApiTest, ResubscribeResourceProvider) } +// Test that when a resource provider attempts to resubscribe with an +// unknown ID it is not admitted but disconnected. +TEST_P(ResourceProviderManagerHttpApiTest, ResubscribeUnknownID) +{ + Clock::pause(); + + // Start master and agent. + Try<Owned<cluster::Master>> master = StartMaster(); + ASSERT_SOME(master); + + Owned<MasterDetector> detector = master.get()->createDetector(); + + slave::Flags slaveFlags = CreateSlaveFlags(); + + // For the agent's resource provider manager to start, + // the agent needs to have been assigned an agent ID. + Future<SlaveRegisteredMessage> slaveRegisteredMessage = + FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); + + Try<Owned<cluster::Slave>> agent = StartSlave(detector.get(), slaveFlags); + ASSERT_SOME(agent); + + Clock::advance(slaveFlags.registration_backoff_factor); + Clock::settle(); + + AWAIT_READY(slaveRegisteredMessage); + + mesos::v1::ResourceProviderID resourceProviderId; + resourceProviderId.set_value(id::UUID::random().toString()); + + mesos::v1::ResourceProviderInfo resourceProviderInfo; + resourceProviderInfo.mutable_id()->CopyFrom(resourceProviderId); + resourceProviderInfo.set_type("org.apache.mesos.rp.test"); + resourceProviderInfo.set_name("test"); + + Owned<v1::MockResourceProvider> resourceProvider( + new v1::MockResourceProvider(resourceProviderInfo)); + + // We explicitly reset the resource provider after the expected + // disconnect to prevent it from resubscribing indefinitely. + Future<Nothing> disconnected; + EXPECT_CALL(*resourceProvider, disconnected()) + .WillOnce(DoAll( + Invoke([&resourceProvider]() { resourceProvider.reset(); }), + FutureSatisfy(&disconnected))); + + // Start and register a resource provider. + Owned<EndpointDetector> endpointDetector( + resource_provider::createEndpointDetector(agent.get()->pid)); + + const ContentType contentType = GetParam(); + + resourceProvider->start( + endpointDetector, + contentType, + v1::DEFAULT_CREDENTIAL); + + AWAIT_READY(disconnected); +} + + // This test verifies that a disconnected resource provider will // result in an `UpdateSlaveMessage` to be sent to the master and the // total resources of the disconnected resource provider will be