This is an automated email from the ASF dual-hosted git repository. rombert pushed a commit to annotated tag org.apache.sling.discovery.oak-1.2.20 in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-discovery-oak.git
commit bf294ac5e21fa979ace5f2dbef2fd78bf0d24b5e Author: Stefan Egli <[email protected]> AuthorDate: Tue Jul 4 12:09:01 2017 +0000 SLING-6924 : avoid harmless error message when new instance joins : a slowly starting new instance might first be visible via discovery-lite before it sets the leaderElectionid property - if that's the case there used to be a NPE - which is harmless but not nice as its an error git-svn-id: https://svn.apache.org/repos/asf/sling/trunk/bundles/extensions/discovery/oak@1800761 13f79535-47bb-0310-9956-ffa450edef68 --- pom.xml | 4 +- .../oak/cluster/OakClusterViewService.java | 29 +++- .../discovery/oak/OakDiscoveryServiceTest.java | 152 +++++++++++++++++++++ 3 files changed, 181 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 1fc8bf7..570fa41 100644 --- a/pom.xml +++ b/pom.xml @@ -167,7 +167,7 @@ <dependency> <groupId>org.apache.sling</groupId> <artifactId>org.apache.sling.discovery.base</artifactId> - <version>2.0.1-SNAPSHOT</version> + <version>2.0.3-SNAPSHOT</version> <scope>provided</scope> </dependency> <!-- besides including discovery.base' normal jar above, @@ -176,7 +176,7 @@ <dependency> <groupId>org.apache.sling</groupId> <artifactId>org.apache.sling.discovery.base</artifactId> - <version>2.0.1-SNAPSHOT</version> + <version>2.0.3-SNAPSHOT</version> <scope>test</scope> <type>test-jar</type> </dependency> diff --git a/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java b/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java index 48cd976..af30f20 100644 --- a/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java +++ b/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java @@ -179,6 +179,20 @@ public class OakClusterViewService implements ClusterViewService { } String leaderElectionId = getLeaderElectionId(resourceResolver, slingId); + // SLING-6924 : leaderElectionId can be null here + // this means that another instance is just starting up, has already + // created its oak lease, thus is already visible from an oak discover-lite + // point of view - but upper level code here in discovery.oak has not yet + // set the leaderElectionId. This is rare but valid case + if (leaderElectionId == null) { + // then at this stage the clusterView is not yet established + // in a few moments it will but at this point not. + // so falling back to treating this as NO_ESTABLISHED_VIEW + // and with the heartbeat interval this situation will + // resolve itself upon one of the next pings + throw new UndefinedClusterViewException(Reason.NO_ESTABLISHED_VIEW, + "no leaderElectionId available yet for slingId="+slingId); + } leaderElectionIds.put(id, leaderElectionId); } @@ -293,9 +307,20 @@ public class OakClusterViewService implements ClusterViewService { throw new IllegalStateException("slingId must not be null"); } final String myClusterNodePath = config.getClusterInstancesPath()+"/"+slingId; - ValueMap resourceMap = resourceResolver.getResource(myClusterNodePath) - .adaptTo(ValueMap.class); + // SLING-6924 case 1 : /var/discovery/oak/clusterInstances/<slingId> can be non existant == null + final Resource myClusterNode = resourceResolver.getResource(myClusterNodePath); + if (myClusterNode == null) { + // SLING-6924 : return null case 1 + return null; + } + ValueMap resourceMap = myClusterNode.adaptTo(ValueMap.class); + // SLING-6924 case 2 : /var/discovery/oak/clusterInstances/<slingId> can exist BUT leaderElectionId not yet set + // namely the "leaderElectionId" is only written when resetLeaderElectionId() is called - which happens + // on OakViewChecker.activate (or when isolated) - and this activate *can* happen after properties + // or announcements have been written - those end up below /var/discovery/oak/clusterInstances/<slingId>/ String result = resourceMap.get("leaderElectionId", String.class); + + // SLING-6924 : return null case 2 (if leaderElectionId is indeed null, that is) return result; } diff --git a/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java b/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java index f43831e..4a41e7f 100644 --- a/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java +++ b/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java @@ -20,20 +20,33 @@ package org.apache.sling.discovery.oak; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import java.util.ArrayList; +import java.util.List; import java.util.UUID; +import org.apache.sling.api.resource.LoginException; +import org.apache.sling.api.resource.ModifiableValueMap; +import org.apache.sling.api.resource.Resource; import org.apache.sling.api.resource.ResourceResolver; import org.apache.sling.api.resource.ResourceResolverFactory; +import org.apache.sling.discovery.TopologyEvent; +import org.apache.sling.discovery.base.its.setup.OSGiMock; import org.apache.sling.discovery.base.its.setup.VirtualInstance; +import org.apache.sling.discovery.base.its.setup.mock.DummyResourceResolverFactory; +import org.apache.sling.discovery.base.its.setup.mock.MockFactory; import org.apache.sling.discovery.commons.providers.base.DummyListener; import org.apache.sling.discovery.commons.providers.spi.base.DescriptorHelper; import org.apache.sling.discovery.commons.providers.spi.base.DiscoveryLiteConfig; +import org.apache.sling.discovery.commons.providers.spi.base.DiscoveryLiteDescriptor; import org.apache.sling.discovery.commons.providers.spi.base.DiscoveryLiteDescriptorBuilder; import org.apache.sling.discovery.commons.providers.spi.base.DummySlingSettingsService; import org.apache.sling.discovery.commons.providers.spi.base.IdMapService; +import org.apache.sling.discovery.oak.its.setup.OakTestConfig; import org.apache.sling.discovery.oak.its.setup.OakVirtualInstanceBuilder; +import org.apache.sling.discovery.oak.its.setup.SimulatedLease; import org.apache.sling.discovery.oak.its.setup.SimulatedLeaseCollection; import org.junit.Test; import org.slf4j.Logger; @@ -111,6 +124,10 @@ public class OakDiscoveryServiceTest { discoBuilder.setFinal(true); DescriptorHelper.setDiscoveryLiteDescriptor(builder.getResourceResolverFactory(), discoBuilder); + // SLING-6924 : need to simulate a OakViewChecker.activate to trigger resetLeaderElectionId + // otherwise no TOPOLOGY_INIT will be generated as without a leaderElectionId we now + // consider a view as NO_ESTABLISHED_VIEW + OSGiMock.activate(builder.getViewChecker()); discoveryService.checkForTopologyChange(); assertEquals(0, discoveryService.getViewStateManager().waitForAsyncEvents(2000)); assertEquals(1, listener.countEvents()); @@ -181,4 +198,139 @@ public class OakDiscoveryServiceTest { assertEquals(3, listener.countEvents()); } + @Test + public void testNotYetInitializedLeaderElectionid() throws Exception { + logger.info("testNotYetInitializedLeaderElectionid: start"); + OakVirtualInstanceBuilder builder1 = + (OakVirtualInstanceBuilder) new OakVirtualInstanceBuilder() + .setDebugName("instance") + .newRepository("/foo/barrx/foo/", true) + .setConnectorPingInterval(999) + .setConnectorPingTimeout(999); + VirtualInstance instance1 = builder1.build(); + logger.info("testNotYetInitializedLeaderElectionid: created 1 instance, binding listener..."); + + DummyListener listener = new DummyListener(); + OakDiscoveryService discoveryService = (OakDiscoveryService) instance1.getDiscoveryService(); + discoveryService.bindTopologyEventListener(listener); + + logger.info("testNotYetInitializedLeaderElectionid: waiting 2sec, listener should not get anything yet"); + assertEquals(0, discoveryService.getViewStateManager().waitForAsyncEvents(2000)); + assertEquals(0, listener.countEvents()); + + logger.info("testNotYetInitializedLeaderElectionid: issuing 2 heartbeats with each instance should let the topology get established"); + instance1.heartbeatsAndCheckView(); + instance1.heartbeatsAndCheckView(); + + logger.info("testNotYetInitializedLeaderElectionid: listener should get an event within 2sec from now at latest"); + assertEquals(0, discoveryService.getViewStateManager().waitForAsyncEvents(2000)); + assertEquals(1, listener.countEvents()); + + SimulatedLeaseCollection c = builder1.getSimulatedLeaseCollection(); + String secondSlingId = UUID.randomUUID().toString(); + final SimulatedLease newIncomingInstance = new SimulatedLease(instance1.getResourceResolverFactory(), c, secondSlingId); + c.hooked(newIncomingInstance); + c.incSeqNum(1); + newIncomingInstance.updateLeaseAndDescriptor(new OakTestConfig()); + + logger.info("testNotYetInitializedLeaderElectionid: issuing another 2 heartbeats"); + instance1.heartbeatsAndCheckView(); + instance1.heartbeatsAndCheckView(); + + // there are different properties that an instance must set in the repository such that it finally becomes visible. + // these include: + // 1) idmap : it must map the oak id to sling id + // 2) node named after its own slingId under /var/discovery/oak/clusterInstances/<slingId> + // 3) store the leaderElectionId under /var/discovery/oak/clusterInstances/<slingId> + // in all 3 cases the code must work fine if that node/property doesn't exist + // and that's exactly what we're testing here. + + + // initially not even the idmap is updated, so we're stuck with TOPOLOGY_CHANGING + + // due to the nature of the syncService/minEventDelay we now explicitly first sleep 2sec before waiting for async events for another 2sec + logger.info("testNotYetInitializedLeaderElectionid: sleeping 2sec for topology change to happen"); + Thread.sleep(2000); + logger.info("testNotYetInitializedLeaderElectionid: ensuring no async events are still in the pipe - for another 2sec"); + assertEquals(0, discoveryService.getViewStateManager().waitForAsyncEvents(2000)); + logger.info("testNotYetInitializedLeaderElectionid: now listener should have received 2 events, INIT and CHANGING, it got: "+listener.countEvents()); + assertEquals(2, listener.countEvents()); + List<TopologyEvent> events = listener.getEvents(); + assertEquals(TopologyEvent.Type.TOPOLOGY_INIT, events.get(0).getType()); + assertEquals(TopologyEvent.Type.TOPOLOGY_CHANGING, events.get(1).getType()); + + // let's update the idmap first then + DummyResourceResolverFactory factory1 = (DummyResourceResolverFactory) instance1.getResourceResolverFactory(); + ResourceResolverFactory factory2 = MockFactory.mockResourceResolverFactory(factory1.getSlingRepository()); + + ResourceResolver resourceResolver = getResourceResolver(instance1.getResourceResolverFactory()); + DiscoveryLiteDescriptor descriptor = + DiscoveryLiteDescriptor.getDescriptorFrom(resourceResolver); + resourceResolver.close(); + + DiscoveryLiteDescriptorBuilder dlb = prefill(descriptor); + dlb.me(2); + DescriptorHelper.setDiscoveryLiteDescriptor(factory2, dlb); + + IdMapService secondIdMapService = IdMapService.testConstructor((DiscoveryLiteConfig) builder1.getConnectorConfig(), new DummySlingSettingsService(secondSlingId), factory2); + + instance1.heartbeatsAndCheckView(); + instance1.heartbeatsAndCheckView(); + Thread.sleep(2000); + assertEquals(2, listener.countEvents()); + + + // now let's add the /var/discovery/oak/clusterInstances/<slingId> node + resourceResolver = getResourceResolver(factory2); + Resource clusterInstancesRes = resourceResolver.getResource(builder1.getConnectorConfig().getClusterInstancesPath()); + assertNull(clusterInstancesRes.getChild(secondSlingId)); + resourceResolver.create(clusterInstancesRes, secondSlingId, null); + resourceResolver.commit(); + assertNotNull(clusterInstancesRes.getChild(secondSlingId)); + resourceResolver.close(); + + instance1.heartbeatsAndCheckView(); + instance1.heartbeatsAndCheckView(); + Thread.sleep(2000); + assertEquals(2, listener.countEvents()); + + // now let's add the leaderElectionId + resourceResolver = getResourceResolver(factory2); + Resource instanceResource = resourceResolver.getResource(builder1.getConnectorConfig().getClusterInstancesPath() + "/" + secondSlingId); + assertNotNull(instanceResource); + instanceResource.adaptTo(ModifiableValueMap.class).put("leaderElectionId", "0"); + resourceResolver.commit(); + resourceResolver.close(); + + instance1.heartbeatsAndCheckView(); + instance1.heartbeatsAndCheckView(); + Thread.sleep(2000); + assertEquals(3, listener.countEvents()); + assertEquals(TopologyEvent.Type.TOPOLOGY_CHANGED, events.get(2).getType()); + } + + private DiscoveryLiteDescriptorBuilder prefill(DiscoveryLiteDescriptor d) throws Exception { + DiscoveryLiteDescriptorBuilder b = new DiscoveryLiteDescriptorBuilder(); + b.setFinal(true); + long seqnum = d.getSeqNum(); + b.seq((int) seqnum); + b.activeIds(box(d.getActiveIds())); + b.deactivatingIds(box(d.getDeactivatingIds())); + b.me(d.getMyId()); + b.id(d.getViewId()); + return b; + } + + private Integer[] box(final int[] ids) { + //TODO: use Guava + List<Integer> list = new ArrayList<Integer>(ids.length); + for (Integer i : ids) { + list.add(i); + } + return list.toArray(new Integer[list.size()]); + } + + private ResourceResolver getResourceResolver(ResourceResolverFactory resourceResolverFactory) throws LoginException { + return resourceResolverFactory.getServiceResourceResolver(null); + } } -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
