Author: stefanegli
Date: Tue Jul 4 12:09:01 2017
New Revision: 1800761
URL: http://svn.apache.org/viewvc?rev=1800761&view=rev
Log:
SLING-6924 : avoid harmless error message when new instance joins : a slowly
starting new instance might first be visible via discovery-lite before it sets
the leaderElectionid property - if that's the case there used to be a NPE -
which is harmless but not nice as its an error
Modified:
sling/trunk/bundles/extensions/discovery/oak/pom.xml
sling/trunk/bundles/extensions/discovery/oak/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java
sling/trunk/bundles/extensions/discovery/oak/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java
Modified: sling/trunk/bundles/extensions/discovery/oak/pom.xml
URL:
http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/oak/pom.xml?rev=1800761&r1=1800760&r2=1800761&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/oak/pom.xml (original)
+++ sling/trunk/bundles/extensions/discovery/oak/pom.xml Tue Jul 4 12:09:01
2017
@@ -167,7 +167,7 @@
<dependency>
<groupId>org.apache.sling</groupId>
<artifactId>org.apache.sling.discovery.base</artifactId>
- <version>2.0.1-SNAPSHOT</version>
+ <version>2.0.3-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<!-- besides including discovery.base' normal jar above,
@@ -176,7 +176,7 @@
<dependency>
<groupId>org.apache.sling</groupId>
<artifactId>org.apache.sling.discovery.base</artifactId>
- <version>2.0.1-SNAPSHOT</version>
+ <version>2.0.3-SNAPSHOT</version>
<scope>test</scope>
<type>test-jar</type>
</dependency>
Modified:
sling/trunk/bundles/extensions/discovery/oak/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java
URL:
http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/oak/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java?rev=1800761&r1=1800760&r2=1800761&view=diff
==============================================================================
---
sling/trunk/bundles/extensions/discovery/oak/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java
(original)
+++
sling/trunk/bundles/extensions/discovery/oak/src/main/java/org/apache/sling/discovery/oak/cluster/OakClusterViewService.java
Tue Jul 4 12:09:01 2017
@@ -179,6 +179,20 @@ public class OakClusterViewService imple
}
String leaderElectionId = getLeaderElectionId(resourceResolver,
slingId);
+ // SLING-6924 : leaderElectionId can be null here
+ // this means that another instance is just starting up, has
already
+ // created its oak lease, thus is already visible from an oak
discover-lite
+ // point of view - but upper level code here in discovery.oak has
not yet
+ // set the leaderElectionId. This is rare but valid case
+ if (leaderElectionId == null) {
+ // then at this stage the clusterView is not yet established
+ // in a few moments it will but at this point not.
+ // so falling back to treating this as NO_ESTABLISHED_VIEW
+ // and with the heartbeat interval this situation will
+ // resolve itself upon one of the next pings
+ throw new
UndefinedClusterViewException(Reason.NO_ESTABLISHED_VIEW,
+ "no leaderElectionId available yet for
slingId="+slingId);
+ }
leaderElectionIds.put(id, leaderElectionId);
}
@@ -293,9 +307,20 @@ public class OakClusterViewService imple
throw new IllegalStateException("slingId must not be null");
}
final String myClusterNodePath =
config.getClusterInstancesPath()+"/"+slingId;
- ValueMap resourceMap = resourceResolver.getResource(myClusterNodePath)
- .adaptTo(ValueMap.class);
+ // SLING-6924 case 1 : /var/discovery/oak/clusterInstances/<slingId>
can be non existant == null
+ final Resource myClusterNode =
resourceResolver.getResource(myClusterNodePath);
+ if (myClusterNode == null) {
+ // SLING-6924 : return null case 1
+ return null;
+ }
+ ValueMap resourceMap = myClusterNode.adaptTo(ValueMap.class);
+ // SLING-6924 case 2 : /var/discovery/oak/clusterInstances/<slingId>
can exist BUT leaderElectionId not yet set
+ // namely the "leaderElectionId" is only written when
resetLeaderElectionId() is called - which happens
+ // on OakViewChecker.activate (or when isolated) - and this
activate *can* happen after properties
+ // or announcements have been written - those end up below
/var/discovery/oak/clusterInstances/<slingId>/
String result = resourceMap.get("leaderElectionId", String.class);
+
+ // SLING-6924 : return null case 2 (if leaderElectionId is indeed
null, that is)
return result;
}
Modified:
sling/trunk/bundles/extensions/discovery/oak/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java
URL:
http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/oak/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java?rev=1800761&r1=1800760&r2=1800761&view=diff
==============================================================================
---
sling/trunk/bundles/extensions/discovery/oak/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java
(original)
+++
sling/trunk/bundles/extensions/discovery/oak/src/test/java/org/apache/sling/discovery/oak/OakDiscoveryServiceTest.java
Tue Jul 4 12:09:01 2017
@@ -20,20 +20,33 @@ package org.apache.sling.discovery.oak;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import java.util.ArrayList;
+import java.util.List;
import java.util.UUID;
+import org.apache.sling.api.resource.LoginException;
+import org.apache.sling.api.resource.ModifiableValueMap;
+import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
+import org.apache.sling.discovery.TopologyEvent;
+import org.apache.sling.discovery.base.its.setup.OSGiMock;
import org.apache.sling.discovery.base.its.setup.VirtualInstance;
+import
org.apache.sling.discovery.base.its.setup.mock.DummyResourceResolverFactory;
+import org.apache.sling.discovery.base.its.setup.mock.MockFactory;
import org.apache.sling.discovery.commons.providers.base.DummyListener;
import org.apache.sling.discovery.commons.providers.spi.base.DescriptorHelper;
import
org.apache.sling.discovery.commons.providers.spi.base.DiscoveryLiteConfig;
+import
org.apache.sling.discovery.commons.providers.spi.base.DiscoveryLiteDescriptor;
import
org.apache.sling.discovery.commons.providers.spi.base.DiscoveryLiteDescriptorBuilder;
import
org.apache.sling.discovery.commons.providers.spi.base.DummySlingSettingsService;
import org.apache.sling.discovery.commons.providers.spi.base.IdMapService;
+import org.apache.sling.discovery.oak.its.setup.OakTestConfig;
import org.apache.sling.discovery.oak.its.setup.OakVirtualInstanceBuilder;
+import org.apache.sling.discovery.oak.its.setup.SimulatedLease;
import org.apache.sling.discovery.oak.its.setup.SimulatedLeaseCollection;
import org.junit.Test;
import org.slf4j.Logger;
@@ -111,6 +124,10 @@ public class OakDiscoveryServiceTest {
discoBuilder.setFinal(true);
DescriptorHelper.setDiscoveryLiteDescriptor(builder.getResourceResolverFactory(),
discoBuilder);
+ // SLING-6924 : need to simulate a OakViewChecker.activate to trigger
resetLeaderElectionId
+ // otherwise no TOPOLOGY_INIT will be generated as without a
leaderElectionId we now
+ // consider a view as NO_ESTABLISHED_VIEW
+ OSGiMock.activate(builder.getViewChecker());
discoveryService.checkForTopologyChange();
assertEquals(0,
discoveryService.getViewStateManager().waitForAsyncEvents(2000));
assertEquals(1, listener.countEvents());
@@ -181,4 +198,139 @@ public class OakDiscoveryServiceTest {
assertEquals(3, listener.countEvents());
}
+ @Test
+ public void testNotYetInitializedLeaderElectionid() throws Exception {
+ logger.info("testNotYetInitializedLeaderElectionid: start");
+ OakVirtualInstanceBuilder builder1 =
+ (OakVirtualInstanceBuilder) new OakVirtualInstanceBuilder()
+ .setDebugName("instance")
+ .newRepository("/foo/barrx/foo/", true)
+ .setConnectorPingInterval(999)
+ .setConnectorPingTimeout(999);
+ VirtualInstance instance1 = builder1.build();
+ logger.info("testNotYetInitializedLeaderElectionid: created 1
instance, binding listener...");
+
+ DummyListener listener = new DummyListener();
+ OakDiscoveryService discoveryService = (OakDiscoveryService)
instance1.getDiscoveryService();
+ discoveryService.bindTopologyEventListener(listener);
+
+ logger.info("testNotYetInitializedLeaderElectionid: waiting 2sec,
listener should not get anything yet");
+ assertEquals(0,
discoveryService.getViewStateManager().waitForAsyncEvents(2000));
+ assertEquals(0, listener.countEvents());
+
+ logger.info("testNotYetInitializedLeaderElectionid: issuing 2
heartbeats with each instance should let the topology get established");
+ instance1.heartbeatsAndCheckView();
+ instance1.heartbeatsAndCheckView();
+
+ logger.info("testNotYetInitializedLeaderElectionid: listener should
get an event within 2sec from now at latest");
+ assertEquals(0,
discoveryService.getViewStateManager().waitForAsyncEvents(2000));
+ assertEquals(1, listener.countEvents());
+
+ SimulatedLeaseCollection c = builder1.getSimulatedLeaseCollection();
+ String secondSlingId = UUID.randomUUID().toString();
+ final SimulatedLease newIncomingInstance = new
SimulatedLease(instance1.getResourceResolverFactory(), c, secondSlingId);
+ c.hooked(newIncomingInstance);
+ c.incSeqNum(1);
+ newIncomingInstance.updateLeaseAndDescriptor(new OakTestConfig());
+
+ logger.info("testNotYetInitializedLeaderElectionid: issuing another 2
heartbeats");
+ instance1.heartbeatsAndCheckView();
+ instance1.heartbeatsAndCheckView();
+
+ // there are different properties that an instance must set in the
repository such that it finally becomes visible.
+ // these include:
+ // 1) idmap : it must map the oak id to sling id
+ // 2) node named after its own slingId under
/var/discovery/oak/clusterInstances/<slingId>
+ // 3) store the leaderElectionId under
/var/discovery/oak/clusterInstances/<slingId>
+ // in all 3 cases the code must work fine if that node/property
doesn't exist
+ // and that's exactly what we're testing here.
+
+
+ // initially not even the idmap is updated, so we're stuck with
TOPOLOGY_CHANGING
+
+ // due to the nature of the syncService/minEventDelay we now
explicitly first sleep 2sec before waiting for async events for another 2sec
+ logger.info("testNotYetInitializedLeaderElectionid: sleeping 2sec for
topology change to happen");
+ Thread.sleep(2000);
+ logger.info("testNotYetInitializedLeaderElectionid: ensuring no async
events are still in the pipe - for another 2sec");
+ assertEquals(0,
discoveryService.getViewStateManager().waitForAsyncEvents(2000));
+ logger.info("testNotYetInitializedLeaderElectionid: now listener
should have received 2 events, INIT and CHANGING, it got:
"+listener.countEvents());
+ assertEquals(2, listener.countEvents());
+ List<TopologyEvent> events = listener.getEvents();
+ assertEquals(TopologyEvent.Type.TOPOLOGY_INIT,
events.get(0).getType());
+ assertEquals(TopologyEvent.Type.TOPOLOGY_CHANGING,
events.get(1).getType());
+
+ // let's update the idmap first then
+ DummyResourceResolverFactory factory1 = (DummyResourceResolverFactory)
instance1.getResourceResolverFactory();
+ ResourceResolverFactory factory2 =
MockFactory.mockResourceResolverFactory(factory1.getSlingRepository());
+
+ ResourceResolver resourceResolver =
getResourceResolver(instance1.getResourceResolverFactory());
+ DiscoveryLiteDescriptor descriptor =
+ DiscoveryLiteDescriptor.getDescriptorFrom(resourceResolver);
+ resourceResolver.close();
+
+ DiscoveryLiteDescriptorBuilder dlb = prefill(descriptor);
+ dlb.me(2);
+ DescriptorHelper.setDiscoveryLiteDescriptor(factory2, dlb);
+
+ IdMapService secondIdMapService =
IdMapService.testConstructor((DiscoveryLiteConfig)
builder1.getConnectorConfig(), new DummySlingSettingsService(secondSlingId),
factory2);
+
+ instance1.heartbeatsAndCheckView();
+ instance1.heartbeatsAndCheckView();
+ Thread.sleep(2000);
+ assertEquals(2, listener.countEvents());
+
+
+ // now let's add the /var/discovery/oak/clusterInstances/<slingId> node
+ resourceResolver = getResourceResolver(factory2);
+ Resource clusterInstancesRes =
resourceResolver.getResource(builder1.getConnectorConfig().getClusterInstancesPath());
+ assertNull(clusterInstancesRes.getChild(secondSlingId));
+ resourceResolver.create(clusterInstancesRes, secondSlingId, null);
+ resourceResolver.commit();
+ assertNotNull(clusterInstancesRes.getChild(secondSlingId));
+ resourceResolver.close();
+
+ instance1.heartbeatsAndCheckView();
+ instance1.heartbeatsAndCheckView();
+ Thread.sleep(2000);
+ assertEquals(2, listener.countEvents());
+
+ // now let's add the leaderElectionId
+ resourceResolver = getResourceResolver(factory2);
+ Resource instanceResource =
resourceResolver.getResource(builder1.getConnectorConfig().getClusterInstancesPath()
+ "/" + secondSlingId);
+ assertNotNull(instanceResource);
+
instanceResource.adaptTo(ModifiableValueMap.class).put("leaderElectionId", "0");
+ resourceResolver.commit();
+ resourceResolver.close();
+
+ instance1.heartbeatsAndCheckView();
+ instance1.heartbeatsAndCheckView();
+ Thread.sleep(2000);
+ assertEquals(3, listener.countEvents());
+ assertEquals(TopologyEvent.Type.TOPOLOGY_CHANGED,
events.get(2).getType());
+ }
+
+ private DiscoveryLiteDescriptorBuilder prefill(DiscoveryLiteDescriptor d)
throws Exception {
+ DiscoveryLiteDescriptorBuilder b = new
DiscoveryLiteDescriptorBuilder();
+ b.setFinal(true);
+ long seqnum = d.getSeqNum();
+ b.seq((int) seqnum);
+ b.activeIds(box(d.getActiveIds()));
+ b.deactivatingIds(box(d.getDeactivatingIds()));
+ b.me(d.getMyId());
+ b.id(d.getViewId());
+ return b;
+ }
+
+ private Integer[] box(final int[] ids) {
+ //TODO: use Guava
+ List<Integer> list = new ArrayList<Integer>(ids.length);
+ for (Integer i : ids) {
+ list.add(i);
+ }
+ return list.toArray(new Integer[list.size()]);
+ }
+
+ private ResourceResolver getResourceResolver(ResourceResolverFactory
resourceResolverFactory) throws LoginException {
+ return resourceResolverFactory.getServiceResourceResolver(null);
+ }
}