sadanand48 commented on code in PR #5266:
URL: https://github.com/apache/ozone/pull/5266#discussion_r1332548359
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java:
##########
@@ -452,7 +452,7 @@ private void processReregisterCommand() {
LOG.debug("Received SCM notification to register."
+ " Interrupt HEARTBEAT and transit to REGISTER state.");
Review Comment:
Done.
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManager.java:
##########
@@ -370,6 +375,78 @@ public void testBlockDeletionTransactions() throws
Exception {
}
}
+ @Test
+ public void testOldDNRegistersToReInitialisedSCM() throws Exception {
+ OzoneConfiguration conf = new OzoneConfiguration();
+ MiniOzoneCluster cluster =
+ MiniOzoneCluster.newBuilder(conf).setHbInterval(1000)
+ .setHbProcessorInterval(3000).setNumDatanodes(1)
+ .setClusterId(UUID.randomUUID().toString()).build();
+ cluster.waitForClusterToBeReady();
+
+ try {
+ HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0);
+ StorageContainerManager scm = cluster.getStorageContainerManager();
+ scm.stop();
+
+ // re-initialise SCM with new clusterID
+
+ GenericTestUtils.deleteDirectory(
+ new File(scm.getScmStorageConfig().getStorageDir()));
+ String newClusterId = UUID.randomUUID().toString();
+ StorageContainerManager.scmInit(scm.getConfiguration(), newClusterId);
+ scm = HddsTestUtils.getScmSimple(scm.getConfiguration());
+
+ DatanodeStateMachine dsm = datanode.getDatanodeStateMachine();
+ Assert.assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING,
+ dsm.getContext().getState());
+ // DN Endpoint State has already gone through GetVersion and Register,
+ // so it will be in HEARTBEAT state.
+ for (EndpointStateMachine endpoint : dsm.getConnectionManager()
+ .getValues()) {
+ Assert.assertEquals(EndpointStateMachine.EndPointStates.HEARTBEAT,
+ endpoint.getState());
+ }
+ GenericTestUtils.LogCapturer scmDnHBDispatcherLog =
+ GenericTestUtils.LogCapturer.captureLogs(
+ SCMDatanodeHeartbeatDispatcher.LOG);
+ GenericTestUtils.LogCapturer versionEndPointTaskLog =
+ GenericTestUtils.LogCapturer.captureLogs(VersionEndpointTask.LOG);
+ // Initially empty
+ Assert.assertTrue(scmDnHBDispatcherLog.getOutput().isEmpty());
+ Assert.assertTrue(versionEndPointTaskLog.getOutput().isEmpty());
+ // start the new SCM
+ scm.start();
+ // Initially DatanodeStateMachine will be in Running state
+ Assert.assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING,
+ dsm.getContext().getState());
+ // DN heartbeats to new SCM, SCM doesn't recognize the node, sends the
+ // command to DN to re-register. Wait for SCM to send re-register command
+ String expectedLog = String.format(
+ "SCM received heartbeat from an unregistered datanode %s. "
+ + "Asking datanode to re-register.",
+ datanode.getDatanodeDetails());
+ GenericTestUtils.waitFor(
+ () -> scmDnHBDispatcherLog.getOutput().contains(expectedLog), 100,
+ 3000);
+ ExitUtil.disableSystemExit();
+ // As part of processing response for re-register, DN
EndpointStateMachine
+ // goes to GET-VERSION state which checks if there is already existing
+ // version file on the DN & if the clusterID matches with that of the SCM
+ // In this case, it won't match and gets
InconsistentStorageStateException
+ // and DN shuts down.
Review Comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]