HDDS-440. Datanode loops forever if it cannot create directories. Contributed by Bharat Viswanadham.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a968ea48 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a968ea48 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a968ea48 Branch: refs/heads/YARN-7402 Commit: a968ea489743ed09d63a6e267e34491e490cd2d8 Parents: e71f61e Author: Anu Engineer <[email protected]> Authored: Tue Sep 18 14:31:50 2018 -0700 Committer: Anu Engineer <[email protected]> Committed: Tue Sep 18 14:31:50 2018 -0700 ---------------------------------------------------------------------- .../states/datanode/InitDatanodeState.java | 12 +++++- .../common/TestDatanodeStateMachine.java | 42 ++++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/a968ea48/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java index b348327..995f172 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java @@ -116,7 +116,7 @@ public class InitDatanodeState implements DatanodeState, /** * Persist DatanodeDetails to datanode.id file. */ - private void persistContainerDatanodeDetails() throws IOException { + private void persistContainerDatanodeDetails() { String dataNodeIDPath = HddsUtils.getDatanodeIdFilePath(conf); if (Strings.isNullOrEmpty(dataNodeIDPath)) { LOG.error("A valid file path is needed for config setting {}", @@ -128,7 +128,15 @@ public class InitDatanodeState implements DatanodeState, DatanodeDetails datanodeDetails = this.context.getParent() .getDatanodeDetails(); if (datanodeDetails != null && !idPath.exists()) { - ContainerUtils.writeDatanodeDetailsTo(datanodeDetails, idPath); + try { + ContainerUtils.writeDatanodeDetailsTo(datanodeDetails, idPath); + } catch (IOException ex) { + // As writing DatanodeDetails in to datanodeid file failed, which is + // a critical thing, so shutting down the state machine. + LOG.error("Writing to {} failed {}", dataNodeIDPath, ex.getMessage()); + this.context.setState(DatanodeStateMachine.DatanodeStates.SHUTDOWN); + return; + } LOG.info("DatanodeDetails is persisted to {}", dataNodeIDPath); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/a968ea48/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java index 59029db..3fc0dd0 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java @@ -311,6 +311,48 @@ public class TestDatanodeStateMachine { } } + @Test + public void testDatanodeStateMachineWithIdWriteFail() throws Exception { + + File idPath = new File( + conf.get(ScmConfigKeys.OZONE_SCM_DATANODE_ID)); + idPath.delete(); + DatanodeDetails datanodeDetails = getNewDatanodeDetails(); + DatanodeDetails.Port port = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.STANDALONE, + OzoneConfigKeys.DFS_CONTAINER_IPC_PORT_DEFAULT); + datanodeDetails.setPort(port); + + try (DatanodeStateMachine stateMachine = + new DatanodeStateMachine(datanodeDetails, conf)) { + DatanodeStateMachine.DatanodeStates currentState = + stateMachine.getContext().getState(); + Assert.assertEquals(DatanodeStateMachine.DatanodeStates.INIT, + currentState); + + DatanodeState<DatanodeStateMachine.DatanodeStates> task = + stateMachine.getContext().getTask(); + Assert.assertEquals(InitDatanodeState.class, task.getClass()); + + //Set the idPath to read only, state machine will fail to write + // datanodeId file and set the state to shutdown. + idPath.getParentFile().mkdirs(); + idPath.getParentFile().setReadOnly(); + + task.execute(executorService); + DatanodeStateMachine.DatanodeStates newState = + task.await(2, TimeUnit.SECONDS); + + //As, we have changed the permission of idPath to readable, writing + // will fail and it will set the state to shutdown. + Assert.assertEquals(DatanodeStateMachine.DatanodeStates.SHUTDOWN, + newState); + + //Setting back to writable. + idPath.getParentFile().setWritable(true); + } + } + /** * Test state transition with a list of invalid scm configurations, * and verify the state transits to SHUTDOWN each time. --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
