This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 08ac32dc23 HDDS-12608. Race condition in datanode version file 
creation (#8093)
08ac32dc23 is described below

commit 08ac32dc23d0a9a6c06f75485dd31c2bba5d0961
Author: Doroszlai, Attila <[email protected]>
AuthorDate: Mon Mar 17 13:49:53 2025 +0100

    HDDS-12608. Race condition in datanode version file creation (#8093)
---
 .../container/common/states/endpoint/VersionEndpointTask.java |  6 ------
 .../hadoop/ozone/container/ozoneimpl/OzoneContainer.java      |  6 ++++++
 .../hadoop/ozone/container/common/ContainerTestUtils.java     | 11 +++++++++++
 .../apache/hadoop/ozone/container/common/TestEndPoint.java    |  8 ++++++++
 .../hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java  |  2 ++
 .../ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java  |  1 +
 .../ozone/container/ozoneimpl/TestSecureOzoneContainer.java   |  1 +
 7 files changed, 29 insertions(+), 6 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
index 2fb7b9c69b..0f52fedf63 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
@@ -24,7 +24,6 @@
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto;
 import org.apache.hadoop.ozone.OzoneConsts;
-import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage;
 import 
org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine;
 import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
 import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
@@ -88,11 +87,6 @@ public EndpointStateMachine.EndPointStates call() throws 
Exception {
           // Check HddsVolumes
           checkVolumeSet(ozoneContainer.getVolumeSet(), scmId, clusterId);
 
-          DatanodeLayoutStorage layoutStorage
-              = new DatanodeLayoutStorage(configuration);
-          layoutStorage.setClusterId(clusterId);
-          layoutStorage.persistCurrentState();
-
           // Start the container services after getting the version information
           ozoneContainer.start(clusterId);
         }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
index b3fa513382..91ff1511bd 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
@@ -65,6 +65,7 @@
 import org.apache.hadoop.hdds.utils.db.Table;
 import org.apache.hadoop.hdds.utils.db.TableIterator;
 import org.apache.hadoop.ozone.HddsDatanodeService;
+import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage;
 import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics;
 import org.apache.hadoop.ozone.container.common.impl.BlockDeletingService;
 import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
@@ -484,6 +485,11 @@ public void start(String clusterId) throws IOException {
       return;
     }
 
+    DatanodeLayoutStorage layoutStorage
+        = new DatanodeLayoutStorage(config);
+    layoutStorage.setClusterId(clusterId);
+    layoutStorage.persistCurrentState();
+
     buildContainerSet();
 
     // Start background volume checks, which will begin after the configured
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
index e33a4c4a3c..3cff94ed9b 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java
@@ -17,6 +17,7 @@
 
 package org.apache.hadoop.ozone.container.common;
 
+import static org.apache.hadoop.ozone.common.Storage.StorageState.INITIALIZED;
 import static org.mockito.Mockito.any;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
@@ -46,6 +47,7 @@
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.ozone.HddsDatanodeService;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.container.ContainerTestHelper;
 import org.apache.hadoop.ozone.container.common.impl.ContainerData;
@@ -350,4 +352,13 @@ public static XceiverServerRatis newXceiverServerRatis(
         getNoopContainerDispatcher(), getEmptyContainerController(),
         null, null);
   }
+
+  /** Initialize {@link DatanodeLayoutStorage}.  Normally this is done during 
{@link HddsDatanodeService} start,
+   * have to do the same for tests that create {@link OzoneContainer} 
manually. */
+  public static void initializeDatanodeLayout(ConfigurationSource conf, 
DatanodeDetails dn) throws IOException {
+    DatanodeLayoutStorage layoutStorage = new DatanodeLayoutStorage(conf, 
dn.getUuidString());
+    if (layoutStorage.getState() != INITIALIZED) {
+      layoutStorage.initialize();
+    }
+  }
 }
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java
index 64f903f450..3bbe889d1a 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java
@@ -63,6 +63,7 @@
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.common.Storage.StorageState;
 import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
 import 
org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine;
 import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
@@ -310,6 +311,13 @@ public void testDnLayoutVersionFile() throws Exception {
 
       assertEquals("different_cluster_id", layout1.getClusterID());
       assertNotEquals(scmServerImpl.getClusterId(), layout1.getClusterID());
+
+      // another call() with OzoneContainer already started should not write 
the file
+      FileUtils.forceDelete(layout1.getVersionFile());
+      rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION);
+      versionTask.call();
+      assertEquals(StorageState.NOT_INITIALIZED, new 
DatanodeLayoutStorage(ozoneConf, "any").getState());
+
       FileUtils.forceDelete(storageDir);
     }
   }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java
index cd3aeb9400..cb0dbe9719 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java
@@ -82,6 +82,7 @@ public void testCreateOzoneContainer(
           .getOzoneContainer(datanodeDetails, conf);
       
StorageVolumeUtil.getHddsVolumesList(container.getVolumeSet().getVolumesList())
           .forEach(hddsVolume -> hddsVolume.setDbParentDir(tempDir.toFile()));
+      ContainerTestUtils.initializeDatanodeLayout(conf, datanodeDetails);
       //Set clusterId and manually start ozone container.
       container.start(UUID.randomUUID().toString());
 
@@ -112,6 +113,7 @@ void testOzoneContainerStart(
       DatanodeDetails datanodeDetails = randomDatanodeDetails();
       container = ContainerTestUtils
           .getOzoneContainer(datanodeDetails, conf);
+      ContainerTestUtils.initializeDatanodeLayout(conf, datanodeDetails);
 
       String clusterId = UUID.randomUUID().toString();
       container.start(clusterId);
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java
index 62214ca214..91285b551d 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java
@@ -313,6 +313,7 @@ private OzoneContainer 
createAndStartOzoneContainerInstance() {
       MutableVolumeSet volumeSet = container.getVolumeSet();
       StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList())
           .forEach(hddsVolume -> 
hddsVolume.setDbParentDir(tempFolder.toFile()));
+      ContainerTestUtils.initializeDatanodeLayout(conf, dn);
       container.start(clusterID);
     } catch (Throwable e) {
       if (container != null) {
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java
index e3c3b53a22..de63bfd7c8 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java
@@ -138,6 +138,7 @@ void testCreateOzoneContainer(boolean requireToken, boolean 
hasToken,
       MutableVolumeSet volumeSet = container.getVolumeSet();
       StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList())
           .forEach(hddsVolume -> 
hddsVolume.setDbParentDir(tempFolder.toFile()));
+      ContainerTestUtils.initializeDatanodeLayout(conf, dn);
       //Set scmId and manually start ozone container.
       container.start(UUID.randomUUID().toString());
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to