This is an automated email from the ASF dual-hosted git repository.

sammichen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new a2e865a3a43 HDDS-13955. Handle empty datanode.id file gracefully 
(#9479)
a2e865a3a43 is described below

commit a2e865a3a439c6103dce6bbe8846df4d8e44362f
Author: Neo Chien <[email protected]>
AuthorDate: Mon Jan 5 13:31:41 2026 +0800

    HDDS-13955. Handle empty datanode.id file gracefully (#9479)
---
 .../apache/hadoop/ozone/HddsDatanodeService.java   |  2 +-
 .../container/common/helpers/ContainerUtils.java   | 69 ++++++++++++++++++----
 .../container/common/helpers/DatanodeIdYaml.java   |  7 +++
 .../container/common/utils/StorageVolumeUtil.java  |  2 +-
 .../common/helpers/TestContainerUtils.java         | 50 ++++++++++++++--
 .../datanode/schemaupgrade/UpgradeUtils.java       |  2 +-
 6 files changed, 113 insertions(+), 19 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
index 9bd10a83b3e..bb3810431e5 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
@@ -455,7 +455,7 @@ private DatanodeDetails initializeDatanodeDetails()
     File idFile = new File(idFilePath);
     DatanodeDetails details;
     if (idFile.exists()) {
-      details = ContainerUtils.readDatanodeDetailsFrom(idFile);
+      details = ContainerUtils.readDatanodeDetailsFrom(idFile, conf);
     } else {
       // There is no datanode.id file, this might be the first time datanode
       // is started.
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
index 33d7dc9a324..7d16546fb69 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
@@ -34,7 +34,9 @@
 import java.nio.file.Paths;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
+import java.util.Collection;
 import java.util.Objects;
+import java.util.Properties;
 import java.util.UUID;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -49,10 +51,12 @@
 import 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import org.apache.hadoop.hdds.utils.HddsServerUtil;
 import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.container.common.impl.ContainerData;
 import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
 import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
 import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
 import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
 import org.slf4j.Logger;
@@ -166,25 +170,68 @@ public static synchronized void writeDatanodeDetailsTo(
    * @return {@link DatanodeDetails}
    * @throws IOException If the id file is malformed or other I/O exceptions
    */
-  public static synchronized DatanodeDetails readDatanodeDetailsFrom(File path)
-      throws IOException {
+  public static synchronized DatanodeDetails readDatanodeDetailsFrom(
+      File path, ConfigurationSource conf) throws IOException {
     if (!path.exists()) {
       throw new IOException("Datanode ID file not found.");
     }
     try {
       return DatanodeIdYaml.readDatanodeIdFile(path);
     } catch (IOException e) {
-      LOG.warn("Error loading DatanodeDetails yaml from {}",
-          path.getAbsolutePath(), e);
-      // Try to load as protobuf before giving up
-      try (InputStream in = Files.newInputStream(path.toPath())) {
-        return DatanodeDetails.getFromProtoBuf(
-            HddsProtos.DatanodeDetailsProto.parseFrom(in));
-      } catch (IOException io) {
-        throw new IOException("Failed to parse DatanodeDetails from "
-            + path.getAbsolutePath(), io);
+      LOG.warn("Failed to read Datanode ID file as YAML. " +
+          "Attempting recovery.", e);
+      try {
+        return recoverDatanodeDetailsFromVersionFile(path, conf);
+      } catch (IOException recoveryEx) {
+        LOG.warn("Datanode ID recovery from VERSION file failed. " +
+            "Falling back to reading as Protobuf.", recoveryEx);
+        try {
+          return readDatanodeDetailsFromProto(path);
+        } catch (IOException io) {
+          throw new IOException("Failed to parse DatanodeDetails from "
+              + path.getAbsolutePath(), io);
+        }
+      }
+    }
+  }
+
+  /**
+   * Recover DatanodeDetails from VERSION file.
+   */
+  private static DatanodeDetails recoverDatanodeDetailsFromVersionFile(
+      File path, ConfigurationSource conf) throws IOException {
+    LOG.info("Attempting to recover Datanode ID from VERSION file.");
+    String dnUuid = null;
+    Collection<String> dataNodeDirs =
+        HddsServerUtil.getDatanodeStorageDirs(conf);
+    for (String dataNodeDir : dataNodeDirs) {
+      File versionFile = new File(dataNodeDir, HddsVolume.HDDS_VOLUME_DIR + 
"/" + StorageVolumeUtil.VERSION_FILE);
+      if (versionFile.exists()) {
+        Properties props = DatanodeVersionFile.readFrom(versionFile);
+        dnUuid = props.getProperty(OzoneConsts.DATANODE_UUID);
+        if (dnUuid != null && !dnUuid.isEmpty()) {
+          break;
+        }
       }
     }
+    if (dnUuid == null) {
+      throw new IOException("Could not find a valid datanode UUID from " +
+          "any VERSION file in " + dataNodeDirs);
+    }
+    DatanodeDetails.Builder builder = DatanodeDetails.newBuilder();
+    builder.setUuid(UUID.fromString(dnUuid));
+    DatanodeDetails datanodeDetails = builder.build();
+    DatanodeIdYaml.createDatanodeIdFile(datanodeDetails, path, conf);
+    LOG.info("Successfully recovered and rewrote datanode ID file.");
+    return datanodeDetails;
+  }
+
+  private static DatanodeDetails readDatanodeDetailsFromProto(File path)
+      throws IOException {
+    try (InputStream in = Files.newInputStream(path.toPath())) {
+      return DatanodeDetails.getFromProtoBuf(
+          HddsProtos.DatanodeDetailsProto.parseFrom(in));
+    }
   }
 
   /**
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
index d3fd432efef..07bdedb4398 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
@@ -87,6 +87,13 @@ public static DatanodeDetails readDatanodeIdFile(File path)
         throw new IOException("Unable to parse yaml file.", e);
       }
 
+      if (datanodeDetailsYaml == null
+          || datanodeDetailsYaml.getUuid() == null
+          || datanodeDetailsYaml.getUuid().isEmpty()) {
+        throw new IOException(
+            "Datanode ID file is empty or has null UUID: " + 
path.getAbsolutePath());
+      }
+
       DatanodeDetails.Builder builder = DatanodeDetails.newBuilder();
       builder.setUuid(UUID.fromString(datanodeDetailsYaml.getUuid()))
           .setIpAddress(datanodeDetailsYaml.getIpAddress())
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
index 5e6fe086a16..c71fc6cde6d 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
@@ -43,7 +43,7 @@
  */
 public final class StorageVolumeUtil {
 
-  private static final String VERSION_FILE   = "VERSION";
+  public static final String VERSION_FILE   = "VERSION";
   private static final String STORAGE_ID_PREFIX = "DS-";
 
   private StorageVolumeUtil() {
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java
index 2a2d90ae18c..e262e795aa6 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java
@@ -26,6 +26,7 @@
 import static 
org.apache.hadoop.ozone.container.ContainerTestHelper.getDummyCommandRequestProto;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.mockStatic;
 import static org.mockito.Mockito.when;
@@ -37,6 +38,7 @@
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
+import java.util.UUID;
 import org.apache.commons.lang3.RandomUtils;
 import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -45,6 +47,7 @@
 import 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.ByteStringConversion;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.ozone.common.ChunkBuffer;
 import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat;
 import org.junit.jupiter.api.BeforeEach;
@@ -125,13 +128,13 @@ public void testDatanodeIDPersistent(@TempDir File 
tempDir) throws Exception {
       // Read should return an empty value if file doesn't exist
       File nonExistFile = new File(tempDir, "non_exist.id");
       assertThrows(IOException.class,
-          () -> ContainerUtils.readDatanodeDetailsFrom(nonExistFile));
+          () -> ContainerUtils.readDatanodeDetailsFrom(nonExistFile, conf));
 
       // Read should fail if the file is malformed
       File malformedFile = new File(tempDir, "malformed.id");
       createMalformedIDFile(malformedFile);
       assertThrows(IOException.class,
-          () -> ContainerUtils.readDatanodeDetailsFrom(malformedFile));
+          () -> ContainerUtils.readDatanodeDetailsFrom(malformedFile, conf));
 
       // Test upgrade scenario - protobuf file instead of yaml
       File protoFile = new File(tempDir, "valid-proto.id");
@@ -139,20 +142,57 @@ public void testDatanodeIDPersistent(@TempDir File 
tempDir) throws Exception {
         HddsProtos.DatanodeDetailsProto proto = id1.getProtoBufMessage();
         proto.writeTo(out);
       }
-      assertDetailsEquals(id1, 
ContainerUtils.readDatanodeDetailsFrom(protoFile));
+      assertDetailsEquals(id1, 
ContainerUtils.readDatanodeDetailsFrom(protoFile, conf));
 
       id1.setInitialVersion(1);
       assertWriteRead(tempDir, id1);
     }
   }
 
+  @Test
+  public void testDatanodeIdRecovery(@TempDir File tempDir) throws IOException 
{
+    // 1. Setup storage directory and VERSION file
+    String datanodeUuid = UUID.randomUUID().toString();
+    File storageDir = new File(tempDir, "datanode-storage");
+    assertTrue(storageDir.mkdirs());
+    conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, 
storageDir.getAbsolutePath());
+
+    File hddsSubDir = new File(storageDir, "hdds");
+    assertTrue(hddsSubDir.mkdirs());
+    File versionFile = new File(hddsSubDir, "VERSION");
+    DatanodeVersionFile dnVersionFile = new DatanodeVersionFile(
+        "storage-id", "cluster-id", datanodeUuid, System.currentTimeMillis(), 
0);
+    dnVersionFile.createVersionFile(versionFile);
+
+    // 2. Simulate a corrupted/empty datanode.id file
+    File datanodeIdFile = new File(tempDir, "datanode.id");
+    assertTrue(datanodeIdFile.createNewFile());
+
+    assertEquals(0, datanodeIdFile.length(), "Datanode ID file should be empty 
initially");
+
+    // 3. Call readDatanodeDetailsFrom and verify recovery
+    DatanodeDetails recoveredDetails =
+        ContainerUtils.readDatanodeDetailsFrom(datanodeIdFile, conf);
+
+    // 4. Assertions
+    // Recovered UUID matches the one in the VERSION file
+    assertEquals(datanodeUuid, recoveredDetails.getUuidString());
+
+    // datanode.id file is recreated and is not empty
+    assertTrue(datanodeIdFile.length() > 0, "Datanode ID file should have been 
recreated with content");
+
+    // The recreated file can be read normally and contains the correct UUID
+    DatanodeDetails finalDetails = 
ContainerUtils.readDatanodeDetailsFrom(datanodeIdFile, conf);
+    assertEquals(datanodeUuid, finalDetails.getUuidString());
+  }
+
   private void assertWriteRead(@TempDir File tempDir,
       DatanodeDetails details) throws IOException {
     // Write a single ID to the file and read it out
     File file = new File(tempDir, "valid-values.id");
     ContainerUtils.writeDatanodeDetailsTo(details, file, conf);
 
-    DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file);
+    DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file, conf);
 
     assertDetailsEquals(details, read);
     assertEquals(details.getCurrentVersion(), read.getCurrentVersion());
@@ -163,7 +203,7 @@ private void assertWriteReadWithChangedIpAddress(@TempDir 
File tempDir,
     // Write a single ID to the file and read it out
     File file = new File(tempDir, "valid-values.id");
     ContainerUtils.writeDatanodeDetailsTo(details, file, conf);
-    DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file);
+    DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file, conf);
     assertEquals(details.getIpAddress(), read.getIpAddress());
     read.validateDatanodeIpAddress();
     assertEquals("127.0.0.1", read.getIpAddress());
diff --git 
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/datanode/schemaupgrade/UpgradeUtils.java
 
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/datanode/schemaupgrade/UpgradeUtils.java
index 09c2480e9ef..e447f101154 100644
--- 
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/datanode/schemaupgrade/UpgradeUtils.java
+++ 
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/datanode/schemaupgrade/UpgradeUtils.java
@@ -73,7 +73,7 @@ public static DatanodeDetails 
getDatanodeDetails(OzoneConfiguration conf)
     File idFile = new File(idFilePath);
     Preconditions.checkState(idFile.exists(),
         "Datanode id file: " + idFilePath + " not exists");
-    return ContainerUtils.readDatanodeDetailsFrom(idFile);
+    return ContainerUtils.readDatanodeDetailsFrom(idFile, conf);
   }
 
   public static File getVolumeUpgradeCompleteFile(HddsVolume volume) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to