This is an automated email from the ASF dual-hosted git repository.

nanda pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 403caf0361 HDDS-9022. DiskChecker incorrectly reporting errors. (#5086)
403caf0361 is described below

commit 403caf03615722545d9e6fcf784b78deebc3425d
Author: Ethan Rose <[email protected]>
AuthorDate: Sat Jul 22 19:16:22 2023 -0700

    HDDS-9022. DiskChecker incorrectly reporting errors. (#5086)
---
 .../common/statemachine/DatanodeConfiguration.java |   4 +-
 .../container/common/utils/DiskCheckUtil.java      |  18 +-
 .../container/common/volume/MetadataVolume.java    |  15 +
 .../container/common/volume/MutableVolumeSet.java  |  15 +-
 .../container/common/volume/StorageVolume.java     |   5 +-
 .../common/volume/StorageVolumeChecker.java        |  47 ++-
 .../ozone/container/ozoneimpl/OzoneContainer.java  |  19 +-
 .../container/common/volume/TestHddsVolume.java    |  27 ++
 .../container/common/volume/TestStorageVolume.java | 268 ----------------
 .../volume/TestStorageVolumeHealthChecks.java      | 346 +++++++++++++++++++++
 .../common/volume/TestVolumeSetDiskChecks.java     |   8 +
 .../TestDatanodeHddsVolumeFailureToleration.java   |  30 +-
 12 files changed, 477 insertions(+), 325 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
index 164af7f31c..e5f0046c05 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
@@ -43,7 +43,7 @@ public class DatanodeConfiguration {
   static final String PERIODIC_DISK_CHECK_INTERVAL_MINUTES_KEY =
       "hdds.datanode.periodic.disk.check.interval.minutes";
   public static final String DISK_CHECK_FILE_SIZE_KEY =
-      "hdds.datanode.disk.check.file.size";
+      "hdds.datanode.disk.check.io.file.size";
   public static final String DISK_CHECK_IO_TEST_COUNT_KEY =
       "hdds.datanode.disk.check.io.test.count";
   public static final String DISK_CHECK_IO_FAILURES_TOLERATED_KEY =
@@ -301,7 +301,7 @@ public class DatanodeConfiguration {
   private int volumeIOFailureTolerance =
       DISK_CHECK_IO_FAILURES_TOLERATED_DEFAULT;
 
-  @Config(key = "disk.check.file.size",
+  @Config(key = "disk.check.io.file.size",
       defaultValue = "100B",
       type = ConfigType.SIZE,
       tags = { DATANODE },
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/DiskCheckUtil.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/DiskCheckUtil.java
index b267b1d479..b567841c02 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/DiskCheckUtil.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/DiskCheckUtil.java
@@ -139,15 +139,15 @@ public final class DiskCheckUtil {
         fos.getFD().sync();
       } catch (FileNotFoundException notFoundEx) {
         logError(storageDir, String.format("Could not find file %s for " +
-            "volume check.", testFile), notFoundEx);
+            "volume check.", testFile.getAbsolutePath()), notFoundEx);
         return false;
       } catch (SyncFailedException syncEx) {
         logError(storageDir, String.format("Could sync file %s to disk.",
-            testFile), syncEx);
+            testFile.getAbsolutePath()), syncEx);
         return false;
       } catch (IOException ioEx) {
         logError(storageDir, String.format("Could not write file %s " +
-            "for volume check.", testFile), ioEx);
+            "for volume check.", testFile.getAbsolutePath()), ioEx);
         return false;
       }
 
@@ -157,17 +157,17 @@ public final class DiskCheckUtil {
         int numBytesRead = fis.read(readBytes);
         if (numBytesRead != numBytesToWrite) {
           logError(storageDir, String.format("%d bytes written to file %s " +
-                  "but %d bytes were read back.", numBytesToWrite, testFile,
-              numBytesRead));
+                  "but %d bytes were read back.", numBytesToWrite,
+              testFile.getAbsolutePath(), numBytesRead));
           return false;
         }
       } catch (FileNotFoundException notFoundEx) {
         logError(storageDir, String.format("Could not find file %s " +
-            "for volume check.", testFile), notFoundEx);
+            "for volume check.", testFile.getAbsolutePath()), notFoundEx);
         return false;
       } catch (IOException ioEx) {
         logError(storageDir, String.format("Could not read file %s " +
-            "for volume check.", testFile), ioEx);
+            "for volume check.", testFile.getAbsolutePath()), ioEx);
         return false;
       }
 
@@ -175,14 +175,14 @@ public final class DiskCheckUtil {
       if (!Arrays.equals(writtenBytes, readBytes)) {
         logError(storageDir, String.format("%d Bytes read from file " +
                 "%s do not match the %d bytes that were written.",
-            writtenBytes.length, testFile, readBytes.length));
+            writtenBytes.length, testFile.getAbsolutePath(), 
readBytes.length));
         return false;
       }
 
       // Delete the file.
       if (!testFile.delete()) {
         logError(storageDir, String.format("Could not delete file %s " +
-            "for volume check.", testFile));
+            "for volume check.", testFile.getAbsolutePath()));
         return false;
       }
 
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MetadataVolume.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MetadataVolume.java
index c5b399b662..d7f22b42be 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MetadataVolume.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MetadataVolume.java
@@ -31,12 +31,27 @@ public class MetadataVolume extends StorageVolume {
 
   protected MetadataVolume(Builder b) throws IOException {
     super(b);
+    // Tmp directory on Metadata Volume uses the volume root as the working
+    // directory. It is not dependent on the cluster ID from SCM.
+    super.createTmpDirs("");
   }
 
   public VolumeType getType() {
     return type;
   }
 
+  @Override
+  public void format(String cid) throws IOException {
+    // No-op for Metadata volumes.
+  }
+
+  @Override
+  public void createTmpDirs(String workingDirName) {
+    // No-op for metadata volumes.
+    // Tmp directory is created on construction since it is not dependent on
+    // getting the cluster ID from SCM.
+  }
+
   /**
    * Builder class for MetadataVolume.
    */
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
index f4a5d0b9ad..985ddea8de 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
@@ -89,7 +89,6 @@ public class MutableVolumeSet implements VolumeSet {
   private final StorageVolumeFactory volumeFactory;
   private final StorageVolume.VolumeType volumeType;
   private int maxVolumeFailuresTolerated;
-  private boolean initialized;
 
   public MutableVolumeSet(String dnUuid, ConfigurationSource conf,
       StateContext context, StorageVolume.VolumeType volumeType,
@@ -101,7 +100,6 @@ public class MutableVolumeSet implements VolumeSet {
       ConfigurationSource conf, StateContext context,
       StorageVolume.VolumeType volumeType, StorageVolumeChecker volumeChecker
   ) throws IOException {
-    this.initialized = false;
     this.context = context;
     this.datanodeUuid = dnUuid;
     this.clusterID = clusterID;
@@ -198,9 +196,6 @@ public class MutableVolumeSet implements VolumeSet {
     if (volumeMap.size() == 0) {
       throw new DiskOutOfSpaceException("No storage locations configured");
     }
-
-    checkAllVolumes();
-    initialized = true;
   }
 
   /**
@@ -253,15 +248,7 @@ public class MutableVolumeSet implements VolumeSet {
 
       // check failed volume tolerated
       if (!hasEnoughVolumes()) {
-        // on startup, we could not try to stop uninitialized services
-        if (!initialized) {
-          throw new IOException("Don't have enough good volumes on startup,"
-              + " bad volumes detected: " + failedVolumes.size()
-              + " max tolerated: " + maxVolumeFailuresTolerated);
-        }
-        if (context != null) {
-          context.getParent().handleFatalVolumeFailures();
-        }
+        context.getParent().handleFatalVolumeFailures();
       }
     } finally {
       this.writeUnlock();
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
index 95d1b2c2de..dd48f0bb17 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
@@ -653,13 +653,14 @@ public abstract class StorageVolume
     // The failure counts can be left as is.
     if (currentIOFailureCount.get() > ioFailureTolerance) {
       LOG.info("Failed IO test for volume {}: the last {} runs " +
-              "encountered {}/{} tolerated failures.", this,
+              "encountered {} out of {} tolerated failures.", this,
           ioTestSlidingWindow.size(), currentIOFailureCount,
           ioFailureTolerance);
       return VolumeCheckResult.FAILED;
     } else if (LOG.isDebugEnabled()) {
       LOG.debug("IO test results for volume {}: the last {} runs encountered " 
+
-              "{}/{} tolerated failures", this, ioTestSlidingWindow.size(),
+              "{} out of {} tolerated failures", this,
+          ioTestSlidingWindow.size(),
           currentIOFailureCount, ioFailureTolerance);
     }
 
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
index d9869894b2..bcb11fddf0 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
@@ -34,6 +34,7 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.ScheduledFuture;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 
 import com.google.common.util.concurrent.MoreExecutors;
@@ -91,13 +92,17 @@ public class StorageVolumeChecker {
 
   private final ExecutorService checkVolumeResultHandlerExecutorService;
 
+  private final DatanodeConfiguration dnConf;
+
   /**
    * An executor for periodic disk checks.
    */
   private final ScheduledExecutorService diskCheckerservice;
-  private final ScheduledFuture<?> periodicDiskChecker;
+  private ScheduledFuture<?> periodicDiskChecker;
   private final List<VolumeSet> registeredVolumeSets;
 
+  private final AtomicBoolean started;
+
   /**
    * @param conf  Configuration object.
    * @param timer {@link Timer} object used for throttling checks.
@@ -106,7 +111,7 @@ public class StorageVolumeChecker {
 
     this.timer = timer;
 
-    DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class);
+    dnConf = conf.getObject(DatanodeConfiguration.class);
 
     maxAllowedTimeForCheckMs = dnConf.getDiskCheckTimeout().toMillis();
 
@@ -137,12 +142,18 @@ public class StorageVolumeChecker {
           return t;
         });
 
-    long periodicDiskCheckIntervalMinutes =
-        dnConf.getPeriodicDiskCheckIntervalMinutes();
-    this.periodicDiskChecker =
-        diskCheckerservice.scheduleWithFixedDelay(this::checkAllVolumeSets,
-            periodicDiskCheckIntervalMinutes, periodicDiskCheckIntervalMinutes,
-            TimeUnit.MINUTES);
+    started = new AtomicBoolean(false);
+  }
+
+  public void start() {
+    if (started.compareAndSet(false, true)) {
+      long periodicDiskCheckIntervalMinutes =
+          dnConf.getPeriodicDiskCheckIntervalMinutes();
+      periodicDiskChecker =
+          diskCheckerservice.scheduleWithFixedDelay(this::checkAllVolumeSets,
+              periodicDiskCheckIntervalMinutes,
+              periodicDiskCheckIntervalMinutes, TimeUnit.MINUTES);
+    }
   }
 
   public synchronized void registerVolumeSet(VolumeSet volumeSet) {
@@ -384,15 +395,17 @@ public class StorageVolumeChecker {
    * of the parameters.
    */
   public void shutdownAndWait(int gracePeriod, TimeUnit timeUnit) {
-    periodicDiskChecker.cancel(true);
-    diskCheckerservice.shutdownNow();
-    checkVolumeResultHandlerExecutorService.shutdownNow();
-    try {
-      delegateChecker.shutdownAndWait(gracePeriod, timeUnit);
-    } catch (InterruptedException e) {
-      LOG.warn("{} interrupted during shutdown.",
-          this.getClass().getSimpleName());
-      Thread.currentThread().interrupt();
+    if (started.compareAndSet(true, false)) {
+      periodicDiskChecker.cancel(true);
+      diskCheckerservice.shutdownNow();
+      checkVolumeResultHandlerExecutorService.shutdownNow();
+      try {
+        delegateChecker.shutdownAndWait(gracePeriod, timeUnit);
+      } catch (InterruptedException e) {
+        LOG.warn("{} interrupted during shutdown.",
+            this.getClass().getSimpleName());
+        Thread.currentThread().interrupt();
+      }
     }
   }
 
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
index 48fd1b909d..3a3d1dcf6d 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
@@ -141,7 +141,7 @@ public class OzoneContainer {
     config = conf;
     this.datanodeDetails = datanodeDetails;
     this.context = context;
-    this.volumeChecker = getVolumeChecker(conf);
+    this.volumeChecker = new StorageVolumeChecker(conf, new Timer());
 
     volumeSet = new MutableVolumeSet(datanodeDetails.getUuidString(), conf,
         context, VolumeType.DATA_VOLUME, volumeChecker);
@@ -416,6 +416,18 @@ public class OzoneContainer {
       return;
     }
 
+    // Start background volume checks, which will begin after the configured
+    // delay.
+    volumeChecker.start();
+    // Do an immediate check of all volumes to ensure datanode health before
+    // proceeding.
+    volumeSet.checkAllVolumes();
+    metaVolumeSet.checkAllVolumes();
+    // DB volume set may be null if dedicated DB volumes are not used.
+    if (dbVolumeSet != null) {
+      dbVolumeSet.checkAllVolumes();
+    }
+
     LOG.info("Attempting to start container services.");
     startContainerScrub();
 
@@ -538,11 +550,6 @@ public class OzoneContainer {
     return dbVolumeSet;
   }
 
-  @VisibleForTesting
-  StorageVolumeChecker getVolumeChecker(ConfigurationSource conf) {
-    return new StorageVolumeChecker(conf, new Timer());
-  }
-
   public ContainerMetrics getMetrics() {
     return metrics;
   }
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java
index d02b5733d5..898b44da7b 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.nio.file.Paths;
 import java.time.Duration;
+import java.util.Properties;
 import java.util.UUID;
 import java.util.concurrent.atomic.AtomicLong;
 
@@ -46,6 +47,7 @@ import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import org.apache.hadoop.ozone.container.common.ContainerTestUtils;
+import org.apache.hadoop.ozone.container.common.helpers.DatanodeVersionFile;
 import org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache;
 import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
 import org.junit.Before;
@@ -81,6 +83,31 @@ public class TestHddsVolume {
     versionFile = StorageVolumeUtil.getVersionFile(rootDir);
   }
 
+  @Test
+  public void testReadPropertiesFromVersionFile() throws Exception {
+    StorageVolume volume = volumeBuilder.build();
+
+    volume.format(CLUSTER_ID);
+
+    Properties properties = DatanodeVersionFile.readFrom(versionFile);
+
+    String storageID = StorageVolumeUtil.getStorageID(properties, versionFile);
+    String clusterID = StorageVolumeUtil.getClusterID(
+        properties, versionFile, CLUSTER_ID);
+    String datanodeUuid = StorageVolumeUtil.getDatanodeUUID(
+        properties, versionFile, DATANODE_UUID);
+    long cTime = StorageVolumeUtil.getCreationTime(
+        properties, versionFile);
+    int layoutVersion = StorageVolumeUtil.getLayOutVersion(
+        properties, versionFile);
+
+    assertEquals(volume.getStorageID(), storageID);
+    assertEquals(volume.getClusterID(), clusterID);
+    assertEquals(volume.getDatanodeUuid(), datanodeUuid);
+    assertEquals(volume.getCTime(), cTime);
+    assertEquals(volume.getLayoutVersion(), layoutVersion);
+  }
+
   @Test
   public void testHddsVolumeInitialization() throws Exception {
     HddsVolume volume = volumeBuilder.build();
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolume.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolume.java
deleted file mode 100644
index 74469c78b5..0000000000
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolume.java
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership.  The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations 
under
- * the License.
- */
-package org.apache.hadoop.ozone.container.common.volume;
-
-import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult;
-import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.fs.MockSpaceUsageCheckFactory;
-import org.apache.hadoop.ozone.container.common.helpers.DatanodeVersionFile;
-import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
-import org.apache.hadoop.ozone.container.common.utils.DiskCheckUtil;
-import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-import java.io.File;
-import java.util.Properties;
-import java.util.UUID;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-/**
- * Test for StorageVolume.
- */
-public class TestStorageVolume {
-
-  private static final String DATANODE_UUID = UUID.randomUUID().toString();
-  private static final String CLUSTER_ID = UUID.randomUUID().toString();
-  private static final OzoneConfiguration CONF = new OzoneConfiguration();
-
-  @Rule
-  public TemporaryFolder folder = new TemporaryFolder();
-
-  private HddsVolume.Builder volumeBuilder;
-  private File versionFile;
-
-  private static final DiskCheckUtil.DiskChecks IO_FAILURE =
-      new DiskCheckUtil.DiskChecks() {
-        @Override
-        public boolean checkReadWrite(File storageDir, File testFileDir,
-                                      int numBytesToWrite) {
-          return false;
-        }
-      };
-
-  @Before
-  public void setup() throws Exception {
-    File rootDir = new File(folder.getRoot(), HddsVolume.HDDS_VOLUME_DIR);
-    volumeBuilder = new HddsVolume.Builder(folder.getRoot().getPath())
-        .datanodeUuid(DATANODE_UUID)
-        .conf(CONF)
-        .usageCheckFactory(MockSpaceUsageCheckFactory.NONE);
-    versionFile = StorageVolumeUtil.getVersionFile(rootDir);
-    DiskCheckUtil.clearTestImpl();
-  }
-
-  @Test
-  public void testReadPropertiesFromVersionFile() throws Exception {
-    HddsVolume volume = volumeBuilder.build();
-
-    volume.format(CLUSTER_ID);
-
-    Properties properties = DatanodeVersionFile.readFrom(versionFile);
-
-    String storageID = StorageVolumeUtil.getStorageID(properties, versionFile);
-    String clusterID = StorageVolumeUtil.getClusterID(
-        properties, versionFile, CLUSTER_ID);
-    String datanodeUuid = StorageVolumeUtil.getDatanodeUUID(
-        properties, versionFile, DATANODE_UUID);
-    long cTime = StorageVolumeUtil.getCreationTime(
-        properties, versionFile);
-    int layoutVersion = StorageVolumeUtil.getLayOutVersion(
-        properties, versionFile);
-
-    assertEquals(volume.getStorageID(), storageID);
-    assertEquals(volume.getClusterID(), clusterID);
-    assertEquals(volume.getDatanodeUuid(), datanodeUuid);
-    assertEquals(volume.getCTime(), cTime);
-    assertEquals(volume.getLayoutVersion(), layoutVersion);
-  }
-
-  @Test
-  public void testCheckExistence() throws Exception {
-    HddsVolume volume = volumeBuilder.build();
-    volume.format(CLUSTER_ID);
-
-    VolumeCheckResult result = volume.check(false);
-    assertEquals(VolumeCheckResult.HEALTHY, result);
-
-    final DiskCheckUtil.DiskChecks doesNotExist =
-        new DiskCheckUtil.DiskChecks() {
-          @Override
-          public boolean checkExistence(File storageDir) {
-            return false;
-          }
-        };
-
-    DiskCheckUtil.setTestImpl(doesNotExist);
-    result = volume.check(false);
-    assertEquals(VolumeCheckResult.FAILED, result);
-  }
-
-  @Test
-  public void testCheckPermissions() throws Exception {
-    HddsVolume volume = volumeBuilder.build();
-    volume.format(CLUSTER_ID);
-
-    VolumeCheckResult result = volume.check(false);
-    assertEquals(VolumeCheckResult.HEALTHY, result);
-
-    final DiskCheckUtil.DiskChecks noPermissions =
-        new DiskCheckUtil.DiskChecks() {
-          @Override
-          public boolean checkPermissions(File storageDir) {
-            return false;
-          }
-        };
-
-    DiskCheckUtil.setTestImpl(noPermissions);
-    result = volume.check(false);
-    assertEquals(VolumeCheckResult.FAILED, result);
-  }
-
-  /**
-   * Setting test count to 0 should disable IO tests.
-   */
-  @Test
-  public void testCheckIODisabled() throws Exception {
-    DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
-    dnConf.setVolumeIOTestCount(0);
-    CONF.setFromObject(dnConf);
-    volumeBuilder.conf(CONF);
-    HddsVolume volume = volumeBuilder.build();
-    volume.format(CLUSTER_ID);
-
-    DiskCheckUtil.setTestImpl(IO_FAILURE);
-    assertEquals(VolumeCheckResult.HEALTHY, volume.check(false));
-  }
-
-  @Test
-  public void testCheckIODefaultConfigs() {
-    CONF.clear();
-    DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
-    // Make sure default values are not invalid.
-    assertTrue(dnConf.getVolumeIOFailureTolerance() <
-        dnConf.getVolumeIOTestCount());
-  }
-
-  @Test
-  public void testCheckIOInvalidConfig() throws Exception {
-    HddsVolume volume = volumeBuilder.build();
-    volume.format(CLUSTER_ID);
-    DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
-
-    // When failure tolerance is above test count, default values should be
-    // used.
-    dnConf.setVolumeIOTestCount(3);
-    dnConf.setVolumeIOFailureTolerance(4);
-    CONF.setFromObject(dnConf);
-    dnConf = CONF.getObject(DatanodeConfiguration.class);
-    assertEquals(dnConf.getVolumeIOTestCount(),
-        DatanodeConfiguration.DISK_CHECK_IO_TEST_COUNT_DEFAULT);
-    assertEquals(dnConf.getVolumeIOFailureTolerance(),
-        DatanodeConfiguration.DISK_CHECK_IO_FAILURES_TOLERATED_DEFAULT);
-
-    // When test count and failure tolerance are set to the same value,
-    // Default values should be used.
-    dnConf.setVolumeIOTestCount(2);
-    dnConf.setVolumeIOFailureTolerance(2);
-    CONF.setFromObject(dnConf);
-    dnConf = CONF.getObject(DatanodeConfiguration.class);
-    assertEquals(DatanodeConfiguration.DISK_CHECK_IO_TEST_COUNT_DEFAULT,
-        dnConf.getVolumeIOTestCount());
-    
assertEquals(DatanodeConfiguration.DISK_CHECK_IO_FAILURES_TOLERATED_DEFAULT,
-        dnConf.getVolumeIOFailureTolerance());
-
-    // Negative test count should reset to default value.
-    dnConf.setVolumeIOTestCount(-1);
-    CONF.setFromObject(dnConf);
-    dnConf = CONF.getObject(DatanodeConfiguration .class);
-    assertEquals(DatanodeConfiguration.DISK_CHECK_IO_TEST_COUNT_DEFAULT,
-        dnConf.getVolumeIOTestCount());
-
-    // Negative failure tolerance should reset to default value.
-    dnConf.setVolumeIOFailureTolerance(-1);
-    CONF.setFromObject(dnConf);
-    dnConf = CONF.getObject(DatanodeConfiguration .class);
-    
assertEquals(DatanodeConfiguration.DISK_CHECK_IO_FAILURES_TOLERATED_DEFAULT,
-        dnConf.getVolumeIOFailureTolerance());
-  }
-
-  @Test
-  public void testCheckIOInitiallyPassing() throws Exception {
-    testCheckIOUntilFailure(3, 1, true, true, true, false, true, false);
-  }
-
-  @Test
-  public void testCheckIOEarlyFailure() throws Exception {
-    testCheckIOUntilFailure(3, 1, false, false);
-  }
-
-  @Test
-  public void testCheckIOFailuresDiscarded() throws Exception {
-    testCheckIOUntilFailure(3, 1, false, true, true, true, false, false);
-  }
-
-  @Test
-  public void testCheckIOAlternatingFailures() throws Exception {
-    testCheckIOUntilFailure(3, 1, true, false, true, false);
-  }
-
-  /**
-   * Helper method to test the sliding window of IO checks before volume
-   * failure.
-   *
-   * @param ioTestCount The number of most recent tests whose results should
-   *    be considered.
-   * @param ioFailureTolerance The number of IO failures tolerated out of the
-   *    last {@param ioTestCount} tests.
-   * @param checkResults The result of the IO check for each run. Volume
-   *    should fail after the last IO check is completed.
-   */
-  private void testCheckIOUntilFailure(int ioTestCount, int ioFailureTolerance,
-      boolean... checkResults) throws Exception {
-    DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
-    dnConf.setVolumeIOTestCount(ioTestCount);
-    dnConf.setVolumeIOFailureTolerance(ioFailureTolerance);
-    CONF.setFromObject(dnConf);
-    volumeBuilder.conf(CONF);
-    HddsVolume volume = volumeBuilder.build();
-    volume.format(CLUSTER_ID);
-
-    for (int i = 0; i < checkResults.length; i++) {
-      final boolean result = checkResults[i];
-      final DiskCheckUtil.DiskChecks ioResult = new DiskCheckUtil.DiskChecks() 
{
-            @Override
-            public boolean checkReadWrite(File storageDir, File testDir,
-                int numBytesToWrite) {
-              return result;
-            }
-          };
-      DiskCheckUtil.setTestImpl(ioResult);
-      if (i < checkResults.length - 1) {
-        assertEquals("Unexpected IO failure in run " + i,
-            VolumeCheckResult.HEALTHY, volume.check(false));
-      } else {
-        assertEquals("Unexpected IO success in run " + i,
-            VolumeCheckResult.FAILED, volume.check(false));
-      }
-    }
-  }
-}
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java
new file mode 100644
index 0000000000..e2fdbdc937
--- /dev/null
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations 
under
+ * the License.
+ */
+package org.apache.hadoop.ozone.container.common.volume;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.fs.MockSpaceUsageCheckFactory;
+import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
+import org.apache.hadoop.ozone.container.common.utils.DiskCheckUtil;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Named;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.File;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.UUID;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Test for StorageVolume health checks using Real volume instances with
+ * mocked checkers to simulate failures.
+ */
+public class TestStorageVolumeHealthChecks {
+
+  private static final String DATANODE_UUID = UUID.randomUUID().toString();
+  private static final String CLUSTER_ID = UUID.randomUUID().toString();
+  private static final OzoneConfiguration CONF = new OzoneConfiguration();
+
+  @TempDir
+  private static Path volumePath;
+
+  public static Stream<Arguments> volumeBuilders() {
+    HddsVolume.Builder hddsVolumeBuilder =
+        new HddsVolume.Builder(volumePath.toString())
+            .datanodeUuid(DATANODE_UUID)
+            .conf(CONF)
+            .usageCheckFactory(MockSpaceUsageCheckFactory.NONE);
+
+    MetadataVolume.Builder metadataVolumeBuilder =
+        new MetadataVolume.Builder(volumePath.toString())
+            .datanodeUuid(DATANODE_UUID)
+            .conf(CONF)
+            .usageCheckFactory(MockSpaceUsageCheckFactory.NONE);
+
+    DbVolume.Builder dbVolumeBuilder =
+        new DbVolume.Builder(volumePath.toString())
+            .datanodeUuid(DATANODE_UUID)
+            .conf(CONF)
+            .usageCheckFactory(MockSpaceUsageCheckFactory.NONE);
+
+    return Stream.of(
+        Arguments.of(Named.of("HDDS Volume", hddsVolumeBuilder)),
+        Arguments.of(Named.of("Metadata Volume", metadataVolumeBuilder)),
+        Arguments.of(Named.of("DB Volume", dbVolumeBuilder))
+    );
+  }
+
+  @BeforeEach
+  public void setup() throws Exception {
+    // Volume path must be static to construct volume argument provider, but
+    // needs to be cleared before each test.
+    FileUtils.deleteDirectory(volumePath.toFile());
+    DiskCheckUtil.clearTestImpl();
+  }
+
+  @ParameterizedTest
+  @MethodSource("volumeBuilders")
+  public void testCheckExistence(StorageVolume.Builder<?> builder)
+      throws Exception {
+    StorageVolume volume = builder.build();
+    volume.format(CLUSTER_ID);
+    volume.createTmpDirs(CLUSTER_ID);
+
+    VolumeCheckResult result = volume.check(false);
+    assertEquals(VolumeCheckResult.HEALTHY, result);
+
+    final DiskCheckUtil.DiskChecks doesNotExist =
+        new DiskCheckUtil.DiskChecks() {
+          @Override
+          public boolean checkExistence(File storageDir) {
+            return false;
+          }
+        };
+
+    DiskCheckUtil.setTestImpl(doesNotExist);
+    result = volume.check(false);
+    assertEquals(VolumeCheckResult.FAILED, result);
+  }
+
+  @ParameterizedTest
+  @MethodSource("volumeBuilders")
+  public void testCheckPermissions(StorageVolume.Builder<?> builder)
+      throws Exception {
+    StorageVolume volume = builder.build();
+    volume.format(CLUSTER_ID);
+    volume.createTmpDirs(CLUSTER_ID);
+
+    VolumeCheckResult result = volume.check(false);
+    assertEquals(VolumeCheckResult.HEALTHY, result);
+
+    final DiskCheckUtil.DiskChecks noPermissions =
+        new DiskCheckUtil.DiskChecks() {
+          @Override
+          public boolean checkPermissions(File storageDir) {
+            return false;
+          }
+        };
+
+    DiskCheckUtil.setTestImpl(noPermissions);
+    result = volume.check(false);
+    assertEquals(VolumeCheckResult.FAILED, result);
+  }
+
+  /**
+   * Setting test count to 0 should disable IO tests.
+   */
+  @ParameterizedTest
+  @MethodSource("volumeBuilders")
+  public void testCheckIODisabled(StorageVolume.Builder<?> builder)
+      throws Exception {
+    DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
+    dnConf.setVolumeIOTestCount(0);
+    CONF.setFromObject(dnConf);
+
+    builder.conf(CONF);
+    StorageVolume volume = builder.build();
+    volume.format(CLUSTER_ID);
+    volume.createTmpDirs(CLUSTER_ID);
+
+    DiskCheckUtil.DiskChecks ioFailure = new DiskCheckUtil.DiskChecks() {
+          @Override
+          public boolean checkReadWrite(File storageDir, File testFileDir,
+                                        int numBytesToWrite) {
+            return false;
+          }
+        };
+    DiskCheckUtil.setTestImpl(ioFailure);
+    assertEquals(VolumeCheckResult.HEALTHY, volume.check(false));
+  }
+
+  @Test
+  public void testCheckIODefaultConfigs() {
+    CONF.clear();
+    DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
+    // Make sure default values are not invalid.
+    assertTrue(dnConf.getVolumeIOFailureTolerance() <
+        dnConf.getVolumeIOTestCount());
+  }
+
+  @Test
+  public void testCheckIOInvalidConfig() {
+    DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
+
+    // When failure tolerance is above test count, default values should be
+    // used.
+    dnConf.setVolumeIOTestCount(3);
+    dnConf.setVolumeIOFailureTolerance(4);
+    CONF.setFromObject(dnConf);
+    dnConf = CONF.getObject(DatanodeConfiguration.class);
+    assertEquals(dnConf.getVolumeIOTestCount(),
+        DatanodeConfiguration.DISK_CHECK_IO_TEST_COUNT_DEFAULT);
+    assertEquals(dnConf.getVolumeIOFailureTolerance(),
+        DatanodeConfiguration.DISK_CHECK_IO_FAILURES_TOLERATED_DEFAULT);
+
+    // When test count and failure tolerance are set to the same value,
+    // Default values should be used.
+    dnConf.setVolumeIOTestCount(2);
+    dnConf.setVolumeIOFailureTolerance(2);
+    CONF.setFromObject(dnConf);
+    dnConf = CONF.getObject(DatanodeConfiguration.class);
+    assertEquals(DatanodeConfiguration.DISK_CHECK_IO_TEST_COUNT_DEFAULT,
+        dnConf.getVolumeIOTestCount());
+    
assertEquals(DatanodeConfiguration.DISK_CHECK_IO_FAILURES_TOLERATED_DEFAULT,
+        dnConf.getVolumeIOFailureTolerance());
+
+    // Negative test count should reset to default value.
+    dnConf.setVolumeIOTestCount(-1);
+    CONF.setFromObject(dnConf);
+    dnConf = CONF.getObject(DatanodeConfiguration .class);
+    assertEquals(DatanodeConfiguration.DISK_CHECK_IO_TEST_COUNT_DEFAULT,
+        dnConf.getVolumeIOTestCount());
+
+    // Negative failure tolerance should reset to default value.
+    dnConf.setVolumeIOFailureTolerance(-1);
+    CONF.setFromObject(dnConf);
+    dnConf = CONF.getObject(DatanodeConfiguration .class);
+    
assertEquals(DatanodeConfiguration.DISK_CHECK_IO_FAILURES_TOLERATED_DEFAULT,
+        dnConf.getVolumeIOFailureTolerance());
+  }
+
+  @ParameterizedTest
+  @MethodSource("volumeBuilders")
+  public void testCheckIOInitiallyPassing(StorageVolume.Builder<?> builder)
+      throws Exception {
+    testCheckIOUntilFailure(builder, 3, 1, true, true, true, false, true,
+        false);
+  }
+
+  @ParameterizedTest
+  @MethodSource("volumeBuilders")
+  public void testCheckIOEarlyFailure(StorageVolume.Builder<?> builder)
+      throws Exception {
+    testCheckIOUntilFailure(builder, 3, 1, false, false);
+  }
+
+  @ParameterizedTest
+  @MethodSource("volumeBuilders")
+  public void testCheckIOFailuresDiscarded(StorageVolume.Builder<?> builder)
+      throws Exception {
+    testCheckIOUntilFailure(builder, 3, 1, false, true, true, true, false,
+        false);
+  }
+
+  @ParameterizedTest
+  @MethodSource("volumeBuilders")
+  public void testCheckIOAlternatingFailures(StorageVolume.Builder<?> builder)
+      throws Exception {
+    testCheckIOUntilFailure(builder, 3, 1, true, false, true, false);
+  }
+
+  /**
+   * Helper method to test the sliding window of IO checks before volume
+   * failure.
+   *
+   * @param ioTestCount The number of most recent tests whose results should
+   *    be considered.
+   * @param ioFailureTolerance The number of IO failures tolerated out of the
+   *    last {@param ioTestCount} tests.
+   * @param checkResults The result of the IO check for each run. Volume
+   *    should fail after the last IO check is completed.
+   */
+  private void testCheckIOUntilFailure(StorageVolume.Builder<?> builder,
+      int ioTestCount, int ioFailureTolerance, boolean... checkResults)
+      throws Exception {
+    DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
+    dnConf.setVolumeIOTestCount(ioTestCount);
+    dnConf.setVolumeIOFailureTolerance(ioFailureTolerance);
+    CONF.setFromObject(dnConf);
+    builder.conf(CONF);
+    StorageVolume volume = builder.build();
+    volume.format(CLUSTER_ID);
+    volume.createTmpDirs(CLUSTER_ID);
+
+    for (int i = 0; i < checkResults.length; i++) {
+      final boolean result = checkResults[i];
+      final DiskCheckUtil.DiskChecks ioResult = new DiskCheckUtil.DiskChecks() 
{
+            @Override
+            public boolean checkReadWrite(File storageDir, File testDir,
+                int numBytesToWrite) {
+              return result;
+            }
+          };
+      DiskCheckUtil.setTestImpl(ioResult);
+      if (i < checkResults.length - 1) {
+        assertEquals(VolumeCheckResult.HEALTHY, volume.check(false),
+            "Unexpected IO failure in run " + i);
+      } else {
+        assertEquals(VolumeCheckResult.FAILED, volume.check(false),
+            "Unexpected IO success in run " + i);
+      }
+    }
+  }
+
+  @ParameterizedTest
+  @MethodSource("volumeBuilders")
+  public void testCorrectDirectoryChecked(StorageVolume.Builder<?> builder)
+      throws Exception {
+    StorageVolume volume = builder.build();
+    DiskCheckUtil.setTestImpl(new DirectoryCheck(volume));
+    volume.format(CLUSTER_ID);
+    volume.createTmpDirs(CLUSTER_ID);
+    volume.check(false);
+  }
+
+  /**
+   * Asserts that the disk checks are being done on the correct directory for
+   * each volume type.
+   */
+  private static final class DirectoryCheck implements
+      DiskCheckUtil.DiskChecks {
+    private final StorageVolume volume;
+
+    DirectoryCheck(StorageVolume volume) {
+      this.volume = volume;
+    }
+
+    @Override
+    public boolean checkExistence(File storageDir) {
+      assertEquals(volume.getStorageDir(), storageDir);
+      return true;
+    }
+
+    @Override
+    public boolean checkPermissions(File storageDir) {
+      assertEquals(volume.getStorageDir(), storageDir);
+      return true;
+    }
+
+    @Override
+    public boolean checkReadWrite(File storageDir, File testFileDir,
+        int numBytesToWrite) {
+      assertEquals(volume.getStorageDir(), storageDir);
+
+      Path expectedDiskCheckPath;
+      if (volume instanceof MetadataVolume) {
+        expectedDiskCheckPath = Paths.get(
+            volume.getStorageDir().getAbsolutePath(),
+            StorageVolume.TMP_DIR_NAME,
+            StorageVolume.TMP_DISK_CHECK_DIR_NAME);
+      } else {
+        expectedDiskCheckPath = Paths.get(
+            volume.getStorageDir().getAbsolutePath(),
+            volume.getClusterID(),
+            StorageVolume.TMP_DIR_NAME,
+            StorageVolume.TMP_DISK_CHECK_DIR_NAME);
+      }
+
+      assertEquals(expectedDiskCheckPath.toFile(), volume.getDiskCheckDir());
+      assertEquals(expectedDiskCheckPath.toFile(), testFileDir);
+      return true;
+    }
+  }
+}
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeSetDiskChecks.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeSetDiskChecks.java
index bb281f1eb5..10a1b43f90 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeSetDiskChecks.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeSetDiskChecks.java
@@ -154,14 +154,19 @@ public class TestVolumeSetDiskChecks {
         StorageVolume.VolumeType.DB_VOLUME,
         dummyChecker);
 
+    volumeSet.checkAllVolumes();
     Assert.assertEquals(volumeSet.getFailedVolumesList().size(),
         numBadVolumes);
     Assert.assertEquals(volumeSet.getVolumesList().size(),
         numVolumes - numBadVolumes);
+
+    metaVolumeSet.checkAllVolumes();
     Assert.assertEquals(metaVolumeSet.getFailedVolumesList().size(),
         numBadVolumes);
     Assert.assertEquals(metaVolumeSet.getVolumesList().size(),
         numVolumes - numBadVolumes);
+
+    dbVolumeSet.checkAllVolumes();
     Assert.assertEquals(dbVolumeSet.getFailedVolumesList().size(),
         numBadVolumes);
     Assert.assertEquals(dbVolumeSet.getVolumesList().size(),
@@ -197,10 +202,13 @@ public class TestVolumeSetDiskChecks {
         StorageVolume.VolumeType.DB_VOLUME,
         dummyChecker);
 
+    volumeSet.checkAllVolumes();
     assertEquals(volumeSet.getFailedVolumesList().size(), numVolumes);
     assertEquals(volumeSet.getVolumesList().size(), 0);
+    metaVolumeSet.checkAllVolumes();
     assertEquals(metaVolumeSet.getFailedVolumesList().size(), numVolumes);
     assertEquals(metaVolumeSet.getVolumesList().size(), 0);
+    dbVolumeSet.checkAllVolumes();
     assertEquals(dbVolumeSet.getFailedVolumesList().size(), numVolumes);
     assertEquals(dbVolumeSet.getVolumesList().size(), 0);
 
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureToleration.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureToleration.java
index 25c0bac0e6..c0c181dc36 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureToleration.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureToleration.java
@@ -25,16 +25,21 @@ import org.apache.hadoop.hdds.conf.StorageUnit;
 import org.apache.hadoop.ozone.HddsDatanodeService;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
 import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
+import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
 import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
 import org.apache.hadoop.ozone.container.common.volume.StorageVolume;
 import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
 import org.apache.hadoop.ozone.dn.DatanodeTestUtils;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.ozone.test.GenericTestUtils;
+import org.apache.ozone.test.GenericTestUtils.LogCapturer;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.Timeout;
+import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
@@ -124,13 +129,24 @@ public class TestDatanodeHddsVolumeFailureToleration {
     DatanodeTestUtils.simulateBadRootDir(volRootDir1);
 
     // restart datanode to test
-    try {
-      cluster.restartHddsDatanode(0, true);
-      Assert.fail();
-    } catch (RuntimeException e) {
-      Assert.assertTrue(e.getMessage()
-          .contains("Can't start the HDDS datanode plugin"));
-    }
+    // Make datanode throw an exception instead of exiting the jvm when too
+    // many volumes fail so that the test keeps running.
+    ExitUtil.disableSystemExit();
+    // Since the exception will not be thrown from the main thread, the
+    // datanode will not actually exit. Use log messages to determine that
+    // the ExitUtil was invoked which would terminate the process in a normal
+    // deployment.
+    LogCapturer dsmCapturer = LogCapturer.captureLogs(
+        LoggerFactory.getLogger(DatanodeStateMachine.class));
+    LogCapturer exitCapturer = LogCapturer.captureLogs(
+            LoggerFactory.getLogger(ExitUtil.class.getName()));
+    cluster.restartHddsDatanode(0, false);
+    // Give the datanode time to restart. This may be slow in a mini ozone
+    // cluster.
+    GenericTestUtils.waitFor(() -> exitCapturer.getOutput()
+        .contains("Exiting with status 1: ExitException"), 500, 60000);
+    Assert.assertTrue(dsmCapturer.getOutput()
+        .contains("DatanodeStateMachine Shutdown due to too many bad 
volumes"));
 
     // restore bad volumes
     DatanodeTestUtils.restoreBadRootDir(volRootDir0);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to