This is an automated email from the ASF dual-hosted git repository.

Gargi-jais11 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 48613a7a050 HDDS-15455. Implement Custom DataNode Container Directory 
Discovery and Duplicate Detection (#10414).
48613a7a050 is described below

commit 48613a7a050eb12f16f5bf822abcdd5e70cf0a93
Author: sreejasahithi <[email protected]>
AuthorDate: Thu Jun 18 17:38:07 2026 +0530

    HDDS-15455. Implement Custom DataNode Container Directory Discovery and 
Duplicate Detection (#10414).
    
    Co-authored-by: Sreeja Chintalapati 
<[email protected]>
---
 .../container/common/utils/StorageVolumeUtil.java  |  28 +++
 .../ozone/container/ozoneimpl/ContainerReader.java |  36 +--
 .../datanode/container/ContainerCommands.java      |   4 +-
 .../container/analyze/AnalyzeSubcommand.java       | 107 ++++++++
 .../analyze/ContainerDirectoryScanner.java         | 275 +++++++++++++++++++++
 .../container/analyze/ContainerDiskOccurrence.java |  61 +++++
 .../container/analyze/ContainerScanResult.java     |  52 ++++
 .../datanode/container/analyze/package-info.java   |  21 ++
 .../analyze/ContainerAnalyzeTestHelper.java        | 113 +++++++++
 .../container/analyze/TestAnalyzeSubcommand.java   | 189 ++++++++++++++
 .../analyze/TestContainerDirectoryScanner.java     | 154 ++++++++++++
 ...estDuplicateContainerDirScannerIntegration.java | 241 ++++++++++++++++++
 12 files changed, 1256 insertions(+), 25 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
index c71fc6cde6d..eb6747a6bfd 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
@@ -28,6 +28,7 @@
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.common.InconsistentStorageStateException;
+import org.apache.hadoop.ozone.common.Storage;
 import org.apache.hadoop.ozone.container.common.HDDSVolumeLayoutVersion;
 import org.apache.hadoop.ozone.container.common.volume.DbVolume;
 import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
@@ -274,4 +275,31 @@ public static boolean checkVolume(StorageVolume volume, 
String scmId,
 
     return success;
   }
+  
+  public static File resolveContainerCurrentDir(
+      File hddsRoot, String clusterId, File[] storageDirs)
+      throws InconsistentStorageStateException {
+
+    File clusterIdDir = new File(hddsRoot, clusterId);
+    //The subdirectory we should verify containers within.
+    // If this volume was formatted pre SCM HA, this will be the SCM ID.
+    // A cluster ID symlink will exist in this case only if this cluster is
+    // finalized for SCM HA.
+    // If the volume was formatted post SCM HA, this will be the cluster ID.
+    File idDir = clusterIdDir;
+
+    if (storageDirs.length == 1 && !clusterIdDir.exists()) {
+      // If the one directory is not the cluster ID directory, assume it is
+      // the old SCM ID directory used before SCM HA.
+      idDir = storageDirs[0];
+    } else if (!clusterIdDir.exists()) {
+      // There are 1 or more storage directories. We only care about the
+      // cluster ID directory.
+      throw new InconsistentStorageStateException(
+          "Volume " + hddsRoot + " is in an inconsistent state. Expected 
cluster ID directory "
+              + clusterIdDir + " not found.");
+    }
+
+    return new File(idDir, Storage.STORAGE_DIR_CURRENT);
+  }
 }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
index 43aa05c850c..88a9cc50a40 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
@@ -28,11 +28,12 @@
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
-import org.apache.hadoop.ozone.common.Storage;
+import org.apache.hadoop.ozone.common.InconsistentStorageStateException;
 import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
 import org.apache.hadoop.ozone.container.common.impl.ContainerData;
 import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
 import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
 import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
 import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
 import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer;
@@ -119,32 +120,19 @@ public void readVolume(File hddsVolumeRootDir) {
     // by HddsUtil#checkVolume once we have a cluster ID from SCM. No
     // operations to perform here in that case.
     if (storageDirs.length > 0) {
-      File clusterIDDir = new File(hddsVolumeRootDir,
-          hddsVolume.getClusterID());
-      // The subdirectory we should verify containers within.
-      // If this volume was formatted pre SCM HA, this will be the SCM ID.
-      // A cluster ID symlink will exist in this case only if this cluster is
-      // finalized for SCM HA.
-      // If the volume was formatted post SCM HA, this will be the cluster ID.
-      File idDir = clusterIDDir;
-      if (storageDirs.length == 1 && !clusterIDDir.exists()) {
-        // If the one directory is not the cluster ID directory, assume it is
-        // the old SCM ID directory used before SCM HA.
-        idDir = storageDirs[0];
-      } else {
-        // There are 1 or more storage directories. We only care about the
-        // cluster ID directory.
-        if (!clusterIDDir.exists()) {
-          LOG.error("Volume {} is in an inconsistent state. Expected " +
-              "clusterID directory {} not found.", hddsVolumeRootDir,
-              clusterIDDir);
-          volumeSet.failVolume(hddsVolumeRootDir.getPath());
-          return;
-        }
+      File currentDir;
+      try {
+        currentDir = 
StorageVolumeUtil.resolveContainerCurrentDir(hddsVolumeRootDir, 
+            hddsVolume.getClusterID(), storageDirs);
+      } catch (InconsistentStorageStateException e) {
+        LOG.error("Volume {} is in an inconsistent state. Expected " +
+                "clusterID directory {} not found.", hddsVolumeRootDir,
+            new File(hddsVolumeRootDir, hddsVolume.getClusterID()));
+        volumeSet.failVolume(hddsVolumeRootDir.getPath());
+        return;
       }
 
       LOG.info("Start to verify containers on volume {}", hddsVolumeRootDir);
-      File currentDir = new File(idDir, Storage.STORAGE_DIR_CURRENT);
       File[] containerTopDirs = currentDir.listFiles();
       if (containerTopDirs != null && containerTopDirs.length > 0) {
         for (File containerTopDir : containerTopDirs) {
diff --git 
a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/ContainerCommands.java
 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/ContainerCommands.java
index ec6bb17a9f7..2656269171f 100644
--- 
a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/ContainerCommands.java
+++ 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/ContainerCommands.java
@@ -58,6 +58,7 @@
 import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController;
 import org.apache.hadoop.ozone.container.ozoneimpl.ContainerReader;
 import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures;
+import 
org.apache.hadoop.ozone.debug.datanode.container.analyze.AnalyzeSubcommand;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import picocli.CommandLine.Command;
@@ -75,7 +76,8 @@
         ListSubcommand.class,
         InfoSubcommand.class,
         ExportSubcommand.class,
-        InspectSubcommand.class
+        InspectSubcommand.class,
+        AnalyzeSubcommand.class
     })
 public class ContainerCommands extends AbstractSubcommand {
 
diff --git 
a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java
 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java
new file mode 100644
index 00000000000..12a0c67bd87
--- /dev/null
+++ 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.debug.datanode.container.analyze;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import org.apache.hadoop.hdds.cli.AbstractSubcommand;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+
+/**
+ * {@code ozone debug datanode container analyze}.
+ *
+ * <p>Compares on-disk container directories on this DataNode against SCM
+ * metadata to report inconsistencies.
+ */
+@Command(
+    name = "analyze",
+    description = "Analyze container consistency between on-disk container " +
+            "directories on this DataNode and SCM metadata. Must be run 
locally on a DataNode.")
+public class AnalyzeSubcommand extends AbstractSubcommand implements 
Callable<Void> {
+  @CommandLine.Option(names = {"--count"},
+          defaultValue = "20",
+          description = "Number of containers to display")
+  private int count;
+
+  @Override
+  public Void call() throws Exception {
+    if (count < 1) {
+      throw new IOException("Count must be an integer greater than 0.");
+    }
+    OzoneConfiguration conf = getOzoneConf();
+    ContainerScanResult scanResult = ContainerDirectoryScanner.scan(conf);
+    Map<Long, List<ContainerDiskOccurrence>> enrichedDuplicates =
+        ContainerDirectoryScanner.enrichDuplicates(scanResult.getDuplicates());
+
+    // TODO: SCM metadata lookup from --scm-db when provided.
+    // TODO: For each id in scanResult.getSingles().keySet() classified 
NOT_IN_SCM or DELETED:
+    //   enrichOccurrence(id, scanResult.getSingles().get(id)) and report.
+    // TODO: For each id in enrichedDuplicates.keySet() classified NOT_IN_SCM 
or DELETED:
+    //   enrichedDuplicates.get(id) is already enriched — just report.
+
+    printDuplicates(enrichedDuplicates);
+    printVolumeScanErrors(scanResult.getVolumeScanErrors());
+    return null;
+  }
+
+  private void printDuplicates(Map<Long, List<ContainerDiskOccurrence>> 
duplicates) {
+    long totalDuplicateIds = duplicates.size();
+    out().printf("Number of containers with duplicate container directories on 
this DataNode: %d%n", totalDuplicateIds);
+
+    if (totalDuplicateIds == 0) {
+      return;
+    }
+
+    if (totalDuplicateIds > count) {
+      out().printf("Showing first %d:%n", count);
+    }
+
+    duplicates.entrySet().stream()
+        .sorted(Map.Entry.comparingByKey())
+        .limit(count)
+        .forEach(entry -> {
+          long containerId = entry.getKey();
+          List<ContainerDiskOccurrence> occurrences = entry.getValue();
+          out().printf("Container %d (%d occurrences):%n", containerId, 
occurrences.size());
+          for (ContainerDiskOccurrence o : occurrences) {
+            out().printf("  path=%s%n", o.getContainerPath());
+            if (o.isSizeKnown()) {
+              out().printf("  status=%s size=%d bytes%n", o.getStatus(), 
o.getSizeBytes());
+            } else {
+              out().printf("  status=%s size=unavailable (failed to compute 
directory size)%n",
+                  o.getStatus());
+            }
+            out().println();
+          }
+        });
+  }
+
+  private void printVolumeScanErrors(List<String> volumeScanErrors) {
+    if (volumeScanErrors.isEmpty()) {
+      return;
+    }
+    err().printf("%nVolumes that failed to scan (%d):%n", 
volumeScanErrors.size());
+    for (String error : volumeScanErrors) {
+      err().printf("  %s%n", error);
+    }
+  }
+}
diff --git 
a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerDirectoryScanner.java
 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerDirectoryScanner.java
new file mode 100644
index 00000000000..02f7ab3c3f4
--- /dev/null
+++ 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerDirectoryScanner.java
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.debug.datanode.container.analyze;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.utils.HddsServerUtil;
+import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
+import org.apache.hadoop.ozone.common.InconsistentStorageStateException;
+import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
+import org.apache.hadoop.ozone.container.common.helpers.DatanodeVersionFile;
+import org.apache.hadoop.ozone.container.common.impl.ContainerData;
+import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
+import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Read-only walker for container directories under {@code hdds.datanode.dir}.
+ *
+ * <p>This scanner surfaces duplicate copies across volumes. Singleton 
container IDs
+ * are stored as a single path in {@link ContainerScanResult#getSingles()}; 
duplicate
+ * IDs are stored as path lists in {@link ContainerScanResult#getDuplicates()}.
+ * Size and metadata status are computed later via {@link 
#enrichDuplicates(Map)}.
+ */
+public final class ContainerDirectoryScanner {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ContainerDirectoryScanner.class);
+
+  private ContainerDirectoryScanner() {
+    //Never constructed
+  }
+
+  public static ContainerScanResult scan(ConfigurationSource conf) throws 
IOException {
+    Map<Long, String> singles = new ConcurrentHashMap<>();
+    Map<Long, List<String>> duplicates = new ConcurrentHashMap<>();
+    List<String> volumeScanErrors = Collections.synchronizedList(new 
ArrayList<>());
+    List<String> volumeRootsToScan = resolveExistingVolumeRoots(conf);
+    if (volumeRootsToScan.isEmpty()) {
+      return new ContainerScanResult(singles, duplicates, volumeScanErrors);
+    }
+
+    int volumeCount = volumeRootsToScan.size();
+    ExecutorService executor = Executors.newFixedThreadPool(volumeCount,
+        new ThreadFactoryBuilder()
+            .setDaemon(true)
+            .setNameFormat("ContainerDirectoryScanner-%d")
+            .build());
+    
+    try {
+      List<Future<?>> futures = new ArrayList<>(volumeCount);
+      for (String volumeRoot : volumeRootsToScan) {
+        futures.add(executor.submit(() -> {
+          try {
+            scanVolume(volumeRoot, singles, duplicates);
+          } catch (IOException e) {
+            LOG.warn("Failed to scan volume {}", volumeRoot, e);
+            volumeScanErrors.add(volumeRoot + ": " + e.getMessage());
+          }
+        }));
+      }
+      for (Future<?> future : futures) {
+        try {
+          future.get();
+        } catch (ExecutionException e) {
+          throw new IOException("Unexpected error scanning volume", 
e.getCause());
+        } catch (InterruptedException e) {
+          Thread.currentThread().interrupt();
+          throw new IOException("Volume scan interrupted", e);
+        }
+      }
+    } finally {
+      executor.shutdownNow();
+    }
+    return new ContainerScanResult(singles, duplicates, volumeScanErrors);
+  }
+
+  private static List<String> resolveExistingVolumeRoots(ConfigurationSource 
conf) throws IOException {
+    List<String> volumeRootsToScan = new ArrayList<>();
+    for (String storageDir : HddsServerUtil.getDatanodeStorageDirs(conf)) {
+      String volumeRoot = StorageLocation.parse(storageDir).getUri().getPath();
+      if (!new File(volumeRoot).exists()) {
+        LOG.warn("Configured storage path {} does not exist, skipping", 
volumeRoot);
+        continue;
+      }
+      volumeRootsToScan.add(volumeRoot);
+    }
+    return volumeRootsToScan;
+  }
+
+  /**
+   * Scan a single DataNode storage volume root and merge results into {@code 
singles}
+   * and {@code duplicates}.
+   */
+  private static void scanVolume(String volumeRoot, Map<Long, String> singles,
+      Map<Long, List<String>> duplicates) throws IOException {
+    File hddsRoot = new File(volumeRoot, HddsVolume.HDDS_VOLUME_DIR);
+    if (!hddsRoot.isDirectory()) {
+      LOG.warn("HDDS root {} does not exist or is not a directory, skipping 
volume {}", hddsRoot, volumeRoot);
+      return;
+    }
+
+    File versionFile = StorageVolumeUtil.getVersionFile(hddsRoot);
+    Properties props = DatanodeVersionFile.readFrom(versionFile);
+    if (props.isEmpty()) {
+      throw new IOException("Version file " + versionFile + " is missing or 
empty");
+    }
+    String clusterId;
+    try {
+      clusterId = StorageVolumeUtil.getClusterID(props, versionFile, null);
+    } catch (InconsistentStorageStateException e) {
+      throw new IOException("Invalid version file " + versionFile, e);
+    }
+
+    File currentDir = resolveCurrentDir(hddsRoot, clusterId);
+    if (currentDir == null || !currentDir.isDirectory()) {
+      LOG.info("No current container directory under {}, skipping volume {}", 
hddsRoot, volumeRoot);
+      return;
+    }
+
+    LOG.info("Scanning container directories under {}", currentDir);
+    File[] containerTopDirs = currentDir.listFiles(File::isDirectory);
+    if (containerTopDirs == null) {
+      throw new IOException("Failed to list container top-level directories 
under " + currentDir);
+    }
+
+    for (File containerTopDir : containerTopDirs) {
+      File[] containerDirs = containerTopDir.listFiles(File::isDirectory);
+      if (containerDirs == null) {
+        LOG.warn("Failed to list container directories under {}", 
containerTopDir);
+        continue;
+      }
+      for (File containerDir : containerDirs) {
+        recordContainerDir(containerDir, singles, duplicates);
+      }
+    }
+  }
+
+  private static File resolveCurrentDir(File hddsRoot, String clusterId) 
throws IOException {
+    File[] storageDirs = hddsRoot.listFiles(File::isDirectory);
+    if (storageDirs == null) {
+      throw new IOException("IO error listing " + hddsRoot);
+    }
+    if (storageDirs.length == 0) {
+      return null;
+    }
+    return StorageVolumeUtil.resolveContainerCurrentDir(hddsRoot, clusterId, 
storageDirs);
+  }
+
+  private static void recordContainerDir(File containerDir, Map<Long, String> 
singles,
+      Map<Long, List<String>> duplicates) {
+    long containerId;
+    try {
+      containerId = ContainerUtils.getContainerID(containerDir);
+    } catch (NumberFormatException e) {
+      LOG.warn("Skipping non-numeric container directory {}", containerDir);
+      return;
+    }
+
+    String containerPath = containerDir.getAbsolutePath();
+    singles.compute(containerId, (id, firstPath) -> {
+      List<String> dupList = duplicates.get(id);
+      if (dupList != null) {
+        dupList.add(containerPath);
+        return null;
+      }
+      if (firstPath == null) {
+        return containerPath;
+      }
+      List<String> list = new ArrayList<>(2);
+      list.add(firstPath);
+      list.add(containerPath);
+      duplicates.put(id, list);
+      return null;
+    });
+  }
+
+  public static Map<Long, List<ContainerDiskOccurrence>> 
enrichDuplicates(Map<Long, List<String>> duplicates) {
+    Map<Long, List<ContainerDiskOccurrence>> enriched = new 
HashMap<>(duplicates.size());
+    for (Map.Entry<Long, List<String>> entry : duplicates.entrySet()) {
+      long containerId = entry.getKey();
+      List<String> containerPaths = new ArrayList<>(entry.getValue());
+      Collections.sort(containerPaths);
+      List<ContainerDiskOccurrence> occurrences = new 
ArrayList<>(containerPaths.size());
+      for (String containerPath : containerPaths) {
+        occurrences.add(enrichOccurrence(containerId, containerPath));
+      }
+      enriched.put(containerId, Collections.unmodifiableList(occurrences));
+    }
+    return Collections.unmodifiableMap(enriched);
+  }
+
+  /**
+   * Compute directory size and metadata status for on-disk container path.
+   */
+  static ContainerDiskOccurrence enrichOccurrence(long containerId, String 
containerPath) {
+    File containerDir = new File(containerPath);
+    File containerFile = ContainerUtils.getContainerFile(containerDir);
+    ContainerDiskScanStatus status;
+    if (!containerFile.exists()) {
+      status = ContainerDiskScanStatus.MISSING_METADATA;
+    } else {
+      status = readMetadataStatus(containerId, containerFile);
+    }
+
+    boolean sizeKnown = true;
+    long sizeBytes;
+    try {
+      sizeBytes = FileUtils.sizeOfDirectory(containerDir);
+    } catch (IllegalArgumentException e) {
+      LOG.warn("Failed to compute size for container directory {}", 
containerDir, e);
+      sizeBytes = 0L;
+      sizeKnown = false;
+    }
+
+    return new ContainerDiskOccurrence(containerId, containerPath, sizeBytes, 
sizeKnown, status);
+  }
+
+  private static ContainerDiskScanStatus readMetadataStatus(long containerId, 
File containerFile) {
+    try {
+      ContainerData containerData = 
ContainerDataYaml.readContainerFile(containerFile);
+      if (containerId != containerData.getContainerID()) {
+        LOG.warn("Container ID mismatch in {}. Directory name is {} but 
metadata has {}.",
+            containerFile, containerId, containerData.getContainerID());
+        return ContainerDiskScanStatus.INVALID_METADATA;
+      }
+      return ContainerDiskScanStatus.VALID;
+    } catch (IOException e) {
+      LOG.warn("Failed to parse container metadata file {}", containerFile, e);
+      return ContainerDiskScanStatus.INVALID_METADATA;
+    }
+  }
+
+  /**
+   * On-disk status of a container directory discovered during a DN scan.
+   */
+  public enum ContainerDiskScanStatus {
+    /** {@code metadata/{containerId}.container} exists and parses correctly. 
*/
+    VALID,
+    /** Container directory exists but the {@code .container} file is missing. 
*/
+    MISSING_METADATA,
+    /** {@code .container} exists but is unreadable or its ID does not match 
the directory name. */
+    INVALID_METADATA
+  }
+}
diff --git 
a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerDiskOccurrence.java
 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerDiskOccurrence.java
new file mode 100644
index 00000000000..c6716878527
--- /dev/null
+++ 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerDiskOccurrence.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.debug.datanode.container.analyze;
+
+import java.util.Objects;
+
+/**
+ * On-disk occurrence of a container directory on a DataNode volume.
+ */
+public final class ContainerDiskOccurrence {
+
+  private final long containerId;
+  private final String containerPath;
+  private final long sizeBytes;
+  private final boolean sizeKnown;
+  private final ContainerDirectoryScanner.ContainerDiskScanStatus status;
+
+  ContainerDiskOccurrence(long containerId, String containerPath, long 
sizeBytes,
+      boolean sizeKnown, ContainerDirectoryScanner.ContainerDiskScanStatus 
status) {
+    this.containerId = containerId;
+    this.containerPath = Objects.requireNonNull(containerPath, 
"containerPath");
+    this.sizeBytes = sizeBytes;
+    this.sizeKnown = sizeKnown;
+    this.status = Objects.requireNonNull(status, "status");
+  }
+
+  public long getContainerId() {
+    return containerId;
+  }
+
+  public String getContainerPath() {
+    return containerPath;
+  }
+
+  public long getSizeBytes() {
+    return sizeBytes;
+  }
+
+  public boolean isSizeKnown() {
+    return sizeKnown;
+  }
+
+  public ContainerDirectoryScanner.ContainerDiskScanStatus getStatus() {
+    return status;
+  }
+}
diff --git 
a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerScanResult.java
 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerScanResult.java
new file mode 100644
index 00000000000..e1e4d2243ab
--- /dev/null
+++ 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerScanResult.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.debug.datanode.container.analyze;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Result of a {@link ContainerDirectoryScanner} walk over DataNode storage 
volumes.
+ */
+public final class ContainerScanResult {
+
+  private final Map<Long, String> singles;
+  private final Map<Long, List<String>> duplicates;
+  private final List<String> volumeScanErrors;
+
+  ContainerScanResult(Map<Long, String> singles, Map<Long, List<String>> 
duplicates,
+      List<String> volumeScanErrors) {
+    this.singles = Objects.requireNonNull(singles, "singles");
+    this.duplicates = Objects.requireNonNull(duplicates, "duplicates");
+    this.volumeScanErrors = Objects.requireNonNull(volumeScanErrors, 
"volumeScanErrors");
+  }
+
+  public Map<Long, String> getSingles() {
+    return Collections.unmodifiableMap(singles);
+  }
+
+  public Map<Long, List<String>> getDuplicates() {
+    return Collections.unmodifiableMap(duplicates);
+  }
+
+  public List<String> getVolumeScanErrors() {
+    return Collections.unmodifiableList(volumeScanErrors);
+  }
+}
diff --git 
a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/package-info.java
 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/package-info.java
new file mode 100644
index 00000000000..f1ad378b8a8
--- /dev/null
+++ 
b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Container analysis for DataNode container debug command.
+ */
+package org.apache.hadoop.ozone.debug.datanode.container.analyze;
diff --git 
a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerAnalyzeTestHelper.java
 
b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerAnalyzeTestHelper.java
new file mode 100644
index 00000000000..c9d3e01483d
--- /dev/null
+++ 
b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerAnalyzeTestHelper.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.debug.datanode.container.analyze;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.UUID;
+import org.apache.hadoop.conf.StorageUnit;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.common.Storage;
+import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
+import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
+import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
+import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
+import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Shared on-disk DataNode volume and container directory setup for analyze 
tests.
+ */
+final class ContainerAnalyzeTestHelper {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ContainerAnalyzeTestHelper.class);
+
+  private final Path tempDir;
+  private final OzoneConfiguration conf;
+  private final String clusterId;
+  private final String datanodeUuid;
+
+  ContainerAnalyzeTestHelper(Path tempDir, OzoneConfiguration conf,
+      String clusterId, String datanodeUuid) {
+    this.tempDir = tempDir;
+    this.conf = conf;
+    this.clusterId = clusterId;
+    this.datanodeUuid = datanodeUuid;
+  }
+
+  File formatVolume(String name) throws IOException {
+    File volumeRoot = tempDir.resolve(name).toFile();
+    HddsVolume volume = new HddsVolume.Builder(volumeRoot.getAbsolutePath())
+        .conf(conf)
+        .datanodeUuid(datanodeUuid)
+        .clusterID(clusterId)
+        .build();
+    StorageVolumeUtil.checkVolume(volume, clusterId, clusterId, conf, LOG, 
null);
+    return volumeRoot;
+  }
+
+  Path containerTopDir(File volumeRoot) {
+    return volumeRoot.toPath()
+        .resolve(HddsVolume.HDDS_VOLUME_DIR)
+        .resolve(clusterId)
+        .resolve(Storage.STORAGE_DIR_CURRENT)
+        .resolve("containerDir0");
+  }
+
+  String containerPath(File volumeRoot, long containerId) {
+    return 
containerTopDir(volumeRoot).resolve(Long.toString(containerId)).toFile().getAbsolutePath();
+  }
+
+  void createContainerDirectory(File volumeRoot, long containerId,
+      boolean writeMetadata, long metadataContainerId) throws IOException {
+    Path containerBase = 
containerTopDir(volumeRoot).resolve(Long.toString(containerId));
+    Files.createDirectories(containerBase.resolve("metadata"));
+    Files.createDirectories(containerBase.resolve("chunks"));
+
+    if (writeMetadata) {
+      KeyValueContainerData containerData = new KeyValueContainerData(
+          metadataContainerId,
+          ContainerLayoutVersion.FILE_PER_BLOCK,
+          (long) StorageUnit.GB.toBytes(1),
+          UUID.randomUUID().toString(),
+          datanodeUuid);
+      containerData.setChunksPath(containerBase.resolve("chunks").toString());
+      
containerData.setMetadataPath(containerBase.resolve("metadata").toString());
+      File containerFile = 
ContainerUtils.getContainerFile(containerBase.toFile());
+      ContainerDataYaml.createContainerFile(containerData, containerFile);
+    }
+  }
+
+  void createEmptyContainerFileOnVolume(File volumeRoot, long containerId) 
throws IOException {
+    Path containerBase = 
containerTopDir(volumeRoot).resolve(Long.toString(containerId));
+    Files.createDirectories(containerBase.resolve("metadata"));
+    Files.createDirectories(containerBase.resolve("chunks"));
+    
Files.createFile(ContainerUtils.getContainerFile(containerBase.toFile()).toPath());
+  }
+
+  void corruptVersionFile(File volumeRoot) throws IOException {
+    File hddsRoot = new File(volumeRoot, HddsVolume.HDDS_VOLUME_DIR);
+    File versionFile = StorageVolumeUtil.getVersionFile(hddsRoot);
+    Files.write(versionFile.toPath(), new byte[0]);
+  }
+}
diff --git 
a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java
 
b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java
new file mode 100644
index 00000000000..0d3da9f45a7
--- /dev/null
+++ 
b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.debug.datanode.container.analyze;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.ozone.debug.OzoneDebug;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import picocli.CommandLine;
+
+/**
+ * Tests for {@code ozone debug datanode container analyze} command.
+ */
+public class TestAnalyzeSubcommand {
+
+  @TempDir
+  private Path tempDir;
+
+  private ContainerAnalyzeTestHelper testHelper;
+  private CommandLine cmd;
+  private StringWriter outWriter;
+  private StringWriter errWriter;
+
+  @BeforeEach
+  public void setup() {
+    OzoneConfiguration conf = new OzoneConfiguration();
+    testHelper = new ContainerAnalyzeTestHelper(tempDir, conf, 
+        UUID.randomUUID().toString(), UUID.randomUUID().toString());
+
+    cmd = new OzoneDebug().getCmd();
+    outWriter = new StringWriter();
+    errWriter = new StringWriter();
+    cmd.setOut(new PrintWriter(outWriter));
+    cmd.setErr(new PrintWriter(errWriter));
+  }
+
+  @Test
+  public void testAnalyzeNoDuplicates() throws Exception {
+    File volumeRoot = testHelper.formatVolume("volume0");
+    testHelper.createContainerDirectory(volumeRoot, 6006L, true, 6006L);
+
+    executeAnalyze(volumeRoot.getAbsolutePath());
+
+    String output = outWriter.toString();
+    assertThat(output).contains("Number of containers with duplicate container 
directories on this DataNode: 0");
+    assertThat(output).doesNotContain("Container ");
+  }
+
+  @Test
+  public void testAnalyzeRespectsCount() throws Exception {
+    File volumeRoot1 = testHelper.formatVolume("volume0");
+    File volumeRoot2 = testHelper.formatVolume("volume1");
+    long[] duplicateIds = {9003L, 9001L, 9002L};
+    for (long containerId : duplicateIds) {
+      testHelper.createContainerDirectory(volumeRoot1, containerId, true, 
containerId);
+      testHelper.createContainerDirectory(volumeRoot2, containerId, true, 
containerId);
+    }
+
+    executeAnalyze(volumeRoot1.getAbsolutePath() + "," + 
volumeRoot2.getAbsolutePath(),
+        "--count", "2");
+
+    String output = outWriter.toString();
+    assertThat(output).contains("Number of containers with duplicate container 
directories on this DataNode: 3");
+    assertThat(output).contains("Showing first 2:");
+    assertThat(output).contains("Container 9001 (2 occurrences):");
+    assertThat(output).contains("Container 9002 (2 occurrences):");
+    assertThat(output).doesNotContain("Container 9003");
+    assertThat(output.indexOf("Container 
9001")).isLessThan(output.indexOf("Container 9002"));
+  }
+
+  @Test
+  public void testAnalyzeInvalidCount() {
+    executeAnalyze(tempDir.toString(), "--count", "0");
+
+    String combined = outWriter.toString() + errWriter.toString();
+    assertThat(combined).contains("Count must be an integer greater than 0.");
+  }
+
+  @Test
+  public void testAnalyzeVolumeScanErrors() throws Exception {
+    File healthyVolume = testHelper.formatVolume("volume0");
+    File failingVolume = testHelper.formatVolume("volume1");
+    testHelper.createContainerDirectory(healthyVolume, 6006L, true, 6006L);
+    testHelper.corruptVersionFile(failingVolume);
+
+    executeAnalyze(healthyVolume.getAbsolutePath() + "," + 
failingVolume.getAbsolutePath());
+
+    String output = outWriter.toString();
+    assertThat(output).contains("Number of containers with duplicate container 
directories on this DataNode: 0");
+
+    String errors = errWriter.toString();
+    assertThat(errors).contains("Volumes that failed to scan (1):");
+    assertThat(errors).contains(failingVolume.getAbsolutePath());
+  }
+
+  @Test
+  public void testAnalyzeDuplicateValidAndValid() throws Exception {
+    File volumeRoot1 = testHelper.formatVolume("volume0");
+    File volumeRoot2 = testHelper.formatVolume("volume1");
+    long containerId = 4004L;
+    testHelper.createContainerDirectory(volumeRoot1, containerId, true, 
containerId);
+    testHelper.createContainerDirectory(volumeRoot2, containerId, true, 
containerId);
+
+    assertDuplicateReport(volumeRoot1, volumeRoot2, containerId, "VALID");
+  }
+
+  @Test
+  public void testAnalyzeDuplicateValidAndMissing() throws Exception {
+    File volumeRoot1 = testHelper.formatVolume("volume0");
+    File volumeRoot2 = testHelper.formatVolume("volume1");
+    long containerId = 7007L;
+    testHelper.createContainerDirectory(volumeRoot1, containerId, true, 
containerId);
+    testHelper.createContainerDirectory(volumeRoot2, containerId, false, 
containerId);
+
+    assertDuplicateReport(volumeRoot1, volumeRoot2, containerId, 
"MISSING_METADATA");
+  }
+
+  @Test
+  public void testAnalyzeDuplicateValidAndInvalidIdMismatch() throws Exception 
{
+    File volumeRoot1 = testHelper.formatVolume("volume0");
+    File volumeRoot2 = testHelper.formatVolume("volume1");
+    long containerId = 3003L;
+    testHelper.createContainerDirectory(volumeRoot1, containerId, true, 
containerId);
+    testHelper.createContainerDirectory(volumeRoot2, containerId, true, 9999L);
+
+    assertDuplicateReport(volumeRoot1, volumeRoot2, containerId, 
"INVALID_METADATA");
+  }
+
+  @Test
+  public void testAnalyzeDuplicateValidAndInvalidEmptyFile() throws Exception {
+    File volumeRoot1 = testHelper.formatVolume("volume0");
+    File volumeRoot2 = testHelper.formatVolume("volume1");
+    long containerId = 5005L;
+    testHelper.createContainerDirectory(volumeRoot1, containerId, true, 
containerId);
+    testHelper.createEmptyContainerFileOnVolume(volumeRoot2, containerId);
+
+    assertDuplicateReport(volumeRoot1, volumeRoot2, containerId, 
"INVALID_METADATA");
+  }
+
+  private void assertDuplicateReport(File volumeRoot1, File volumeRoot2, long 
containerId,
+      String volume2ExpectedStatus) {
+    executeAnalyze(volumeRoot1.getAbsolutePath() + "," + 
volumeRoot2.getAbsolutePath());
+
+    String path1 = testHelper.containerPath(volumeRoot1, containerId);
+    String path2 = testHelper.containerPath(volumeRoot2, containerId);
+    String output = outWriter.toString();
+    assertThat(output).contains("Container " + containerId + " (2 
occurrences):");
+    assertThat(output).contains("path=" + path1 + "\n  status=" + "VALID");
+    assertThat(output).contains("path=" + path2 + "\n  status=" + 
volume2ExpectedStatus);
+  }
+
+  private void executeAnalyze(String datanodeDirs, String... extraArgs) {
+    List<String> args = new ArrayList<>();
+    args.add("-D");
+    args.add(ScmConfigKeys.HDDS_DATANODE_DIR_KEY + "=" + datanodeDirs);
+    args.add("datanode");
+    args.add("container");
+    args.add("analyze");
+    args.addAll(Arrays.asList(extraArgs));
+    cmd.execute(args.toArray(new String[0]));
+  }
+}
diff --git 
a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestContainerDirectoryScanner.java
 
b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestContainerDirectoryScanner.java
new file mode 100644
index 00000000000..08ee3f35cdb
--- /dev/null
+++ 
b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestContainerDirectoryScanner.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.debug.datanode.container.analyze;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.UUID;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Unit tests for {@link ContainerDirectoryScanner}.
+ */
+public class TestContainerDirectoryScanner {
+
+  @TempDir
+  private Path tempDir;
+
+  private OzoneConfiguration conf;
+  private ContainerAnalyzeTestHelper testHelper;
+
+  @BeforeEach
+  public void setup() {
+    conf = new OzoneConfiguration();
+    testHelper = new ContainerAnalyzeTestHelper(tempDir, conf,
+        UUID.randomUUID().toString(), UUID.randomUUID().toString());
+  }
+
+  @Test
+  public void testValidContainer() throws Exception {
+    File volumeRoot = testHelper.formatVolume("volume0");
+    long containerId = 1001L;
+    testHelper.createContainerDirectory(volumeRoot, containerId, true, 
containerId);
+    ContainerDiskOccurrence occurrence = enrichSingleContainer(volumeRoot, 
containerId);
+    assertEquals(ContainerDirectoryScanner.ContainerDiskScanStatus.VALID, 
occurrence.getStatus());
+    
assertThat(occurrence.getContainerPath()).startsWith(volumeRoot.getAbsolutePath());
+    assertThat(occurrence.getSizeBytes()).isGreaterThan(0L);
+  }
+
+  @Test
+  public void testMissingMetadata() throws Exception {
+    File volumeRoot = testHelper.formatVolume("volume0");
+    long containerId = 2002L;
+    testHelper.createContainerDirectory(volumeRoot, containerId, false, 
containerId);
+    ContainerDiskOccurrence occurrence = enrichSingleContainer(volumeRoot, 
containerId);
+    
assertEquals(ContainerDirectoryScanner.ContainerDiskScanStatus.MISSING_METADATA,
 occurrence.getStatus());
+  }
+
+  @Test
+  public void testInvalidMetadataIdMismatch() throws Exception {
+    File volumeRoot = testHelper.formatVolume("volume0");
+    long containerId = 3003L;
+    testHelper.createContainerDirectory(volumeRoot, containerId, true, 9999L);
+    ContainerDiskOccurrence occurrence = enrichSingleContainer(volumeRoot, 
containerId);
+    
assertEquals(ContainerDirectoryScanner.ContainerDiskScanStatus.INVALID_METADATA,
 occurrence.getStatus());
+  }
+
+  @Test
+  public void testInvalidMetadataEmptyContainerFile() throws Exception {
+    File volumeRoot = testHelper.formatVolume("volume0");
+    long containerId = 5005L;
+    testHelper.createEmptyContainerFileOnVolume(volumeRoot, containerId);
+    ContainerDiskOccurrence occurrence = enrichSingleContainer(volumeRoot, 
containerId);
+    
assertEquals(ContainerDirectoryScanner.ContainerDiskScanStatus.INVALID_METADATA,
 occurrence.getStatus());
+  }
+
+  @Test
+  public void testDuplicateAcrossVolumes() throws Exception {
+    File volumeRoot1 = testHelper.formatVolume("volume0");
+    File volumeRoot2 = testHelper.formatVolume("volume1");
+    long containerId = 4004L;
+    testHelper.createContainerDirectory(volumeRoot1, containerId, true, 
containerId);
+    testHelper.createContainerDirectory(volumeRoot2, containerId, true, 
containerId);
+
+    conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY,
+        volumeRoot1.getAbsolutePath() + "," + volumeRoot2.getAbsolutePath());
+    ContainerScanResult scanResult = ContainerDirectoryScanner.scan(conf);
+
+    assertEquals(1, scanResult.getDuplicates().size());
+    assertEquals(2, scanResult.getDuplicates().get(containerId).size());
+    assertEquals(ContainerDirectoryScanner.ContainerDiskScanStatus.VALID,
+        ContainerDirectoryScanner.enrichOccurrence(containerId, 
+            scanResult.getDuplicates().get(containerId).get(0)).getStatus());
+  }
+
+  @Test
+  public void testSingletonStoredInSinglesNotDuplicates() throws Exception {
+    File volumeRoot = testHelper.formatVolume("volume0");
+    long containerId = 6006L;
+    testHelper.createContainerDirectory(volumeRoot, containerId, true, 
containerId);
+
+    conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, 
volumeRoot.getAbsolutePath());
+    ContainerScanResult scanResult = ContainerDirectoryScanner.scan(conf);
+
+    assertEquals(1, scanResult.getSingles().size());
+    assertThat(scanResult.getSingles()).containsKey(containerId);
+    assertThat(scanResult.getSingles().get(containerId)).isNotBlank();
+    assertThat(scanResult.getDuplicates()).isEmpty();
+  }
+
+  @Test
+  public void testNonNumericDirectorySkipped() throws Exception {
+    File volumeRoot = testHelper.formatVolume("volume0");
+    Path invalidDir = 
testHelper.containerTopDir(volumeRoot).resolve("not-a-container");
+    Files.createDirectories(invalidDir);
+
+    conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, 
volumeRoot.getAbsolutePath());
+    ContainerScanResult scanResult = ContainerDirectoryScanner.scan(conf);
+    assertThat(scanResult.getSingles()).isEmpty();
+    assertThat(scanResult.getDuplicates()).isEmpty();
+    assertThat(scanResult.getVolumeScanErrors()).isEmpty();
+  }
+
+  @Test
+  public void testMissingConfiguredVolumeSkipped() throws IOException {
+    conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, 
tempDir.resolve("missing-volume").toString());
+    ContainerScanResult scanResult = ContainerDirectoryScanner.scan(conf);
+    assertThat(scanResult.getSingles()).isEmpty();
+    assertThat(scanResult.getDuplicates()).isEmpty();
+    assertThat(scanResult.getVolumeScanErrors()).isEmpty();
+  }
+
+  private ContainerDiskOccurrence enrichSingleContainer(File volumeRoot, long 
containerId) throws IOException {
+    conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, 
volumeRoot.getAbsolutePath());
+    ContainerScanResult scanResult = ContainerDirectoryScanner.scan(conf);
+    assertThat(scanResult.getDuplicates()).isEmpty();
+    String containerPath = scanResult.getSingles().get(containerId);
+    assertThat(containerPath).isNotBlank();
+    return ContainerDirectoryScanner.enrichOccurrence(containerId, 
containerPath);
+  }
+}
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/container/TestDuplicateContainerDirScannerIntegration.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/container/TestDuplicateContainerDirScannerIntegration.java
new file mode 100644
index 00000000000..1373f997789
--- /dev/null
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/container/TestDuplicateContainerDirScannerIntegration.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.dn.container;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.hadoop.hdds.client.ReplicationFactor.ONE;
+import static org.apache.hadoop.hdds.client.ReplicationType.RATIS;
+import static 
org.apache.hadoop.ozone.debug.datanode.container.analyze.ContainerDirectoryScanner.ContainerDiskScanStatus.MISSING_METADATA;
+import static 
org.apache.hadoop.ozone.debug.datanode.container.analyze.ContainerDirectoryScanner.ContainerDiskScanStatus.VALID;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.conf.StorageUnit;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.ozone.HddsDatanodeService;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.UniformDatanodesFactory;
+import org.apache.hadoop.ozone.client.ObjectStore;
+import org.apache.hadoop.ozone.client.OzoneBucket;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.apache.hadoop.ozone.client.OzoneClientFactory;
+import org.apache.hadoop.ozone.client.OzoneVolume;
+import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
+import org.apache.hadoop.ozone.container.ContainerTestHelper;
+import org.apache.hadoop.ozone.container.TestHelper;
+import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
+import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
+import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
+import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer;
+import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
+import 
org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerLocationUtil;
+import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
+import 
org.apache.hadoop.ozone.debug.datanode.container.analyze.ContainerDirectoryScanner;
+import 
org.apache.hadoop.ozone.debug.datanode.container.analyze.ContainerDiskOccurrence;
+import org.apache.ozone.test.GenericTestUtils;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Integration test: same container ID on two DN volumes,
+ * detected by {@link ContainerDirectoryScanner} before and after DN restart.
+ */
+class TestDuplicateContainerDirScannerIntegration {
+
+  private MiniOzoneCluster cluster;
+  private OzoneClient ozoneClient;
+  private ObjectStore store;
+  private String volumeName;
+  private String bucketName;
+  private OzoneBucket bucket;
+
+  @BeforeEach
+  void startCluster() throws Exception {
+    OzoneConfiguration conf = new OzoneConfiguration();
+    conf.set(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, "1GB");
+    
conf.setStorageSize(ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN,
+        0, StorageUnit.MB);
+    conf.setInt(OzoneConfigKeys.OZONE_REPLICATION, ONE.getValue());
+
+    cluster = MiniOzoneCluster.newBuilder(conf)
+        .setNumDatanodes(1)
+        .setDatanodeFactory(UniformDatanodesFactory.newBuilder()
+            .setNumDataVolumes(3)
+            .build())
+        .build();
+    cluster.waitForClusterToBeReady();
+    cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 60000);
+
+    ozoneClient = OzoneClientFactory.getRpcClient(cluster.getConf());
+    store = ozoneClient.getObjectStore();
+    volumeName = UUID.randomUUID().toString();
+    bucketName = UUID.randomUUID().toString();
+    store.createVolume(volumeName);
+    OzoneVolume volume = store.getVolume(volumeName);
+    volume.createBucket(bucketName);
+    bucket = volume.getBucket(bucketName);
+  }
+
+  @AfterEach
+  void shutdown() throws IOException {
+    if (ozoneClient != null) {
+      ozoneClient.close();
+    }
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Test
+  void scannerFindsDuplicateDirsAcrossVolumes() throws Exception {
+    long containerId = writeKeyAndCloseContainer("dup-scanner-key");
+
+    OzoneContainer ozoneContainer = getOzoneContainer();
+    ContainerSet containerSet = ozoneContainer.getContainerSet();
+    KeyValueContainer live = (KeyValueContainer) 
containerSet.getContainer(containerId);
+    KeyValueContainerData liveData = live.getContainerData();
+    HddsVolume volumeA = liveData.getVolume();
+    String pathA = liveData.getContainerPath();
+    String volumeARoot = volumeA.getVolumeRootDir();
+
+    assertTrue(containerSet.removeContainerOnlyFromMemory(containerId));
+    assertNull(containerSet.getContainer(containerId));
+    assertFullContainerLayout(pathA);
+
+    HddsVolume volumeB = pickOtherVolume(ozoneContainer, volumeA);
+    String volumeBRoot = volumeB.getVolumeRootDir();
+    String clusterId = volumeA.getClusterID();
+    String pathB = KeyValueContainerLocationUtil.getBaseContainerLocation(
+        volumeB.getHddsRootDir().getAbsolutePath(), clusterId, containerId);
+
+    createPartialCopyOnVolumeB(pathA, pathB);
+
+    assertScannerSeesDuplicate(containerId, volumeARoot, volumeBRoot, pathA, 
pathB);
+
+    cluster.restartHddsDatanode(0, true);
+    cluster.waitForClusterToBeReady();
+    cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 60000);
+
+    assertFullContainerLayout(pathA);
+    assertPartialContainerLayout(pathB);
+    assertScannerSeesDuplicate(containerId, volumeARoot, volumeBRoot, pathA, 
pathB);
+  }
+
+  private long writeKeyAndCloseContainer(String keyName) throws Exception {
+    byte[] data = ContainerTestHelper
+        .getFixedLengthString("sample", 1024 * 1024)
+        .getBytes(UTF_8);
+    try (OzoneOutputStream out = TestHelper.createKey(
+        keyName, RATIS, ONE, 0, store, volumeName, bucketName)) {
+      out.write(data);
+      out.flush();
+    }
+
+    long containerId = bucket.getKey(keyName).getOzoneKeyLocations().stream()
+        .findFirst()
+        .orElseThrow(() -> new IllegalStateException("Key has no block 
locations"))
+        .getContainerID();
+
+    cluster.getStorageContainerLocationClient().closeContainer(containerId);
+    GenericTestUtils.waitFor(
+        () -> TestHelper.isContainerClosed(cluster, containerId,
+            cluster.getHddsDatanodes().get(0).getDatanodeDetails()),
+        1000, 15000);
+
+    return containerId;
+  }
+
+  private static void createPartialCopyOnVolumeB(String pathA, String pathB) 
throws IOException {
+    File dirB = new File(pathB);
+    assertFalse(dirB.exists(), "Volume B must not already have this container 
dir");
+    Files.createDirectories(new File(pathB, "chunks").toPath());
+    FileUtils.copyDirectory(new File(pathA, "chunks"), new File(pathB, 
"chunks"));
+    assertFalse(new File(pathB, "metadata").exists());
+    assertFalse(ContainerUtils.getContainerFile(dirB).exists());
+  }
+
+  private void assertScannerSeesDuplicate(long containerId, String 
volumeARoot, String volumeBRoot, 
+      String pathA, String pathB) throws IOException {
+    OzoneConfiguration scanConf = cluster.getHddsDatanodes().get(0).getConf();
+    Map<Long, List<ContainerDiskOccurrence>> enrichedDuplicates =
+        
ContainerDirectoryScanner.enrichDuplicates(ContainerDirectoryScanner.scan(scanConf).getDuplicates());
+
+    assertThat(enrichedDuplicates).containsKey(containerId);
+    List<ContainerDiskOccurrence> occurrences = 
enrichedDuplicates.get(containerId);
+    assertThat(occurrences).hasSize(2);
+
+    ContainerDiskOccurrence onA = findOnVolume(occurrences, volumeARoot);
+    ContainerDiskOccurrence onB = findOnVolume(occurrences, volumeBRoot);
+
+    assertThat(onA.getStatus()).isEqualTo(VALID);
+    assertThat(onB.getStatus()).isEqualTo(MISSING_METADATA);
+    
assertThat(Paths.get(onA.getContainerPath())).isEqualTo(Paths.get(pathA).toAbsolutePath());
+    
assertThat(Paths.get(onB.getContainerPath())).isEqualTo(Paths.get(pathB).toAbsolutePath());
+    assertFullContainerLayout(pathA);
+    assertPartialContainerLayout(pathB);
+  }
+
+  private static ContainerDiskOccurrence 
findOnVolume(List<ContainerDiskOccurrence> occurrences, String volumeRoot) {
+    return occurrences.stream()
+        .filter(o -> 
Paths.get(o.getContainerPath()).startsWith(Paths.get(volumeRoot)))
+        .findFirst()
+        .orElseThrow(() -> new AssertionError(
+            "No occurrence on volume root " + volumeRoot + ", got " + 
occurrences));
+  }
+
+  private static void assertFullContainerLayout(String containerPath) {
+    assertTrue(new File(containerPath, "metadata").isDirectory());
+    assertTrue(new File(containerPath, "chunks").isDirectory());
+    assertTrue(ContainerUtils.getContainerFile(new 
File(containerPath)).exists());
+  }
+
+  private static void assertPartialContainerLayout(String containerPath) {
+    assertTrue(new File(containerPath).isDirectory());
+    assertFalse(new File(containerPath, "metadata").exists());
+    assertTrue(new File(containerPath, "chunks").isDirectory());
+    assertFalse(ContainerUtils.getContainerFile(new 
File(containerPath)).exists());
+  }
+
+  private static HddsVolume pickOtherVolume(OzoneContainer ozoneContainer, 
HddsVolume volumeA) {
+    return 
StorageVolumeUtil.getHddsVolumesList(ozoneContainer.getVolumeSet().getVolumesList())
+        .stream()
+        .filter(v -> !v.getVolumeRootDir().equals(volumeA.getVolumeRootDir()))
+        .findFirst()
+        .orElseThrow(() -> new IllegalStateException("Need at least two data 
volumes"));
+  }
+
+  private OzoneContainer getOzoneContainer() {
+    HddsDatanodeService dn = cluster.getHddsDatanodes().get(0);
+    return dn.getDatanodeStateMachine().getContainer();
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to