This is an automated email from the ASF dual-hosted git repository.

aswinshakil pushed a commit to branch HDDS-10239-container-reconciliation
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to 
refs/heads/HDDS-10239-container-reconciliation by this push:
     new 8768d04816 HDDS-12745. Container checksum should be reported during 
container close and DN restart (#8204)
8768d04816 is described below

commit 8768d048160a6b3761ddc2938d7287f9cd609e04
Author: Aswin Shakil Balasubramanian <[email protected]>
AuthorDate: Tue Apr 15 22:33:28 2025 +0530

    HDDS-12745. Container checksum should be reported during container close 
and DN restart (#8204)
---
 .../checksum/ContainerChecksumTreeManager.java     |  37 ++++---
 .../keyvalue/helpers/KeyValueContainerUtil.java    |  21 ++++
 .../checksum/TestContainerChecksumTreeManager.java |   4 -
 .../hdds/scm/container/ContainerReplica.java       |   1 +
 .../apache/hadoop/hdds/scm/TestCloseContainer.java |  13 +++
 .../TestContainerCommandReconciliation.java        | 112 +++++++++++++++++++--
 6 files changed, 163 insertions(+), 25 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
index 99b5800c45..261073123b 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
@@ -318,21 +318,11 @@ private Lock getLock(long containerID) {
    * swapped into place.
    */
   public Optional<ContainerProtos.ContainerChecksumInfo> read(ContainerData 
data) throws IOException {
-    long containerID = data.getContainerID();
-    File checksumFile = getContainerChecksumFile(data);
     try {
-      if (!checksumFile.exists()) {
-        LOG.debug("No checksum file currently exists for container {} at the 
path {}", containerID, checksumFile);
-        return Optional.empty();
-      }
-      try (FileInputStream inStream = new FileInputStream(checksumFile)) {
-        return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(),
-            () -> 
Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream)));
-      }
+      return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(), 
() -> readChecksumInfo(data));
     } catch (IOException ex) {
       metrics.incrementMerkleTreeReadFailures();
-      throw new IOException("Error occurred when reading container merkle tree 
for containerID "
-              + data.getContainerID() + " at path " + checksumFile, ex);
+      throw new IOException(ex);
     }
   }
 
@@ -383,6 +373,29 @@ public ByteString 
getContainerChecksumInfo(KeyValueContainerData data) throws IO
     }
   }
 
+  /**
+   * Reads the container checksum info file (containerID.tree) from the disk.
+   * Callers are not required to hold a lock while calling this since writes 
are done to a tmp file and atomically
+   * swapped into place.
+   */
+  public static Optional<ContainerProtos.ContainerChecksumInfo> 
readChecksumInfo(ContainerData data)
+      throws IOException {
+    long containerID = data.getContainerID();
+    File checksumFile = getContainerChecksumFile(data);
+    try {
+      if (!checksumFile.exists()) {
+        LOG.debug("No checksum file currently exists for container {} at the 
path {}", containerID, checksumFile);
+        return Optional.empty();
+      }
+      try (FileInputStream inStream = new FileInputStream(checksumFile)) {
+        return 
Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream));
+      }
+    } catch (IOException ex) {
+      throw new IOException("Error occurred when reading container merkle tree 
for containerID "
+          + data.getContainerID() + " at path " + checksumFile, ex);
+    }
+  }
+
   @VisibleForTesting
   public ContainerMerkleTreeMetrics getMetrics() {
     return this.metrics;
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
index f0d13c14d3..dbf5cfaa8e 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
@@ -26,12 +26,15 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.Optional;
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo;
 import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
 import org.apache.hadoop.hdds.utils.db.Table;
 import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager;
 import org.apache.hadoop.ozone.container.common.helpers.BlockData;
 import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
 import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator;
@@ -277,6 +280,23 @@ public static void 
parseKVContainerData(KeyValueContainerData kvContainerData,
     }
   }
 
+  private static void populateContainerDataChecksum(KeyValueContainerData 
kvContainerData) {
+    if (kvContainerData.isOpen()) {
+      return;
+    }
+
+    try {
+      Optional<ContainerChecksumInfo> optionalContainerChecksumInfo = 
ContainerChecksumTreeManager
+          .readChecksumInfo(kvContainerData);
+      if (optionalContainerChecksumInfo.isPresent()) {
+        ContainerChecksumInfo containerChecksumInfo = 
optionalContainerChecksumInfo.get();
+        
kvContainerData.setDataChecksum(containerChecksumInfo.getContainerMerkleTree().getDataChecksum());
+      }
+    } catch (IOException ex) {
+      LOG.warn("Failed to read checksum info for container {}", 
kvContainerData.getContainerID(), ex);
+    }
+  }
+
   private static void populateContainerMetadata(
       KeyValueContainerData kvContainerData, DatanodeStore store,
       boolean bCheckChunksFilePath)
@@ -356,6 +376,7 @@ private static void populateContainerMetadata(
 
     // Load finalizeBlockLocalIds for container in memory.
     populateContainerFinalizeBlock(kvContainerData, store);
+    populateContainerDataChecksum(kvContainerData);
   }
 
   /**
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java
index 987ff7cf81..538fd9c15c 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java
@@ -201,7 +201,6 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws 
Exception {
     
assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 
0);
     List<Long> expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L);
     checksumManager.markBlocksAsDeleted(container, new 
ArrayList<>(expectedBlocksToDelete));
-    
assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 
0);
     ContainerMerkleTreeWriter tree = buildTestTree(config);
     checksumManager.writeContainerDataTree(container, tree);
     
assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 
0);
@@ -222,7 +221,6 @@ public void testTreePreservedOnDeletedBlocksWrite() throws 
Exception {
     
assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 
0);
     ContainerMerkleTreeWriter tree = buildTestTree(config);
     checksumManager.writeContainerDataTree(container, tree);
-    
assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 
0);
     List<Long> expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L);
     checksumManager.markBlocksAsDeleted(container, new 
ArrayList<>(expectedBlocksToDelete));
     
assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 
0);
@@ -242,8 +240,6 @@ public void testReadContainerMerkleTreeMetric() throws 
Exception {
     
assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 
0);
     ContainerMerkleTreeWriter tree = buildTestTree(config);
     checksumManager.writeContainerDataTree(container, tree);
-    
assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 
0);
-    checksumManager.writeContainerDataTree(container, tree);
     
assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 
0);
     
assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 
0);
   }
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java
index 16afcc9608..0e2baeeeb3 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java
@@ -188,6 +188,7 @@ public String toString() {
         ",replicaIndex=" + replicaIndex :
         "") +
         ", isEmpty=" + isEmpty +
+        ", dataChecksum=" + dataChecksum +
         '}';
   }
 
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java
index 1c838a85f8..8c63ec5caf 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java
@@ -158,6 +158,9 @@ public void 
testReplicasAreReportedForClosedContainerAfterRestart()
     // Ensure 3 replicas are reported successfully as expected.
     GenericTestUtils.waitFor(() ->
             getContainerReplicas(newContainer).size() == 3, 200, 30000);
+    for (ContainerReplica replica : getContainerReplicas(newContainer)) {
+      assertNotEquals(0, replica.getDataChecksum());
+    }
   }
 
   /**
@@ -198,6 +201,10 @@ public void testCloseClosedContainer()
       assertTrue(containerChecksumFileExists(hddsDatanode, container));
     }
 
+    for (ContainerReplica replica : getContainerReplicas(container)) {
+      assertNotEquals(0, replica.getDataChecksum());
+    }
+
     assertThrows(IOException.class,
         () -> cluster.getStorageContainerLocationClient()
             .closeContainer(container.getContainerID()),
@@ -269,6 +276,12 @@ public void testContainerChecksumForClosedContainer() 
throws Exception {
     assertNotEquals(prevExpectedChecksumInfo1.getContainerID(), 
prevExpectedChecksumInfo2.getContainerID());
     
assertNotEquals(prevExpectedChecksumInfo1.getContainerMerkleTree().getDataChecksum(),
         prevExpectedChecksumInfo2.getContainerMerkleTree().getDataChecksum());
+    for (ContainerReplica replica : getContainerReplicas(containerInfo1)) {
+      assertNotEquals(0, replica.getDataChecksum());
+    }
+    for (ContainerReplica replica : getContainerReplicas(containerInfo2)) {
+      assertNotEquals(0, replica.getDataChecksum());
+    }
   }
 
   private boolean checkContainerCloseInDatanode(HddsDatanodeService 
hddsDatanode,
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
index f51dbfed43..4624cc562f 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
@@ -50,6 +50,7 @@
 import static 
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_KERBEROS_PRINCIPAL_KEY;
 import static 
org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -76,7 +77,9 @@
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
 import 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import 
org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig;
+import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
 import org.apache.hadoop.hdds.security.symmetric.SecretKeyClient;
 import 
org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
 import org.apache.hadoop.hdds.utils.db.BatchOperation;
@@ -84,6 +87,7 @@
 import org.apache.hadoop.ozone.ClientVersion;
 import org.apache.hadoop.ozone.HddsDatanodeService;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
 import org.apache.hadoop.ozone.client.ObjectStore;
 import org.apache.hadoop.ozone.client.OzoneBucket;
 import org.apache.hadoop.ozone.client.OzoneClient;
@@ -110,7 +114,6 @@
 import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
 import 
org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
 import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -120,7 +123,7 @@
  */
 public class TestContainerCommandReconciliation {
 
-  private static MiniOzoneCluster cluster;
+  private static MiniOzoneHAClusterImpl cluster;
   private static OzoneClient rpcClient;
   private static ObjectStore store;
   private static OzoneConfiguration conf;
@@ -146,7 +149,9 @@ public static void init() throws Exception {
     conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 128 * 1024, 
StorageUnit.BYTES);
     conf.setStorageSize(OZONE_SCM_BLOCK_SIZE,  512 * 1024, StorageUnit.BYTES);
     // Disable the container scanner so it does not create merkle tree files 
that interfere with this test.
+    // TODO: Currently container scrub sets the checksum to 0, Revert this 
after HDDS-10374 is merged.
     conf.getObject(ContainerScannerConfiguration.class).setEnabled(false);
+    conf.setBoolean("hdds.container.scrub.enabled", false);
 
     startMiniKdc();
     setSecureConfig();
@@ -343,7 +348,7 @@ public void testContainerChecksumWithBlockMissing() throws 
Exception {
         .getContainerReplicas(ContainerID.valueOf(containerID))
         .stream().map(ContainerReplica::getDatanodeDetails)
         .collect(Collectors.toList());
-    Assertions.assertEquals(3, dataNodeDetails.size());
+    assertEquals(3, dataNodeDetails.size());
     HddsDatanodeService hddsDatanodeService = 
cluster.getHddsDatanode(dataNodeDetails.get(0));
     DatanodeStateMachine datanodeStateMachine = 
hddsDatanodeService.getDatanodeStateMachine();
     Container<?> container = 
datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID);
@@ -378,7 +383,7 @@ public void testContainerChecksumWithBlockMissing() throws 
Exception {
         readChecksumFile(container.getContainerData());
     long dataChecksumAfterBlockDelete = 
containerChecksumAfterBlockDelete.getContainerMerkleTree().getDataChecksum();
     // Checksum should have changed after block delete.
-    Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete);
+    assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete);
 
     // Since the container is already closed, we have manually updated the 
container checksum file.
     // This doesn't update the checksum reported to SCM, and we need to 
trigger an ICR.
@@ -409,7 +414,7 @@ public void testContainerChecksumChunkCorruption() throws 
Exception {
         .getContainerReplicas(ContainerID.valueOf(containerID))
         .stream().map(ContainerReplica::getDatanodeDetails)
         .collect(Collectors.toList());
-    Assertions.assertEquals(3, dataNodeDetails.size());
+    assertEquals(3, dataNodeDetails.size());
     HddsDatanodeService hddsDatanodeService = 
cluster.getHddsDatanode(dataNodeDetails.get(0));
     DatanodeStateMachine datanodeStateMachine = 
hddsDatanodeService.getDatanodeStateMachine();
     Container<?> container = 
datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID);
@@ -463,11 +468,11 @@ public void testContainerChecksumChunkCorruption() throws 
Exception {
     long dataChecksumAfterAfterChunkCorruption = 
containerChecksumAfterChunkCorruption
         .getContainerMerkleTree().getDataChecksum();
     // Checksum should have changed after chunk corruption.
-    Assertions.assertNotEquals(oldDataChecksum, 
dataChecksumAfterAfterChunkCorruption);
+    assertNotEquals(oldDataChecksum, dataChecksumAfterAfterChunkCorruption);
 
     // 3. Set Unhealthy for first chunk of all blocks. This should be done by 
the scanner, Until then this is a
     // manual step.
-    // // TODO: Use On-demand container scanner to build the new container 
merkle tree (HDDS-10374)
+    // TODO: Use On-demand container scanner to build the new container merkle 
tree (HDDS-10374)
     Random random = new Random();
     ContainerProtos.ContainerChecksumInfo.Builder builder = 
containerChecksumAfterChunkCorruption.toBuilder();
     List<ContainerProtos.BlockMerkleTree> blockMerkleTreeList = 
builder.getContainerMerkleTree()
@@ -498,7 +503,97 @@ public void testContainerChecksumChunkCorruption() throws 
Exception {
     ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = 
readChecksumFile(container.getContainerData());
     
assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(),
         newContainerChecksumInfo.getContainerMerkleTree());
-    Assertions.assertEquals(oldDataChecksum, 
newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum());
+    assertEquals(oldDataChecksum, 
newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum());
+    TestHelper.validateData(KEY_NAME, data, store, volume, bucket);
+  }
+
+  @Test
+  public void testDataChecksumReportedAtSCM() throws Exception {
+    // 1. Write data to a container.
+    // Read the key back and check its hash.
+    String volume = UUID.randomUUID().toString();
+    String bucket = UUID.randomUUID().toString();
+    Pair<Long, byte[]> containerAndData = getDataAndContainer(true, 20 * 1024 
* 1024, volume, bucket);
+    long containerID = containerAndData.getLeft();
+    byte[] data = containerAndData.getRight();
+    // Get the datanodes where the container replicas are stored.
+    List<DatanodeDetails> dataNodeDetails = 
cluster.getStorageContainerManager().getContainerManager()
+        .getContainerReplicas(ContainerID.valueOf(containerID))
+        .stream().map(ContainerReplica::getDatanodeDetails)
+        .collect(Collectors.toList());
+    assertEquals(3, dataNodeDetails.size());
+    HddsDatanodeService hddsDatanodeService = 
cluster.getHddsDatanode(dataNodeDetails.get(0));
+    DatanodeStateMachine datanodeStateMachine = 
hddsDatanodeService.getDatanodeStateMachine();
+    Container<?> container = 
datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID);
+    KeyValueContainerData containerData = (KeyValueContainerData) 
container.getContainerData();
+    ContainerProtos.ContainerChecksumInfo oldContainerChecksumInfo = 
readChecksumFile(container.getContainerData());
+    KeyValueHandler kvHandler = (KeyValueHandler) 
datanodeStateMachine.getContainer().getDispatcher()
+        .getHandler(ContainerProtos.ContainerType.KeyValueContainer);
+
+    long oldDataChecksum = 
oldContainerChecksumInfo.getContainerMerkleTree().getDataChecksum();
+    // Check non-zero checksum after container close
+    StorageContainerLocationProtocolClientSideTranslatorPB scmClient = 
cluster.getStorageContainerLocationClient();
+    List<HddsProtos.SCMContainerReplicaProto> containerReplicas = 
scmClient.getContainerReplicas(containerID,
+        ClientVersion.CURRENT_VERSION);
+    assertEquals(3, containerReplicas.size());
+    for (HddsProtos.SCMContainerReplicaProto containerReplica: 
containerReplicas) {
+      assertNotEquals(0, containerReplica.getDataChecksum());
+    }
+
+    // 2. Delete some blocks to simulate missing blocks.
+    BlockManager blockManager = kvHandler.getBlockManager();
+    List<BlockData> blockDataList = blockManager.listBlock(container, -1, 100);
+    String chunksPath = container.getContainerData().getChunksPath();
+    try (DBHandle db = BlockUtils.getDB(containerData, conf);
+         BatchOperation op = 
db.getStore().getBatchHandler().initBatchOperation()) {
+      for (int i = 0; i < blockDataList.size(); i += 2) {
+        BlockData blockData = blockDataList.get(i);
+        // Delete the block metadata from the container db
+        db.getStore().getBlockDataTable().deleteWithBatch(op, 
containerData.getBlockKey(blockData.getLocalID()));
+        // Delete the block file.
+        Files.deleteIfExists(Paths.get(chunksPath + "/" + 
blockData.getBlockID().getLocalID() + ".block"));
+      }
+      db.getStore().getBatchHandler().commitBatchOperation(op);
+      db.getStore().flushDB();
+    }
+
+    // TODO: Use On-demand container scanner to build the new container merkle 
tree. (HDDS-10374)
+    
Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath());
+    kvHandler.createContainerMerkleTree(container);
+    ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete =
+        readChecksumFile(container.getContainerData());
+    long dataChecksumAfterBlockDelete = 
containerChecksumAfterBlockDelete.getContainerMerkleTree().getDataChecksum();
+    // Checksum should have changed after block delete.
+    assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete);
+
+    // Since the container is already closed, we have manually updated the 
container checksum file.
+    // This doesn't update the checksum reported to SCM, and we need to 
trigger an ICR.
+    // Marking a container unhealthy will send an ICR.
+    kvHandler.markContainerUnhealthy(container, MetadataScanResult.deleted());
+    waitForDataChecksumsAtSCM(containerID, 2);
+    scmClient.reconcileContainer(containerID);
+
+    waitForDataChecksumsAtSCM(containerID, 1);
+    // Check non-zero checksum after container reconciliation
+    containerReplicas = scmClient.getContainerReplicas(containerID, 
ClientVersion.CURRENT_VERSION);
+    assertEquals(3, containerReplicas.size());
+    for (HddsProtos.SCMContainerReplicaProto containerReplica: 
containerReplicas) {
+      assertNotEquals(0, containerReplica.getDataChecksum());
+    }
+
+    // Check non-zero checksum after datanode restart
+    // Restarting all the nodes take more time in mini ozone cluster, so 
restarting only one node
+    cluster.restartHddsDatanode(0, true);
+    for (StorageContainerManager scm : cluster.getStorageContainerManagers()) {
+      cluster.restartStorageContainerManager(scm, false);
+    }
+    cluster.waitForClusterToBeReady();
+    waitForDataChecksumsAtSCM(containerID, 1);
+    containerReplicas = scmClient.getContainerReplicas(containerID, 
ClientVersion.CURRENT_VERSION);
+    assertEquals(3, containerReplicas.size());
+    for (HddsProtos.SCMContainerReplicaProto containerReplica: 
containerReplicas) {
+      assertNotEquals(0, containerReplica.getDataChecksum());
+    }
     TestHelper.validateData(KEY_NAME, data, store, volume, bucket);
   }
 
@@ -622,7 +717,6 @@ private static void startCluster() throws Exception {
         .setSCMServiceId("SecureSCM")
         .setNumOfStorageContainerManagers(3)
         .setNumOfOzoneManagers(1)
-        .setNumDatanodes(3)
         .build();
     cluster.waitForClusterToBeReady();
     rpcClient = OzoneClientFactory.getRpcClient(conf);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to