This is an automated email from the ASF dual-hosted git repository.

spacemonkd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 652bb192e58 HDDS-15110. Fix negative container size handling in 
replication (#10126)
652bb192e58 is described below

commit 652bb192e58b797c1244c1aede57834759f25c98
Author: Rishabh Patel <[email protected]>
AuthorDate: Fri May 8 12:06:07 2026 -0700

    HDDS-15110. Fix negative container size handling in replication (#10126)
---
 .../ozone/container/common/impl/ContainerData.java | 11 +++--
 .../replication/TestContainerReplication.java      | 53 ++++++++++++++++++++++
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
index 31b890fdbd3..7dd8157ead6 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
@@ -707,10 +707,15 @@ public synchronized void incrementBlockBytes(long delta) {
 
     public synchronized void decDeletion(long deletedBytes, long 
processedBytes, long deletedBlockCount,
         long processedBlockCount) {
+
+      // After subtraction if blockBytes is 0, let it be. Only if it becomes 
negative, set the size to 1 byte.
       blockBytes -= deletedBytes;
-      blockCount -= deletedBlockCount;
-      blockPendingDeletion -= processedBlockCount;
-      blockPendingDeletionBytes -= processedBytes;
+      if (blockBytes < 0) {
+        blockBytes = 1L;
+      }
+      blockCount = Math.max(0L, blockCount - deletedBlockCount);
+      blockPendingDeletion = Math.max(0L, blockPendingDeletion - 
processedBlockCount);
+      blockPendingDeletionBytes = Math.max(0L, blockPendingDeletionBytes - 
processedBytes);
     }
 
     public synchronized void updateBlocks(long bytes, long count) {
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java
index 968e331103e..1ea92309aa2 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java
@@ -25,6 +25,7 @@
 import static 
org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.createContainer;
 import static org.apache.ozone.test.GenericTestUtils.waitFor;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 import com.google.common.collect.ImmutableList;
 import java.io.IOException;
@@ -53,8 +54,11 @@
 import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.container.ContainerTestHelper;
 import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.hadoop.ozone.container.common.interfaces.DBHandle;
 import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
 import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
+import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
 import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand;
 import org.apache.ozone.test.GenericTestUtils;
 import org.apache.ozone.test.GenericTestUtils.LogCapturer;
@@ -65,6 +69,7 @@
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.EnumSource;
 import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
 import org.slf4j.event.Level;
 
 /**
@@ -155,6 +160,54 @@ void targetPullsFromWrongService() throws Exception {
         ReplicationSupervisor::getReplicationFailureCount);
   }
 
+  /**
+   * Replication must succeed even when the source container's persisted
+   * {@code CONTAINER_BYTES_USED} RocksDB counter has drifted negative.
+   */
+  @ParameterizedTest
+  @ValueSource(longs = {0L, 1L, -1_234_567_890L})
+  void pushSucceedsWhenSourceBytesUsedIsNegative(long containerSize) throws 
Exception {
+    DatanodeDetails source = cluster.getHddsDatanodes().get(0)
+        .getDatanodeDetails();
+    DatanodeDetails target = selectOtherNode(source);
+
+    long containerID = createOverAllocatedContainer(source, 2L * 1024L * 
1024L);
+
+    poisonBytesUsed(source, containerID, containerSize);
+
+    ReplicateContainerCommand cmd =
+        ReplicateContainerCommand.toTarget(containerID, target);
+
+    queueAndWaitForCompletion(cmd, source,
+        ReplicationSupervisor::getReplicationSuccessCount);
+
+    // Target must end up hosting the container.
+    Container<?> imported = cluster.getHddsDatanode(target)
+        .getDatanodeStateMachine()
+        .getContainer()
+        .getContainerSet()
+        .getContainer(containerID);
+    assertNotNull(imported, "target should import the container despite a 
negative bytesUsed on source");
+  }
+
+  private void poisonBytesUsed(DatanodeDetails dn, long containerID, long 
poisonValue) throws IOException {
+    HddsDatanodeService dnService = cluster.getHddsDatanode(dn);
+    Container<?> container = dnService.getDatanodeStateMachine().getContainer()
+        .getContainerSet().getContainer(containerID);
+    KeyValueContainerData data =
+        (KeyValueContainerData) container.getContainerData();
+
+    try (DBHandle db = BlockUtils.getDB(data, dnService.getConf())) {
+      db.getStore().getMetadataTable()
+          .put(data.getBytesUsedKey(), poisonValue);
+    }
+    // Keep the in-memory Statistics counter consistent with the on-disk
+    // poisoned value. The import failure is driven by the on-disk value (what
+    // the target reads), but this prevents any subsequent close/flush path on
+    // the source from silently correcting the poison before packing.
+    data.getStatistics().setBlockBytesForTesting(poisonValue);
+  }
+
   /**
    * Replication fails because source tries to push a non-existent container.
    */


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to