This is an automated email from the ASF dual-hosted git repository.
spacemonkd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 652bb192e58 HDDS-15110. Fix negative container size handling in
replication (#10126)
652bb192e58 is described below
commit 652bb192e58b797c1244c1aede57834759f25c98
Author: Rishabh Patel <[email protected]>
AuthorDate: Fri May 8 12:06:07 2026 -0700
HDDS-15110. Fix negative container size handling in replication (#10126)
---
.../ozone/container/common/impl/ContainerData.java | 11 +++--
.../replication/TestContainerReplication.java | 53 ++++++++++++++++++++++
2 files changed, 61 insertions(+), 3 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
index 31b890fdbd3..7dd8157ead6 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
@@ -707,10 +707,15 @@ public synchronized void incrementBlockBytes(long delta) {
public synchronized void decDeletion(long deletedBytes, long
processedBytes, long deletedBlockCount,
long processedBlockCount) {
+
+ // After subtraction if blockBytes is 0, let it be. Only if it becomes
negative, set the size to 1 byte.
blockBytes -= deletedBytes;
- blockCount -= deletedBlockCount;
- blockPendingDeletion -= processedBlockCount;
- blockPendingDeletionBytes -= processedBytes;
+ if (blockBytes < 0) {
+ blockBytes = 1L;
+ }
+ blockCount = Math.max(0L, blockCount - deletedBlockCount);
+ blockPendingDeletion = Math.max(0L, blockPendingDeletion -
processedBlockCount);
+ blockPendingDeletionBytes = Math.max(0L, blockPendingDeletionBytes -
processedBytes);
}
public synchronized void updateBlocks(long bytes, long count) {
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java
index 968e331103e..1ea92309aa2 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/replication/TestContainerReplication.java
@@ -25,6 +25,7 @@
import static
org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.createContainer;
import static org.apache.ozone.test.GenericTestUtils.waitFor;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
import com.google.common.collect.ImmutableList;
import java.io.IOException;
@@ -53,8 +54,11 @@
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.container.ContainerTestHelper;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.hadoop.ozone.container.common.interfaces.DBHandle;
import
org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
+import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand;
import org.apache.ozone.test.GenericTestUtils;
import org.apache.ozone.test.GenericTestUtils.LogCapturer;
@@ -65,6 +69,7 @@
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
import org.slf4j.event.Level;
/**
@@ -155,6 +160,54 @@ void targetPullsFromWrongService() throws Exception {
ReplicationSupervisor::getReplicationFailureCount);
}
+ /**
+ * Replication must succeed even when the source container's persisted
+ * {@code CONTAINER_BYTES_USED} RocksDB counter has drifted negative.
+ */
+ @ParameterizedTest
+ @ValueSource(longs = {0L, 1L, -1_234_567_890L})
+ void pushSucceedsWhenSourceBytesUsedIsNegative(long containerSize) throws
Exception {
+ DatanodeDetails source = cluster.getHddsDatanodes().get(0)
+ .getDatanodeDetails();
+ DatanodeDetails target = selectOtherNode(source);
+
+ long containerID = createOverAllocatedContainer(source, 2L * 1024L *
1024L);
+
+ poisonBytesUsed(source, containerID, containerSize);
+
+ ReplicateContainerCommand cmd =
+ ReplicateContainerCommand.toTarget(containerID, target);
+
+ queueAndWaitForCompletion(cmd, source,
+ ReplicationSupervisor::getReplicationSuccessCount);
+
+ // Target must end up hosting the container.
+ Container<?> imported = cluster.getHddsDatanode(target)
+ .getDatanodeStateMachine()
+ .getContainer()
+ .getContainerSet()
+ .getContainer(containerID);
+ assertNotNull(imported, "target should import the container despite a
negative bytesUsed on source");
+ }
+
+ private void poisonBytesUsed(DatanodeDetails dn, long containerID, long
poisonValue) throws IOException {
+ HddsDatanodeService dnService = cluster.getHddsDatanode(dn);
+ Container<?> container = dnService.getDatanodeStateMachine().getContainer()
+ .getContainerSet().getContainer(containerID);
+ KeyValueContainerData data =
+ (KeyValueContainerData) container.getContainerData();
+
+ try (DBHandle db = BlockUtils.getDB(data, dnService.getConf())) {
+ db.getStore().getMetadataTable()
+ .put(data.getBytesUsedKey(), poisonValue);
+ }
+ // Keep the in-memory Statistics counter consistent with the on-disk
+ // poisoned value. The import failure is driven by the on-disk value (what
+ // the target reads), but this prevents any subsequent close/flush path on
+ // the source from silently correcting the poison before packing.
+ data.getStatistics().setBlockBytesForTesting(poisonValue);
+ }
+
/**
* Replication fails because source tries to push a non-existent container.
*/
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]