This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new b6cc4af598 HDDS-12114. Prevent delete commands running after a long 
lock wait and send ICR earlier (#7726)
b6cc4af598 is described below

commit b6cc4af5983fec8afdfe2c5a0c6febabbcd20196
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Fri Jan 24 11:10:30 2025 +0000

    HDDS-12114. Prevent delete commands running after a long lock wait and send 
ICR earlier (#7726)
---
 .../common/statemachine/DatanodeConfiguration.java | 14 +++++
 .../ozone/container/keyvalue/KeyValueHandler.java  | 27 ++++++++-
 .../container/keyvalue/TestKeyValueHandler.java    | 66 ++++++++++++++++++++++
 3 files changed, 106 insertions(+), 1 deletion(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
index 22dff7505c..11ef3e9c18 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
@@ -573,6 +573,20 @@ public void setWaitOnAllFollowers(boolean val) {
   private boolean bCheckEmptyContainerDir =
       OZONE_DATANODE_CHECK_EMPTY_CONTAINER_DIR_ON_DELETE_DEFAULT;
 
+  @Config(key = "delete.container.timeout",
+      type = ConfigType.TIME,
+      defaultValue = "60s",
+      tags = { DATANODE },
+      description = "If a delete container request spends more than this time 
waiting on the container lock or " +
+          "performing pre checks, the command will be skipped and SCM will 
resend it automatically. This avoids " +
+          "commands running for a very long time without SCM being informed of 
the progress."
+  )
+  private long deleteContainerTimeoutMs = Duration.ofSeconds(60).toMillis();
+
+  public long getDeleteContainerTimeoutMs() {
+    return deleteContainerTimeoutMs;
+  }
+
   @PostConstruct
   public void validate() {
     if (containerDeleteThreads < 1) {
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 0ef8d5e68a..267a2ecb66 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -27,6 +27,7 @@
 import java.nio.file.DirectoryStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.time.Clock;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedList;
@@ -146,11 +147,13 @@ public class KeyValueHandler extends Handler {
   private final ChunkManager chunkManager;
   private final VolumeChoosingPolicy volumeChoosingPolicy;
   private final long maxContainerSize;
+  private final long maxDeleteLockWaitMs;
   private final Function<ByteBuffer, ByteString> byteBufferToByteString;
   private final boolean validateChunkChecksumData;
   // A striped lock that is held during container creation.
   private final Striped<Lock> containerCreationLocks;
   private static FaultInjector injector;
+  private final Clock clock;
 
   public KeyValueHandler(ConfigurationSource config,
                          String datanodeId,
@@ -158,7 +161,18 @@ public KeyValueHandler(ConfigurationSource config,
                          VolumeSet volSet,
                          ContainerMetrics metrics,
                          IncrementalReportSender<Container> icrSender) {
+    this(config, datanodeId, contSet, volSet, metrics, icrSender, 
Clock.systemUTC());
+  }
+
+  public KeyValueHandler(ConfigurationSource config,
+                         String datanodeId,
+                         ContainerSet contSet,
+                         VolumeSet volSet,
+                         ContainerMetrics metrics,
+                         IncrementalReportSender<Container> icrSender,
+                         Clock clock) {
     super(config, datanodeId, contSet, volSet, metrics, icrSender);
+    this.clock = clock;
     blockManager = new BlockManagerImpl(config);
     validateChunkChecksumData = conf.getObject(
         DatanodeConfiguration.class).isChunkDataValidationCheck();
@@ -173,6 +187,9 @@ public KeyValueHandler(ConfigurationSource config,
     maxContainerSize = (long) config.getStorageSize(
         ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
         ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
+
+    DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class);
+    maxDeleteLockWaitMs = dnConf.getDeleteContainerTimeoutMs();
     // this striped handler lock is used for synchronizing createContainer
     // Requests.
     final int threadCountPerDisk = conf.getInt(
@@ -1436,6 +1453,7 @@ private boolean logBlocksFoundOnDisk(Container container) 
throws IOException {
 
   private void deleteInternal(Container container, boolean force)
       throws StorageContainerException {
+    long startTime = clock.millis();
     container.writeLock();
     try {
       if (container.getContainerData().getVolume().isFailed()) {
@@ -1490,6 +1508,13 @@ private void deleteInternal(Container container, boolean 
force)
         // 4. container moved to tmp folder
         // 5. container content deleted from tmp folder
         try {
+          long waitTime = clock.millis() - startTime;
+          if (waitTime > maxDeleteLockWaitMs) {
+            LOG.warn("An attempt to delete container {} took {} ms acquiring 
locks and pre-checks. " +
+                    "The delete has been skipped and should be retried 
automatically by SCM.",
+                container.getContainerData().getContainerID(), waitTime);
+            return;
+          }
           container.markContainerForDelete();
           long containerId = container.getContainerData().getContainerID();
           containerSet.removeContainer(containerId);
@@ -1521,8 +1546,8 @@ private void deleteInternal(Container container, boolean 
force)
       container.writeUnlock();
     }
     // Avoid holding write locks for disk operations
-    container.delete();
     sendICR(container);
+    container.delete();
   }
 
   private void triggerVolumeScanAndThrowException(Container container,
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
index d02910358d..83a6cddf4d 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
@@ -22,6 +22,7 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.time.Clock;
 import java.util.List;
 import java.util.Collections;
 import java.util.HashMap;
@@ -46,6 +47,7 @@
 import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher;
 import org.apache.hadoop.ozone.container.common.interfaces.Container;
 import org.apache.hadoop.ozone.container.common.interfaces.Handler;
+import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
 import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
 import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
 import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
@@ -456,6 +458,70 @@ public void testDeleteContainer() throws IOException {
     }
   }
 
+
+  @Test
+  public void testDeleteContainerTimeout() throws IOException {
+    final String testDir = tempDir.toString();
+    final long containerID = 1L;
+    final String clusterId = UUID.randomUUID().toString();
+    final String datanodeId = UUID.randomUUID().toString();
+    final ConfigurationSource conf = new OzoneConfiguration();
+    final ContainerSet containerSet = new ContainerSet(1000);
+    final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class);
+    final Clock clock = mock(Clock.class);
+    long startTime = System.currentTimeMillis();
+
+    DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class);
+    when(clock.millis())
+        .thenReturn(startTime)
+        .thenReturn(startTime + dnConf.getDeleteContainerTimeoutMs() + 1);
+
+    HddsVolume hddsVolume = new HddsVolume.Builder(testDir).conf(conf)
+        .clusterID(clusterId).datanodeUuid(datanodeId)
+        .volumeSet(volumeSet)
+        .build();
+    hddsVolume.format(clusterId);
+    hddsVolume.createWorkingDir(clusterId, null);
+    hddsVolume.createTmpDirs(clusterId);
+
+    when(volumeSet.getVolumesList())
+        .thenReturn(Collections.singletonList(hddsVolume));
+
+    List<HddsVolume> hddsVolumeList = StorageVolumeUtil
+        .getHddsVolumesList(volumeSet.getVolumesList());
+
+    assertEquals(1, hddsVolumeList.size());
+
+    final ContainerMetrics metrics = ContainerMetrics.create(conf);
+
+    final AtomicInteger icrReceived = new AtomicInteger(0);
+
+    final KeyValueHandler kvHandler = new KeyValueHandler(conf,
+        datanodeId, containerSet, volumeSet, metrics,
+        c -> icrReceived.incrementAndGet(), clock);
+    kvHandler.setClusterID(clusterId);
+
+    final ContainerCommandRequestProto createContainer =
+        createContainerRequest(datanodeId, containerID);
+    kvHandler.handleCreateContainer(createContainer, null);
+    assertEquals(1, icrReceived.get());
+    assertNotNull(containerSet.getContainer(containerID));
+
+    // The delete should not have gone through due to the mocked clock. The 
implementation calls the clock twice:
+    // Once at the start of the method prior to taking the lock, when the 
clock will return the start time of the test.
+    // On the second call to the clock, where the implementation checks if the 
timeout has expired, the clock will
+    // return start_time + timeout + 1. This will cause the delete to timeout 
and the container will not be deleted.
+    kvHandler.deleteContainer(containerSet.getContainer(containerID), true);
+    assertEquals(1, icrReceived.get());
+    assertNotNull(containerSet.getContainer(containerID));
+
+    // Delete the container normally, and it should go through. At this stage 
all calls to the clock mock will return
+    // the same value, indicating no delay to the delete operation will 
succeed.
+    kvHandler.deleteContainer(containerSet.getContainer(containerID), true);
+    assertEquals(2, icrReceived.get());
+    assertNull(containerSet.getContainer(containerID));
+  }
+
   private static ContainerCommandRequestProto createContainerRequest(
       String datanodeId, long containerID) {
     return ContainerCommandRequestProto.newBuilder()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to