This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new b6cc4af598 HDDS-12114. Prevent delete commands running after a long
lock wait and send ICR earlier (#7726)
b6cc4af598 is described below
commit b6cc4af5983fec8afdfe2c5a0c6febabbcd20196
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Fri Jan 24 11:10:30 2025 +0000
HDDS-12114. Prevent delete commands running after a long lock wait and send
ICR earlier (#7726)
---
.../common/statemachine/DatanodeConfiguration.java | 14 +++++
.../ozone/container/keyvalue/KeyValueHandler.java | 27 ++++++++-
.../container/keyvalue/TestKeyValueHandler.java | 66 ++++++++++++++++++++++
3 files changed, 106 insertions(+), 1 deletion(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
index 22dff7505c..11ef3e9c18 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
@@ -573,6 +573,20 @@ public void setWaitOnAllFollowers(boolean val) {
private boolean bCheckEmptyContainerDir =
OZONE_DATANODE_CHECK_EMPTY_CONTAINER_DIR_ON_DELETE_DEFAULT;
+ @Config(key = "delete.container.timeout",
+ type = ConfigType.TIME,
+ defaultValue = "60s",
+ tags = { DATANODE },
+ description = "If a delete container request spends more than this time
waiting on the container lock or " +
+ "performing pre checks, the command will be skipped and SCM will
resend it automatically. This avoids " +
+ "commands running for a very long time without SCM being informed of
the progress."
+ )
+ private long deleteContainerTimeoutMs = Duration.ofSeconds(60).toMillis();
+
+ public long getDeleteContainerTimeoutMs() {
+ return deleteContainerTimeoutMs;
+ }
+
@PostConstruct
public void validate() {
if (containerDeleteThreads < 1) {
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 0ef8d5e68a..267a2ecb66 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -27,6 +27,7 @@
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.time.Clock;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
@@ -146,11 +147,13 @@ public class KeyValueHandler extends Handler {
private final ChunkManager chunkManager;
private final VolumeChoosingPolicy volumeChoosingPolicy;
private final long maxContainerSize;
+ private final long maxDeleteLockWaitMs;
private final Function<ByteBuffer, ByteString> byteBufferToByteString;
private final boolean validateChunkChecksumData;
// A striped lock that is held during container creation.
private final Striped<Lock> containerCreationLocks;
private static FaultInjector injector;
+ private final Clock clock;
public KeyValueHandler(ConfigurationSource config,
String datanodeId,
@@ -158,7 +161,18 @@ public KeyValueHandler(ConfigurationSource config,
VolumeSet volSet,
ContainerMetrics metrics,
IncrementalReportSender<Container> icrSender) {
+ this(config, datanodeId, contSet, volSet, metrics, icrSender,
Clock.systemUTC());
+ }
+
+ public KeyValueHandler(ConfigurationSource config,
+ String datanodeId,
+ ContainerSet contSet,
+ VolumeSet volSet,
+ ContainerMetrics metrics,
+ IncrementalReportSender<Container> icrSender,
+ Clock clock) {
super(config, datanodeId, contSet, volSet, metrics, icrSender);
+ this.clock = clock;
blockManager = new BlockManagerImpl(config);
validateChunkChecksumData = conf.getObject(
DatanodeConfiguration.class).isChunkDataValidationCheck();
@@ -173,6 +187,9 @@ public KeyValueHandler(ConfigurationSource config,
maxContainerSize = (long) config.getStorageSize(
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
+
+ DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class);
+ maxDeleteLockWaitMs = dnConf.getDeleteContainerTimeoutMs();
// this striped handler lock is used for synchronizing createContainer
// Requests.
final int threadCountPerDisk = conf.getInt(
@@ -1436,6 +1453,7 @@ private boolean logBlocksFoundOnDisk(Container container)
throws IOException {
private void deleteInternal(Container container, boolean force)
throws StorageContainerException {
+ long startTime = clock.millis();
container.writeLock();
try {
if (container.getContainerData().getVolume().isFailed()) {
@@ -1490,6 +1508,13 @@ private void deleteInternal(Container container, boolean
force)
// 4. container moved to tmp folder
// 5. container content deleted from tmp folder
try {
+ long waitTime = clock.millis() - startTime;
+ if (waitTime > maxDeleteLockWaitMs) {
+ LOG.warn("An attempt to delete container {} took {} ms acquiring
locks and pre-checks. " +
+ "The delete has been skipped and should be retried
automatically by SCM.",
+ container.getContainerData().getContainerID(), waitTime);
+ return;
+ }
container.markContainerForDelete();
long containerId = container.getContainerData().getContainerID();
containerSet.removeContainer(containerId);
@@ -1521,8 +1546,8 @@ private void deleteInternal(Container container, boolean
force)
container.writeUnlock();
}
// Avoid holding write locks for disk operations
- container.delete();
sendICR(container);
+ container.delete();
}
private void triggerVolumeScanAndThrowException(Container container,
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
index d02910358d..83a6cddf4d 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.time.Clock;
import java.util.List;
import java.util.Collections;
import java.util.HashMap;
@@ -46,6 +47,7 @@
import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.interfaces.Handler;
+import
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
@@ -456,6 +458,70 @@ public void testDeleteContainer() throws IOException {
}
}
+
+ @Test
+ public void testDeleteContainerTimeout() throws IOException {
+ final String testDir = tempDir.toString();
+ final long containerID = 1L;
+ final String clusterId = UUID.randomUUID().toString();
+ final String datanodeId = UUID.randomUUID().toString();
+ final ConfigurationSource conf = new OzoneConfiguration();
+ final ContainerSet containerSet = new ContainerSet(1000);
+ final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class);
+ final Clock clock = mock(Clock.class);
+ long startTime = System.currentTimeMillis();
+
+ DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class);
+ when(clock.millis())
+ .thenReturn(startTime)
+ .thenReturn(startTime + dnConf.getDeleteContainerTimeoutMs() + 1);
+
+ HddsVolume hddsVolume = new HddsVolume.Builder(testDir).conf(conf)
+ .clusterID(clusterId).datanodeUuid(datanodeId)
+ .volumeSet(volumeSet)
+ .build();
+ hddsVolume.format(clusterId);
+ hddsVolume.createWorkingDir(clusterId, null);
+ hddsVolume.createTmpDirs(clusterId);
+
+ when(volumeSet.getVolumesList())
+ .thenReturn(Collections.singletonList(hddsVolume));
+
+ List<HddsVolume> hddsVolumeList = StorageVolumeUtil
+ .getHddsVolumesList(volumeSet.getVolumesList());
+
+ assertEquals(1, hddsVolumeList.size());
+
+ final ContainerMetrics metrics = ContainerMetrics.create(conf);
+
+ final AtomicInteger icrReceived = new AtomicInteger(0);
+
+ final KeyValueHandler kvHandler = new KeyValueHandler(conf,
+ datanodeId, containerSet, volumeSet, metrics,
+ c -> icrReceived.incrementAndGet(), clock);
+ kvHandler.setClusterID(clusterId);
+
+ final ContainerCommandRequestProto createContainer =
+ createContainerRequest(datanodeId, containerID);
+ kvHandler.handleCreateContainer(createContainer, null);
+ assertEquals(1, icrReceived.get());
+ assertNotNull(containerSet.getContainer(containerID));
+
+ // The delete should not have gone through due to the mocked clock. The
implementation calls the clock twice:
+ // Once at the start of the method prior to taking the lock, when the
clock will return the start time of the test.
+ // On the second call to the clock, where the implementation checks if the
timeout has expired, the clock will
+ // return start_time + timeout + 1. This will cause the delete to timeout
and the container will not be deleted.
+ kvHandler.deleteContainer(containerSet.getContainer(containerID), true);
+ assertEquals(1, icrReceived.get());
+ assertNotNull(containerSet.getContainer(containerID));
+
+ // Delete the container normally, and it should go through. At this stage
all calls to the clock mock will return
+ // the same value, indicating no delay to the delete operation will
succeed.
+ kvHandler.deleteContainer(containerSet.getContainer(containerID), true);
+ assertEquals(2, icrReceived.get());
+ assertNull(containerSet.getContainer(containerID));
+ }
+
private static ContainerCommandRequestProto createContainerRequest(
String datanodeId, long containerID) {
return ContainerCommandRequestProto.newBuilder()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]