This is an automated email from the ASF dual-hosted git repository.
sshenoy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new c86aa7ef91 HDDS-8977. Ratis crash if a lot of directories deleted at
once (#5207)
c86aa7ef91 is described below
commit c86aa7ef91faf02e544e4762245756771cbfe013
Author: Sumit Agrawal <[email protected]>
AuthorDate: Tue Aug 29 12:51:26 2023 +0530
HDDS-8977. Ratis crash if a lot of directories deleted at once (#5207)
---
.../common/src/main/resources/ozone-default.xml | 2 +-
.../org/apache/hadoop/ozone/om/OMConfigKeys.java | 5 +-
.../om/service/AbstractKeyDeletingService.java | 15 +-
.../ozone/om/service/DirectoryDeletingService.java | 30 +++-
.../ozone/om/service/SnapshotDeletingService.java | 28 +++-
.../om/service/TestDirectoryDeletingService.java | 160 +++++++++++++++++++++
6 files changed, 235 insertions(+), 5 deletions(-)
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml
b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 7736def18b..919540870c 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -3373,7 +3373,7 @@
</property>
<property>
<name>ozone.path.deleting.limit.per.task</name>
- <value>10000</value>
+ <value>6000</value>
<tag>OZONE, PERFORMANCE, OM</tag>
<description>A maximum number of paths(dirs/files) to be deleted by
directory deleting service per time interval.
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
index 4a2369963d..3a193a39e7 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
@@ -368,7 +368,10 @@ public final class OMConfigKeys {
public static final String OZONE_PATH_DELETING_LIMIT_PER_TASK =
"ozone.path.deleting.limit.per.task";
- public static final int OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT = 10000;
+ // default is 6000 taking account of 32MB buffer size, and assuming
+ // 4KB size (considering acls, key/file name, and other meata) * 6000
+ // resulting 24MB
+ public static final int OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT = 6000;
public static final String SNAPSHOT_SST_DELETING_LIMIT_PER_TASK =
"ozone.snapshot.filtering.limit.per.task";
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java
index d87cb1cce0..7482919ef1 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java
@@ -393,12 +393,14 @@ public abstract class AbstractKeyDeletingService extends
BackgroundService
long dirNum, long subDirNum, long subFileNum,
List<Pair<String, OmKeyInfo>> allSubDirList,
List<PurgePathRequest> purgePathRequestList,
- String snapTableKey, long startTime) {
+ String snapTableKey, long startTime,
+ int remainingBufLimit) {
// Optimization to handle delete sub-dir and keys to remove quickly
// This case will be useful to handle when depth of directory is high
int subdirDelNum = 0;
int subDirRecursiveCnt = 0;
+ int consumedSize = 0;
while (remainNum > 0 && subDirRecursiveCnt < allSubDirList.size()) {
try {
Pair<String, OmKeyInfo> stringOmKeyInfoPair
@@ -407,6 +409,12 @@ public abstract class AbstractKeyDeletingService extends
BackgroundService
remainNum, stringOmKeyInfoPair.getValue(),
stringOmKeyInfoPair.getKey(), allSubDirList,
getOzoneManager().getKeyManager());
+ if (isBufferLimitCrossed(remainingBufLimit, consumedSize,
+ request.getSerializedSize())) {
+ // ignore further add request
+ break;
+ }
+ consumedSize += request.getSerializedSize();
purgePathRequestList.add(request);
remainNum = remainNum - request.getDeletedSubFilesCount();
remainNum = remainNum - request.getMarkDeletedSubDirsCount();
@@ -445,6 +453,11 @@ public abstract class AbstractKeyDeletingService extends
BackgroundService
return remainNum;
}
+ protected boolean isBufferLimitCrossed(
+ int maxLimit, int cLimit, int increment) {
+ return cLimit + increment >= maxLimit;
+ }
+
protected SnapshotInfo getPreviousActiveSnapshot(SnapshotInfo snapInfo,
SnapshotChainManager chainManager, OmSnapshotManager omSnapshotManager)
throws IOException {
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java
index 59b116bca5..ed6c3914da 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.om.service;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.utils.BackgroundTask;
import org.apache.hadoop.hdds.utils.BackgroundTaskQueue;
import org.apache.hadoop.hdds.utils.BackgroundTaskResult;
@@ -26,6 +27,7 @@ import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.hdds.utils.db.Table.KeyValue;
import org.apache.hadoop.hdds.utils.db.TableIterator;
import org.apache.hadoop.ozone.om.IOmMetadataReader;
+import org.apache.hadoop.ozone.om.OMConfigKeys;
import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
import org.apache.hadoop.ozone.om.OmSnapshot;
import org.apache.hadoop.ozone.om.OmSnapshotManager;
@@ -76,9 +78,11 @@ public class DirectoryDeletingService extends
AbstractKeyDeletingService {
// or write to same tables and can send deletion requests for same key
// multiple times.
private static final int DIR_DELETING_CORE_POOL_SIZE = 1;
+ private static final int MIN_ERR_LIMIT_PER_TASK = 1000;
// Number of items(dirs/files) to be batched in an iteration.
private final long pathLimitPerTask;
+ private final int ratisByteLimit;
private final AtomicBoolean suspended;
public DirectoryDeletingService(long interval, TimeUnit unit,
@@ -89,6 +93,12 @@ public class DirectoryDeletingService extends
AbstractKeyDeletingService {
this.pathLimitPerTask = configuration
.getInt(OZONE_PATH_DELETING_LIMIT_PER_TASK,
OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT);
+ int limit = (int) configuration.getStorageSize(
+ OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT,
+ OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT,
+ StorageUnit.BYTES);
+ // always go to 90% of max limit for request as other header will be added
+ this.ratisByteLimit = (int) (limit * 0.9);
this.suspended = new AtomicBoolean(false);
}
@@ -141,6 +151,7 @@ public class DirectoryDeletingService extends
AbstractKeyDeletingService {
long subDirNum = 0L;
long subFileNum = 0L;
long remainNum = pathLimitPerTask;
+ int consumedSize = 0;
List<PurgePathRequest> purgePathRequestList = new ArrayList<>();
List<Pair<String, OmKeyInfo>> allSubDirList
= new ArrayList<>((int) remainNum);
@@ -170,6 +181,22 @@ public class DirectoryDeletingService extends
AbstractKeyDeletingService {
remainNum, pendingDeletedDirInfo.getValue(),
pendingDeletedDirInfo.getKey(), allSubDirList,
getOzoneManager().getKeyManager());
+ if (isBufferLimitCrossed(ratisByteLimit, consumedSize,
+ request.getSerializedSize())) {
+ if (purgePathRequestList.size() != 0) {
+ // if message buffer reaches max limit, avoid sending further
+ remainNum = 0;
+ break;
+ }
+ // if directory itself is having a lot of keys / files,
+ // reduce capacity to minimum level
+ remainNum = MIN_ERR_LIMIT_PER_TASK;
+ request = prepareDeleteDirRequest(
+ remainNum, pendingDeletedDirInfo.getValue(),
+ pendingDeletedDirInfo.getKey(), allSubDirList,
+ getOzoneManager().getKeyManager());
+ }
+ consumedSize += request.getSerializedSize();
purgePathRequestList.add(request);
remainNum = remainNum - request.getDeletedSubFilesCount();
remainNum = remainNum - request.getMarkDeletedSubDirsCount();
@@ -184,7 +211,8 @@ public class DirectoryDeletingService extends
AbstractKeyDeletingService {
optimizeDirDeletesAndSubmitRequest(
remainNum, dirNum, subDirNum, subFileNum,
- allSubDirList, purgePathRequestList, null, startTime);
+ allSubDirList, purgePathRequestList, null, startTime,
+ ratisByteLimit - consumedSize);
} catch (IOException e) {
LOG.error("Error while running delete directories and files " +
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java
index db8d54ad5e..d4b545068c 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java
@@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.ServiceException;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol;
import org.apache.hadoop.hdds.utils.BackgroundTask;
@@ -34,6 +35,7 @@ import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.common.BlockGroup;
import org.apache.hadoop.ozone.lock.BootstrapStateHandler;
import org.apache.hadoop.ozone.om.IOmMetadataReader;
+import org.apache.hadoop.ozone.om.OMConfigKeys;
import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
import org.apache.hadoop.ozone.om.KeyManagerImpl;
import org.apache.hadoop.ozone.om.OmSnapshot;
@@ -89,6 +91,7 @@ public class SnapshotDeletingService extends
AbstractKeyDeletingService {
// from the same table and can send deletion requests for same snapshot
// multiple times.
private static final int SNAPSHOT_DELETING_CORE_POOL_SIZE = 1;
+ private static final int MIN_ERR_LIMIT_PER_TASK = 1000;
private final ClientId clientId = ClientId.randomId();
private final OzoneManager ozoneManager;
@@ -99,6 +102,7 @@ public class SnapshotDeletingService extends
AbstractKeyDeletingService {
private final AtomicLong successRunCount;
private final long snapshotDeletionPerTask;
private final int keyLimitPerSnapshot;
+ private final int ratisByteLimit;
public SnapshotDeletingService(long interval, long serviceTimeout,
OzoneManager ozoneManager, ScmBlockLocationProtocol scmClient)
@@ -117,6 +121,12 @@ public class SnapshotDeletingService extends
AbstractKeyDeletingService {
this.snapshotDeletionPerTask = conf
.getLong(SNAPSHOT_DELETING_LIMIT_PER_TASK,
SNAPSHOT_DELETING_LIMIT_PER_TASK_DEFAULT);
+ int limit = (int) conf.getStorageSize(
+ OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT,
+ OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT,
+ StorageUnit.BYTES);
+ // always go to 90% of max limit for request as other header will be added
+ this.ratisByteLimit = (int) (limit * 0.9);
this.keyLimitPerSnapshot = conf.getInt(
OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK,
OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT);
@@ -381,6 +391,7 @@ public class SnapshotDeletingService extends
AbstractKeyDeletingService {
long subDirNum = 0L;
long subFileNum = 0L;
long remainNum = keyLimitPerSnapshot;
+ int consumedSize = 0;
List<PurgePathRequest> purgePathRequestList = new ArrayList<>();
List<Pair<String, OmKeyInfo>> allSubDirList
= new ArrayList<>(keyLimitPerSnapshot);
@@ -401,6 +412,21 @@ public class SnapshotDeletingService extends
AbstractKeyDeletingService {
PurgePathRequest request = prepareDeleteDirRequest(
remainNum, deletedDir.getValue(), deletedDir.getKey(),
allSubDirList, omSnapshot.getKeyManager());
+ if (isBufferLimitCrossed(ratisByteLimit, consumedSize,
+ request.getSerializedSize())) {
+ if (purgePathRequestList.size() != 0) {
+ // if message buffer reaches max limit, avoid sending further
+ remainNum = 0;
+ break;
+ }
+ // if directory itself is having a lot of keys / files,
+ // reduce capacity to minimum level
+ remainNum = MIN_ERR_LIMIT_PER_TASK;
+ request = prepareDeleteDirRequest(
+ remainNum, deletedDir.getValue(), deletedDir.getKey(),
+ allSubDirList, omSnapshot.getKeyManager());
+ }
+ consumedSize += request.getSerializedSize();
purgePathRequestList.add(request);
remainNum = remainNum - request.getDeletedSubFilesCount();
remainNum = remainNum - request.getMarkDeletedSubDirsCount();
@@ -418,7 +444,7 @@ public class SnapshotDeletingService extends
AbstractKeyDeletingService {
remainNum = optimizeDirDeletesAndSubmitRequest(remainNum, dirNum,
subDirNum, subFileNum, allSubDirList, purgePathRequestList,
- snapInfo.getTableKey(), startTime);
+ snapInfo.getTableKey(), startTime, ratisByteLimit - consumedSize);
} catch (IOException e) {
LOG.error("Error while running delete directories and files for " +
"snapshot " + snapInfo.getTableKey() + " in snapshot deleting " +
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingService.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingService.java
new file mode 100644
index 0000000000..22f2cf6d42
--- /dev/null
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingService.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.ozone.om.service;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.server.ServerUtils;
+import org.apache.hadoop.hdds.utils.db.DBConfigFromFile;
+import org.apache.hadoop.ozone.om.KeyManager;
+import org.apache.hadoop.ozone.om.OMConfigKeys;
+import org.apache.hadoop.ozone.om.OmTestManagers;
+import org.apache.hadoop.ozone.om.OzoneManager;
+import org.apache.hadoop.ozone.om.helpers.BucketLayout;
+import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
+import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo;
+import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
+import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
+import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
+import org.apache.hadoop.ozone.om.request.OMRequestTestUtils;
+import org.apache.hadoop.util.Time;
+import org.apache.ozone.test.GenericTestUtils;
+import org.apache.ratis.util.ExitUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import static
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL;
+
+/**
+ * Test Directory Deleting Service.
+ */
+public class TestDirectoryDeletingService {
+ @Rule
+ public TemporaryFolder folder = new TemporaryFolder();
+ private OzoneManagerProtocol writeClient;
+ private OzoneManager om;
+ private String volumeName;
+ private String bucketName;
+
+ @BeforeClass
+ public static void setup() {
+ ExitUtils.disableSystemExit();
+ }
+
+ private OzoneConfiguration createConfAndInitValues() throws IOException {
+ OzoneConfiguration conf = new OzoneConfiguration();
+ File newFolder = folder.newFolder();
+ if (!newFolder.exists()) {
+ Assert.assertTrue(newFolder.mkdirs());
+ }
+ System.setProperty(DBConfigFromFile.CONFIG_DIR, "/");
+ ServerUtils.setOzoneMetaDirPath(conf, newFolder.toString());
+ conf.setTimeDuration(OZONE_DIR_DELETING_SERVICE_INTERVAL, 3000,
+ TimeUnit.MILLISECONDS);
+ conf.set(OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT, "4MB");
+ conf.setQuietMode(false);
+
+ volumeName = String.format("volume01");
+ bucketName = String.format("bucket01");
+
+ return conf;
+ }
+
+ @After
+ public void cleanup() throws Exception {
+ om.stop();
+ }
+
+ @Test
+ public void testDeleteDirectoryCrossingSizeLimit() throws Exception {
+ OzoneConfiguration conf = createConfAndInitValues();
+ OmTestManagers omTestManagers
+ = new OmTestManagers(conf);
+ KeyManager keyManager = omTestManagers.getKeyManager();
+ writeClient = omTestManagers.getWriteClient();
+ om = omTestManagers.getOzoneManager();
+
+ OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, bucketName,
+ om.getMetadataManager(), BucketLayout.FILE_SYSTEM_OPTIMIZED);
+
+ String bucketKey = om.getMetadataManager().getBucketKey(
+ volumeName, bucketName);
+ OmBucketInfo bucketInfo = om.getMetadataManager().getBucketTable()
+ .get(bucketKey);
+
+ // create parent directory and 2000 files in it crossing
+ // size of 4MB as defined for the testcase
+ StringBuilder sb = new StringBuilder("test");
+ for (int i = 0; i < 100; ++i) {
+ sb.append("0123456789");
+ }
+ String longName = sb.toString();
+ OmDirectoryInfo dir1 = new OmDirectoryInfo.Builder()
+ .setName("dir" + longName)
+ .setCreationTime(Time.now())
+ .setModificationTime(Time.now())
+ .setObjectID(1)
+ .setParentObjectID(bucketInfo.getObjectID())
+ .setUpdateID(0)
+ .build();
+ OMRequestTestUtils.addDirKeyToDirTable(true, dir1, volumeName, bucketName,
+ 1L, om.getMetadataManager());
+
+ for (int i = 0; i < 2000; ++i) {
+ String keyName = "key" + longName + i;
+ OmKeyInfo omKeyInfo =
+ OMRequestTestUtils.createOmKeyInfo(volumeName, bucketName,
+ keyName, HddsProtos.ReplicationType.RATIS,
+ HddsProtos.ReplicationFactor.ONE, dir1.getObjectID() + 1 + i,
+ dir1.getObjectID(), 100, Time.now());
+ OMRequestTestUtils.addFileToKeyTable(false, true, keyName,
+ omKeyInfo, 1234L, i + 1, om.getMetadataManager());
+ }
+
+ // delete directory recursively
+ OmKeyArgs delArgs = new OmKeyArgs.Builder()
+ .setVolumeName(volumeName)
+ .setBucketName(bucketName)
+ .setKeyName("dir" + longName)
+ .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+ HddsProtos.ReplicationFactor.ONE))
+ .setDataSize(0).setRecursive(true)
+ .build();
+ writeClient.deleteKey(delArgs);
+
+ // wait for file count of only 100 but not all 2000 files
+ // as logic, will take 1000 files
+ DirectoryDeletingService dirDeletingService =
+ (DirectoryDeletingService) keyManager.getDirDeletingService();
+ GenericTestUtils.waitFor(
+ () -> dirDeletingService.getMovedFilesCount() >= 1000
+ && dirDeletingService.getMovedFilesCount() < 2000,
+ 500, 60000);
+ Assert.assertTrue(dirDeletingService.getRunCount().get() >= 1);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]