This is an automated email from the ASF dual-hosted git repository.

sshenoy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new c86aa7ef91 HDDS-8977. Ratis crash if a lot of directories deleted at 
once (#5207)
c86aa7ef91 is described below

commit c86aa7ef91faf02e544e4762245756771cbfe013
Author: Sumit Agrawal <[email protected]>
AuthorDate: Tue Aug 29 12:51:26 2023 +0530

    HDDS-8977. Ratis crash if a lot of directories deleted at once (#5207)
---
 .../common/src/main/resources/ozone-default.xml    |   2 +-
 .../org/apache/hadoop/ozone/om/OMConfigKeys.java   |   5 +-
 .../om/service/AbstractKeyDeletingService.java     |  15 +-
 .../ozone/om/service/DirectoryDeletingService.java |  30 +++-
 .../ozone/om/service/SnapshotDeletingService.java  |  28 +++-
 .../om/service/TestDirectoryDeletingService.java   | 160 +++++++++++++++++++++
 6 files changed, 235 insertions(+), 5 deletions(-)

diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml 
b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 7736def18b..919540870c 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -3373,7 +3373,7 @@
   </property>
   <property>
     <name>ozone.path.deleting.limit.per.task</name>
-    <value>10000</value>
+    <value>6000</value>
     <tag>OZONE, PERFORMANCE, OM</tag>
     <description>A maximum number of paths(dirs/files) to be deleted by
       directory deleting service per time interval.
diff --git 
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
 
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
index 4a2369963d..3a193a39e7 100644
--- 
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
+++ 
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
@@ -368,7 +368,10 @@ public final class OMConfigKeys {
 
   public static final String OZONE_PATH_DELETING_LIMIT_PER_TASK =
       "ozone.path.deleting.limit.per.task";
-  public static final int OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT = 10000;
+  // default is 6000 taking account of 32MB buffer size, and assuming
+  // 4KB size (considering acls, key/file name, and other meata)  * 6000
+  // resulting 24MB
+  public static final int OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT = 6000;
 
   public static final String SNAPSHOT_SST_DELETING_LIMIT_PER_TASK =
       "ozone.snapshot.filtering.limit.per.task";
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java
index d87cb1cce0..7482919ef1 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java
@@ -393,12 +393,14 @@ public abstract class AbstractKeyDeletingService extends 
BackgroundService
       long dirNum, long subDirNum, long subFileNum,
       List<Pair<String, OmKeyInfo>> allSubDirList,
       List<PurgePathRequest> purgePathRequestList,
-      String snapTableKey, long startTime) {
+      String snapTableKey, long startTime,
+      int remainingBufLimit) {
 
     // Optimization to handle delete sub-dir and keys to remove quickly
     // This case will be useful to handle when depth of directory is high
     int subdirDelNum = 0;
     int subDirRecursiveCnt = 0;
+    int consumedSize = 0;
     while (remainNum > 0 && subDirRecursiveCnt < allSubDirList.size()) {
       try {
         Pair<String, OmKeyInfo> stringOmKeyInfoPair
@@ -407,6 +409,12 @@ public abstract class AbstractKeyDeletingService extends 
BackgroundService
             remainNum, stringOmKeyInfoPair.getValue(),
             stringOmKeyInfoPair.getKey(), allSubDirList,
             getOzoneManager().getKeyManager());
+        if (isBufferLimitCrossed(remainingBufLimit, consumedSize,
+            request.getSerializedSize())) {
+          // ignore further add request
+          break;
+        }
+        consumedSize += request.getSerializedSize();
         purgePathRequestList.add(request);
         remainNum = remainNum - request.getDeletedSubFilesCount();
         remainNum = remainNum - request.getMarkDeletedSubDirsCount();
@@ -445,6 +453,11 @@ public abstract class AbstractKeyDeletingService extends 
BackgroundService
     return remainNum;
   }
 
+  protected boolean isBufferLimitCrossed(
+      int maxLimit, int cLimit, int increment) {
+    return cLimit + increment >= maxLimit;
+  }
+
   protected SnapshotInfo getPreviousActiveSnapshot(SnapshotInfo snapInfo,
       SnapshotChainManager chainManager, OmSnapshotManager omSnapshotManager)
       throws IOException {
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java
index 59b116bca5..ed6c3914da 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.om.service;
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.conf.StorageUnit;
 import org.apache.hadoop.hdds.utils.BackgroundTask;
 import org.apache.hadoop.hdds.utils.BackgroundTaskQueue;
 import org.apache.hadoop.hdds.utils.BackgroundTaskResult;
@@ -26,6 +27,7 @@ import org.apache.hadoop.hdds.utils.db.Table;
 import org.apache.hadoop.hdds.utils.db.Table.KeyValue;
 import org.apache.hadoop.hdds.utils.db.TableIterator;
 import org.apache.hadoop.ozone.om.IOmMetadataReader;
+import org.apache.hadoop.ozone.om.OMConfigKeys;
 import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
 import org.apache.hadoop.ozone.om.OmSnapshot;
 import org.apache.hadoop.ozone.om.OmSnapshotManager;
@@ -76,9 +78,11 @@ public class DirectoryDeletingService extends 
AbstractKeyDeletingService {
   // or write to same tables and can send deletion requests for same key
   // multiple times.
   private static final int DIR_DELETING_CORE_POOL_SIZE = 1;
+  private static final int MIN_ERR_LIMIT_PER_TASK = 1000;
 
   // Number of items(dirs/files) to be batched in an iteration.
   private final long pathLimitPerTask;
+  private final int ratisByteLimit;
   private final AtomicBoolean suspended;
 
   public DirectoryDeletingService(long interval, TimeUnit unit,
@@ -89,6 +93,12 @@ public class DirectoryDeletingService extends 
AbstractKeyDeletingService {
     this.pathLimitPerTask = configuration
         .getInt(OZONE_PATH_DELETING_LIMIT_PER_TASK,
             OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT);
+    int limit = (int) configuration.getStorageSize(
+        OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT,
+        OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT,
+        StorageUnit.BYTES);
+    // always go to 90% of max limit for request as other header will be added
+    this.ratisByteLimit = (int) (limit * 0.9);
     this.suspended = new AtomicBoolean(false);
   }
 
@@ -141,6 +151,7 @@ public class DirectoryDeletingService extends 
AbstractKeyDeletingService {
         long subDirNum = 0L;
         long subFileNum = 0L;
         long remainNum = pathLimitPerTask;
+        int consumedSize = 0;
         List<PurgePathRequest> purgePathRequestList = new ArrayList<>();
         List<Pair<String, OmKeyInfo>> allSubDirList
             = new ArrayList<>((int) remainNum);
@@ -170,6 +181,22 @@ public class DirectoryDeletingService extends 
AbstractKeyDeletingService {
                 remainNum, pendingDeletedDirInfo.getValue(),
                 pendingDeletedDirInfo.getKey(), allSubDirList,
                 getOzoneManager().getKeyManager());
+            if (isBufferLimitCrossed(ratisByteLimit, consumedSize,
+                request.getSerializedSize())) {
+              if (purgePathRequestList.size() != 0) {
+                // if message buffer reaches max limit, avoid sending further
+                remainNum = 0;
+                break;
+              }
+              // if directory itself is having a lot of keys / files,
+              // reduce capacity to minimum level
+              remainNum = MIN_ERR_LIMIT_PER_TASK;
+              request = prepareDeleteDirRequest(
+                  remainNum, pendingDeletedDirInfo.getValue(),
+                  pendingDeletedDirInfo.getKey(), allSubDirList,
+                  getOzoneManager().getKeyManager());
+            }
+            consumedSize += request.getSerializedSize();
             purgePathRequestList.add(request);
             remainNum = remainNum - request.getDeletedSubFilesCount();
             remainNum = remainNum - request.getMarkDeletedSubDirsCount();
@@ -184,7 +211,8 @@ public class DirectoryDeletingService extends 
AbstractKeyDeletingService {
 
           optimizeDirDeletesAndSubmitRequest(
               remainNum, dirNum, subDirNum, subFileNum,
-              allSubDirList, purgePathRequestList, null, startTime);
+              allSubDirList, purgePathRequestList, null, startTime,
+              ratisByteLimit - consumedSize);
 
         } catch (IOException e) {
           LOG.error("Error while running delete directories and files " +
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java
index db8d54ad5e..d4b545068c 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java
@@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.protobuf.ServiceException;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.conf.StorageUnit;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol;
 import org.apache.hadoop.hdds.utils.BackgroundTask;
@@ -34,6 +35,7 @@ import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.common.BlockGroup;
 import org.apache.hadoop.ozone.lock.BootstrapStateHandler;
 import org.apache.hadoop.ozone.om.IOmMetadataReader;
+import org.apache.hadoop.ozone.om.OMConfigKeys;
 import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
 import org.apache.hadoop.ozone.om.KeyManagerImpl;
 import org.apache.hadoop.ozone.om.OmSnapshot;
@@ -89,6 +91,7 @@ public class SnapshotDeletingService extends 
AbstractKeyDeletingService {
   // from the same table and can send deletion requests for same snapshot
   // multiple times.
   private static final int SNAPSHOT_DELETING_CORE_POOL_SIZE = 1;
+  private static final int MIN_ERR_LIMIT_PER_TASK = 1000;
   private final ClientId clientId = ClientId.randomId();
 
   private final OzoneManager ozoneManager;
@@ -99,6 +102,7 @@ public class SnapshotDeletingService extends 
AbstractKeyDeletingService {
   private final AtomicLong successRunCount;
   private final long snapshotDeletionPerTask;
   private final int keyLimitPerSnapshot;
+  private final int ratisByteLimit;
 
   public SnapshotDeletingService(long interval, long serviceTimeout,
       OzoneManager ozoneManager, ScmBlockLocationProtocol scmClient)
@@ -117,6 +121,12 @@ public class SnapshotDeletingService extends 
AbstractKeyDeletingService {
     this.snapshotDeletionPerTask = conf
         .getLong(SNAPSHOT_DELETING_LIMIT_PER_TASK,
         SNAPSHOT_DELETING_LIMIT_PER_TASK_DEFAULT);
+    int limit = (int) conf.getStorageSize(
+        OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT,
+        OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT,
+        StorageUnit.BYTES);
+    // always go to 90% of max limit for request as other header will be added
+    this.ratisByteLimit = (int) (limit * 0.9);
     this.keyLimitPerSnapshot = conf.getInt(
         OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK,
         OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT);
@@ -381,6 +391,7 @@ public class SnapshotDeletingService extends 
AbstractKeyDeletingService {
       long subDirNum = 0L;
       long subFileNum = 0L;
       long remainNum = keyLimitPerSnapshot;
+      int consumedSize = 0;
       List<PurgePathRequest> purgePathRequestList = new ArrayList<>();
       List<Pair<String, OmKeyInfo>> allSubDirList
           = new ArrayList<>(keyLimitPerSnapshot);
@@ -401,6 +412,21 @@ public class SnapshotDeletingService extends 
AbstractKeyDeletingService {
             PurgePathRequest request = prepareDeleteDirRequest(
                 remainNum, deletedDir.getValue(), deletedDir.getKey(),
                 allSubDirList, omSnapshot.getKeyManager());
+            if (isBufferLimitCrossed(ratisByteLimit, consumedSize,
+                request.getSerializedSize())) {
+              if (purgePathRequestList.size() != 0) {
+                // if message buffer reaches max limit, avoid sending further
+                remainNum = 0;
+                break;
+              }
+              // if directory itself is having a lot of keys / files,
+              // reduce capacity to minimum level
+              remainNum = MIN_ERR_LIMIT_PER_TASK;
+              request = prepareDeleteDirRequest(
+                  remainNum, deletedDir.getValue(), deletedDir.getKey(),
+                  allSubDirList, omSnapshot.getKeyManager());
+            }
+            consumedSize += request.getSerializedSize();
             purgePathRequestList.add(request);
             remainNum = remainNum - request.getDeletedSubFilesCount();
             remainNum = remainNum - request.getMarkDeletedSubDirsCount();
@@ -418,7 +444,7 @@ public class SnapshotDeletingService extends 
AbstractKeyDeletingService {
 
         remainNum = optimizeDirDeletesAndSubmitRequest(remainNum, dirNum,
             subDirNum, subFileNum, allSubDirList, purgePathRequestList,
-            snapInfo.getTableKey(), startTime);
+            snapInfo.getTableKey(), startTime, ratisByteLimit - consumedSize);
       } catch (IOException e) {
         LOG.error("Error while running delete directories and files for " +
             "snapshot " + snapInfo.getTableKey() + " in snapshot deleting " +
diff --git 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingService.java
 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingService.java
new file mode 100644
index 0000000000..22f2cf6d42
--- /dev/null
+++ 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingService.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.ozone.om.service;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.server.ServerUtils;
+import org.apache.hadoop.hdds.utils.db.DBConfigFromFile;
+import org.apache.hadoop.ozone.om.KeyManager;
+import org.apache.hadoop.ozone.om.OMConfigKeys;
+import org.apache.hadoop.ozone.om.OmTestManagers;
+import org.apache.hadoop.ozone.om.OzoneManager;
+import org.apache.hadoop.ozone.om.helpers.BucketLayout;
+import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
+import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo;
+import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
+import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
+import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
+import org.apache.hadoop.ozone.om.request.OMRequestTestUtils;
+import org.apache.hadoop.util.Time;
+import org.apache.ozone.test.GenericTestUtils;
+import org.apache.ratis.util.ExitUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import static 
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL;
+
+/**
+ * Test Directory Deleting Service.
+ */
+public class TestDirectoryDeletingService {
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
+  private OzoneManagerProtocol writeClient;
+  private OzoneManager om;
+  private String volumeName;
+  private String bucketName;
+
+  @BeforeClass
+  public static void setup() {
+    ExitUtils.disableSystemExit();
+  }
+
+  private OzoneConfiguration createConfAndInitValues() throws IOException {
+    OzoneConfiguration conf = new OzoneConfiguration();
+    File newFolder = folder.newFolder();
+    if (!newFolder.exists()) {
+      Assert.assertTrue(newFolder.mkdirs());
+    }
+    System.setProperty(DBConfigFromFile.CONFIG_DIR, "/");
+    ServerUtils.setOzoneMetaDirPath(conf, newFolder.toString());
+    conf.setTimeDuration(OZONE_DIR_DELETING_SERVICE_INTERVAL, 3000,
+        TimeUnit.MILLISECONDS);
+    conf.set(OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT, "4MB");
+    conf.setQuietMode(false);
+
+    volumeName = String.format("volume01");
+    bucketName = String.format("bucket01");
+
+    return conf;
+  }
+
+  @After
+  public void cleanup() throws Exception {
+    om.stop();
+  }
+
+  @Test
+  public void testDeleteDirectoryCrossingSizeLimit() throws Exception {
+    OzoneConfiguration conf = createConfAndInitValues();
+    OmTestManagers omTestManagers
+        = new OmTestManagers(conf);
+    KeyManager keyManager = omTestManagers.getKeyManager();
+    writeClient = omTestManagers.getWriteClient();
+    om = omTestManagers.getOzoneManager();
+
+    OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, bucketName,
+        om.getMetadataManager(), BucketLayout.FILE_SYSTEM_OPTIMIZED);
+
+    String bucketKey = om.getMetadataManager().getBucketKey(
+        volumeName, bucketName);
+    OmBucketInfo bucketInfo = om.getMetadataManager().getBucketTable()
+        .get(bucketKey);
+
+    // create parent directory and 2000 files in it crossing
+    // size of 4MB as defined for the testcase
+    StringBuilder sb = new StringBuilder("test");
+    for (int i = 0; i < 100; ++i) {
+      sb.append("0123456789");
+    }
+    String longName = sb.toString();
+    OmDirectoryInfo dir1 = new OmDirectoryInfo.Builder()
+        .setName("dir" + longName)
+        .setCreationTime(Time.now())
+        .setModificationTime(Time.now())
+        .setObjectID(1)
+        .setParentObjectID(bucketInfo.getObjectID())
+        .setUpdateID(0)
+        .build();
+    OMRequestTestUtils.addDirKeyToDirTable(true, dir1, volumeName, bucketName,
+        1L, om.getMetadataManager());
+
+    for (int i = 0; i < 2000; ++i) {
+      String keyName = "key" + longName + i;
+      OmKeyInfo omKeyInfo =
+          OMRequestTestUtils.createOmKeyInfo(volumeName, bucketName,
+              keyName, HddsProtos.ReplicationType.RATIS,
+              HddsProtos.ReplicationFactor.ONE, dir1.getObjectID() + 1 + i,
+              dir1.getObjectID(), 100, Time.now());
+      OMRequestTestUtils.addFileToKeyTable(false, true, keyName,
+          omKeyInfo, 1234L, i + 1, om.getMetadataManager());
+    }
+
+    // delete directory recursively
+    OmKeyArgs delArgs = new OmKeyArgs.Builder()
+        .setVolumeName(volumeName)
+        .setBucketName(bucketName)
+        .setKeyName("dir" + longName)
+        .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+            HddsProtos.ReplicationFactor.ONE))
+        .setDataSize(0).setRecursive(true)
+        .build();
+    writeClient.deleteKey(delArgs);
+
+    // wait for file count of only 100 but not all 2000 files
+    // as logic, will take 1000 files
+    DirectoryDeletingService dirDeletingService =
+        (DirectoryDeletingService) keyManager.getDirDeletingService();
+    GenericTestUtils.waitFor(
+        () -> dirDeletingService.getMovedFilesCount() >= 1000
+            && dirDeletingService.getMovedFilesCount() < 2000,
+        500, 60000);
+    Assert.assertTrue(dirDeletingService.getRunCount().get() >= 1);
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to