This is an automated email from the ASF dual-hosted git repository.
siyao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new ff9e37d1bf HDDS-9112. Make snapshot dir wait poll max timeout
configurable (#5139)
ff9e37d1bf is described below
commit ff9e37d1bf5124ea2e1fb1c1edd468efee89acd9
Author: Swaminathan Balachandran <[email protected]>
AuthorDate: Thu Aug 3 11:09:01 2023 -0700
HDDS-9112. Make snapshot dir wait poll max timeout configurable (#5139)
Co-authored-by: Hemant Kumar <[email protected]>
Co-authored-by: prashantpogde <[email protected]>
---
.../common/src/main/resources/ozone-default.xml | 9 +++++++
.../hadoop/hdds/utils/db/RDBCheckpointUtils.java | 30 +++++++++++++++-------
.../org/apache/hadoop/ozone/om/OMConfigKeys.java | 6 +++++
.../hadoop/ozone/om/OmMetadataManagerImpl.java | 11 +++++++-
.../hadoop/ozone/om/snapshot/SnapshotUtils.java | 6 +++--
5 files changed, 50 insertions(+), 12 deletions(-)
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml
b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index d392fe97f6..aa1b092413 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -3368,6 +3368,15 @@
<description>Time interval of the SST File filtering service from Snapshot.
</description>
</property>
+ <property>
+ <name>ozone.om.snapshot.checkpoint.dir.creation.poll.timeout</name>
+ <value>20s</value>
+ <tag>OZONE, PERFORMANCE, OM</tag>
+ <description>
+ Max poll timeout for snapshot dir exists check performed before loading
a snapshot in cache.
+ Unit defaults to millisecond if a unit is not specified.
+ </description>
+ </property>
<property>
<name>ozone.sst.filtering.service.timeout</name>
<value>300000ms</value>
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java
index 24033680a7..ee550d7227 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java
@@ -37,21 +37,22 @@ public final class RDBCheckpointUtils {
LoggerFactory.getLogger(RDBCheckpointUtils.class);
private static final Duration POLL_DELAY_DURATION = Duration.ZERO;
private static final Duration POLL_INTERVAL_DURATION =
Duration.ofMillis(100);
- private static final Duration POLL_MAX_DURATION = Duration.ofSeconds(5);
+ private static final Duration POLL_MAX_DURATION = Duration.ofSeconds(20);
private RDBCheckpointUtils() { }
/**
- * Wait for checkpoint directory to be created for 5 secs with 100 millis
- * poll interval.
+ * Wait for checkpoint directory to be created for the given duration with
+ * 100 millis poll interval.
* @param file Checkpoint directory.
- * @return true if found.
+ * @param maxWaitTimeout wait at most before request timeout.
+ * @return true if found within given timeout else false.
*/
- public static boolean waitForCheckpointDirectoryExist(File file)
- throws IOException {
+ public static boolean waitForCheckpointDirectoryExist(File file,
+ Duration maxWaitTimeout) {
Instant start = Instant.now();
try {
- with().atMost(POLL_MAX_DURATION)
+ with().atMost(maxWaitTimeout)
.pollDelay(POLL_DELAY_DURATION)
.pollInterval(POLL_INTERVAL_DURATION)
.await()
@@ -62,9 +63,20 @@ public final class RDBCheckpointUtils {
file.getAbsoluteFile());
return true;
} catch (ConditionTimeoutException exception) {
- LOG.info("Checkpoint directory: {} didn't get created in 5 secs.",
- file.getAbsolutePath());
+ LOG.info("Checkpoint directory: {} didn't get created in {} secs.",
+ maxWaitTimeout.getSeconds(), file.getAbsolutePath());
return false;
}
}
+
+ /**
+ * Wait for checkpoint directory to be created for 5 secs with 100 millis
+ * poll interval.
+ * @param file Checkpoint directory.
+ * @return true if found.
+ */
+ public static boolean waitForCheckpointDirectoryExist(File file)
+ throws IOException {
+ return waitForCheckpointDirectoryExist(file, POLL_MAX_DURATION);
+ }
}
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
index 259f7ab3ee..4a2369963d 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
@@ -383,6 +383,12 @@ public final class OMConfigKeys {
public static final String
OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL_DEFAULT = "60s";
+ public static final String
+ OZONE_SNAPSHOT_CHECKPOINT_DIR_CREATION_POLL_TIMEOUT =
+ "ozone.om.snapshot.checkpoint.dir.creation.poll.timeout";
+
+ public static final String
+ OZONE_SNAPSHOT_CHECKPOINT_DIR_CREATION_POLL_TIMEOUT_DEFAULT = "20s";
public static final String OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH =
"ozone.om.grpc.maximum.response.length";
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
index 86f3725408..748c9094d7 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
@@ -33,6 +33,7 @@ import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.UUID;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -97,6 +98,8 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME;
import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
import static
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT;
import static
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT;
+import static
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_CHECKPOINT_DIR_CREATION_POLL_TIMEOUT;
+import static
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_CHECKPOINT_DIR_CREATION_POLL_TIMEOUT_DEFAULT;
import static org.apache.hadoop.ozone.om.OmSnapshotManager.getSnapshotPrefix;
import static
org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.BUCKET_NOT_FOUND;
import static
org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.VOLUME_NOT_FOUND;
@@ -377,13 +380,19 @@ public class OmMetadataManagerImpl implements
OMMetadataManager,
OM_KEY_PREFIX + OM_SNAPSHOT_CHECKPOINT_DIR;
File metaDir = new File(snapshotDir);
String dbName = OM_DB_NAME + snapshotDirName;
+ Duration maxPollDuration =
+ Duration.ofMillis(conf.getTimeDuration(
+ OZONE_SNAPSHOT_CHECKPOINT_DIR_CREATION_POLL_TIMEOUT,
+ OZONE_SNAPSHOT_CHECKPOINT_DIR_CREATION_POLL_TIMEOUT_DEFAULT,
+ TimeUnit.MILLISECONDS));
// The check is only to prevent every snapshot read to perform a disk IO
// and check if a checkpoint dir exists. If entry is present in cache,
// it is most likely DB entries will get flushed in this wait time.
if (isSnapshotInCache) {
File checkpoint =
Paths.get(metaDir.toPath().toString(), dbName).toFile();
- RDBCheckpointUtils.waitForCheckpointDirectoryExist(checkpoint);
+ RDBCheckpointUtils.waitForCheckpointDirectoryExist(checkpoint,
+ maxPollDuration);
// Check if the snapshot directory exists.
checkSnapshotDirExist(checkpoint);
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java
index de6ab46361..ef7e4e895e 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java
@@ -36,6 +36,7 @@ import java.util.UUID;
import static
org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND;
import static
org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.KEY_NOT_FOUND;
+import static
org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.TIMEOUT;
/**
* Util class for snapshot diff APIs.
@@ -93,7 +94,7 @@ public final class SnapshotUtils {
}
/**
- * Throws OMException FILE_NOT_FOUND if snapshot directory does not exist.
+ * Throws OMException TIMEOUT if snapshot directory does not exist.
* @param checkpoint Snapshot checkpoint directory
*/
public static void checkSnapshotDirExist(File checkpoint)
@@ -101,7 +102,8 @@ public final class SnapshotUtils {
if (!checkpoint.exists()) {
throw new OMException("Unable to load snapshot. " +
"Snapshot checkpoint directory '" + checkpoint.getAbsolutePath() +
- "' does not exists.", FILE_NOT_FOUND);
+ "' does not exist yet. Please wait a few more seconds before " +
+ "retrying", TIMEOUT);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]