This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 2b8ac5ded3 HDDS-8871. Intermittent failure in
TestOpenKeyCleanupService (#5493)
2b8ac5ded3 is described below
commit 2b8ac5ded3017bc51f5b41f8db9872addde14c9a
Author: Doroszlai, Attila <[email protected]>
AuthorDate: Thu Oct 26 18:35:07 2023 +0200
HDDS-8871. Intermittent failure in TestOpenKeyCleanupService (#5493)
---
.../om/service/TestOpenKeyCleanupService.java | 143 +++++++++++----------
1 file changed, 74 insertions(+), 69 deletions(-)
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java
index c421e10f94..fad99837e2 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java
@@ -20,7 +20,7 @@
package org.apache.hadoop.ozone.om.service;
import org.apache.commons.lang3.RandomUtils;
-import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
+import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
@@ -54,6 +54,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
+import java.io.UncheckedIOException;
import java.nio.file.Path;
import java.time.Duration;
import java.util.ArrayList;
@@ -66,23 +67,17 @@ import static
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_EXPIRE_T
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
-/**
- * Test Key Deleting Service.
- * <p>
- * This test does the following things.
- * <p>
- * 1. Creates a bunch of keys. 2. Then executes delete key directly using
- * Metadata Manager. 3. Waits for a while for the KeyDeleting Service to pick
up
- * and call into SCM. 4. Confirms that calls have been successful.
- */
-public class TestOpenKeyCleanupService {
+class TestOpenKeyCleanupService {
private OzoneManagerProtocol writeClient;
private OzoneManager om;
private static final Logger LOG =
LoggerFactory.getLogger(TestOpenKeyCleanupService.class);
- private static final Duration SERVICE_INTERVAL = Duration.ofMillis(100);
- private static final Duration EXPIRE_THRESHOLD = Duration.ofMillis(200);
+ private static final int SERVICE_INTERVAL = 20;
+ private static final int EXPIRE_THRESHOLD_MS = 140;
+ private static final Duration EXPIRE_THRESHOLD =
+ Duration.ofMillis(EXPIRE_THRESHOLD_MS);
+ private static final int WAIT_TIME = (int) Duration.ofSeconds(10).toMillis();
private static final int NUM_MPU_PARTS = 5;
private KeyManager keyManager;
private OMMetadataManager omMetadataManager;
@@ -98,9 +93,9 @@ public class TestOpenKeyCleanupService {
System.setProperty(DBConfigFromFile.CONFIG_DIR, "/");
ServerUtils.setOzoneMetaDirPath(conf, tempDir.toString());
conf.setTimeDuration(OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_INTERVAL,
- SERVICE_INTERVAL.toMillis(), TimeUnit.MILLISECONDS);
+ SERVICE_INTERVAL, TimeUnit.MILLISECONDS);
conf.setTimeDuration(OZONE_OM_OPEN_KEY_EXPIRE_THRESHOLD,
- EXPIRE_THRESHOLD.toMillis(), TimeUnit.MILLISECONDS);
+ EXPIRE_THRESHOLD_MS, TimeUnit.MILLISECONDS);
conf.setBoolean(OzoneConfigKeys.OZONE_FS_HSYNC_ENABLED, true);
conf.setQuietMode(false);
OmTestManagers omTestManagers = new OmTestManagers(conf);
@@ -112,7 +107,9 @@ public class TestOpenKeyCleanupService {
@AfterEach
public void cleanup() throws Exception {
- om.stop();
+ if (om.stop()) {
+ om.join();
+ }
}
/**
@@ -143,7 +140,7 @@ public class TestOpenKeyCleanupService {
openKeyCleanupService.suspend();
// wait for submitted tasks to complete
- Thread.sleep(SERVICE_INTERVAL.toMillis());
+ Thread.sleep(SERVICE_INTERVAL);
final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount();
final long oldrunCount = openKeyCleanupService.getRunCount();
LOG.info("oldkeyCount={}, oldrunCount={}", oldkeyCount, oldrunCount);
@@ -158,7 +155,7 @@ public class TestOpenKeyCleanupService {
createOpenKeys(numFSOKeys, hsync, BucketLayout.FILE_SYSTEM_OPTIMIZED);
// wait for open keys to expire
- Thread.sleep(EXPIRE_THRESHOLD.toMillis());
+ Thread.sleep(EXPIRE_THRESHOLD_MS);
assertExpiredOpenKeys(numDEFKeys == 0, false, BucketLayout.DEFAULT);
assertExpiredOpenKeys(numFSOKeys == 0, hsync,
@@ -166,26 +163,22 @@ public class TestOpenKeyCleanupService {
openKeyCleanupService.resume();
- GenericTestUtils.waitFor(() -> openKeyCleanupService
- .getRunCount() > oldrunCount,
- (int) SERVICE_INTERVAL.toMillis(),
- 5 * (int) SERVICE_INTERVAL.toMillis());
+ GenericTestUtils.waitFor(
+ () -> openKeyCleanupService.getSubmittedOpenKeyCount() >= keyCount,
+ SERVICE_INTERVAL, WAIT_TIME);
+ GenericTestUtils.waitFor(
+ () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2,
+ SERVICE_INTERVAL, WAIT_TIME);
- // wait for requests to complete
- final int n = hsync ? numDEFKeys + numFSOKeys : 1;
- Thread.sleep(n * SERVICE_INTERVAL.toMillis());
+ waitForOpenKeyCleanup(false, BucketLayout.DEFAULT);
+ waitForOpenKeyCleanup(hsync, BucketLayout.FILE_SYSTEM_OPTIMIZED);
- assertTrue(openKeyCleanupService.getSubmittedOpenKeyCount() >=
- oldkeyCount + keyCount);
- assertExpiredOpenKeys(true, false, BucketLayout.DEFAULT);
- assertExpiredOpenKeys(true, hsync,
- BucketLayout.FILE_SYSTEM_OPTIMIZED);
if (hsync) {
- assertEquals(numDEFKeys, metrics.getNumOpenKeysCleaned());
- assertTrue(metrics.getNumOpenKeysHSyncCleaned() >= numFSOKeys);
+ assertAtLeast(numDEFKeys, metrics.getNumOpenKeysCleaned());
+ assertAtLeast(numFSOKeys, metrics.getNumOpenKeysHSyncCleaned());
assertEquals(numFSOKeys, metrics.getNumKeyHSyncs());
} else {
- assertEquals(keyCount, metrics.getNumOpenKeysCleaned());
+ assertAtLeast(keyCount, metrics.getNumOpenKeysCleaned());
assertEquals(0, metrics.getNumOpenKeysHSyncCleaned());
assertEquals(0, metrics.getNumKeyHSyncs());
}
@@ -214,7 +207,7 @@ public class TestOpenKeyCleanupService {
openKeyCleanupService.suspend();
// wait for submitted tasks to complete
- Thread.sleep(SERVICE_INTERVAL.toMillis());
+ Thread.sleep(SERVICE_INTERVAL);
final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount();
final long oldrunCount = openKeyCleanupService.getRunCount();
LOG.info("oldMpuKeyCount={}, oldMpuRunCount={}", oldkeyCount, oldrunCount);
@@ -230,7 +223,7 @@ public class TestOpenKeyCleanupService {
NUM_MPU_PARTS, true);
// wait for open keys to expire
- Thread.sleep(EXPIRE_THRESHOLD.toMillis());
+ Thread.sleep(EXPIRE_THRESHOLD_MS);
// All MPU open keys should be skipped
assertExpiredOpenKeys(true, false, BucketLayout.DEFAULT);
@@ -239,13 +232,12 @@ public class TestOpenKeyCleanupService {
openKeyCleanupService.resume();
- GenericTestUtils.waitFor(() -> openKeyCleanupService
- .getRunCount() > oldrunCount,
- (int) SERVICE_INTERVAL.toMillis(),
- 5 * (int) SERVICE_INTERVAL.toMillis());
+ GenericTestUtils.waitFor(
+ () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2,
+ SERVICE_INTERVAL, WAIT_TIME);
// wait for requests to complete
- Thread.sleep(SERVICE_INTERVAL.toMillis());
+ Thread.sleep(SERVICE_INTERVAL);
// No expired open keys fetched
assertEquals(openKeyCleanupService.getSubmittedOpenKeyCount(),
oldkeyCount);
@@ -279,7 +271,7 @@ public class TestOpenKeyCleanupService {
openKeyCleanupService.suspend();
// wait for submitted tasks to complete
- Thread.sleep(SERVICE_INTERVAL.toMillis());
+ Thread.sleep(SERVICE_INTERVAL);
final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount();
final long oldrunCount = openKeyCleanupService.getRunCount();
LOG.info("oldMpuKeyCount={}, oldMpuRunCount={}", oldkeyCount, oldrunCount);
@@ -290,52 +282,65 @@ public class TestOpenKeyCleanupService {
assertEquals(0, metrics.getNumOpenKeysCleaned());
assertEquals(0, metrics.getNumOpenKeysHSyncCleaned());
final int keyCount = numDEFKeys + numFSOKeys;
+ final int partCount = NUM_MPU_PARTS * keyCount;
createIncompleteMPUKeys(numDEFKeys, BucketLayout.DEFAULT, NUM_MPU_PARTS,
false);
createIncompleteMPUKeys(numFSOKeys, BucketLayout.FILE_SYSTEM_OPTIMIZED,
NUM_MPU_PARTS, false);
- Thread.sleep(EXPIRE_THRESHOLD.toMillis());
+ Thread.sleep(EXPIRE_THRESHOLD_MS);
// Each MPU keys create 1 MPU open key and some MPU open part keys
// only the MPU open part keys will be deleted
- assertExpiredOpenKeys((numDEFKeys * NUM_MPU_PARTS) == 0, false,
+ assertExpiredOpenKeys(numDEFKeys == 0, false,
BucketLayout.DEFAULT);
- assertExpiredOpenKeys((numFSOKeys * NUM_MPU_PARTS) == 0, false,
+ assertExpiredOpenKeys(numFSOKeys == 0, false,
BucketLayout.FILE_SYSTEM_OPTIMIZED);
openKeyCleanupService.resume();
- GenericTestUtils.waitFor(() -> openKeyCleanupService
- .getRunCount() > oldrunCount,
- (int) SERVICE_INTERVAL.toMillis(),
- 5 * (int) SERVICE_INTERVAL.toMillis());
-
- // wait for requests to complete
- Thread.sleep(SERVICE_INTERVAL.toMillis());
+ GenericTestUtils.waitFor(
+ () -> openKeyCleanupService.getSubmittedOpenKeyCount() >= partCount,
+ SERVICE_INTERVAL, WAIT_TIME);
+ GenericTestUtils.waitFor(
+ () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2,
+ SERVICE_INTERVAL, WAIT_TIME);
// No expired MPU parts fetched
- int numExpiredParts = NUM_MPU_PARTS * keyCount;
- assertTrue(openKeyCleanupService.getSubmittedOpenKeyCount() >=
- (oldkeyCount + numExpiredParts));
- assertExpiredOpenKeys(true, false, BucketLayout.DEFAULT);
- assertExpiredOpenKeys(true, false,
- BucketLayout.FILE_SYSTEM_OPTIMIZED);
+ waitForOpenKeyCleanup(false, BucketLayout.DEFAULT);
+ waitForOpenKeyCleanup(false, BucketLayout.FILE_SYSTEM_OPTIMIZED);
+ assertAtLeast(partCount, metrics.getNumOpenKeysCleaned());
+ }
- assertEquals(numExpiredParts,
- metrics.getNumOpenKeysCleaned());
+ private static void assertAtLeast(long expectedMinimum, long actual) {
+ assertTrue(actual >= expectedMinimum,
+ () -> actual + " < " + expectedMinimum);
}
- void assertExpiredOpenKeys(boolean expectedToEmpty, boolean hsync,
- BucketLayout layout) throws IOException {
- final ExpiredOpenKeys expired = keyManager.getExpiredOpenKeys(
- EXPIRE_THRESHOLD, 100, layout);
- final int size = (hsync ? expired.getHsyncKeys()
- : expired.getOpenKeyBuckets()).size();
+ private void assertExpiredOpenKeys(boolean expectedToEmpty, boolean hsync,
+ BucketLayout layout) {
+ final int size = getExpiredOpenKeys(hsync, layout);
assertEquals(expectedToEmpty, size == 0,
() -> "size=" + size + ", layout=" + layout);
}
+ private int getExpiredOpenKeys(boolean hsync, BucketLayout layout) {
+ try {
+ final ExpiredOpenKeys expired = keyManager.getExpiredOpenKeys(
+ EXPIRE_THRESHOLD, 100, layout);
+ return (hsync ? expired.getHsyncKeys() : expired.getOpenKeyBuckets())
+ .size();
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ void waitForOpenKeyCleanup(boolean hsync, BucketLayout layout)
+ throws Exception {
+ GenericTestUtils.waitFor(() -> 0 == getExpiredOpenKeys(hsync, layout),
+ SERVICE_INTERVAL, WAIT_TIME);
+ }
+
private void createOpenKeys(int keyCount, boolean hsync,
BucketLayout bucketLayout) throws IOException {
String volume = UUID.randomUUID().toString();
@@ -383,7 +388,7 @@ public class TestOpenKeyCleanupService {
.setBucketName(bucketName)
.setKeyName(keyName)
.setAcls(Collections.emptyList())
- .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+ .setReplicationConfig(RatisReplicationConfig.getInstance(
HddsProtos.ReplicationFactor.ONE))
.setLocationInfoList(new ArrayList<>())
.build();
@@ -436,7 +441,7 @@ public class TestOpenKeyCleanupService {
.setBucketName(bucketName)
.setKeyName(keyName)
.setAcls(Collections.emptyList())
- .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+ .setReplicationConfig(RatisReplicationConfig.getInstance(
HddsProtos.ReplicationFactor.ONE))
.setLocationInfoList(new ArrayList<>())
.build();
@@ -455,7 +460,7 @@ public class TestOpenKeyCleanupService {
.setMultipartUploadID(omMultipartInfo.getUploadID())
.setMultipartUploadPartNumber(i)
.setAcls(Collections.emptyList())
- .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+ .setReplicationConfig(RatisReplicationConfig.getInstance(
HddsProtos.ReplicationFactor.ONE))
.build();
@@ -471,7 +476,7 @@ public class TestOpenKeyCleanupService {
.setMultipartUploadID(omMultipartInfo.getUploadID())
.setMultipartUploadPartNumber(i)
.setAcls(Collections.emptyList())
- .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+ .setReplicationConfig(RatisReplicationConfig.getInstance(
HddsProtos.ReplicationFactor.ONE))
.setLocationInfoList(Collections.emptyList())
.build();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]