This is an automated email from the ASF dual-hosted git repository.
sammichen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 7ca3e13 HDDS-5394. Fix skipped volume check due to
disk.check.min.gap. (#2373)
7ca3e13 is described below
commit 7ca3e13ca43df1674ebbdd3ebb0f6fb5dbb9189c
Author: Gui Hecheng <[email protected]>
AuthorDate: Mon Jul 12 14:43:42 2021 +0800
HDDS-5394. Fix skipped volume check due to disk.check.min.gap. (#2373)
---
.../common/statemachine/DatanodeConfiguration.java | 4 +-
.../common/volume/StorageVolumeChecker.java | 45 ++++----
.../common/volume/TestPeriodicVolumeChecker.java | 118 +++++++++++++++++++++
3 files changed, 147 insertions(+), 20 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
index fc90344..cd5929f 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
@@ -54,7 +54,7 @@ public class DatanodeConfiguration {
static final int REPLICATION_MAX_STREAMS_DEFAULT = 10;
- static final long PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT = 15;
+ static final long PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT = 60;
static final int FAILED_VOLUMES_TOLERATED_DEFAULT = -1;
@@ -131,7 +131,7 @@ public class DatanodeConfiguration {
}
@Config(key = "periodic.disk.check.interval.minutes",
- defaultValue = "15",
+ defaultValue = "60",
type = ConfigType.LONG,
tags = { DATANODE },
description = "Periodic disk check run interval in minutes."
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
index 0d3c110..ce2fb59 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
@@ -22,7 +22,6 @@ import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
@@ -69,6 +68,7 @@ public class StorageVolumeChecker {
private final AtomicLong numVolumeChecks = new AtomicLong(0);
private final AtomicLong numAllVolumeChecks = new AtomicLong(0);
+ private final AtomicLong numAllVolumeSetsChecks = new AtomicLong(0);
private final AtomicLong numSkippedChecks = new AtomicLong(0);
/**
@@ -83,9 +83,9 @@ public class StorageVolumeChecker {
private final long minDiskCheckGapMs;
/**
- * Timestamp of the last check of all volumes.
+ * Timestamp of the last check of all volume sets.
*/
- private long lastAllVolumesCheck;
+ private long lastAllVolumeSetsCheckComplete;
private final Timer timer;
@@ -113,7 +113,7 @@ public class StorageVolumeChecker {
minDiskCheckGapMs = dnConf.getDiskCheckMinGap().toMillis();
- lastAllVolumesCheck = timer.monotonicNow() - minDiskCheckGapMs;
+ lastAllVolumeSetsCheckComplete = timer.monotonicNow() - minDiskCheckGapMs;
registeredVolumeSets = new ArrayList<>();
@@ -151,10 +151,25 @@ public class StorageVolumeChecker {
}
public synchronized void checkAllVolumeSets() {
+ final long gap = timer.monotonicNow() - lastAllVolumeSetsCheckComplete;
+ if (gap < minDiskCheckGapMs) {
+ numSkippedChecks.incrementAndGet();
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(
+ "Skipped checking all volumes, time since last check {} is less " +
+ "than the minimum gap between checks ({} ms).",
+ gap, minDiskCheckGapMs);
+ }
+ return;
+ }
+
try {
for (MutableVolumeSet volSet : registeredVolumeSets) {
volSet.checkAllVolumes();
}
+
+ lastAllVolumeSetsCheckComplete = timer.monotonicNow();
+ numAllVolumeSetsChecks.incrementAndGet();
} catch (IOException e) {
LOG.warn("Exception while checking disks", e);
}
@@ -174,19 +189,6 @@ public class StorageVolumeChecker {
public Set<? extends StorageVolume> checkAllVolumes(
Collection<? extends StorageVolume> volumes)
throws InterruptedException {
- final long gap = timer.monotonicNow() - lastAllVolumesCheck;
- if (gap < minDiskCheckGapMs) {
- numSkippedChecks.incrementAndGet();
- if (LOG.isTraceEnabled()) {
- LOG.trace(
- "Skipped checking all volumes, time since last check {} is less " +
- "than the minimum gap between checks ({} ms).",
- gap, minDiskCheckGapMs);
- }
- return Collections.emptySet();
- }
-
- lastAllVolumesCheck = timer.monotonicNow();
final Set<StorageVolume> healthyVolumes = ConcurrentHashMap.newKeySet();
final Set<StorageVolume> failedVolumes = ConcurrentHashMap.newKeySet();
final Set<StorageVolume> allVolumes = new HashSet<>();
@@ -408,13 +410,20 @@ public class StorageVolumeChecker {
}
/**
- * Return the number of {@link #checkAllVolumes} invocations.
+ * Return the number of {@link #checkAllVolumes(Collection)} ()} invocations.
*/
public long getNumAllVolumeChecks() {
return numAllVolumeChecks.get();
}
/**
+ * Return the number of {@link #checkAllVolumeSets()} invocations.
+ */
+ public long getNumAllVolumeSetsChecks() {
+ return numAllVolumeSetsChecks.get();
+ }
+
+ /**
* Return the number of checks skipped because the minimum gap since the
* last check had not elapsed.
*/
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestPeriodicVolumeChecker.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestPeriodicVolumeChecker.java
new file mode 100644
index 0000000..ba7babd
--- /dev/null
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestPeriodicVolumeChecker.java
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.container.common.volume;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.container.common.ContainerTestUtils;
+import
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
+import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.time.Duration;
+
+/**
+ * Test periodic volume checker in StorageVolumeChecker.
+ */
+public class TestPeriodicVolumeChecker {
+
+ public static final Logger LOG = LoggerFactory.getLogger(
+ TestPeriodicVolumeChecker.class);
+
+ @Rule
+ public TemporaryFolder folder = new TemporaryFolder();
+
+ @Rule
+ public TestName testName = new TestName();
+
+ @Rule
+ public Timeout globalTimeout = Timeout.seconds(150);
+
+ private OzoneConfiguration conf = new OzoneConfiguration();
+
+ @Before
+ public void setup() throws IOException {
+ conf = new OzoneConfiguration();
+ conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, folder.getRoot()
+ .getAbsolutePath());
+ conf.set(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR,
+ folder.newFolder().getAbsolutePath());
+ }
+
+ @After
+ public void cleanup() throws IOException {
+ FileUtils.deleteDirectory(folder.getRoot());
+ }
+
+ @Test
+ public void testPeriodicVolumeChecker() throws Exception {
+ LOG.info("Executing {}", testName.getMethodName());
+
+ DatanodeConfiguration dnConf =
+ conf.getObject(DatanodeConfiguration.class);
+ dnConf.setDiskCheckMinGap(Duration.ofMinutes(2));
+ dnConf.setPeriodicDiskCheckIntervalMinutes(1);
+ conf.setFromObject(dnConf);
+
+ DatanodeDetails datanodeDetails =
+ ContainerTestUtils.createDatanodeDetails();
+ OzoneContainer ozoneContainer =
+ ContainerTestUtils.getOzoneContainer(datanodeDetails, conf);
+ MutableVolumeSet dataVolumeSet = ozoneContainer.getVolumeSet();
+
+ StorageVolumeChecker volumeChecker = dataVolumeSet.getVolumeChecker();
+ volumeChecker.setDelegateChecker(
+ new TestStorageVolumeChecker.DummyChecker());
+
+ // 1 for volumeSet and 1 for metadataVolumeSet
+ // in MutableVolumeSet constructor
+ Assert.assertEquals(2, volumeChecker.getNumAllVolumeChecks());
+ Assert.assertEquals(0, volumeChecker.getNumAllVolumeSetsChecks());
+
+ // wait for periodic disk checker start
+ Thread.sleep((60 + 5) * 1000);
+
+ // first round
+ // 2 for volumeSet and 2 for metadataVolumeSet
+ Assert.assertEquals(4, volumeChecker.getNumAllVolumeChecks());
+ Assert.assertEquals(1, volumeChecker.getNumAllVolumeSetsChecks());
+ Assert.assertEquals(0, volumeChecker.getNumSkippedChecks());
+
+ // wait for periodic disk checker next round
+ Thread.sleep((60 + 5) * 1000);
+
+ // skipped next round
+ Assert.assertEquals(4, volumeChecker.getNumAllVolumeChecks());
+ Assert.assertEquals(1, volumeChecker.getNumAllVolumeSetsChecks());
+ Assert.assertEquals(1, volumeChecker.getNumSkippedChecks());
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]