This is an automated email from the ASF dual-hosted git repository.

sammichen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 7ca3e13  HDDS-5394. Fix skipped volume check due to 
disk.check.min.gap. (#2373)
7ca3e13 is described below

commit 7ca3e13ca43df1674ebbdd3ebb0f6fb5dbb9189c
Author: Gui Hecheng <[email protected]>
AuthorDate: Mon Jul 12 14:43:42 2021 +0800

    HDDS-5394. Fix skipped volume check due to disk.check.min.gap. (#2373)
---
 .../common/statemachine/DatanodeConfiguration.java |   4 +-
 .../common/volume/StorageVolumeChecker.java        |  45 ++++----
 .../common/volume/TestPeriodicVolumeChecker.java   | 118 +++++++++++++++++++++
 3 files changed, 147 insertions(+), 20 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
index fc90344..cd5929f 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
@@ -54,7 +54,7 @@ public class DatanodeConfiguration {
 
   static final int REPLICATION_MAX_STREAMS_DEFAULT = 10;
 
-  static final long PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT = 15;
+  static final long PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT = 60;
 
   static final int FAILED_VOLUMES_TOLERATED_DEFAULT = -1;
 
@@ -131,7 +131,7 @@ public class DatanodeConfiguration {
   }
 
   @Config(key = "periodic.disk.check.interval.minutes",
-      defaultValue = "15",
+      defaultValue = "60",
       type = ConfigType.LONG,
       tags = { DATANODE },
       description = "Periodic disk check run interval in minutes."
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
index 0d3c110..ce2fb59 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolumeChecker.java
@@ -22,7 +22,6 @@ import javax.annotation.Nonnull;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Optional;
@@ -69,6 +68,7 @@ public class StorageVolumeChecker {
 
   private final AtomicLong numVolumeChecks = new AtomicLong(0);
   private final AtomicLong numAllVolumeChecks = new AtomicLong(0);
+  private final AtomicLong numAllVolumeSetsChecks = new AtomicLong(0);
   private final AtomicLong numSkippedChecks = new AtomicLong(0);
 
   /**
@@ -83,9 +83,9 @@ public class StorageVolumeChecker {
   private final long minDiskCheckGapMs;
 
   /**
-   * Timestamp of the last check of all volumes.
+   * Timestamp of the last check of all volume sets.
    */
-  private long lastAllVolumesCheck;
+  private long lastAllVolumeSetsCheckComplete;
 
   private final Timer timer;
 
@@ -113,7 +113,7 @@ public class StorageVolumeChecker {
 
     minDiskCheckGapMs = dnConf.getDiskCheckMinGap().toMillis();
 
-    lastAllVolumesCheck = timer.monotonicNow() - minDiskCheckGapMs;
+    lastAllVolumeSetsCheckComplete = timer.monotonicNow() - minDiskCheckGapMs;
 
     registeredVolumeSets = new ArrayList<>();
 
@@ -151,10 +151,25 @@ public class StorageVolumeChecker {
   }
 
   public synchronized void checkAllVolumeSets() {
+    final long gap = timer.monotonicNow() - lastAllVolumeSetsCheckComplete;
+    if (gap < minDiskCheckGapMs) {
+      numSkippedChecks.incrementAndGet();
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(
+            "Skipped checking all volumes, time since last check {} is less " +
+                "than the minimum gap between checks ({} ms).",
+            gap, minDiskCheckGapMs);
+      }
+      return;
+    }
+
     try {
       for (MutableVolumeSet volSet : registeredVolumeSets) {
         volSet.checkAllVolumes();
       }
+
+      lastAllVolumeSetsCheckComplete = timer.monotonicNow();
+      numAllVolumeSetsChecks.incrementAndGet();
     } catch (IOException e) {
       LOG.warn("Exception while checking disks", e);
     }
@@ -174,19 +189,6 @@ public class StorageVolumeChecker {
   public Set<? extends StorageVolume> checkAllVolumes(
       Collection<? extends StorageVolume> volumes)
       throws InterruptedException {
-    final long gap = timer.monotonicNow() - lastAllVolumesCheck;
-    if (gap < minDiskCheckGapMs) {
-      numSkippedChecks.incrementAndGet();
-      if (LOG.isTraceEnabled()) {
-        LOG.trace(
-            "Skipped checking all volumes, time since last check {} is less " +
-                "than the minimum gap between checks ({} ms).",
-            gap, minDiskCheckGapMs);
-      }
-      return Collections.emptySet();
-    }
-
-    lastAllVolumesCheck = timer.monotonicNow();
     final Set<StorageVolume> healthyVolumes = ConcurrentHashMap.newKeySet();
     final Set<StorageVolume> failedVolumes = ConcurrentHashMap.newKeySet();
     final Set<StorageVolume> allVolumes = new HashSet<>();
@@ -408,13 +410,20 @@ public class StorageVolumeChecker {
   }
 
   /**
-   * Return the number of {@link #checkAllVolumes} invocations.
+   * Return the number of {@link #checkAllVolumes(Collection)} ()} invocations.
    */
   public long getNumAllVolumeChecks() {
     return numAllVolumeChecks.get();
   }
 
   /**
+   * Return the number of {@link #checkAllVolumeSets()} invocations.
+   */
+  public long getNumAllVolumeSetsChecks() {
+    return numAllVolumeSetsChecks.get();
+  }
+
+  /**
    * Return the number of checks skipped because the minimum gap since the
    * last check had not elapsed.
    */
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestPeriodicVolumeChecker.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestPeriodicVolumeChecker.java
new file mode 100644
index 0000000..ba7babd
--- /dev/null
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestPeriodicVolumeChecker.java
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.container.common.volume;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.container.common.ContainerTestUtils;
+import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
+import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.time.Duration;
+
+/**
+ * Test periodic volume checker in StorageVolumeChecker.
+ */
+public class TestPeriodicVolumeChecker {
+
+  public static final Logger LOG = LoggerFactory.getLogger(
+      TestPeriodicVolumeChecker.class);
+
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
+
+  @Rule
+  public TestName testName = new TestName();
+
+  @Rule
+  public Timeout globalTimeout = Timeout.seconds(150);
+
+  private OzoneConfiguration conf = new OzoneConfiguration();
+
+  @Before
+  public void setup() throws IOException {
+    conf = new OzoneConfiguration();
+    conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, folder.getRoot()
+        .getAbsolutePath());
+    conf.set(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR,
+        folder.newFolder().getAbsolutePath());
+  }
+
+  @After
+  public void cleanup() throws IOException {
+    FileUtils.deleteDirectory(folder.getRoot());
+  }
+
+  @Test
+  public void testPeriodicVolumeChecker() throws Exception {
+    LOG.info("Executing {}", testName.getMethodName());
+
+    DatanodeConfiguration dnConf =
+        conf.getObject(DatanodeConfiguration.class);
+    dnConf.setDiskCheckMinGap(Duration.ofMinutes(2));
+    dnConf.setPeriodicDiskCheckIntervalMinutes(1);
+    conf.setFromObject(dnConf);
+
+    DatanodeDetails datanodeDetails =
+        ContainerTestUtils.createDatanodeDetails();
+    OzoneContainer ozoneContainer =
+        ContainerTestUtils.getOzoneContainer(datanodeDetails, conf);
+    MutableVolumeSet dataVolumeSet = ozoneContainer.getVolumeSet();
+
+    StorageVolumeChecker volumeChecker = dataVolumeSet.getVolumeChecker();
+    volumeChecker.setDelegateChecker(
+        new TestStorageVolumeChecker.DummyChecker());
+
+    // 1 for volumeSet and 1 for metadataVolumeSet
+    // in MutableVolumeSet constructor
+    Assert.assertEquals(2, volumeChecker.getNumAllVolumeChecks());
+    Assert.assertEquals(0, volumeChecker.getNumAllVolumeSetsChecks());
+
+    // wait for periodic disk checker start
+    Thread.sleep((60 + 5) * 1000);
+
+    // first round
+    // 2 for volumeSet and 2 for metadataVolumeSet
+    Assert.assertEquals(4, volumeChecker.getNumAllVolumeChecks());
+    Assert.assertEquals(1, volumeChecker.getNumAllVolumeSetsChecks());
+    Assert.assertEquals(0, volumeChecker.getNumSkippedChecks());
+
+    // wait for periodic disk checker next round
+    Thread.sleep((60 + 5) * 1000);
+
+    // skipped next round
+    Assert.assertEquals(4, volumeChecker.getNumAllVolumeChecks());
+    Assert.assertEquals(1, volumeChecker.getNumAllVolumeSetsChecks());
+    Assert.assertEquals(1, volumeChecker.getNumSkippedChecks());
+  }
+}

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to