Repository: hadoop
Updated Branches:
  refs/heads/trunk fa94d370b -> f880ff418


YARN-8775. TestDiskFailures.testLocalDirsFailures sometimes can fail on 
concurrent File modifications. (Contributed by Antal Bálint Steinbach)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f880ff41
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f880ff41
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f880ff41

Branch: refs/heads/trunk
Commit: f880ff418c07965b333c750805c8de77a067b158
Parents: fa94d37
Author: Haibo Chen <haiboc...@apache.org>
Authored: Mon Oct 15 09:37:20 2018 -0700
Committer: Haibo Chen <haiboc...@apache.org>
Committed: Mon Oct 15 09:37:20 2018 -0700

----------------------------------------------------------------------
 .../nodemanager/LocalDirsHandlerService.java    |  5 ++-
 .../hadoop/yarn/server/TestDiskFailures.java    | 38 ++++++++------------
 2 files changed, 19 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/f880ff41/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
index 6eabd0d..b2bb4e3 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
@@ -27,6 +27,8 @@ import java.util.List;
 import java.util.Set;
 import java.util.Timer;
 import java.util.TimerTask;
+
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.DiskValidator;
 import org.apache.hadoop.util.DiskValidatorFactory;
@@ -493,7 +495,8 @@ public class LocalDirsHandlerService extends 
AbstractService {
 
   }
 
-  private void checkDirs() {
+  @VisibleForTesting
+  public void checkDirs() {
     boolean disksStatusChange = false;
     Set<String> failedLocalDirsPreCheck =
         new HashSet<String>(localDirs.getFailedDirs());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f880ff41/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
index e9de20a..23bb039 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
@@ -27,7 +27,6 @@ import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.server.MiniYARNCluster;
 import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -56,7 +55,12 @@ public class TestDiskFailures {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(TestDiskFailures.class);
 
-  private static final long DISK_HEALTH_CHECK_INTERVAL = 1000;//1 sec
+  /*
+   * Set disk check interval high enough so that it never runs during the test.
+   * Checks will be called manually if necessary.
+   */
+  private static final long TOO_HIGH_DISK_HEALTH_CHECK_INTERVAL =
+      1000 * 60 * 60 * 24;
 
   private static FileContext localFS = null;
   private static final File testDir = new File("target",
@@ -146,9 +150,10 @@ public class TestDiskFailures {
                                          : YarnConfiguration.NM_LOG_DIRS;
 
     Configuration conf = new Configuration();
-    // set disk health check interval to a small value (say 1 sec).
+    // set disk health check interval to a large value to effectively disable
+    // disk health check done internally in LocalDirsHandlerService"
     conf.setLong(YarnConfiguration.NM_DISK_HEALTH_CHECK_INTERVAL_MS,
-                 DISK_HEALTH_CHECK_INTERVAL);
+        TOO_HIGH_DISK_HEALTH_CHECK_INTERVAL);
 
     // If 2 out of the total 4 local-dirs fail OR if 2 Out of the total 4
     // log-dirs fail, then the node's health status should become unhealthy.
@@ -202,22 +207,6 @@ public class TestDiskFailures {
     verifyDisksHealth(localORLogDirs, expectedDirs, false);
   }
 
-  /**
-   * Wait for the NodeManger to go for the disk-health-check at least once.
-   */
-  private void waitForDiskHealthCheck() {
-    long lastDisksCheckTime = dirsHandler.getLastDisksCheckTime();
-    long time = lastDisksCheckTime;
-    for (int i = 0; i < 10 && (time <= lastDisksCheckTime); i++) {
-      try {
-        Thread.sleep(1000);
-      } catch(InterruptedException e) {
-        LOG.error(
-            "Interrupted while waiting for NodeManager's disk health check.");
-      }
-      time = dirsHandler.getLastDisksCheckTime();
-    }
-  }
 
   /**
    * Verify if the NodeManager could identify disk failures.
@@ -228,8 +217,8 @@ public class TestDiskFailures {
    */
   private void verifyDisksHealth(boolean localORLogDirs, String expectedDirs,
       boolean isHealthy) {
-    // Wait for the NodeManager to identify disk failures.
-    waitForDiskHealthCheck();
+    // identify disk failures
+    dirsHandler.checkDirs();
 
     List<String> list = localORLogDirs ? dirsHandler.getLocalDirs()
                                        : dirsHandler.getLogDirs();
@@ -272,7 +261,10 @@ public class TestDiskFailures {
    */
   private void prepareDirToFail(String dir) throws IOException {
     File file = new File(dir);
-    FileUtil.fullyDelete(file);
+    if(!FileUtil.fullyDelete(file)) {
+      throw new IOException("Delete of file was unsuccessful! Path: " +
+          file.getAbsolutePath());
+    }
     file.createNewFile();
     LOG.info("Prepared " + dir + " to fail.");
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to