This is an automated email from the ASF dual-hosted git repository.

bteke pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 134dcf166f5 YARN-11703. Validate accessibility of Node Manager working 
directories (#6903)
134dcf166f5 is described below

commit 134dcf166f51a3bd47923f3a0fbad7954135cb6d
Author: K0K0V0K <109747532+k0k0...@users.noreply.github.com>
AuthorDate: Thu Jun 27 16:21:28 2024 +0200

    YARN-11703. Validate accessibility of Node Manager working directories 
(#6903)
---
 .../apache/hadoop/yarn/conf/YarnConfiguration.java |  19 ++-
 .../src/main/resources/yarn-default.xml            |   6 +
 .../server/nodemanager/DirectoryCollection.java    | 153 +++++++++++++--------
 .../nodemanager/TestDirectoryCollection.java       |  35 +++--
 4 files changed, 140 insertions(+), 73 deletions(-)

diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 7747d4cb734..9503d475377 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -2157,16 +2157,19 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_MIN_PER_DISK_FREE_SPACE_MB =
       NM_DISK_HEALTH_CHECK_PREFIX + "min-free-space-per-disk-mb";
 
+  /**
+   * By default, all the disk can be used before it is marked as offline.
+   */
+  public static final long DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB = 0;
+
   /**
    * Enable/Disable the minimum disk free
    * space threshold for disk health checker.
    */
   public static final String NM_DISK_FREE_SPACE_THRESHOLD_ENABLED =
-      NM_DISK_HEALTH_CHECK_PREFIX +
-          "disk-free-space-threshold.enabled";
+      NM_DISK_HEALTH_CHECK_PREFIX + "disk-free-space-threshold.enabled";
 
-  public static final boolean
-      DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED = true;
+  public static final boolean DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED = 
true;
 
   /**
    * The minimum space that must be available on an offline
@@ -2180,9 +2183,13 @@ public class YarnConfiguration extends Configuration {
       NM_DISK_HEALTH_CHECK_PREFIX +
           "min-free-space-per-disk-watermark-high-mb";
   /**
-   * By default, all of the disk can be used before it is marked as offline.
+   * Validate content of the node manager directories can be accessed.
    */
-  public static final long DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB = 0;
+  public static final String 
NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED =
+      NM_DISK_HEALTH_CHECK_PREFIX + 
"working-dir-content-accessibility-validation.enabled";
+
+  public static final boolean 
DEFAULT_NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED =
+      true;
 
   /** The health checker scripts. */
   public static final String NM_HEALTH_CHECK_SCRIPTS =
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 927d0c1aa41..ac976b7472d 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1995,6 +1995,12 @@
     <value>true</value>
   </property>
 
+  <property>
+    <description>Validate content of the node manager directories can be 
accessed</description>
+    
<name>yarn.nodemanager.disk-health-checker.working-dir-content-accessibility-validation.enabled</name>
+    <value>true</value>
+  </property>
+
   <property>
     <description>The maximum percentage of disk space utilization allowed 
after 
     which a disk is marked as bad. Values can range from 0.0 to 100.0. 
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
index 8ecaa6d9590..a5657ab48b4 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
@@ -21,6 +21,8 @@ package org.apache.hadoop.yarn.server.nodemanager;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -28,22 +30,27 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.DiskChecker;
 import org.apache.hadoop.util.DiskValidator;
 import org.apache.hadoop.util.DiskValidatorFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 
@@ -62,6 +69,7 @@ public class DirectoryCollection {
 
   private boolean diskUtilizationThresholdEnabled;
   private boolean diskFreeSpaceThresholdEnabled;
+  private boolean subAccessibilityValidationEnabled;
   /**
    * The enum defines disk failure type.
    */
@@ -242,16 +250,15 @@ public class DirectoryCollection {
       throw new YarnRuntimeException(e);
     }
 
-    diskUtilizationThresholdEnabled = conf.
-        getBoolean(YarnConfiguration.
-                NM_DISK_UTILIZATION_THRESHOLD_ENABLED,
-            YarnConfiguration.
-                DEFAULT_NM_DISK_UTILIZATION_THRESHOLD_ENABLED);
-    diskFreeSpaceThresholdEnabled = conf.
-        getBoolean(YarnConfiguration.
-                NM_DISK_FREE_SPACE_THRESHOLD_ENABLED,
-            YarnConfiguration.
-                DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED);
+    diskUtilizationThresholdEnabled = conf.getBoolean(
+        YarnConfiguration.NM_DISK_UTILIZATION_THRESHOLD_ENABLED,
+        YarnConfiguration.DEFAULT_NM_DISK_UTILIZATION_THRESHOLD_ENABLED);
+    diskFreeSpaceThresholdEnabled = conf.getBoolean(
+        YarnConfiguration.NM_DISK_FREE_SPACE_THRESHOLD_ENABLED,
+        YarnConfiguration.DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED);
+    subAccessibilityValidationEnabled = conf.getBoolean(
+        
YarnConfiguration.NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED,
+        
YarnConfiguration.DEFAULT_NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED);
 
     localDirs = new ArrayList<>(Arrays.asList(dirs));
     errorDirs = new ArrayList<>();
@@ -448,8 +455,7 @@ public class DirectoryCollection {
 
     // move testDirs out of any lock as it could wait for very long time in
     // case of busy IO
-    Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs,
-        preCheckGoodDirs);
+    Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs, 
preCheckGoodDirs);
 
     this.writeLock.lock();
     try {
@@ -521,60 +527,89 @@ public class DirectoryCollection {
     }
   }
 
-  Map<String, DiskErrorInformation> testDirs(List<String> dirs,
-      Set<String> goodDirs) {
-    HashMap<String, DiskErrorInformation> ret =
-        new HashMap<String, DiskErrorInformation>();
-    for (final String dir : dirs) {
-      String msg;
-      try {
-        File testDir = new File(dir);
-        diskValidator.checkStatus(testDir);
-        float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ?
-            diskUtilizationPercentageCutoffHigh : 
diskUtilizationPercentageCutoffLow;
-        long diskFreeSpaceCutoff = goodDirs.contains(dir) ?
-            diskFreeSpaceCutoffLow : diskFreeSpaceCutoffHigh;
-
-        if (diskUtilizationThresholdEnabled
-            && isDiskUsageOverPercentageLimit(testDir,
-            diskUtilizationPercentageCutoff)) {
-          msg =
-              "used space above threshold of "
-                  + diskUtilizationPercentageCutoff
-                  + "%";
-          ret.put(dir,
-            new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
-          continue;
-        } else if (diskFreeSpaceThresholdEnabled
-            && isDiskFreeSpaceUnderLimit(testDir, diskFreeSpaceCutoff)) {
-          msg =
-              "free space below limit of " + diskFreeSpaceCutoff
-                  + "MB";
-          ret.put(dir,
-            new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
-          continue;
-        }
-      } catch (IOException ie) {
-        ret.put(dir,
-          new DiskErrorInformation(DiskErrorCause.OTHER, ie.getMessage()));
-      }
+  Map<String, DiskErrorInformation> testDirs(List<String> dirs, Set<String> 
goodDirs) {
+    final Map<String, DiskErrorInformation> ret = new HashMap<>(0);
+    for (String dir : dirs) {
+      LOG.debug("Start testing dir accessibility: {}", dir);
+      File testDir = new File(dir);
+      boolean goodDir = goodDirs.contains(dir);
+      Stream.of(
+          validateDisk(testDir),
+          validateUsageOverPercentageLimit(testDir, goodDir),
+          validateDiskFreeSpaceUnderLimit(testDir, goodDir),
+          validateSubsAccessibility(testDir)
+      )
+          .filter(Objects::nonNull)
+          .findFirst()
+          .ifPresent(diskErrorInformation -> ret.put(dir, 
diskErrorInformation));
     }
     return ret;
   }
 
-  private boolean isDiskUsageOverPercentageLimit(File dir,
-      float diskUtilizationPercentageCutoff) {
-    float freePercentage =
-        100 * (dir.getUsableSpace() / (float) dir.getTotalSpace());
+  private DiskErrorInformation validateDisk(File dir) {
+    try {
+      diskValidator.checkStatus(dir);
+      LOG.debug("Dir {} pass throw the disk validation", dir);
+      return null;
+    } catch (IOException | UncheckedIOException | SecurityException e) {
+      return new DiskErrorInformation(DiskErrorCause.OTHER, e.getMessage());
+    }
+  }
+
+  private DiskErrorInformation validateUsageOverPercentageLimit(File dir, 
boolean isGoodDir) {
+    if (!diskUtilizationThresholdEnabled) {
+      return null;
+    }
+    float diskUtilizationPercentageCutoff = isGoodDir
+        ? diskUtilizationPercentageCutoffHigh
+        : diskUtilizationPercentageCutoffLow;
+    float freePercentage = 100 * (dir.getUsableSpace() / (float) 
dir.getTotalSpace());
     float usedPercentage = 100.0F - freePercentage;
-    return (usedPercentage > diskUtilizationPercentageCutoff
-        || usedPercentage >= 100.0F);
+    if (usedPercentage > diskUtilizationPercentageCutoff || usedPercentage >= 
100.0F) {
+      return new DiskErrorInformation(DiskErrorCause.DISK_FULL,
+          "used space above threshold of " + diskUtilizationPercentageCutoff + 
"%");
+    } else {
+      LOG.debug("Dir {} pass throw the usage over percentage validation", dir);
+      return null;
+    }
   }
 
-  private boolean isDiskFreeSpaceUnderLimit(File dir,
-      long freeSpaceCutoff) {
+  private DiskErrorInformation validateDiskFreeSpaceUnderLimit(File dir, 
boolean isGoodDir) {
+    if (!diskFreeSpaceThresholdEnabled) {
+      return null;
+    }
+    long freeSpaceCutoff = isGoodDir ? diskFreeSpaceCutoffLow : 
diskFreeSpaceCutoffHigh;
     long freeSpace = dir.getUsableSpace() / (1024 * 1024);
-    return freeSpace < freeSpaceCutoff;
+    if (freeSpace < freeSpaceCutoff) {
+      return new DiskErrorInformation(DiskErrorCause.DISK_FULL,
+          "free space below limit of " + freeSpaceCutoff + "MB");
+    } else {
+      LOG.debug("Dir {} pass throw the free space validation", dir);
+      return null;
+    }
+  }
+
+  private DiskErrorInformation validateSubsAccessibility(File dir) {
+    if (!subAccessibilityValidationEnabled) {
+      return null;
+    }
+    try (Stream<java.nio.file.Path> walk = Files.walk(dir.toPath())) {
+      List<File> subs = walk
+          .map(java.nio.file.Path::toFile)
+          .collect(Collectors.toList());
+      for (File sub : subs) {
+        if (sub.isDirectory()) {
+          DiskChecker.checkDir(sub);
+        } else if (!Files.isReadable(sub.toPath())) {
+          return new DiskErrorInformation(DiskErrorCause.OTHER, "Can not read 
" + sub);
+        } else {
+          LOG.debug("{} under {} is accessible", sub, dir);
+        }
+      }
+    } catch (IOException | UncheckedIOException | SecurityException e) {
+      return new DiskErrorInformation(DiskErrorCause.OTHER, e.getMessage());
+    }
+    return null;
   }
 
   private void createDir(FileContext localFs, Path dir, FsPermission perm)
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
index 33bd4d92347..0193f844ac8 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
@@ -20,8 +20,17 @@ package org.apache.hadoop.yarn.server.nodemanager;
 
 import java.io.File;
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.attribute.PosixFilePermissions;
+import java.util.Collections;
 import java.util.List;
 import java.util.ListIterator;
+import java.util.Map;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
@@ -32,16 +41,11 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import 
org.apache.hadoop.yarn.server.nodemanager.DirectoryCollection.DirsChangeListener;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
 
 public class TestDirectoryCollection {
 
-  private static final File testDir = new File("target",
-      TestDirectoryCollection.class.getName()).getAbsoluteFile();
-  private static final File testFile = new File(testDir, "testfile");
+  private File testDir;
+  private File testFile;
 
   private Configuration conf;
   private FileContext localFs;
@@ -50,7 +54,8 @@ public class TestDirectoryCollection {
   public void setupForTests() throws IOException {
     conf = new Configuration();
     localFs = FileContext.getLocalFSFileContext(conf);
-    testDir.mkdirs();
+    testDir = 
Files.createTempDirectory(TestDirectoryCollection.class.getName()).toFile();
+    testFile = new File(testDir, "testfile");
     testFile.createNewFile();
   }
 
@@ -516,6 +521,20 @@ public class TestDirectoryCollection {
     Assert.assertEquals(listener3.num, 1);
   }
 
+  @Test
+  public void testNonAccessibleSub() throws IOException {
+    Files.setPosixFilePermissions(testDir.toPath(),
+        PosixFilePermissions.fromString("rwx------"));
+    Files.setPosixFilePermissions(testFile.toPath(),
+        PosixFilePermissions.fromString("-w--w--w-"));
+    DirectoryCollection dc = new DirectoryCollection(new 
String[]{testDir.toString()});
+    Map<String, DirectoryCollection.DiskErrorInformation> 
diskErrorInformationMap =
+        dc.testDirs(Collections.singletonList(testDir.toString()), 
Collections.emptySet());
+    Assert.assertEquals(1, diskErrorInformationMap.size());
+    Assert.assertTrue(diskErrorInformationMap.values().iterator().next()
+        .message.contains(testFile.getName()));
+  }
+
   static class DirsChangeListenerTest implements DirsChangeListener {
     public int num = 0;
     public DirsChangeListenerTest() {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to