This is an automated email from the ASF dual-hosted git repository.

bteke pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new ce7d01fac84e YARN-11689. Update the cgroup v2 init error handling 
(#6810)
ce7d01fac84e is described below

commit ce7d01fac84e736aa0bce6f775d63fbd36c9459e
Author: Benjamin Teke <brumi1...@users.noreply.github.com>
AuthorDate: Mon May 13 12:56:26 2024 +0200

    YARN-11689. Update the cgroup v2 init error handling (#6810)
---
 .../linux/resources/AbstractCGroupsHandler.java    | 16 ++------
 .../linux/resources/CGroupsV2HandlerImpl.java      | 43 +++++++++++++++-------
 .../linux/resources/TestCGroupsV2HandlerImpl.java  |  2 +
 3 files changed, 35 insertions(+), 26 deletions(-)

diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java
index a8f528a20911..becb68e22f0f 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java
@@ -358,14 +358,14 @@ public abstract class AbstractCGroupsHandler implements 
CGroupsHandler {
         } else {
           // Unexpected: we just checked that it was missing
           throw new ResourceHandlerException(getErrorWithDetails(
-              "Unexpected: Cannot create yarn cgroup",
+              "Unexpected: Cannot create yarn cgroup hierarchy",
               subsystemName,
               yarnHierarchy.getAbsolutePath()
           ));
         }
       } catch (SecurityException e) {
         throw new ResourceHandlerException(getErrorWithDetails(
-            "No permissions to create yarn cgroup",
+            "No permissions to create yarn cgroup hierarchy",
             subsystemName,
             yarnHierarchy.getAbsolutePath()
         ), e);
@@ -378,15 +378,7 @@ public abstract class AbstractCGroupsHandler implements 
CGroupsHandler {
       ));
     }
 
-    try {
-      updateEnabledControllersInHierarchy(yarnHierarchy, controller);
-    } catch (ResourceHandlerException e) {
-      throw new ResourceHandlerException(getErrorWithDetails(
-          "Failed to update cgroup.subtree_control in yarn hierarchy",
-          subsystemName,
-          yarnHierarchy.getAbsolutePath()
-      ));
-    }
+    updateEnabledControllersInHierarchy(yarnHierarchy, controller);
   }
 
   protected abstract void updateEnabledControllersInHierarchy(
@@ -401,7 +393,7 @@ public abstract class AbstractCGroupsHandler implements 
CGroupsHandler {
    * @param yarnCgroupPath cgroup path that failed
    * @return a string builder that can be appended by the caller
    */
-  private String getErrorWithDetails(
+  protected String getErrorWithDetails(
       String errorMessage,
       String subsystemName,
       String yarnCgroupPath) {
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java
index 312627f89ba3..cd362ab9a548 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java
@@ -97,10 +97,8 @@ class CGroupsV2HandlerImpl extends AbstractCGroupsHandler {
   @Override
   protected Map<String, Set<String>> parsePreConfiguredMountPath() throws 
IOException {
     Map<String, Set<String>> controllerMappings = new HashMap<>();
-    String controllerPath = this.cGroupsMountConfig.getMountPath() +
-        Path.SEPARATOR + this.cGroupPrefix;
     controllerMappings.put(this.cGroupsMountConfig.getMountPath(),
-        readControllersFile(controllerPath));
+        readControllersFile(this.cGroupsMountConfig.getMountPath()));
     return controllerMappings;
   }
 
@@ -171,19 +169,32 @@ class CGroupsV2HandlerImpl extends AbstractCGroupsHandler 
{
     try {
       Set<String> enabledControllers = 
readControllersFile(yarnHierarchy.getAbsolutePath());
       if (!enabledControllers.contains(controller.getName())) {
-        throw new ResourceHandlerException(String.format(
+        String errorMsg = String.format(
             "The controller %s is not enabled in the cgroup hierarchy: %s. 
Please enable it in " +
                 "in the %s/cgroup.subtree_control file.",
             controller.getName(), yarnHierarchy.getAbsolutePath(),
-            yarnHierarchy.getParentFile().getAbsolutePath()));
+            yarnHierarchy.getParentFile().getAbsolutePath());
+
+        throw new ResourceHandlerException(getErrorWithDetails(
+            errorMsg, controller.getName(),
+            yarnHierarchy.getAbsolutePath()));
       }
 
       File subtreeControlFile = new File(yarnHierarchy.getAbsolutePath()
           + Path.SEPARATOR + CGROUP_SUBTREE_CONTROL_FILE);
       if (!subtreeControlFile.exists()) {
-        throw new ResourceHandlerException(
-            "No subtree control file found in the cgroup hierarchy: " +
-                yarnHierarchy.getAbsolutePath());
+        String errorMsg = "No subtree control file found in the cgroup 
hierarchy: " +
+            yarnHierarchy.getAbsolutePath();
+        throw new ResourceHandlerException(getErrorWithDetails(
+            errorMsg, controller.getName(),
+            yarnHierarchy.getAbsolutePath()));
+      }
+      if (!subtreeControlFile.canWrite()) {
+        String errorMsg = "Cannot write the cgroup.subtree_control file in the 
" +
+            "cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
+        throw new ResourceHandlerException(getErrorWithDetails(
+            errorMsg, controller.getName(),
+            yarnHierarchy.getAbsolutePath()));
       }
 
       Writer w = new 
OutputStreamWriter(Files.newOutputStream(subtreeControlFile.toPath(),
@@ -194,16 +205,20 @@ class CGroupsV2HandlerImpl extends AbstractCGroupsHandler 
{
             yarnHierarchy.getAbsolutePath());
         pw.write("+" + controller.getName());
         if (pw.checkError()) {
-          throw new ResourceHandlerException("Failed to add the controller to 
the " +
+          String errorMsg = "Failed to add the controller to the " +
               "cgroup.subtree_control file in the cgroup hierarchy: " +
-              yarnHierarchy.getAbsolutePath());
+              yarnHierarchy.getAbsolutePath();
+          throw new ResourceHandlerException(getErrorWithDetails(
+              errorMsg, controller.getName(),
+              yarnHierarchy.getAbsolutePath()));
         }
       }
     } catch (IOException e) {
-      throw new ResourceHandlerException(
-          "Failed to update the cgroup.subtree_control file in the cgroup 
hierarchy: " +
-              yarnHierarchy.getAbsolutePath(), e);
+      String errorMsg = "Failed to update the cgroup.subtree_control file in 
the " +
+          "cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
+      throw new ResourceHandlerException(getErrorWithDetails(
+          errorMsg, controller.getName(),
+          yarnHierarchy.getAbsolutePath()));
     }
   }
-
 }
\ No newline at end of file
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java
index b8d1fb238d1f..1198cda7ab05 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java
@@ -217,11 +217,13 @@ public class TestCGroupsV2HandlerImpl extends 
TestCGroupsHandlerBase {
     conf.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY,
         "/hadoop-yarn");
 
+    File baseCgroup = new File(tmpPath);
     File subCgroup = new File(tmpPath, "/hadoop-yarn");
     Assert.assertTrue("temp dir should be created", subCgroup.mkdirs());
     subCgroup.deleteOnExit();
 
     String enabledControllers = "cpuset cpu io memory hugetlb pids rdma 
misc\n";
+    createFileWithContent(baseCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, 
enabledControllers);
     createFileWithContent(subCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, 
enabledControllers);
 
     File subtreeControlFile = new File(subCgroup.getAbsolutePath(),


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to