This is an automated email from the ASF dual-hosted git repository. bteke pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new ce7d01fac84e YARN-11689. Update the cgroup v2 init error handling (#6810) ce7d01fac84e is described below commit ce7d01fac84e736aa0bce6f775d63fbd36c9459e Author: Benjamin Teke <brumi1...@users.noreply.github.com> AuthorDate: Mon May 13 12:56:26 2024 +0200 YARN-11689. Update the cgroup v2 init error handling (#6810) --- .../linux/resources/AbstractCGroupsHandler.java | 16 ++------ .../linux/resources/CGroupsV2HandlerImpl.java | 43 +++++++++++++++------- .../linux/resources/TestCGroupsV2HandlerImpl.java | 2 + 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java index a8f528a20911..becb68e22f0f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java @@ -358,14 +358,14 @@ public abstract class AbstractCGroupsHandler implements CGroupsHandler { } else { // Unexpected: we just checked that it was missing throw new ResourceHandlerException(getErrorWithDetails( - "Unexpected: Cannot create yarn cgroup", + "Unexpected: Cannot create yarn cgroup hierarchy", subsystemName, yarnHierarchy.getAbsolutePath() )); } } catch (SecurityException e) { throw new ResourceHandlerException(getErrorWithDetails( - "No permissions to create yarn cgroup", + "No permissions to create yarn cgroup hierarchy", subsystemName, yarnHierarchy.getAbsolutePath() ), e); @@ -378,15 +378,7 @@ public abstract class AbstractCGroupsHandler implements CGroupsHandler { )); } - try { - updateEnabledControllersInHierarchy(yarnHierarchy, controller); - } catch (ResourceHandlerException e) { - throw new ResourceHandlerException(getErrorWithDetails( - "Failed to update cgroup.subtree_control in yarn hierarchy", - subsystemName, - yarnHierarchy.getAbsolutePath() - )); - } + updateEnabledControllersInHierarchy(yarnHierarchy, controller); } protected abstract void updateEnabledControllersInHierarchy( @@ -401,7 +393,7 @@ public abstract class AbstractCGroupsHandler implements CGroupsHandler { * @param yarnCgroupPath cgroup path that failed * @return a string builder that can be appended by the caller */ - private String getErrorWithDetails( + protected String getErrorWithDetails( String errorMessage, String subsystemName, String yarnCgroupPath) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java index 312627f89ba3..cd362ab9a548 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java @@ -97,10 +97,8 @@ class CGroupsV2HandlerImpl extends AbstractCGroupsHandler { @Override protected Map<String, Set<String>> parsePreConfiguredMountPath() throws IOException { Map<String, Set<String>> controllerMappings = new HashMap<>(); - String controllerPath = this.cGroupsMountConfig.getMountPath() + - Path.SEPARATOR + this.cGroupPrefix; controllerMappings.put(this.cGroupsMountConfig.getMountPath(), - readControllersFile(controllerPath)); + readControllersFile(this.cGroupsMountConfig.getMountPath())); return controllerMappings; } @@ -171,19 +169,32 @@ class CGroupsV2HandlerImpl extends AbstractCGroupsHandler { try { Set<String> enabledControllers = readControllersFile(yarnHierarchy.getAbsolutePath()); if (!enabledControllers.contains(controller.getName())) { - throw new ResourceHandlerException(String.format( + String errorMsg = String.format( "The controller %s is not enabled in the cgroup hierarchy: %s. Please enable it in " + "in the %s/cgroup.subtree_control file.", controller.getName(), yarnHierarchy.getAbsolutePath(), - yarnHierarchy.getParentFile().getAbsolutePath())); + yarnHierarchy.getParentFile().getAbsolutePath()); + + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); } File subtreeControlFile = new File(yarnHierarchy.getAbsolutePath() + Path.SEPARATOR + CGROUP_SUBTREE_CONTROL_FILE); if (!subtreeControlFile.exists()) { - throw new ResourceHandlerException( - "No subtree control file found in the cgroup hierarchy: " + - yarnHierarchy.getAbsolutePath()); + String errorMsg = "No subtree control file found in the cgroup hierarchy: " + + yarnHierarchy.getAbsolutePath(); + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); + } + if (!subtreeControlFile.canWrite()) { + String errorMsg = "Cannot write the cgroup.subtree_control file in the " + + "cgroup hierarchy: " + yarnHierarchy.getAbsolutePath(); + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); } Writer w = new OutputStreamWriter(Files.newOutputStream(subtreeControlFile.toPath(), @@ -194,16 +205,20 @@ class CGroupsV2HandlerImpl extends AbstractCGroupsHandler { yarnHierarchy.getAbsolutePath()); pw.write("+" + controller.getName()); if (pw.checkError()) { - throw new ResourceHandlerException("Failed to add the controller to the " + + String errorMsg = "Failed to add the controller to the " + "cgroup.subtree_control file in the cgroup hierarchy: " + - yarnHierarchy.getAbsolutePath()); + yarnHierarchy.getAbsolutePath(); + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); } } } catch (IOException e) { - throw new ResourceHandlerException( - "Failed to update the cgroup.subtree_control file in the cgroup hierarchy: " + - yarnHierarchy.getAbsolutePath(), e); + String errorMsg = "Failed to update the cgroup.subtree_control file in the " + + "cgroup hierarchy: " + yarnHierarchy.getAbsolutePath(); + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); } } - } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java index b8d1fb238d1f..1198cda7ab05 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java @@ -217,11 +217,13 @@ public class TestCGroupsV2HandlerImpl extends TestCGroupsHandlerBase { conf.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn"); + File baseCgroup = new File(tmpPath); File subCgroup = new File(tmpPath, "/hadoop-yarn"); Assert.assertTrue("temp dir should be created", subCgroup.mkdirs()); subCgroup.deleteOnExit(); String enabledControllers = "cpuset cpu io memory hugetlb pids rdma misc\n"; + createFileWithContent(baseCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers); createFileWithContent(subCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers); File subtreeControlFile = new File(subCgroup.getAbsolutePath(), --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org