[ 
https://issues.apache.org/jira/browse/YARN-11920?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18053930#comment-18053930
 ] 

ASF GitHub Bot commented on YARN-11920:
---------------------------------------

edwardcapriolo commented on code in PR #8184:
URL: https://github.com/apache/hadoop/pull/8184#discussion_r2721285937


##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -1073,16 +1073,35 @@ static int change_owner(const char* path, uid_t user, 
gid_t group) {
   }
 }
 
+mode_t get_container_group_mode(){
+  char *permission_string = get_section_value(CONTAINER_GROUP_MODE_KEY, 
&executor_cfg);
+  char *default_mode = DEFAULT_CONTAINER_GROUP_MODE;
+  char *endptr;
+  mode_t mode_val;
+  if (permission_string != NULL){
+    mode_val = (mode_t) strtol(permission_string, &endptr, 8);
+    if (*endptr != '\0'){
+      fprintf(LOGFILE, "Illegal value of %s for %s in configuration\n",
+        permission_string, CONTAINER_GROUP_MODE_KEY);
+      exit(1);
+    }
+    free(permission_string);
+  } else {
+    mode_val = (mode_t) strtol(default_mode, &endptr, 8);
+  }
+  return mode_val;
+}
+
+
 /**
  * Create a top level directory for the user.
  * It assumes that the parent directory is *not* writable by the user.
- * It creates directories with 02750 permissions owned by the user
+ * It creates directories with get_container_group_mode() permissions owned by 
the user
  * and with the group set to the node manager group.
  * return non-0 on failure
  */
 int create_directory_for_user(const char* path) {
-  // set 750 permissions and setgid bit
-  mode_t permissions = S_IRWXU | S_IRGRP | S_IXGRP | S_ISGID;
+  mode_t permissions = get_container_group_mode() | S_ISGID;

Review Comment:
   Whatever the user supplies in the override we take it and always add the 
sticky bit.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -1073,16 +1073,35 @@ static int change_owner(const char* path, uid_t user, 
gid_t group) {
   }
 }
 
+mode_t get_container_group_mode(){

Review Comment:
   We let the user chose this value. The default value currently in the file i 
find incorrect. Either way this value is better as it works in more 
deployments. This is a "770" was a bug but now its a feature.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c:
##########
@@ -200,6 +200,34 @@ void test_get_app_directory() {
   free(app_dir);
 }
 
+/*
+In the directory structure:
+  /yarn-root/nm-local-dir/usercache/auser/appcache
+Where nodemanager is running:
+ user: yarn group: hadoop
+We require group +w permission as "auser" is owned auser.
+Otherwise nodemanager will not be able to create appcache
++*/
+void test_create_app_dirs(){

Review Comment:
   There was no test covering the place in the code where the issue was so I 
added one.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -1370,34 +1389,74 @@ int create_container_log_dirs(const char *container_id, 
const char *app_id,
   return 0;
 }
 
-/**
- * Function to create the application directories.
- * Returns pointer to primary_app_dir or NULL if it fails.
- */
-static char *create_app_dirs(const char *user,
+char* concat(const char *s1, const char *s2) {

Review Comment:
   It is much harder to add string in c then java for sure. 



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -1370,34 +1389,74 @@ int create_container_log_dirs(const char *container_id, 
const char *app_id,
   return 0;
 }
 
-/**
- * Function to create the application directories.
- * Returns pointer to primary_app_dir or NULL if it fails.
- */
-static char *create_app_dirs(const char *user,
+char* concat(const char *s1, const char *s2) {
+    size_t len1 = strlen(s1);
+    size_t len2 = strlen(s2);
+    char *result = malloc(len1 + len2 + 1);
+    if (result == NULL) {
+        exit(EXIT_FAILURE);
+    }
+    memcpy(result, s1, len1);
+    memcpy(result + len1, s2, len2 + 1);
+    return result;
+}
+
+void maybe_create_appcache(const char *appcache, mode_t permissions){

Review Comment:
   Depending on the flow runc vs intialization container of if a previous run 
happened this directory might exist. We create it conditionally.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -1370,34 +1389,74 @@ int create_container_log_dirs(const char *container_id, 
const char *app_id,
   return 0;
 }
 
-/**
- * Function to create the application directories.
- * Returns pointer to primary_app_dir or NULL if it fails.
- */
-static char *create_app_dirs(const char *user,
+char* concat(const char *s1, const char *s2) {
+    size_t len1 = strlen(s1);
+    size_t len2 = strlen(s2);
+    char *result = malloc(len1 + len2 + 1);
+    if (result == NULL) {
+        exit(EXIT_FAILURE);
+    }
+    memcpy(result, s1, len1);
+    memcpy(result + len1, s2, len2 + 1);
+    return result;
+}
+
+void maybe_create_appcache(const char *appcache, mode_t permissions){
+  //fprintf(LOGFILE, "Going to create %s\n", appcache);
+  struct stat exists;
+  int stat_res = stat(appcache, &exists);
+  if (stat_res == -1){
+    int mk_res = mkdir(appcache, permissions);
+    fprintf(LOGFILE, "Creating appcache %s result %d\n", appcache, mk_res);
+  }
+}
+
+char *create_app_dirs(const char *user,
                              const char *app_id,
-                             char* const* local_dirs)
-{
+                             char* const* local_dirs) {
   // 750
   mode_t permissions = S_IRWXU | S_IRGRP | S_IXGRP;
   char* const* nm_root;
   char *primary_app_dir = NULL;
-  for(nm_root=local_dirs; *nm_root != NULL; ++nm_root) {
+  for(nm_root = local_dirs; *nm_root != NULL; ++nm_root) {
     char *app_dir = get_app_directory(*nm_root, user, app_id);
-    if (app_dir == NULL) {
-      // try the next one
-    } else if (strstr(app_dir, "..") != 0) {
-      fprintf(LOGFILE, "Unsupported app directory path detected.\n");
+    fprintf(LOGFILE, "Appdir %s\n", app_dir);
+    if (app_dir == NULL){
       free(app_dir);
-    } else if (mkdirs(app_dir, permissions) != 0) {
+      continue;
+    }
+    //implementation node: Could be more thought put in do this detection
+    if (strstr(app_dir, "..") != 0) {
+      fprintf(LOGFILE, "Unsupported app directory path detected.\n");
       free(app_dir);
-    } else if (primary_app_dir == NULL) {
-      primary_app_dir = app_dir;
+      continue;
+    }
+    char *user_root = get_user_directory(*nm_root, user);
+    char *appcache = concat(user_root, "/appcache");
+    maybe_create_appcache(appcache, permissions);
+    free(user_root);
+    free(appcache);
+
+    struct stat exists;
+    int stat_res = stat(app_dir, &exists);
+    if (stat_res == 0){
+      if (primary_app_dir == NULL) {
+        primary_app_dir = strdup(app_dir);
+      }
+    } else if (stat_res == -1){
+      int mk_res = mkdir(app_dir, permissions);

Review Comment:
   I cleaned up the logic here and shifted the code to using mkdir vs mkdirs 
mkdirs attempts to operate on parent directories as well and at this point in 
the code the parents are already created correctly.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -1370,34 +1389,74 @@ int create_container_log_dirs(const char *container_id, 
const char *app_id,
   return 0;
 }
 
-/**
- * Function to create the application directories.
- * Returns pointer to primary_app_dir or NULL if it fails.
- */
-static char *create_app_dirs(const char *user,
+char* concat(const char *s1, const char *s2) {
+    size_t len1 = strlen(s1);
+    size_t len2 = strlen(s2);
+    char *result = malloc(len1 + len2 + 1);
+    if (result == NULL) {
+        exit(EXIT_FAILURE);
+    }
+    memcpy(result, s1, len1);
+    memcpy(result + len1, s2, len2 + 1);
+    return result;
+}
+
+void maybe_create_appcache(const char *appcache, mode_t permissions){
+  //fprintf(LOGFILE, "Going to create %s\n", appcache);
+  struct stat exists;
+  int stat_res = stat(appcache, &exists);
+  if (stat_res == -1){
+    int mk_res = mkdir(appcache, permissions);
+    fprintf(LOGFILE, "Creating appcache %s result %d\n", appcache, mk_res);
+  }
+}
+
+char *create_app_dirs(const char *user,
                              const char *app_id,
-                             char* const* local_dirs)
-{
+                             char* const* local_dirs) {
   // 750
   mode_t permissions = S_IRWXU | S_IRGRP | S_IXGRP;
   char* const* nm_root;
   char *primary_app_dir = NULL;
-  for(nm_root=local_dirs; *nm_root != NULL; ++nm_root) {
+  for(nm_root = local_dirs; *nm_root != NULL; ++nm_root) {
     char *app_dir = get_app_directory(*nm_root, user, app_id);
-    if (app_dir == NULL) {
-      // try the next one
-    } else if (strstr(app_dir, "..") != 0) {
-      fprintf(LOGFILE, "Unsupported app directory path detected.\n");
+    fprintf(LOGFILE, "Appdir %s\n", app_dir);
+    if (app_dir == NULL){
       free(app_dir);
-    } else if (mkdirs(app_dir, permissions) != 0) {
+      continue;
+    }
+    //implementation node: Could be more thought put in do this detection
+    if (strstr(app_dir, "..") != 0) {
+      fprintf(LOGFILE, "Unsupported app directory path detected.\n");
       free(app_dir);
-    } else if (primary_app_dir == NULL) {
-      primary_app_dir = app_dir;

Review Comment:
   This was a leek in the code as the pointer for primary_add_dir points to 
app_dir which we later free. This leads to undefined behavior.





> linux-container-executor requires flexible directory permissions.
> -----------------------------------------------------------------
>
>                 Key: YARN-11920
>                 URL: https://issues.apache.org/jira/browse/YARN-11920
>             Project: Hadoop YARN
>          Issue Type: New Feature
>            Reporter: Edward Capriolo
>            Assignee: Edward Capriolo
>            Priority: Major
>              Labels: pull-request-available
>
> When running yarn node-manager directories are created which the node manager 
> can not write to. I discussed this on the mailing list:  
> {quote}Hello. I am trying to run linux-container-executor in a setup without 
> kerberos. I want to see it "change user" and run a map reduce job.
> I have a fork of linux-container-executor with some gratuitous println:
> main : command provided 0
> 2026-01-12T19:51:25.467740715Z main : run as user is auser
> 2026-01-12T19:51:25.467750476Z main : requested yarn user is auser
> 2026-01-12T19:51:25.467760225Z main : validate_container_id 
> 2026-01-12T19:51:25.467771148Z main : huh 
> 2026-01-12T19:51:25.467784131Z validated command: INITIALIZE_CONTAINER
> 2026-01-12T19:51:25.467795274Z init : set_user 
> 2026-01-12T19:51:25.467805332Z maybe free_user 
> 2026-01-12T19:51:25.467815142Z going to check user 
> 2026-01-12T19:51:25.467824798Z min id 
> 2026-01-12T19:51:25.467833618Z min id 1000
> 2026-01-12T19:51:25.467842685Z Get user info 
> 2026-01-12T19:51:25.467851066Z init : set_user done 
> 2026-01-12T19:51:25.467860879Z initialize_app( 
> 2026-01-12T19:51:25.467871118Z create user dirs 
> 2026-01-12T19:51:25.467881131Z initialize_user.
> 2026-01-12T19:51:25.467890384Z created 
> 2026-01-12T19:51:25.467900435Z create_log_dirs().
> 2026-01-12T19:51:25.467911090Z create container log 
> 2026-01-12T19:51:25.467920790Z create_container_log_dirs
> 2026-01-12T19:51:25.467931683Z open_file_as_nm.
> 2026-01-12T19:51:25.467941717Z change_user 
> 2026-01-12T19:51:25.467952667Z change_user.
> *2026-01-12T19:51:25.467962032Z Can't create directory 
> /yarn-root/nm-local-dir/usercache/auser/appcache - Permission denied*
> 2026-01-12T19:51:25.467973350Z Did not create any app directories
> I am creating users like this:
>   RUN addgroup -S hadoop
>   RUN addgroup -S hdfs && adduser -S -G hdfs -H -D hdfs
>   RUN addgroup -S yarn && adduser -S -G yarn -H -D yarn
>   RUN addgroup yarn hadoop
>   RUN addgroup -S auser && adduser -S -G auser -H -D auser
> I am launching a wordcount as "auser" like so:
> [https://github.com/edwardcapriolo/edgy-ansible/blob/main/imaging/hadoop/compositions/ha_rm_zk_pki_tls/enter_auser.sh]
> This is what teh directory inside the node manager looks like:
> nm1:/yarn-root/nm-local-dir/usercache# rm -rf auser/
> nm1:/yarn-root/nm-local-dir/usercache# ld -lahd /yarn-root/
> nm1:/yarn-root/nm-local-dir/usercache# ls -lahd /yarn-root/
> drwxr-xr-x    1 yarn     root          24 Jan 12 19:32 /yarn-root/
> nm1:/yarn-root/nm-local-dir/usercache# ls -lahd /yarn-root/nm-local-dir/
> drwxr-xr-x    1 yarn     hadoop        54 Jan 12 19:32 
> /yarn-root/nm-local-dir/
> nm1:/yarn-root/nm-local-dir/usercache# ls -lahd /yarn-root/nm-local-dir/
> filecache/ nmPrivate/ usercache/ 
> nm1:/yarn-root/nm-local-dir/usercache# ls -lahd 
> /yarn-root/nm-local-dir/usercache/
> drwxr-sr-x    1 yarn     hadoop        10 Jan 12 20:38 
> /yarn-root/nm-local-dir/usercache/
> nm1:/yarn-root/nm-local-dir/usercache# ls -lahd 
> /yarn-root/nm-local-dir/usercache/auser/
> drwxr-s---    1 auser    hadoop         0 Jan 12 20:38 
> /yarn-root/nm-local-dir/usercache/auser/
> My node manager is running as yarn
> nm1:/$ ps -ef | grep yarn
>     1 yarn      0:20 /usr/bin/java -Dproc_nodemanager 
> nm1:/$ id -u yarn
> 101
> nm1:/$ id -g yarn
> 103
> nm1:/$ id -G yarn
> 103 101
> nm1:/$ id -G yarn -n
> yarn hadoop
> nm1:/$ umask 
> 0022
> I am guessing that the issue is 
> drwxr-s---    1 auser    hadoop         0 Jan 12 20:38 auser
> Ths directory gets owned by auser/hadoop but the group write is off?
> My yarn config is here:
> [https://github.com/edwardcapriolo/edgy-ansible/blob/main/imaging/hadoop/compositions/ha_rm_zk_pki_tls/hd_conf/yarn-site.xml#L126]
> Also manually changing it it just gets put back
> nm1:/yarn-root/nm-local-dir/usercache# chmod g+w auser/
> nm1:/yarn-root/nm-local-dir/usercache# ls -lah
> total 0      
> drwxr-sr-x    1 yarn     hadoop        10 Jan 12 20:38 .
> drwxr-xr-x    1 yarn     hadoop        54 Jan 12 19:32 ..
> drwxrws---    1 auser    hadoop         0 Jan 12 20:38 auser
> nm1:/yarn-root/nm-local-dir/usercache# ls -lah
> total 0      
> drwxr-sr-x    1 yarn     hadoop        10 Jan 12 20:38 .
> drwxr-xr-x    1 yarn     hadoop        54 Jan 12 19:32 ..
> drwxr-s---    1 auser    hadoop         0 Jan 12 20:38 auser
> {quote}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to