[ 
https://issues.apache.org/jira/browse/YARN-11919?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18061791#comment-18061791
 ] 

ASF GitHub Bot commented on YARN-11919:
---------------------------------------

cnauroth commented on code in PR #8177:
URL: https://github.com/apache/hadoop/pull/8177#discussion_r2867182367


##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -915,26 +934,47 @@ static int create_container_directories(const char* user, 
const char *app_id,
 
 /**
  * Load the user information for a given user name.
+ * See <a href="https://linux.die.net/man/3/getpwnam_r";>getpwname_r</a>
+ * Note: for user not found and some error conditions NULL is returned
  */
-static struct passwd* get_user_info(const char* user) {
-  size_t string_size = sysconf(_SC_GETPW_R_SIZE_MAX);
+static struct serialized_passwd* get_user_info(const char* user) {
+  struct passwd pwd;
   struct passwd *result = NULL;
-  if(string_size < 1024) {
-    string_size = 1024;
-  }
-  struct passwd* buffer = malloc(sizeof(struct passwd) + string_size);
-  if (NULL == buffer) {
-    fprintf(LOGFILE, "Failed malloc in get_user_info\n");
-    return NULL;
-  }
-  if (getpwnam_r(user, buffer, ((char*)buffer) + sizeof(struct passwd),
-        string_size, &result) != 0) {
-    free(buffer);
-    fprintf(LOGFILE, "Can't get user information %s - %s\n", user,
-           strerror(errno));
+  struct serialized_passwd *serialized_result;
+  char *buf;
+  size_t bufsize;
+  int s;
+  bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+  if (bufsize == -1){
+    bufsize = 1024;
+  }
+  buf = malloc(bufsize);
+  if (buf == NULL) {
+    exit(EXIT_FAILURE);
+  }
+  while ((s = getpwnam_r(user, &pwd, buf, bufsize, &result)) == ERANGE){
+    bufsize = 2 * bufsize;
+    char *newbuffer = realloc(buf, bufsize);
+    if (newbuffer == NULL){

Review Comment:
   Nitpick: space before curly brace please.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -915,26 +934,47 @@ static int create_container_directories(const char* user, 
const char *app_id,
 
 /**
  * Load the user information for a given user name.
+ * See <a href="https://linux.die.net/man/3/getpwnam_r";>getpwname_r</a>
+ * Note: for user not found and some error conditions NULL is returned
  */
-static struct passwd* get_user_info(const char* user) {
-  size_t string_size = sysconf(_SC_GETPW_R_SIZE_MAX);
+static struct serialized_passwd* get_user_info(const char* user) {
+  struct passwd pwd;
   struct passwd *result = NULL;
-  if(string_size < 1024) {
-    string_size = 1024;
-  }
-  struct passwd* buffer = malloc(sizeof(struct passwd) + string_size);
-  if (NULL == buffer) {
-    fprintf(LOGFILE, "Failed malloc in get_user_info\n");
-    return NULL;
-  }
-  if (getpwnam_r(user, buffer, ((char*)buffer) + sizeof(struct passwd),
-        string_size, &result) != 0) {
-    free(buffer);
-    fprintf(LOGFILE, "Can't get user information %s - %s\n", user,
-           strerror(errno));
+  struct serialized_passwd *serialized_result;
+  char *buf;
+  size_t bufsize;
+  int s;
+  bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+  if (bufsize == -1){
+    bufsize = 1024;
+  }
+  buf = malloc(bufsize);
+  if (buf == NULL) {
+    exit(EXIT_FAILURE);
+  }
+  while ((s = getpwnam_r(user, &pwd, buf, bufsize, &result)) == ERANGE){
+    bufsize = 2 * bufsize;
+    char *newbuffer = realloc(buf, bufsize);
+    if (newbuffer == NULL){
+      exit(EXIT_FAILURE);
+    }
+    buf = newbuffer;
+  }
+  if (result == NULL) {
+    if (s == 0){

Review Comment:
   Nitpick: space before curly brace please.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -915,26 +934,47 @@ static int create_container_directories(const char* user, 
const char *app_id,
 
 /**
  * Load the user information for a given user name.
+ * See <a href="https://linux.die.net/man/3/getpwnam_r";>getpwname_r</a>
+ * Note: for user not found and some error conditions NULL is returned
  */
-static struct passwd* get_user_info(const char* user) {
-  size_t string_size = sysconf(_SC_GETPW_R_SIZE_MAX);
+static struct serialized_passwd* get_user_info(const char* user) {
+  struct passwd pwd;
   struct passwd *result = NULL;
-  if(string_size < 1024) {
-    string_size = 1024;
-  }
-  struct passwd* buffer = malloc(sizeof(struct passwd) + string_size);
-  if (NULL == buffer) {
-    fprintf(LOGFILE, "Failed malloc in get_user_info\n");
-    return NULL;
-  }
-  if (getpwnam_r(user, buffer, ((char*)buffer) + sizeof(struct passwd),
-        string_size, &result) != 0) {
-    free(buffer);
-    fprintf(LOGFILE, "Can't get user information %s - %s\n", user,
-           strerror(errno));
+  struct serialized_passwd *serialized_result;
+  char *buf;
+  size_t bufsize;
+  int s;
+  bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+  if (bufsize == -1){
+    bufsize = 1024;
+  }
+  buf = malloc(bufsize);
+  if (buf == NULL) {
+    exit(EXIT_FAILURE);
+  }
+  while ((s = getpwnam_r(user, &pwd, buf, bufsize, &result)) == ERANGE){
+    bufsize = 2 * bufsize;
+    char *newbuffer = realloc(buf, bufsize);

Review Comment:
   If a [`realloc`](https://linux.die.net/man/3/realloc) fails, it returns 
`NULL`. If there is a failure, the original buffer remains in place. This 
implies we need to `free` the original if the `realloc` fails.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -915,26 +934,47 @@ static int create_container_directories(const char* user, 
const char *app_id,
 
 /**
  * Load the user information for a given user name.
+ * See <a href="https://linux.die.net/man/3/getpwnam_r";>getpwname_r</a>
+ * Note: for user not found and some error conditions NULL is returned
  */
-static struct passwd* get_user_info(const char* user) {
-  size_t string_size = sysconf(_SC_GETPW_R_SIZE_MAX);
+static struct serialized_passwd* get_user_info(const char* user) {
+  struct passwd pwd;
   struct passwd *result = NULL;
-  if(string_size < 1024) {
-    string_size = 1024;
-  }
-  struct passwd* buffer = malloc(sizeof(struct passwd) + string_size);
-  if (NULL == buffer) {
-    fprintf(LOGFILE, "Failed malloc in get_user_info\n");
-    return NULL;
-  }
-  if (getpwnam_r(user, buffer, ((char*)buffer) + sizeof(struct passwd),
-        string_size, &result) != 0) {
-    free(buffer);
-    fprintf(LOGFILE, "Can't get user information %s - %s\n", user,
-           strerror(errno));
+  struct serialized_passwd *serialized_result;
+  char *buf;
+  size_t bufsize;
+  int s;
+  bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+  if (bufsize == -1){
+    bufsize = 1024;
+  }
+  buf = malloc(bufsize);
+  if (buf == NULL) {
+    exit(EXIT_FAILURE);
+  }
+  while ((s = getpwnam_r(user, &pwd, buf, bufsize, &result)) == ERANGE){
+    bufsize = 2 * bufsize;
+    char *newbuffer = realloc(buf, bufsize);
+    if (newbuffer == NULL){

Review Comment:
   Nitpick: space before the curly brace please.



##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c:
##########
@@ -120,6 +120,25 @@ void set_nm_uid(uid_t user, gid_t group) {
   nm_gid = group;
 }
 
+//function to make a deep clone of passwd to serialized_passwd
+void deep_copy_passwd(const struct passwd *src, struct serialized_passwd 
*dest){
+  dest->pw_name = strdup(src->pw_name);

Review Comment:
   [`strdup`](https://man7.org/linux/man-pages/man3/strdup.3.html) can return 
`NULL` if there is insufficient memory. (There are pre-existing calls to 
`strdup` that don't have the error checking. No expectation that you need to do 
a mass update of all error handling in scope of this patch.)





> linux-container-executor segfault with get_user_info
> ----------------------------------------------------
>
>                 Key: YARN-11919
>                 URL: https://issues.apache.org/jira/browse/YARN-11919
>             Project: Hadoop YARN
>          Issue Type: Bug
>    Affects Versions: 3.4.2
>            Reporter: Edward Capriolo
>            Priority: Major
>              Labels: pull-request-available
>
> The code in container executor for getting user information is slightly 
> different then the recipe in the documentation. On alpine it repeatedly 
> segfaults
> {code:java}
> rvn package -Pnative -Dmaven.skip.test=true -DskipTests -Dtar
> cp target/native/target/usr/local/bin/container-executor /usr/local/bin/
> chmod 6050 /usr/local/bin/container-executor
> chgrp hadoop /usr/local/bin/container-executor
> chmod 6050 /usr/local/bin/container-executor
> /usr/local/bin/container-executor nobody nobody 0 
> application_1766935260716_0004  container_1766935260716_0004_02_000001 
> /yarn-root/nm-local-dir/nmPriv
>  {code}
> Result:
> {code:java}
> edgy
> main : command provided 0
> main : run as user is nobody
> main : requested yarn user is nobody
> main : validate_container_id 
> main : huh 
> validated command: INITIALIZE_CONTAINER
> init : set_user 
> maybe free_user 
> going to check user 
> min id 
> min id 1000
> Get user info 
> passwd info 
> got pwd 
> Segmentation fault (core dumped)
>  {code}
> The recipe here: 
> [https://linux.die.net/man/3/getpwnam_r]
> has better success.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to