Here is a patch to make 'sort' use posix_spawnp.

Very tiny performance improvement:

    $ export LC_ALL=C
    $ seq 100000 | shuf > input
    $ perf stat -i --repeat 10 ./src/sort-prev -S 1K \
        --compress-program=gzip input 2>&1 > /dev/null \
        | grep -F 'seconds time elapsed'
                4.5896 +- 0.0109 seconds time elapsed  ( +-  0.24% )
    $ perf stat -i --repeat 10 ./src/sort -S 1K \
        --compress-program=gzip input 2>&1 > /dev/null \
        | grep -F 'seconds time elapsed'
               4.53118 +- 0.00142 seconds time elapsed  ( +-  0.03% )

But hopefully it avoids a fork failing with ENOMEM.

Also, I think the error reporting is a bit nicer with this change. Here
is how it behaved previously:

    $ /bin/sort -S 1K --compress-program=missing Makefile 2>&1
    couldn't execute compress program: errno 2
    [...]
    couldn't execute compress program: errno 2
    sort: 'missing' [-d] terminated abnormally
    couldn't execute compress program: errno 2
    couldn't execute compress program: errno 2    
    $ /bin/sort -S 1K --compress-program=missing Makefile 2>&1
    couldn't execute compress program: errno 2
    [...]
    couldn't execute compress program: errno 2
    sort: 'missing' [-d] terminated abnormally
    couldn't execute compress program: errno 2

Here it is now:

    $ ./src/sort -S 1K --compress-program=missing Makefile 2>&1
    sort: couldn't create process for 'missing': No such file or directory

Collin

>From f95d5e9374b87b794303d6e03dbaa55fdde9a0a6 Mon Sep 17 00:00:00 2001
Message-ID: <f95d5e9374b87b794303d6e03dbaa55fdde9a0a6.1761287028.git.collin.fu...@gmail.com>
From: Collin Funk <[email protected]>
Date: Thu, 23 Oct 2025 21:27:53 -0700
Subject: [PATCH] sort: prefer posix_spawnp to fork and execlp

* NEWS: Mention the change.
* src/sort.c: Include spawn.h.
(MAX_FORK_TRIES_COMPRESS): Adjust comment.
(async_safe_die): Remove function.
(posix_spawn_file_actions_move_fd): New function.
(pipe_fork): Return an error number instead of a pid. Use posix_spawnp
instead of calling fork and expecting the caller to exec.
(maybe_create_temp): Remove code for the child process.
(open_temp): Likewise.
---
 NEWS       |   3 +
 src/sort.c | 190 +++++++++++++++++++++++++++--------------------------
 2 files changed, 101 insertions(+), 92 deletions(-)

diff --git a/NEWS b/NEWS
index ff684fb64..aac54ae92 100644
--- a/NEWS
+++ b/NEWS
@@ -54,6 +54,9 @@ GNU coreutils NEWS                                    -*- outline -*-
    - supports a multi-byte --delimiter character
    - no longer processes input indefinitely in the presence of write errors
 
+  'sort' now uses posix_spawn() to invoke the command specified by
+  --compress-program more efficiently.
+
   'split' now uses posix_spawn() to invoke the shell command specified by
   --filter more efficiently.
 
diff --git a/src/sort.c b/src/sort.c
index 077abf37b..ce3a05167 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -29,6 +29,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <signal.h>
+#include <spawn.h>
 #include "system.h"
 #include "argmatch.h"
 #include "assure.h"
@@ -130,9 +131,9 @@ enum
 enum
   {
     /* The number of times we should try to fork a compression process
-       (we retry if the fork call fails).  We don't _need_ to compress
-       temp files, this is just to reduce file system access, so this number
-       can be small.  Each retry doubles in duration.  */
+       (we retry if the posix_spawnp call fails with EAGAIN).  We don't _need_
+       to compress temp files, this is just to reduce file system access, so
+       this number can be small.  Each retry doubles in duration.  */
     MAX_FORK_TRIES_COMPRESS = 4,
 
     /* The number of times we should try to fork a decompression process.
@@ -370,33 +371,6 @@ static bool debug;
    number are present, temp files will be used. */
 static unsigned int nmerge = NMERGE_DEFAULT;
 
-/* Output an error to stderr and exit using async-signal-safe routines.
-   This can be used safely from signal handlers,
-   and between fork and exec of multithreaded processes.  */
-
-static _Noreturn void
-async_safe_die (int errnum, char const *errstr)
-{
-  ignore_value (write (STDERR_FILENO, errstr, strlen (errstr)));
-
-  /* Even if defined HAVE_STRERROR_R, we can't use it,
-     as it may return a translated string etc. and even if not
-     may call malloc which is unsafe.  We might improve this
-     by testing for sys_errlist and using that if available.
-     For now just report the error number.  */
-  if (errnum)
-    {
-      char errbuf[INT_BUFSIZE_BOUND (errnum)];
-      char *p = inttostr (errnum, errbuf);
-      ignore_value (write (STDERR_FILENO, ": errno ", 8));
-      ignore_value (write (STDERR_FILENO, p, strlen (p)));
-    }
-
-  ignore_value (write (STDERR_FILENO, "\n", 1));
-
-  _exit (SORT_FAILURE);
-}
-
 /* Report MESSAGE for FILE, then clean up and exit.
    If FILE is null, it represents standard output.  */
 
@@ -1034,23 +1008,83 @@ move_fd (int oldfd, int newfd)
     }
 }
 
-/* Fork a child process for piping to and do common cleanup.  The
-   TRIES parameter specifies how many times to try to fork before
-   giving up.  Return the PID of the child, or -1 (setting errno)
-   on failure. */
+/* Setup ACTION to move OLDFD to NEWFD.  If OLDFD != NEWFD, NEWFD is not
+   close-on-exec.  Returns 0 if successful, or an error number otherwise.  */
 
-static pid_t
-pipe_fork (int pipefds[2], size_t tries)
+static int
+posix_spawn_file_actions_move_fd (posix_spawn_file_actions_t *actions,
+                                  int oldfd, int newfd)
+{
+  int result = 0;
+  if (oldfd != newfd)
+    {
+      result = posix_spawn_file_actions_adddup2 (actions, oldfd, newfd);
+      if (result == 0)
+        result = posix_spawn_file_actions_addclose (actions, oldfd);
+    }
+  return result;
+}
+
+/* Execute COMPRESS_PROGRAM in a child process.  The child processes pid is
+   stored in PD.  The TRIES parameter specifies how many times to try to create
+   a child process before giving up.  Return 0 on success, or an error number
+   otherwise.  */
+
+static int
+pipe_fork (pid_t *pid, int pipefds[2], int tempfd, bool decompress,
+           size_t tries)
 {
-#if HAVE_WORKING_FORK
   struct tempnode *saved_temphead;
-  int saved_errno;
   double wait_retry = 0.25;
-  pid_t pid;
   struct cs_status cs;
+  int result;
+  posix_spawnattr_t attr;
+  posix_spawn_file_actions_t actions;
+
+  if ((result = posix_spawnattr_init (&attr)))
+    return result;
+  if ((result = posix_spawnattr_setflags (&attr, POSIX_SPAWN_USEVFORK))
+      || (result = posix_spawn_file_actions_init (&actions)))
+    {
+      posix_spawnattr_destroy (&attr);
+      return result;
+    }
 
   if (pipe2 (pipefds, O_CLOEXEC) < 0)
-    return -1;
+    {
+      int saved_errno = errno;
+      posix_spawnattr_destroy (&attr);
+      posix_spawn_file_actions_destroy (&actions);
+      return saved_errno;
+    }
+
+  if ((result = posix_spawn_file_actions_addclose (&actions, STDIN_FILENO))
+      || (result = posix_spawn_file_actions_addclose (&actions, STDOUT_FILENO))
+      || (decompress
+          ? ((result = posix_spawn_file_actions_addclose (&actions,
+                                                          pipefds[0]))
+             || (result = posix_spawn_file_actions_move_fd (&actions, tempfd,
+                                                            STDIN_FILENO))
+             || (result = posix_spawn_file_actions_move_fd (&actions,
+                                                            pipefds[1],
+                                                            STDOUT_FILENO)))
+          : ((result = posix_spawn_file_actions_addclose (&actions,
+                                                          pipefds[1]))
+             || (result = posix_spawn_file_actions_move_fd (&actions, tempfd,
+                                                            STDOUT_FILENO))
+             || (result = posix_spawn_file_actions_move_fd (&actions,
+                                                            pipefds[0],
+                                                            STDIN_FILENO)))))
+    {
+      close (pipefds[0]);
+      close (pipefds[1]);
+      posix_spawnattr_destroy (&attr);
+      posix_spawn_file_actions_destroy (&actions);
+      return result;
+    }
+
+  char const *const argv[] = { compress_program, decompress ? "-d" : nullptr,
+                               nullptr };
 
   /* At least NMERGE + 1 subprocesses are needed.  More could be created, but
      uncontrolled subprocess generation can hurt performance significantly.
@@ -1070,15 +1104,14 @@ pipe_fork (int pipefds[2], size_t tries)
       saved_temphead = temphead;
       temphead = nullptr;
 
-      pid = fork ();
-      saved_errno = errno;
-      if (pid)
+      result = posix_spawnp (pid, compress_program, &actions, &attr,
+                             (char * const *) argv, environ);
+      if (result == 0)
         temphead = saved_temphead;
 
       cs_leave (&cs);
-      errno = saved_errno;
 
-      if (0 <= pid || errno != EAGAIN)
+      if (result != EAGAIN)
         break;
       else
         {
@@ -1088,26 +1121,18 @@ pipe_fork (int pipefds[2], size_t tries)
         }
     }
 
-  if (pid < 0)
+  posix_spawnattr_destroy (&attr);
+  posix_spawn_file_actions_destroy (&actions);
+
+  if (result)
     {
-      saved_errno = errno;
       close (pipefds[0]);
       close (pipefds[1]);
-      errno = saved_errno;
-    }
-  else if (pid == 0)
-    {
-      close (STDIN_FILENO);
-      close (STDOUT_FILENO);
     }
   else
     ++nprocs;
 
-  return pid;
-
-#else  /* ! HAVE_WORKING_FORK */
-  return -1;
-#endif
+  return result;
 }
 
 /* Create a temporary file and, if asked for, start a compressor
@@ -1129,9 +1154,13 @@ maybe_create_temp (FILE **pfp, bool survive_fd_exhaustion)
   if (compress_program)
     {
       int pipefds[2];
+      int result = pipe_fork (&node->pid, pipefds, tempfd, false,
+                              MAX_FORK_TRIES_COMPRESS);
 
-      node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
-      if (0 < node->pid)
+      if (result)
+        error (SORT_FAILURE, result, _("couldn't create process for %s"),
+               quoteaf (compress_program));
+      else
         {
           close (tempfd);
           close (pipefds[0]);
@@ -1139,18 +1168,6 @@ maybe_create_temp (FILE **pfp, bool survive_fd_exhaustion)
 
           register_proc (node);
         }
-      else if (node->pid == 0)
-        {
-          /* Being the child of a multithreaded program before exec,
-             we're restricted to calling async-signal-safe routines here.  */
-          close (pipefds[1]);
-          move_fd (tempfd, STDOUT_FILENO);
-          move_fd (pipefds[0], STDIN_FILENO);
-
-          execlp (compress_program, compress_program, (char *) nullptr);
-
-          async_safe_die (errno, "couldn't execute compress program");
-        }
     }
 
   *pfp = fdopen (tempfd, "w");
@@ -1188,30 +1205,20 @@ open_temp (struct tempnode *temp)
   if (tempfd < 0)
     return nullptr;
 
-  pid_t child = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
+  pid_t child;
+  int result = pipe_fork (&child, pipefds, tempfd, true,
+                          MAX_FORK_TRIES_DECOMPRESS);
 
-  switch (child)
+  if (result)
     {
-    case -1:
-      if (errno != EMFILE)
-        error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
+      if (result != EMFILE)
+        error (SORT_FAILURE, result, _("couldn't create process for %s -d"),
                quoteaf (compress_program));
       close (tempfd);
       errno = EMFILE;
-      break;
-
-    case 0:
-      /* Being the child of a multithreaded program before exec,
-         we're restricted to calling async-signal-safe routines here.  */
-      close (pipefds[0]);
-      move_fd (tempfd, STDIN_FILENO);
-      move_fd (pipefds[1], STDOUT_FILENO);
-
-      execlp (compress_program, compress_program, "-d", (char *) nullptr);
-
-      async_safe_die (errno, "couldn't execute compress program (with -d)");
-
-    default:
+    }
+  else
+    {
       temp->pid = child;
       register_proc (temp);
       close (tempfd);
@@ -1224,7 +1231,6 @@ open_temp (struct tempnode *temp)
           close (pipefds[0]);
           errno = saved_errno;
         }
-      break;
     }
 
   return fp;
-- 
2.51.0

Reply via email to