Here is a patch to make 'sort' use posix_spawnp.
Very tiny performance improvement:
$ export LC_ALL=C
$ seq 100000 | shuf > input
$ perf stat -i --repeat 10 ./src/sort-prev -S 1K \
--compress-program=gzip input 2>&1 > /dev/null \
| grep -F 'seconds time elapsed'
4.5896 +- 0.0109 seconds time elapsed ( +- 0.24% )
$ perf stat -i --repeat 10 ./src/sort -S 1K \
--compress-program=gzip input 2>&1 > /dev/null \
| grep -F 'seconds time elapsed'
4.53118 +- 0.00142 seconds time elapsed ( +- 0.03% )
But hopefully it avoids a fork failing with ENOMEM.
Also, I think the error reporting is a bit nicer with this change. Here
is how it behaved previously:
$ /bin/sort -S 1K --compress-program=missing Makefile 2>&1
couldn't execute compress program: errno 2
[...]
couldn't execute compress program: errno 2
sort: 'missing' [-d] terminated abnormally
couldn't execute compress program: errno 2
couldn't execute compress program: errno 2
$ /bin/sort -S 1K --compress-program=missing Makefile 2>&1
couldn't execute compress program: errno 2
[...]
couldn't execute compress program: errno 2
sort: 'missing' [-d] terminated abnormally
couldn't execute compress program: errno 2
Here it is now:
$ ./src/sort -S 1K --compress-program=missing Makefile 2>&1
sort: couldn't create process for 'missing': No such file or directory
Collin
>From f95d5e9374b87b794303d6e03dbaa55fdde9a0a6 Mon Sep 17 00:00:00 2001
Message-ID: <f95d5e9374b87b794303d6e03dbaa55fdde9a0a6.1761287028.git.collin.fu...@gmail.com>
From: Collin Funk <[email protected]>
Date: Thu, 23 Oct 2025 21:27:53 -0700
Subject: [PATCH] sort: prefer posix_spawnp to fork and execlp
* NEWS: Mention the change.
* src/sort.c: Include spawn.h.
(MAX_FORK_TRIES_COMPRESS): Adjust comment.
(async_safe_die): Remove function.
(posix_spawn_file_actions_move_fd): New function.
(pipe_fork): Return an error number instead of a pid. Use posix_spawnp
instead of calling fork and expecting the caller to exec.
(maybe_create_temp): Remove code for the child process.
(open_temp): Likewise.
---
NEWS | 3 +
src/sort.c | 190 +++++++++++++++++++++++++++--------------------------
2 files changed, 101 insertions(+), 92 deletions(-)
diff --git a/NEWS b/NEWS
index ff684fb64..aac54ae92 100644
--- a/NEWS
+++ b/NEWS
@@ -54,6 +54,9 @@ GNU coreutils NEWS -*- outline -*-
- supports a multi-byte --delimiter character
- no longer processes input indefinitely in the presence of write errors
+ 'sort' now uses posix_spawn() to invoke the command specified by
+ --compress-program more efficiently.
+
'split' now uses posix_spawn() to invoke the shell command specified by
--filter more efficiently.
diff --git a/src/sort.c b/src/sort.c
index 077abf37b..ce3a05167 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -29,6 +29,7 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
+#include <spawn.h>
#include "system.h"
#include "argmatch.h"
#include "assure.h"
@@ -130,9 +131,9 @@ enum
enum
{
/* The number of times we should try to fork a compression process
- (we retry if the fork call fails). We don't _need_ to compress
- temp files, this is just to reduce file system access, so this number
- can be small. Each retry doubles in duration. */
+ (we retry if the posix_spawnp call fails with EAGAIN). We don't _need_
+ to compress temp files, this is just to reduce file system access, so
+ this number can be small. Each retry doubles in duration. */
MAX_FORK_TRIES_COMPRESS = 4,
/* The number of times we should try to fork a decompression process.
@@ -370,33 +371,6 @@ static bool debug;
number are present, temp files will be used. */
static unsigned int nmerge = NMERGE_DEFAULT;
-/* Output an error to stderr and exit using async-signal-safe routines.
- This can be used safely from signal handlers,
- and between fork and exec of multithreaded processes. */
-
-static _Noreturn void
-async_safe_die (int errnum, char const *errstr)
-{
- ignore_value (write (STDERR_FILENO, errstr, strlen (errstr)));
-
- /* Even if defined HAVE_STRERROR_R, we can't use it,
- as it may return a translated string etc. and even if not
- may call malloc which is unsafe. We might improve this
- by testing for sys_errlist and using that if available.
- For now just report the error number. */
- if (errnum)
- {
- char errbuf[INT_BUFSIZE_BOUND (errnum)];
- char *p = inttostr (errnum, errbuf);
- ignore_value (write (STDERR_FILENO, ": errno ", 8));
- ignore_value (write (STDERR_FILENO, p, strlen (p)));
- }
-
- ignore_value (write (STDERR_FILENO, "\n", 1));
-
- _exit (SORT_FAILURE);
-}
-
/* Report MESSAGE for FILE, then clean up and exit.
If FILE is null, it represents standard output. */
@@ -1034,23 +1008,83 @@ move_fd (int oldfd, int newfd)
}
}
-/* Fork a child process for piping to and do common cleanup. The
- TRIES parameter specifies how many times to try to fork before
- giving up. Return the PID of the child, or -1 (setting errno)
- on failure. */
+/* Setup ACTION to move OLDFD to NEWFD. If OLDFD != NEWFD, NEWFD is not
+ close-on-exec. Returns 0 if successful, or an error number otherwise. */
-static pid_t
-pipe_fork (int pipefds[2], size_t tries)
+static int
+posix_spawn_file_actions_move_fd (posix_spawn_file_actions_t *actions,
+ int oldfd, int newfd)
+{
+ int result = 0;
+ if (oldfd != newfd)
+ {
+ result = posix_spawn_file_actions_adddup2 (actions, oldfd, newfd);
+ if (result == 0)
+ result = posix_spawn_file_actions_addclose (actions, oldfd);
+ }
+ return result;
+}
+
+/* Execute COMPRESS_PROGRAM in a child process. The child processes pid is
+ stored in PD. The TRIES parameter specifies how many times to try to create
+ a child process before giving up. Return 0 on success, or an error number
+ otherwise. */
+
+static int
+pipe_fork (pid_t *pid, int pipefds[2], int tempfd, bool decompress,
+ size_t tries)
{
-#if HAVE_WORKING_FORK
struct tempnode *saved_temphead;
- int saved_errno;
double wait_retry = 0.25;
- pid_t pid;
struct cs_status cs;
+ int result;
+ posix_spawnattr_t attr;
+ posix_spawn_file_actions_t actions;
+
+ if ((result = posix_spawnattr_init (&attr)))
+ return result;
+ if ((result = posix_spawnattr_setflags (&attr, POSIX_SPAWN_USEVFORK))
+ || (result = posix_spawn_file_actions_init (&actions)))
+ {
+ posix_spawnattr_destroy (&attr);
+ return result;
+ }
if (pipe2 (pipefds, O_CLOEXEC) < 0)
- return -1;
+ {
+ int saved_errno = errno;
+ posix_spawnattr_destroy (&attr);
+ posix_spawn_file_actions_destroy (&actions);
+ return saved_errno;
+ }
+
+ if ((result = posix_spawn_file_actions_addclose (&actions, STDIN_FILENO))
+ || (result = posix_spawn_file_actions_addclose (&actions, STDOUT_FILENO))
+ || (decompress
+ ? ((result = posix_spawn_file_actions_addclose (&actions,
+ pipefds[0]))
+ || (result = posix_spawn_file_actions_move_fd (&actions, tempfd,
+ STDIN_FILENO))
+ || (result = posix_spawn_file_actions_move_fd (&actions,
+ pipefds[1],
+ STDOUT_FILENO)))
+ : ((result = posix_spawn_file_actions_addclose (&actions,
+ pipefds[1]))
+ || (result = posix_spawn_file_actions_move_fd (&actions, tempfd,
+ STDOUT_FILENO))
+ || (result = posix_spawn_file_actions_move_fd (&actions,
+ pipefds[0],
+ STDIN_FILENO)))))
+ {
+ close (pipefds[0]);
+ close (pipefds[1]);
+ posix_spawnattr_destroy (&attr);
+ posix_spawn_file_actions_destroy (&actions);
+ return result;
+ }
+
+ char const *const argv[] = { compress_program, decompress ? "-d" : nullptr,
+ nullptr };
/* At least NMERGE + 1 subprocesses are needed. More could be created, but
uncontrolled subprocess generation can hurt performance significantly.
@@ -1070,15 +1104,14 @@ pipe_fork (int pipefds[2], size_t tries)
saved_temphead = temphead;
temphead = nullptr;
- pid = fork ();
- saved_errno = errno;
- if (pid)
+ result = posix_spawnp (pid, compress_program, &actions, &attr,
+ (char * const *) argv, environ);
+ if (result == 0)
temphead = saved_temphead;
cs_leave (&cs);
- errno = saved_errno;
- if (0 <= pid || errno != EAGAIN)
+ if (result != EAGAIN)
break;
else
{
@@ -1088,26 +1121,18 @@ pipe_fork (int pipefds[2], size_t tries)
}
}
- if (pid < 0)
+ posix_spawnattr_destroy (&attr);
+ posix_spawn_file_actions_destroy (&actions);
+
+ if (result)
{
- saved_errno = errno;
close (pipefds[0]);
close (pipefds[1]);
- errno = saved_errno;
- }
- else if (pid == 0)
- {
- close (STDIN_FILENO);
- close (STDOUT_FILENO);
}
else
++nprocs;
- return pid;
-
-#else /* ! HAVE_WORKING_FORK */
- return -1;
-#endif
+ return result;
}
/* Create a temporary file and, if asked for, start a compressor
@@ -1129,9 +1154,13 @@ maybe_create_temp (FILE **pfp, bool survive_fd_exhaustion)
if (compress_program)
{
int pipefds[2];
+ int result = pipe_fork (&node->pid, pipefds, tempfd, false,
+ MAX_FORK_TRIES_COMPRESS);
- node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
- if (0 < node->pid)
+ if (result)
+ error (SORT_FAILURE, result, _("couldn't create process for %s"),
+ quoteaf (compress_program));
+ else
{
close (tempfd);
close (pipefds[0]);
@@ -1139,18 +1168,6 @@ maybe_create_temp (FILE **pfp, bool survive_fd_exhaustion)
register_proc (node);
}
- else if (node->pid == 0)
- {
- /* Being the child of a multithreaded program before exec,
- we're restricted to calling async-signal-safe routines here. */
- close (pipefds[1]);
- move_fd (tempfd, STDOUT_FILENO);
- move_fd (pipefds[0], STDIN_FILENO);
-
- execlp (compress_program, compress_program, (char *) nullptr);
-
- async_safe_die (errno, "couldn't execute compress program");
- }
}
*pfp = fdopen (tempfd, "w");
@@ -1188,30 +1205,20 @@ open_temp (struct tempnode *temp)
if (tempfd < 0)
return nullptr;
- pid_t child = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
+ pid_t child;
+ int result = pipe_fork (&child, pipefds, tempfd, true,
+ MAX_FORK_TRIES_DECOMPRESS);
- switch (child)
+ if (result)
{
- case -1:
- if (errno != EMFILE)
- error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
+ if (result != EMFILE)
+ error (SORT_FAILURE, result, _("couldn't create process for %s -d"),
quoteaf (compress_program));
close (tempfd);
errno = EMFILE;
- break;
-
- case 0:
- /* Being the child of a multithreaded program before exec,
- we're restricted to calling async-signal-safe routines here. */
- close (pipefds[0]);
- move_fd (tempfd, STDIN_FILENO);
- move_fd (pipefds[1], STDOUT_FILENO);
-
- execlp (compress_program, compress_program, "-d", (char *) nullptr);
-
- async_safe_die (errno, "couldn't execute compress program (with -d)");
-
- default:
+ }
+ else
+ {
temp->pid = child;
register_proc (temp);
close (tempfd);
@@ -1224,7 +1231,6 @@ open_temp (struct tempnode *temp)
close (pipefds[0]);
errno = saved_errno;
}
- break;
}
return fp;
--
2.51.0