Collin Funk wrote:
> Can't we change the call to posix_spawnp to posix_spawn to prevent
> scanning $PATH twice?

This is not necessary, because in most cases find_in_given_path returns
a filename that contains a slash (see gnulib/lib/findprog.h) and in
this case posix_spawnp() does not even consider PATH [1]:
  "If the file parameter contains a <slash> character, the file parameter
   shall be used as the pathname for the new process image file. Otherwise,
   the path prefix for this file shall be obtained by a search of the
   directories passed as the environment variable PATH ..."

[1] https://pubs.opengroup.org/onlinepubs/9799919799/functions/posix_spawn.html

But another optimization can be done: to search the compress_program in
$PATH only once. Here's a test case:

  strace -ff -o /tmp/sort-log3 src/sort -S 1M --compress-program=gzip Makefile 
| wc -l

searches for 'gzip' in $PATH 12 times. With the attached patch, it searches
only once.

Bruno

>From b1b95a8ba8b7215516d1e7d42b33f44054326af3 Mon Sep 17 00:00:00 2001
From: Bruno Haible <[email protected]>
Date: Fri, 31 Oct 2025 11:20:41 +0100
Subject: [PATCH] sort: Optimize the lookups of the --compress-program

* src/sort.c (get_resolved_compress_program): New function.
(pipe_child): Use it instead of calling find_in_given_path each time.
---
 src/sort.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/sort.c b/src/sort.c
index f1e7553f4..05d00cc11 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -1048,6 +1048,25 @@ posix_spawn_file_actions_move_fd (posix_spawn_file_actions_t *actions,
   return result;
 }
 
+/* Look up COMPRESS_PROGRAM in $PATH, and return the resolved program name.
+   Upon error, return nullptr with errno set.  */
+
+static char const *
+get_resolved_compress_program (void)
+{
+  /* Use a cache, to perform the search only once.  */
+  static char const *resolved_compress_program_cache /* = nullptr */;
+
+  if (resolved_compress_program_cache == nullptr)
+    {
+      resolved_compress_program_cache =
+        find_in_given_path (compress_program, getenv ("PATH"), nullptr, false);
+      /* If resolved_compress_program_cache == nullptr, errno is set here.  */
+    }
+
+  return resolved_compress_program_cache;
+}
+
 /* Execute COMPRESS_PROGRAM in a child process.  The child processes pid is
    stored in PD.  The TRIES parameter specifies how many times to try to create
    a child process before giving up.  Return 0 on success, or an error number
@@ -1058,7 +1077,6 @@ pipe_child (pid_t *pid, int pipefds[2], int tempfd, bool decompress,
             size_t tries)
 {
   char const *resolved_compress_program;
-  char *compress_program_to_free;
   struct tempnode *saved_temphead;
   double wait_retry = 0.25;
   struct cs_status cs;
@@ -1070,13 +1088,9 @@ pipe_child (pid_t *pid, int pipefds[2], int tempfd, bool decompress,
      handle access issues to COMPRESS_PROGRAM, because on some
      implementations/emulations of posix_spawn we get only a
      generic (fatal) error from the child in that case.  */
-  resolved_compress_program =
-    find_in_given_path (compress_program, getenv ("PATH"), nullptr, false);
+  resolved_compress_program = get_resolved_compress_program ();
   if (resolved_compress_program == nullptr)
     return errno;
-  compress_program_to_free = nullptr;
-  if (resolved_compress_program != compress_program)
-    compress_program_to_free = (char *) resolved_compress_program;
 
   if ((result = posix_spawnattr_init (&attr)))
     return result;
@@ -1084,7 +1098,6 @@ pipe_child (pid_t *pid, int pipefds[2], int tempfd, bool decompress,
       || (result = posix_spawn_file_actions_init (&actions)))
     {
       posix_spawnattr_destroy (&attr);
-      free (compress_program_to_free);
       return result;
     }
 
@@ -1093,7 +1106,6 @@ pipe_child (pid_t *pid, int pipefds[2], int tempfd, bool decompress,
       int saved_errno = errno;
       posix_spawnattr_destroy (&attr);
       posix_spawn_file_actions_destroy (&actions);
-      free (compress_program_to_free);
       return saved_errno;
     }
 
@@ -1119,7 +1131,6 @@ pipe_child (pid_t *pid, int pipefds[2], int tempfd, bool decompress,
       close (pipefds[1]);
       posix_spawnattr_destroy (&attr);
       posix_spawn_file_actions_destroy (&actions);
-      free (compress_program_to_free);
       return result;
     }
 
@@ -1169,7 +1180,6 @@ pipe_child (pid_t *pid, int pipefds[2], int tempfd, bool decompress,
 
   posix_spawnattr_destroy (&attr);
   posix_spawn_file_actions_destroy (&actions);
-  free (compress_program_to_free);
 
   if (result)
     {
-- 
2.51.0

Reply via email to