On 28/10/2025 03:29, Collin Funk wrote:
Collin Funk <[email protected]> writes:

Pádraig Brady <[email protected]> writes:

The last case is what worries me.
The failure isn't diagnosed. Also SIGPIPE is common and usually not diagnosed.
Whether 2 or 3 occurs depends on how much is written to the failed compressors.
I can trigger 3. 100% of the time here without valgrind
by increasing the buffer size (amount written) to 100K.

Consider for example:

   $ printf '%s\n' '#!/bin/sh' 'exit 1' > exit1 && chmod a+x exit1
   $ src/sort -S 100K --compress-program=./exit1 Makefile | head -n1
   $ echo $?
   0

No diagnostics or failure code etc.
We'll need to address that.

FWIW it doesn't seem like this is a new bug:

     $ /bin/sort --version | head -n 1
     sort (GNU coreutils) 9.6
     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
     sort: './exit1' [-d] terminated abnormally
     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
     sort: './exit1' [-d] terminated abnormally
     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
     $ /bin/sort -S 100K --compress-program=./exit1 Makefile

So, on the bright side it seems my patch lets us consistently have the
wrong behavior?

Looking at the strace output:

     $ strace ./src/sort -S 100K --compress-program=./exit1 \
         Makefile 2>&1| grep PIPE
     $ write(6, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n    
"..., 4096) = -1 EPIPE (Broken pipe)
      --- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=432537, 
si_uid=1000} ---
      rt_sigaction(SIGPIPE, {sa_handler=SIG_DFL, sa_mask=[PIPE], 
sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7ff9f8b39070}, 
{sa_handler=0x404e10, sa_mask=[HUP INT QUIT PIPE ALRM TERM XCPU XFSZ VTALRM 
PROF IO], sa_flags=SA_RESTORER, sa_restorer=0x7ff9f8b39070}, 8) = 0
      tgkill(432537, 432537, SIGPIPE)         = 0
      rt_sigreturn({mask=[]})                 = -1 EPIPE (Broken pipe)
      --- SIGPIPE {si_signo=SIGPIPE, si_code=SI_TKILL, si_pid=432537, 
si_uid=1000} ---
      +++ killed by SIGPIPE +++

Is it okay to not raise SIGPIPE in sighandler()? Then, once we receive a
write error we can first reap the child processes that have exited to
make sure they did not cause the failure.

Here is your test case using an example patch:

     $ ./src/sort -S 100K --compress-program=./exit1 Makefile
     sort: './exit1' [-d] terminated abnormally

diff --git a/src/sort.c b/src/sort.c
index 7127f671b..e9101eb5f 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -371,16 +371,6 @@ static bool debug;
     number are present, temp files will be used. */
  static unsigned int nmerge = NMERGE_DEFAULT;
-/* Report MESSAGE for FILE, then clean up and exit.
-   If FILE is null, it represents standard output.  */
-
-static void
-sort_die (char const *message, char const *file)
-{
-  error (SORT_FAILURE, errno, "%s: %s", message,
-         quotef (file ? file : _("standard output")));
-}
-
  void
  usage (int status)
  {
@@ -780,6 +770,19 @@ reap_all (void)
      reap (-1);
  }
+/* Report MESSAGE for FILE, then clean up and exit.
+   If FILE is null, it represents standard output.
+   Reap all the exited processes first, since they
+   may have caused the error.  */
+
+static void
+sort_die (char const *message, char const *file)
+{
+  reap_exited ();
+  error (SORT_FAILURE, errno, "%s: %s", message,
+         quotef (file ? file : _("standard output")));
+}
+
  /* Clean up any remaining temporary files.  */
static void
@@ -4273,7 +4276,8 @@ sighandler (int sig)
    cleanup ();
signal (sig, SIG_DFL);
-  raise (sig);
+  if (sig != SIGPIPE)
+    raise (sig);
  }
/* Set the ordering options for KEY specified in S.

Collin

We still want to exit normally though if the output pipe terminates early.
I.e. pipe error on standard out is fine, pipe error on compression helper is 
error.
Also we can't depend on the child failing. It could exit 0 while still
not reading everything.

I've only thought about this for a couple of minutes but the attached could 
work,
where we always ignore SIGPIPE and then convert the EPIPE on stdout to SIGPIPE.

I'll do lots of testing with this and add a new test before pushing,
but it does pass basic tests here:

#  ---- put previous exit statuses in prompt
$ PS1="[\[\033[1;31m\]\${PIPESTATUS[@]/#0/\[\033[0m\]"\
"\[\033[1;32m\]0\[\033[1;31m\]}\[\033[0m\]] \w$ "

[0] $ printf '%s\n' '#!/bin/sh' 'exit 1' > exit1 && chmod a+x exit1

# ---- we now diagnose these failures always
[0] $ src/sort -S 100K --compress-program=./exit1 Makefile | head -n1
sort: write failed: /tmp/sortlh70wt: Broken pipe

# ---- normal pipe operation is maintained
[2 0] $ seq 10000 | sort | :
[0 141 0] $


cheers,
Padraig
diff --git a/src/sort.c b/src/sort.c
index 7127f671b..f02d2b9a3 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -371,12 +371,23 @@ static bool debug;
    number are present, temp files will be used. */
 static unsigned int nmerge = NMERGE_DEFAULT;
 
+/* Whether SIGPIPE had the default disposition at startup.  */
+static bool default_SIGPIPE;
+
 /* Report MESSAGE for FILE, then clean up and exit.
    If FILE is null, it represents standard output.  */
 
 static void
 sort_die (char const *message, char const *file)
 {
+  /* If we got EPIPE writing to stdout and SIGPIPE was originally SIG_DFL,
+     restore traditional SIGPIPE behavior.  */
+  if (errno == EPIPE && !file && default_SIGPIPE)
+    {
+      signal (SIGPIPE, SIG_DFL);
+      raise (SIGPIPE);
+    }
+
   error (SORT_FAILURE, errno, "%s: %s", message,
          quotef (file ? file : _("standard output")));
 }
@@ -4409,7 +4420,7 @@ main (int argc, char **argv)
     static int const sig[] =
       {
         /* The usual suspects.  */
-        SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
+        SIGALRM, SIGHUP, SIGINT, SIGQUIT, SIGTERM,
 #ifdef SIGPOLL
         SIGPOLL,
 #endif
@@ -4457,6 +4468,11 @@ main (int argc, char **argv)
   }
   signal (SIGCHLD, SIG_DFL); /* Don't inherit CHLD handling from parent.  */
 
+  /* Ignore SIGPIPE so write failures are reported via EPIPE errno.
+     For stdout, sort_die() will reraise SIGPIPE if it was originally SIG_DFL.
+     For compression pipes, sort_die() will exit with SORT_FAILURE.  */
+  default_SIGPIPE = (signal (SIGPIPE, SIG_IGN) == SIG_DFL);
+
   /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
   atexit (exit_cleanup);
 

Reply via email to