Collin Funk <[email protected]> writes:

> Pádraig Brady <[email protected]> writes:
>
>> The last case is what worries me.
>> The failure isn't diagnosed. Also SIGPIPE is common and usually not 
>> diagnosed.
>> Whether 2 or 3 occurs depends on how much is written to the failed 
>> compressors.
>> I can trigger 3. 100% of the time here without valgrind
>> by increasing the buffer size (amount written) to 100K.
>>
>> Consider for example:
>>
>>   $ printf '%s\n' '#!/bin/sh' 'exit 1' > exit1 && chmod a+x exit1
>>   $ src/sort -S 100K --compress-program=./exit1 Makefile | head -n1
>>   $ echo $?
>>   0
>>
>> No diagnostics or failure code etc.
>> We'll need to address that.
>
> FWIW it doesn't seem like this is a new bug:
>
>     $ /bin/sort --version | head -n 1
>     sort (GNU coreutils) 9.6
>     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
>     sort: './exit1' [-d] terminated abnormally
>     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
>     sort: './exit1' [-d] terminated abnormally
>     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
>     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
>     $ /bin/sort -S 100K --compress-program=./exit1 Makefile
>
> So, on the bright side it seems my patch lets us consistently have the
> wrong behavior?

Looking at the strace output:

    $ strace ./src/sort -S 100K --compress-program=./exit1 \
        Makefile 2>&1| grep PIPE
    $ write(6, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n    
"..., 4096) = -1 EPIPE (Broken pipe)
     --- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=432537, 
si_uid=1000} ---
     rt_sigaction(SIGPIPE, {sa_handler=SIG_DFL, sa_mask=[PIPE], 
sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7ff9f8b39070}, 
{sa_handler=0x404e10, sa_mask=[HUP INT QUIT PIPE ALRM TERM XCPU XFSZ VTALRM 
PROF IO], sa_flags=SA_RESTORER, sa_restorer=0x7ff9f8b39070}, 8) = 0
     tgkill(432537, 432537, SIGPIPE)         = 0
     rt_sigreturn({mask=[]})                 = -1 EPIPE (Broken pipe)
     --- SIGPIPE {si_signo=SIGPIPE, si_code=SI_TKILL, si_pid=432537, 
si_uid=1000} ---
     +++ killed by SIGPIPE +++

Is it okay to not raise SIGPIPE in sighandler()? Then, once we receive a
write error we can first reap the child processes that have exited to
make sure they did not cause the failure.

Here is your test case using an example patch:

    $ ./src/sort -S 100K --compress-program=./exit1 Makefile
    sort: './exit1' [-d] terminated abnormally

diff --git a/src/sort.c b/src/sort.c
index 7127f671b..e9101eb5f 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -371,16 +371,6 @@ static bool debug;
    number are present, temp files will be used. */
 static unsigned int nmerge = NMERGE_DEFAULT;
 
-/* Report MESSAGE for FILE, then clean up and exit.
-   If FILE is null, it represents standard output.  */
-
-static void
-sort_die (char const *message, char const *file)
-{
-  error (SORT_FAILURE, errno, "%s: %s", message,
-         quotef (file ? file : _("standard output")));
-}
-
 void
 usage (int status)
 {
@@ -780,6 +770,19 @@ reap_all (void)
     reap (-1);
 }
 
+/* Report MESSAGE for FILE, then clean up and exit.
+   If FILE is null, it represents standard output.
+   Reap all the exited processes first, since they
+   may have caused the error.  */
+
+static void
+sort_die (char const *message, char const *file)
+{
+  reap_exited ();
+  error (SORT_FAILURE, errno, "%s: %s", message,
+         quotef (file ? file : _("standard output")));
+}
+
 /* Clean up any remaining temporary files.  */
 
 static void
@@ -4273,7 +4276,8 @@ sighandler (int sig)
   cleanup ();
 
   signal (sig, SIG_DFL);
-  raise (sig);
+  if (sig != SIGPIPE)
+    raise (sig);
 }
 
 /* Set the ordering options for KEY specified in S.

Collin

Reply via email to