Fixed in git with the attached patch.

On Thu, Jul 20, 2023 at 8:46 AM anonymous <invalid.nore...@gnu.org> wrote:

> URL:
>   <https://savannah.gnu.org/bugs/?64451>
>
>                  Summary: Unexpected behaviour of xargs when multiple
> children
> exit with 255 in parallel
>                    Group: findutils
>                Submitter: None
>                Submitted: Thu 20 Jul 2023 07:46:06 AM UTC
>                 Category: xargs
>                 Severity: 3 - Normal
>               Item Group: Wrong result
>                   Status: None
>                  Privacy: Public
>              Assigned to: None
>          Originator Name:
>         Originator Email:
>              Open/Closed: Open
>                  Release: 4.9.0
>          Discussion Lock: Any
>            Fixed Release: None
>
>
>     _______________________________________________________
>
> Follow-up Comments:
>
>
> -------------------------------------------------------
> Date: Thu 20 Jul 2023 07:46:06 AM UTC By: Anonymous
> = Synopsis =
>     When xargs is invoked with *-P* option set to a value other than 1 or 2
> and more that one child exits with 255, xargs fails to wait on all children
> before exiting.
>     *-P0* breaks in the same manner on just one exit 255.
>     The case for *-P2* is a degenerate case where the second child exiting
> would cause this bug, but as it is the last running child, exiting with 255
> does not cause any misbehaviour.
> = Examples =
>
> # One exit 255
> seq 10 | xargs -n1 -P4 sh -c 'for i do echo start $i; sleep $i; if [ $i
> -eq 3
> ]; then echo exit $i; exit 255; fi; echo stop $i; done' _; echo xargs
> exited
> here with $?
> start 1
> start 2
> start 3
> start 4
> stop 1
> start 5
> stop 2
> start 6
> exit 3
> xargs: sh: exited with status 255; aborting
> stop 4
> stop 5
> stop 6
> xargs exited here with 124
>
>     After job 3 exited with 255, xargs correctly waits for already started
> jobs to finish.
>
> # Two exit 255s
> seq 10 | xargs -n1 -P4 sh -c 'for i do echo start $i; sleep $i; if [ $i
> -ge 3
> ] && [ $i -le 4 ]; then echo exit $i; exit 255; fi; echo stop $i; done' _;
> echo xargs exited here with $?
> start 1
> start 2
> start 3
> start 4
> stop 1
> start 5
> stop 2
> start 6
> exit 3
> xargs: sh: exited with status 255; aborting
> exit 4
> xargs: sh: exited with status 255; aborting
> xargs exited here with 124
> stop 5
> stop 6
>
>     xargs exits after job 4. Jobs 5 and 6 continue to run in background. It
> can no longer be guaranteed that jobs 5 and 6 will complete before further
> processing in some script.
> = POSIX compliance =
>     This breaks a POSIX requirement.
> From xargs(1p):
>     Implementations wanting to provide parallel operation of the invoked
> utilities are encouraged to add an option enabling parallel invocation, but
> should still wait for termination of all of the children before _xargs_
> terminates normally.
> = Quick debugging =
>     After adding unconditional error messages to the _wait_for_proc_all_()
> function
>
> diff --git a/xargs/xargs.c b/xargs/xargs.c
> index fdede10..9bc7aa7 100644
> --- a/xargs/xargs.c
> +++ b/xargs/xargs.c
> @@ -1597,6 +1597,7 @@ wait_for_proc (bool all, unsigned int minreap)
>  static void
>  wait_for_proc_all (void)
>  {
> +  error (0, 0, _("wait_for_proc_all enter"));
>    static bool waiting = false;
>
>    /* This function was registered by the original, parent, process.
> @@ -1614,6 +1615,7 @@ wait_for_proc_all (void)
>    wait_for_proc (true, 0u);
>    waiting = false;
>
> +  error (0, 0, _("wait_for_proc_all before child_error test"));
>    if (original_exit_value != child_error)
>      {
>        /* wait_for_proc () changed the value of child_error ().  This
>
> and testing xargs again, the following is observed:
>
> # One exit 255
> seq 10 | ./xargs -n1 -P4 sh -c 'for i do echo start $i; sleep $i; if [ $i
> -ge
> 3 ] && [ $i -le 3 ]; then echo exit $i; exit 255; fi; echo stop $i; done'
> _;
> echo xargs exited here with $?
> start 1
> start 2
> start 3
> start 4
> stop 1
> start 5
> stop 2
> start 6
> exit 3
> ./xargs: sh: exited with status 255; aborting
> ./xargs: wait_for_proc_all enter
> stop 4
> stop 5
> stop 6
> ./xargs: wait_for_proc_all before child_error test
> xargs exited here with 124
>
>
> # Two exit 255s
> seq 10 | ./xargs -n1 -P4 sh -c 'for i do echo start $i; sleep $i; if [ $i
> -ge
> 3 ] && [ $i -le 4 ]; then echo exit $i; exit 255; fi; echo stop $i; done'
> _;
> echo xargs exited here with $?
> start 1
> start 2
> start 3
> start 4
> stop 1
> start 5
> stop 2
> start 6
> exit 3
> ./xargs: sh: exited with status 255; aborting
> ./xargs: wait_for_proc_all enter
> exit 4
> ./xargs: sh: exited with status 255; aborting
> xargs exited here with 124
> stop 5
> stop 6
>
>     It can be seen that in the second case _wait_for_proc_all_() did not
> reach
> _before child_error test_ line.
> = My hypothesis =
>     As _wait_for_proc_all_() is registered via _atexit_() it is called when
> the first exit 255 is handled with _error_() (which itself calls _exit_()),
> then _wait_for_proc_all_() calls _wait_for_proc_() inside of which another
> _exit_() is called.
> According to atexit(3):
>     POSIX.1 says that the result of calling *exit*(3) more than once (i.e.,
> calling *exit*(3) within a function registered using *atexit*()) is
> undefined.
> On some systems (but not Linux), this can result in an infinite recursion;
> portable programs should not invoke *exit*(3) inside a function registered
> using *atexit*().
> = In the case of -P0 =
>     If the same command line is run with *-P0* something else happens:
>
> # One exit 255 is now enough to exit xargs
> seq 10 | ./xargs -n1 -P0 sh -c 'for i do echo start $i; sleep $i; if [ $i
> -ge
> 3 ] && [ $i -le 4 ]; then echo exit $i; exit 255; fi; echo stop $i; done'
> _;
> echo xargs exited here with $?
> start 1
> start 2
> start 3
> start 4
> start 5
> start 7
> start 6
> start 8
> start 9
> ./xargs: wait_for_proc_all enter
> start 10
> stop 1
> stop 2
> exit 3
> ./xargs: sh: exited with status 255; aborting
> xargs exited here with 124
> exit 4
> stop 5
> stop 6
> stop 7
> stop 8
> stop 9
> stop 10
>
>     As xargs has read in all the input it enters _wait_for_proc_all_(),
> now a
> single _exit 255_ is enough to exit xargs.
>
>
>
>
>
>
>
>     _______________________________________________________
>
> Reply to this item at:
>
>   <https://savannah.gnu.org/bugs/?64451>
>
> _______________________________________________
> Message sent via Savannah
> https://savannah.gnu.org/
>
>
>
From ad636eae01f21bda602f5e7b85dc1b5d8d72efd2 Mon Sep 17 00:00:00 2001
From: James Youngman <ja...@youngman.org>
Date: Sat, 18 May 2024 10:36:17 +0100
Subject: [PATCH] xargs: Wait for all children even if one exited with 255.
To: findutils-patc...@gnu.org

* xargs/xargs.c: fix Savannah bug #64451 (xargs -P exits before all
children have exited if one exits with status 255).
* xargs/xargs.1 (BUGS): mention this bug and the corrected behaviour.
* doc/find.texi: Likewise.
* NEWS: mention this bugfix.
---
 NEWS          |  3 +++
 doc/find.texi |  4 +++-
 xargs/xargs.1 | 10 ++++++++++
 xargs/xargs.c | 42 +++++++++++++++++++++++++++++++++++-------
 4 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/NEWS b/NEWS
index 2cc48130..762297d1 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,9 @@ GNU findutils NEWS - User visible changes.      -*- outline -*- (allout)
 
 ** Bug Fixes
 
+  xargs -P now waits for all its child processes to compete before
+  exiting, even if one of them exits with status 255. [#64451]
+
   'find -name /' no longer outputs a warning, because that is a valid pattern
   to match the root directory "/".  Previously, a diagnostic falsely claimed
   that this pattern would not match anything. [#62227]
diff --git a/doc/find.texi b/doc/find.texi
index 294cbaa1..48ecdb68 100644
--- a/doc/find.texi
+++ b/doc/find.texi
@@ -2740,7 +2740,9 @@ Run up to @var{max-procs} processes at a time; the default is 1.  If
 @var{max-procs} is 0, @code{xargs} will run as many processes as
 possible at a time.  Use the @samp{-n}, @samp{-s}, or @samp{-L} option
 with @samp{-P}; otherwise chances are that the command will be run
-only once.
+only once. If a child process exits with status 255, @code{xargs} will
+still wait for all child processes to exit (before version 4.9.0 this
+might not happen).
 @end table
 
 For example, suppose you have a directory tree of large image files
diff --git a/xargs/xargs.1 b/xargs/xargs.1
index e58a2bce..df9ac11f 100644
--- a/xargs/xargs.1
+++ b/xargs/xargs.1
@@ -225,6 +225,9 @@ You cannot decrease it below 1.
 never terminates its commands; when asked to decrease, it merely
 waits for more than one existing command to terminate before starting
 another.
+.B xargs
+always waits for all child processes to exit before exiting itself
+(but see BUGS).
 
 .B Please note
 that it is up to the called processes to properly manage parallel
@@ -532,6 +535,13 @@ which is why this discussion appears in the BUGS section.
 The problem doesn't occur with the output of
 .BR find (1)
 because it emits just one filename per line.
+.P
+In versions of
+.B xargs
+up to and including version 4.9.0,
+.B xargs -P
+would exit while some of its children were still running, if one of
+them exited with status 255.
 .
 .SH "REPORTING BUGS"
 GNU findutils online help: <https://www.gnu.org/software/findutils/#get-help>
diff --git a/xargs/xargs.c b/xargs/xargs.c
index c40096ce..00a5e31c 100644
--- a/xargs/xargs.c
+++ b/xargs/xargs.c
@@ -1494,6 +1494,7 @@ static void
 wait_for_proc (bool all, unsigned int minreap)
 {
   unsigned int reaped = 0;
+  int deferred_exit_status = 0;
 
   while (procs_executing)
     {
@@ -1576,17 +1577,44 @@ wait_for_proc (bool all, unsigned int minreap)
       procs_executing--;
       reaped++;
 
+#define set_deferred_exit_status(n) \
+      do \
+      { \
+	if (deferred_exit_status < n)  \
+	  { \
+	    deferred_exit_status = n; \
+	  } \
+      } while (0)
+
       if (WEXITSTATUS (status) == CHILD_EXIT_PLEASE_STOP_IMMEDIATELY)
-	error (XARGS_EXIT_CLIENT_EXIT_255, 0,
-	       _("%s: exited with status 255; aborting"), bc_state.cmd_argv[0]);
+	{
+	  error (0, 0, _("%s: exited with status 255; aborting"), bc_state.cmd_argv[0]);
+	  set_deferred_exit_status(XARGS_EXIT_CLIENT_EXIT_255);
+	}
       if (WIFSTOPPED (status))
-	error (XARGS_EXIT_CLIENT_FATAL_SIG, 0,
-	       _("%s: stopped by signal %d"), bc_state.cmd_argv[0], WSTOPSIG (status));
+	{
+	  error (0, 0, _("%s: stopped by signal %d"), bc_state.cmd_argv[0], WSTOPSIG (status));
+	  set_deferred_exit_status(XARGS_EXIT_CLIENT_FATAL_SIG);
+	}
       if (WIFSIGNALED (status))
-	error (XARGS_EXIT_CLIENT_FATAL_SIG, 0,
-	       _("%s: terminated by signal %d"), bc_state.cmd_argv[0], WTERMSIG (status));
+	{
+	  error (0, 0,
+		 _("%s: terminated by signal %d"), bc_state.cmd_argv[0], WTERMSIG (status));
+	  set_deferred_exit_status(XARGS_EXIT_CLIENT_FATAL_SIG);
+	}
       if (WEXITSTATUS (status) != 0)
-	child_error = XARGS_EXIT_CLIENT_EXIT_NONZERO;
+	{
+	  child_error = XARGS_EXIT_CLIENT_EXIT_NONZERO;
+	}
+      if (deferred_exit_status && !all)
+	{
+	  break;
+	}
+    }
+  if (deferred_exit_status)
+    {
+      child_error = deferred_exit_status > child_error ? deferred_exit_status : child_error;
+      exit (child_error);
     }
 }
 
-- 
2.39.2

Reply via email to