* xargs/xargs.c (set_slot_var): New function; sets an environment variable to the index of the entry in pids[] that represents the relevant child process. This can be used in rudimentary load distribution systems. (slot_var_name): the name of the variable to use (selected by --process-slot-var). (enum LongOptionIdentifier): Unique identifiers for long options with no short option equivalent (--process-slot-var is the first). (longopts): Add --process-slot-var. (add_proc): return the index within pids[] that we selected. (main): Pass &option_index to getopt_long (option_index is a new variable) in order to identify which long option was passed. Handle --process-slot-var. (prep_child_for_exec): Call set_slot_var. (usage): Mention --process-slot-var. * doc/find.texi (xargs options): Document --process-slot-var. * xargs/xargs.1: Likewise. * NEWS: Mention this change.
Signed-off-by: James Youngman <[email protected]> --- ChangeLog | 20 +++++++++++ NEWS | 10 +++++ doc/find.texi | 6 +++ xargs/xargs.1 | 11 ++++++ xargs/xargs.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 145 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2bf80af..772b55c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,25 @@ 2010-04-11 James Youngman <[email protected]> + Implement xargs --process-slot-var. + * xargs/xargs.c (set_slot_var): New function; sets an environment + variable to the index of the entry in pids[] that represents the + relevant child process. This can be used in rudimentary load + distribution systems. + (slot_var_name): the name of the variable to use (selected by + --process-slot-var). + (enum LongOptionIdentifier): Unique identifiers for long options + with no short option equivalent (--process-slot-var is the first). + (longopts): Add --process-slot-var. + (add_proc): return the index within pids[] that we selected. + (main): Pass &option_index to getopt_long (option_index is a new + variable) in order to identify which long option was passed. + Handle --process-slot-var. + (prep_child_for_exec): Call set_slot_var. + (usage): Mention --process-slot-var. + * doc/find.texi (xargs options): Document --process-slot-var. + * xargs/xargs.1: Likewise. + * NEWS: Mention this change. + Fix syntax checks problems for check sc_unmarked_diagnostics. * find/tree.c (get_pred_cost): Clarify an error message and mark it for translation. diff --git a/NEWS b/NEWS index fe4b4c0..bc3b026 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,16 @@ GNU findutils NEWS - User visible changes. -*- outline -*- (allout) * Major changes in release 4.5.9-git, YYYY-MM-DD +** Functional Enhancements to xargs + +A new option is provided, --process-slot-var. If you set this, xargs +will set the indicated environment variable in each chid. The value +of the environment variable is a unique decimal number lower than the +current value of the --max-procs limit at the time the process was +started. These numbers are re-used, but no executing child process +will have the same value as another executing child process. + + * Major changes in release 4.5.8, 2010-04-07 ** Bug Fixes diff --git a/doc/find.texi b/doc/find.texi index d719ccb..8c90bd5 100644 --- a/doc/find.texi +++ b/doc/find.texi @@ -3592,6 +3592,12 @@ Exit if the size (see the @samp{-s} option) is exceeded. Run simultaneously up to @var{max-procs} processes at once; the default is 1. If @var{max-procs} is 0, @code{xargs} will run as many processes as possible simultaneously. + +...@item --process-slot-v...@var{environment-variable-name} +Set the environment variable @var{environment-variable-name} to a +unique value in each running child process. Each value is a decimal +integer. Values are reused once child processes exit. This can be +used in a rudimentary load distribution scheme, for example. @end table @node Invoking the shell from xargs diff --git a/xargs/xargs.1 b/xargs/xargs.1 index 50a0e0f..6ae2773 100644 --- a/xargs/xargs.1 +++ b/xargs/xargs.1 @@ -23,6 +23,7 @@ xargs \- build and execute command lines from standard input [\fB\-\-max\-chars\fR=\fImax-chars\fR] [\fB\-P \fImax-procs\fR] [\fB\-\-max\-procs\fR=\fImax-procs\fR] +[\fB\-\-process\-slot\-var\fR=\fIname\fR] [\fB\-\-interactive\fR] [\fB\-\-verbose\fR] [\fB\-\-exit\fR] @@ -323,6 +324,16 @@ option or the option with .BR \-P ; otherwise chances are that only one exec will be done. +.TP +.PD +.BR \-\-process\-slot\-var "=\fIname\fR" +Set the environment variable +.I name +to a unique value in each running child +process. Values are reused once child processes exit. +This can be used in a rudimentary load distribution scheme, for +example. + .SH "EXAMPLES" .nf .B find /tmp \-name core \-type f \-print | xargs /bin/rm \-f diff --git a/xargs/xargs.c b/xargs/xargs.c index c311867..967227e 100644 --- a/xargs/xargs.c +++ b/xargs/xargs.c @@ -180,6 +180,18 @@ static bool query_before_executing = false; static char input_delimiter = '\0'; +/* Name of the environment variable which indicates which 'slot' + * the child process is in. This can be used to do some kind of basic + * load distribution. We guarantee not to allow two processes to run + * at the same time with the same value of this variable. + */ +static char* slot_var_name = NULL; + +enum LongOptionIdentifier + { + PROCESS_SLOT_VAR = CHAR_MAX+1 + }; + static struct option const longopts[] = { {"null", no_argument, NULL, '0'}, @@ -196,6 +208,7 @@ static struct option const longopts[] = {"show-limits", no_argument, NULL, 'S'}, {"exit", no_argument, NULL, 'x'}, {"max-procs", required_argument, NULL, 'P'}, + {"process-slot-var", required_argument, NULL, PROCESS_SLOT_VAR}, {"version", no_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {NULL, no_argument, NULL, 0} @@ -224,7 +237,7 @@ static bool print_args PARAMS ((bool ask)); /* static void do_exec PARAMS ((void)); */ static int xargs_do_exec (struct buildcmd_control *ctl, void *usercontext, int argc, char **argv); static void exec_if_possible PARAMS ((void)); -static void add_proc PARAMS ((pid_t pid)); +static unsigned int add_proc PARAMS ((pid_t pid)); static void wait_for_proc PARAMS ((bool all, unsigned int minreap)); static void wait_for_proc_all PARAMS ((void)); static long parse_num PARAMS ((char *str, int option, long min, long max, int fatal)); @@ -370,7 +383,7 @@ smaller_of (size_t a, size_t b) int main (int argc, char **argv) { - int optc; + int optc, option_index; int show_limits = 0; /* --show-limits */ int always_run_command = 1; char *input_file = "-"; /* "-" is stdin */ @@ -482,7 +495,7 @@ main (int argc, char **argv) } while ((optc = getopt_long (argc, argv, "+0a:E:e::i::I:l::L:n:prs:txP:d:", - longopts, (int *) 0)) != -1) + longopts, &option_index)) != -1) { switch (optc) { @@ -603,6 +616,27 @@ main (int argc, char **argv) display_findutils_version ("xargs"); return 0; + case PROCESS_SLOT_VAR: + if (strchr (optarg, '=')) + { + error (EXIT_FAILURE, 0, + _("option --%s may not be set to a value which includes `='"), + longopts[option_index]); + } + slot_var_name = optarg; + if (0 != unsetenv (slot_var_name)) + { + /* This is a fatal error, otherwise some child process + may not be able to guarantee that no two children + have the same value for this variable; see + set_slot_var. + */ + error (EXIT_FAILURE, errno, + _("failed to unset environment variable %s"), + slot_var_name); + } + break; + default: usage (stderr); return 1; @@ -1026,6 +1060,55 @@ print_args (bool ask) return false; } +/* Set SOME_ENVIRONMENT_VARIABLE=n in the environment. */ +static void +set_slot_var (unsigned int n) +{ + static const char *fmt = "%u"; + int size; + char *buf; + + + /* Determine the length of the buffer we need. + + If the result would be zero-length or have length (not value) > + INT_MAX, the assumptions we made about how snprintf behaves (or + what UINT_MAX is) are wrong. Hence we have a design error (not + an environmental error). + */ + size = snprintf (NULL, 0u, fmt, n); + assert (size > 0); + + + /* Failures here are undesirable but not fatal, since we can still + guarantee that this child does not have a duplicate value of the + indicated environment variable set (since the parent unset it on + startup). + */ + if (NULL == (buf = malloc (size+1))) + { + error (0, errno, _("unable to allocate memory")); + } + else + { + snprintf (buf, size+1, fmt, n); + + /* If the user doesn't want us to set the variable, there is + nothing to do. However, we defer the bail-out until this + point in order to get better test coverage. + */ + if (slot_var_name) + { + if (setenv (slot_var_name, buf, 1) < 0) + { + error (0, errno, + _("failed to set environment variable %s"), slot_var_name); + } + } + free (buf); + } +} + /* Close stdin and attach /dev/null to it. * This resolves Savannah bug #3992. @@ -1033,6 +1116,14 @@ print_args (bool ask) static void prep_child_for_exec (void) { + /* The parent will call add_proc to allocate a slot. We do the same in the + child to make sure we get the same value. + + We use 0 here in order to avoid generating a data structure that appears + to indicate that we (the child) have a child. */ + unsigned int slot = add_proc (0); + set_slot_var (slot); + if (!keep_stdin) { const char inputfile[] = "/dev/null"; @@ -1238,7 +1329,7 @@ exec_if_possible (void) /* Add the process with id PID to the list of processes that have been executed. */ -static void +static unsigned int add_proc (pid_t pid) { unsigned int i, j; @@ -1263,6 +1354,7 @@ add_proc (pid_t pid) pids[i] = pid; procs_executing++; procs_executed = true; + return i; } @@ -1455,8 +1547,8 @@ Usage: %s [-0prtx] [--interactive] [--null] [-d|--delimiter=delim]\n\ [-L max-lines] [-l[max-lines]] [--max-lines[=max-lines]]\n\ [-I replace-str] [-i[replace-str]] [--replace[=replace-str]]\n\ [-n max-args] [--max-args=max-args]\n\ - [-s max-chars] [--max-chars=max-chars]\n\ - [-P max-procs] [--max-procs=max-procs] [--show-limits]\n\ + [-s max-chars] [--max-chars=max-chars] [--show-limits]\n\ + [-P max-procs] [--max-procs=max-procs] --process-slot-var=name\n\ [--verbose] [--exit] [--no-run-if-empty] [--arg-file=file]\n\ [--version] [--help] [command [initial-arguments]]\n"), program_name); -- 1.7.0
