Hi,
The number of inputs that can be handled by the sort utility is
currently limited by what may be passed in argv.
Due to the nature of sort, this limit can't be stepped around with
`xargs' as it could be with some other utilities.
My solution to this locally has been to add an option to the sort
utility, --xargs, which causes sort to treat STDIN as a source of
newline-separated arguments that supplement those on the command-line
(please see attached patch).
Consider the following example with an input directory containing
16384 input files each consisting of a single line with a single
character, one of 'a', 'b' or 'c':
$ src/sort -mu input/*
bash: src/sort: Argument list too long
$ find input/ -type f | xargs src/sort -mu
a
b
c
a
b
c
$ find input/ -type f | src/sort -mu --xargs
a
b
c
Is this an option that might be worth including in a future release?
Thanks,
Bo
From 8568528acd4b5eea20d06136aaaf7b18a36f03c0 Mon Sep 17 00:00:00 2001
From: Bo Borgerson <[EMAIL PROTECTED]>
Date: Thu, 3 Apr 2008 12:05:55 -0400
Subject: [PATCH] add new sort option --xargs (-x)
* src/sort.c: if --xargs option, add input to FILES
* tests/misc/sort-xargs: test new option
* tests/misc/Makefile.am: add new test file
* doc/coreutils.texi: describe new option
* NEWS: advertise new option
Signed-off-by: Bo Borgerson <[EMAIL PROTECTED]>
---
NEWS | 4 +++
doc/coreutils.texi | 9 ++++++++
src/sort.c | 43 ++++++++++++++++++++++++++++++++++++++++++
tests/misc/Makefile.am | 1 +
tests/misc/sort-xargs | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 106 insertions(+), 0 deletions(-)
create mode 100755 tests/misc/sort-xargs
diff --git a/NEWS b/NEWS
index e208b30..36d67f6 100644
--- a/NEWS
+++ b/NEWS
@@ -55,6 +55,10 @@ GNU coreutils NEWS -*- outline -*-
options --general-numeric-sort/-g, --month-sort/-M, --numeric-sort/-n
and --random-sort/-R, resp.
+ sort accepts a new option, --xargs (-x), that causes input to be treated
+ as a newline-separated list of files to supplement those passed on the
+ command-line.
+
** Improvements
id and groups work around an AFS-related bug whereby those programs
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index ee7dbb2..eb3d41e 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3803,6 +3803,15 @@ For example, @code{sort -n -u} inspects only the value of the initial
numeric string when checking for uniqueness, whereas @code{sort -n |
uniq} inspects the entire line. @xref{uniq invocation}.
[EMAIL PROTECTED] -x
[EMAIL PROTECTED] --xargs
[EMAIL PROTECTED] -x
[EMAIL PROTECTED] --xargs
[EMAIL PROTECTED] xargs standard input arguments
+Treat the input as a set of newline-separated arguments to supplement
+those on command-line. Useful if the list of input files to sort exceeds
+the command-line argument list size limit.
+
@item -z
@itemx --zero-terminated
@opindex -z
diff --git a/src/sort.c b/src/sort.c
index 8b2eec5..183f56c 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -121,6 +121,9 @@ static bool hard_LC_COLLATE;
static bool hard_LC_TIME;
#endif
+/* If true, treat STDIN as a source of files */
+static bool xargs = false;
+
#define NONZERO(x) ((x) != 0)
/* The kind of blanks for '-b' to skip in various options. */
@@ -222,6 +225,10 @@ static struct month monthtab[] =
{"SEP", 9}
};
+/* The maximum number of input files allowed for in an invocation
+ FIXME: This should be set more intelligently */
+#define NFILES_MAX 1048576
+
/* During the merge phase, the number of files to merge at once. */
#define NMERGE 16
@@ -358,6 +365,9 @@ Other options:\n\
without -c, output only the first of an equal run\n\
"), DEFAULT_TMPDIR);
fputs (_("\
+ -x, --xargs treat STDIN as a source of newline-separated\n\
+ arguments to supplement arguments on the\n\
+ command-line\n\
-z, --zero-terminated end lines with 0 byte, not newline\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -423,6 +433,7 @@ static struct option const long_options[] =
{"field-separator", required_argument, NULL, 't'},
{"temporary-directory", required_argument, NULL, 'T'},
{"unique", no_argument, NULL, 'u'},
+ {"xargs", no_argument, NULL, 'x'},
{"zero-terminated", no_argument, NULL, 'z'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
@@ -3086,6 +3097,10 @@ main (int argc, char **argv)
}
break;
+ case 'x':
+ xargs = true;
+ break;
+
case 'z':
eolchar = 0;
break;
@@ -3099,6 +3114,34 @@ main (int argc, char **argv)
}
}
+ if (xargs)
+ {
+ size_t xargc = argc;
+ char input_line[LINE_MAX];
+ int i, length;
+
+ while (fgets (input_line, LINE_MAX, stdin))
+ {
+
+ if (nfiles >= NFILES_MAX)
+ error (SORT_FAILURE, 0, _("Too many input files"));
+
+ if (nfiles >= xargc)
+ files = x2nrealloc (files, &xargc, sizeof *files);
+
+ length = strlen (input_line);
+
+ if (input_line[length-1] == '\n')
+ input_line[--length] = '\0';
+
+ files[nfiles] = xmalloc (length + 1);
+
+ memcpy (files[nfiles++], input_line, length + 1);
+
+ }
+ }
+
+
/* Inheritance of global options to individual keys. */
for (key = keylist; key; key = key->next)
{
diff --git a/tests/misc/Makefile.am b/tests/misc/Makefile.am
index 17a0ec0..8d8d711 100644
--- a/tests/misc/Makefile.am
+++ b/tests/misc/Makefile.am
@@ -100,6 +100,7 @@ TESTS = \
sort-compress \
sort-merge \
sort-rand \
+ sort-xargs \
split-a \
split-fail \
split-l \
diff --git a/tests/misc/sort-xargs b/tests/misc/sort-xargs
new file mode 100755
index 0000000..8e4a087
--- /dev/null
+++ b/tests/misc/sort-xargs
@@ -0,0 +1,49 @@
+#!/bin/sh
+# Test "sort --xargs".
+
+# Copyright (C) 2002, 2003, 2005-2008 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+: ${srcdir=.}
+. $srcdir/../require-perl
+
+
+me=`echo $0|sed 's,.*/,,'`
+exec $PERL -w -I$srcdir/.. -MCoreutils -M"CuTmpdir qw($me)" -- - <<\EOF
+require 5.003;
+use strict;
+
+(my $program_name = $0) =~ s|.*/||;
+
+# Turn off localisation of executable's ouput.
[EMAIL PROTECTED](LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+`echo a > in1`;
+`echo b > in2`;
+`echo c > in3`;
+
+my @Tests =
+ (
+ ['x1', '--xargs', {IN_PIPE=> {arglist => "in1\nin2\nin3\n"}},
+ {OUT=>"a\nb\nc\n"}],
+ );
+
+my $save_temps = $ENV{DEBUG};
+my $verbose = $ENV{VERBOSE};
+
+my $prog = 'sort';
+my $fail = run_tests ($program_name, $prog, [EMAIL PROTECTED], $save_temps, $verbose);
+exit $fail;
+EOF
--
1.5.2.5
_______________________________________________
Bug-coreutils mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/bug-coreutils