The following patch implements --check-fields and --separator. -- Debian GNU/Linux 2.1 is out! ( http://www.debian.org/ ) Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Index: doc/textutils.texi =================================================================== RCS file: /home/gondor/herbert/src/CVS/debian/textutils/doc/textutils.texi,v retrieving revision 1.2 diff -u -r1.2 textutils.texi --- doc/textutils.texi 2000/06/27 10:27:49 1.2 +++ doc/textutils.texi 2000/06/28 03:15:37 @@ -2464,6 +2464,9 @@ If no @var{output} file is specified, @code{uniq} writes to standard output. +Fields are sequences of non-space non-tab characters that are separated from +each other by at least one spaces or tabs, unless a separator is given. + The program accepts the following options. Also see @ref{Common options}. @table @samp @@ -2474,9 +2477,7 @@ @opindex -@var{n} @opindex -f @opindex --skip-fields -Skip @var{n} fields on each line before checking for uniqueness. Fields -are sequences of non-space non-tab characters that are separated from -each other by at least one spaces or tabs. +Skip @var{n} fields on each line before checking for uniqueness. @item +@var{n} @itemx -s @var{n} @@ -2524,13 +2525,27 @@ @cindex unique lines, outputting Print only unique lines. +@itemx -t @var{sep} +@itemx --separator=@var{sep} +@opindex -t +@opindex --separator +Use @var{sep} to delimit fields. + @item -w @var{n} @itemx --check-chars=@var{n} @opindex -w @opindex --check-chars -Compare @var{n} characters on each line (after skipping any specified -fields and characters). By default the entire rest of the lines are -compared. +Compare no more than @var{n} characters on each line (after skipping any +specified fields and characters). By default the entire rest of the lines +are compared. + +@item -@ @var{n} +@itemx --check-fields=@var{n} +@opindex -W +@opindex --check-fields +Compare no more than @var{n} fields on each line (after skipping any +specified fields and characters). By default the entire rest of the lines +are compared. @end table Index: man/uniq.1 =================================================================== RCS file: /home/gondor/herbert/src/CVS/debian/textutils/man/uniq.1,v retrieving revision 1.1.1.1 diff -u -r1.1.1.1 uniq.1 --- man/uniq.1 1999/08/06 19:24:10 1.1.1.1 +++ man/uniq.1 2000/06/28 03:09:05 @@ -30,12 +30,18 @@ \fB\-s\fR, \fB\-\-skip\-chars\fR=\fIN\fR avoid comparing the first N characters .TP +\fB\-t\fR, \fB\-\-separator\fR=\fISEP\fR +use SEParator to delimit fields +.TP \fB\-u\fR, \fB\-\-unique\fR only print unique lines .TP \fB\-w\fR, \fB\-\-check\-chars\fR=\fIN\fR compare no more than N characters in lines .TP +\fB\-W\fR, \fB\-\-check\-fields\fR=\fIN\fR +compare no more than N fields in lines +.TP \fB\-N\fR same as \fB\-f\fR N .TP @@ -48,8 +54,8 @@ \fB\-\-version\fR output version information and exit .PP -A field is a run of whitespace, then non-whitespace characters. -Fields are skipped before chars. +A field is a run of whitespace, then non-whitespace characters, unless a +SEParator is given. Fields are skipped before chars. .SH "REPORTING BUGS" Report bugs to <[EMAIL PROTECTED]>. .SH "SEE ALSO" Index: src/uniq.c =================================================================== RCS file: /home/gondor/herbert/src/CVS/debian/textutils/src/uniq.c,v retrieving revision 1.1.1.1 diff -u -r1.1.1.1 uniq.c --- src/uniq.c 1999/07/04 10:02:54 1.1.1.1 +++ src/uniq.c 2000/06/28 03:18:16 @@ -47,9 +47,17 @@ /* Number of chars to skip after skipping any fields. */ static int skip_chars; -/* Number of chars to compare; if 0, compare the whole lines. */ +/* Number of fields to compare; if 0, compare the whole lines. */ +static int check_fields; + +/* Number of chars to compare; if 0, compare the whole lines. When used in + conjunction with check_fields, the minimum of the two applies. */ static int check_chars; +/* Separator between fields; if this is NUL, a field is a run of whitespace, + then non-whitespace characters. */ +static int tab; + enum countmode { count_occurrences, /* -c Print count before output lines. */ @@ -84,6 +92,8 @@ {"skip-fields", required_argument, NULL, 'f'}, {"skip-chars", required_argument, NULL, 's'}, {"check-chars", required_argument, NULL, 'w'}, + {"check-fields", required_argument, NULL, 'W'}, + {"separator", required_argument, NULL, 't'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -111,21 +121,53 @@ -f, --skip-fields=N avoid comparing the first N fields\n\ -i, --ignore-case ignore differences in case when comparing\n\ -s, --skip-chars=N avoid comparing the first N characters\n\ + -t, --separator=SEP use SEParator to delimit fields\n\ -u, --unique only print unique lines\n\ -w, --check-chars=N compare no more than N characters in lines\n\ + -W, --check-fields=N compare no more than N fields in lines\n\ -N same as -f N\n\ +N same as -s N\n\ --help display this help and exit\n\ --version output version information and exit\n\ \n\ -A field is a run of whitespace, then non-whitespace characters.\n\ -Fields are skipped before chars.\n\ +A field is a run of whitespace, then non-whitespace characters, unless\n\ +a SEParator is given. Fields are skipped before chars.\n\ ")); puts (_("\nReport bugs to <[EMAIL PROTECTED]>.")); } exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } +/* Given a string, + return the length of the first n fields. */ +static int +find_field_length (const char *string, size_t size, int n) +{ + int count; + int i = 0; + + if (tab) + { + for (count = 0; count < n && i < size; count++) + { + while (string[i++] != tab && i < size) + ; + } + } + else + { + for (count = 0; count < n && i < size; count++) + { + while (i < size && ISBLANK (string[i])) + i++; + while (i < size && !ISBLANK (string[i])) + i++; + } + } + + return i; +} + /* Given a linebuffer LINE, return a pointer to the beginning of the line's field to be compared. */ @@ -135,15 +177,9 @@ register int count; register char *lp = line->buffer; register size_t size = line->length; - register size_t i = 0; + register size_t i; - for (count = 0; count < skip_fields && i < size; count++) - { - while (i < size && ISBLANK (lp[i])) - i++; - while (i < size && !ISBLANK (lp[i])) - i++; - } + i = find_field_length(lp, size, skip_fields); for (count = 0; count < skip_chars && i < size; count++) i++; @@ -161,6 +197,11 @@ { register int order; + if (check_fields) + { + oldlen = find_field_length(old, oldlen, check_fields); + newlen = find_field_length(new, newlen, check_fields); + } if (check_chars) { if (oldlen > check_chars) @@ -292,11 +333,13 @@ skip_chars = 0; skip_fields = 0; check_chars = 0; + check_fields = 0; + tab = 0; mode = output_all; countmode = count_none; - while ((optc = getopt_long (argc, argv, "0123456789cdDf:is:uw:", longopts, - NULL)) != -1) + while ((optc = getopt_long (argc, argv, "0123456789cdDf:is:t:uw:W:", + longopts, NULL)) != -1) { switch (optc) { @@ -356,6 +399,10 @@ } break; + case 't': + tab = *optarg; + break; + case 'u': mode = output_unique; break; @@ -369,6 +416,18 @@ _("invalid number of bytes to compare: `%s'"), optarg); check_chars = (int) tmp_long; + } + break; + + case 'W': + { + long int tmp_long; + if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK + || tmp_long <= 0 || tmp_long > INT_MAX) + error (EXIT_FAILURE, 0, + _("invalid number of fields to compare: `%s'"), + optarg); + check_fields = (int) tmp_long; } break;