[PATCH] new uniq options: --check-fields and --separator

Herbert Xu Tue, 27 Jun 2000 20:18:40 -0700

The following patch implements --check-fields and --separator.
-- 
Debian GNU/Linux 2.1 is out! ( http://www.debian.org/ )
Email:  Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Index: doc/textutils.texi
===================================================================
RCS file: /home/gondor/herbert/src/CVS/debian/textutils/doc/textutils.texi,v
retrieving revision 1.2
diff -u -r1.2 textutils.texi
--- doc/textutils.texi  2000/06/27 10:27:49     1.2
+++ doc/textutils.texi  2000/06/28 03:15:37
@@ -2464,6 +2464,9 @@
 If no @var{output} file is specified, @code{uniq} writes to standard
 output.
 
+Fields are sequences of non-space non-tab characters that are separated from
+each other by at least one spaces or tabs, unless a separator is given.
+
 The program accepts the following options.  Also see @ref{Common options}.
 
 @table @samp
@@ -2474,9 +2477,7 @@
 @opindex -@var{n}
 @opindex -f
 @opindex --skip-fields
-Skip @var{n} fields on each line before checking for uniqueness.  Fields
-are sequences of non-space non-tab characters that are separated from
-each other by at least one spaces or tabs.
+Skip @var{n} fields on each line before checking for uniqueness.
 
 @item +@var{n}
 @itemx -s @var{n}
@@ -2524,13 +2525,27 @@
 @cindex unique lines, outputting
 Print only unique lines.
 
+@itemx -t @var{sep}
+@itemx --separator=@var{sep}
+@opindex -t
+@opindex --separator
+Use @var{sep} to delimit fields.
+
 @item -w @var{n}
 @itemx --check-chars=@var{n}
 @opindex -w
 @opindex --check-chars
-Compare @var{n} characters on each line (after skipping any specified
-fields and characters).  By default the entire rest of the lines are
-compared.
+Compare no more than @var{n} characters on each line (after skipping any
+specified fields and characters).  By default the entire rest of the lines
+are compared.
+
+@item -@ @var{n}
+@itemx --check-fields=@var{n}
+@opindex -W
+@opindex --check-fields
+Compare no more than @var{n} fields on each line (after skipping any
+specified fields and characters).  By default the entire rest of the lines
+are compared.
 
 @end table
 
Index: man/uniq.1
===================================================================
RCS file: /home/gondor/herbert/src/CVS/debian/textutils/man/uniq.1,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 uniq.1
--- man/uniq.1  1999/08/06 19:24:10     1.1.1.1
+++ man/uniq.1  2000/06/28 03:09:05
@@ -30,12 +30,18 @@
 \fB\-s\fR, \fB\-\-skip\-chars\fR=\fIN\fR
 avoid comparing the first N characters
 .TP
+\fB\-t\fR, \fB\-\-separator\fR=\fISEP\fR
+use SEParator to delimit fields
+.TP
 \fB\-u\fR, \fB\-\-unique\fR
 only print unique lines
 .TP
 \fB\-w\fR, \fB\-\-check\-chars\fR=\fIN\fR
 compare no more than N characters in lines
 .TP
+\fB\-W\fR, \fB\-\-check\-fields\fR=\fIN\fR
+compare no more than N fields in lines
+.TP
 \fB\-N\fR
 same as \fB\-f\fR N
 .TP
@@ -48,8 +54,8 @@
 \fB\-\-version\fR
 output version information and exit
 .PP
-A field is a run of whitespace, then non-whitespace characters.
-Fields are skipped before chars.
+A field is a run of whitespace, then non-whitespace characters, unless a
+SEParator is given.  Fields are skipped before chars.
 .SH "REPORTING BUGS"
 Report bugs to <[EMAIL PROTECTED]>.
 .SH "SEE ALSO"
Index: src/uniq.c
===================================================================
RCS file: /home/gondor/herbert/src/CVS/debian/textutils/src/uniq.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 uniq.c
--- src/uniq.c  1999/07/04 10:02:54     1.1.1.1
+++ src/uniq.c  2000/06/28 03:18:16
@@ -47,9 +47,17 @@
 /* Number of chars to skip after skipping any fields. */
 static int skip_chars;
 
-/* Number of chars to compare; if 0, compare the whole lines. */
+/* Number of fields to compare; if 0, compare the whole lines. */
+static int check_fields;
+
+/* Number of chars to compare; if 0, compare the whole lines.  When used in
+   conjunction with check_fields, the minimum of the two applies. */
 static int check_chars;
 
+/* Separator between fields; if this is NUL, a field is a run of whitespace,
+   then non-whitespace characters. */
+static int tab;
+
 enum countmode
 {
   count_occurrences,           /* -c Print count before output lines. */
@@ -84,6 +92,8 @@
   {"skip-fields", required_argument, NULL, 'f'},
   {"skip-chars", required_argument, NULL, 's'},
   {"check-chars", required_argument, NULL, 'w'},
+  {"check-fields", required_argument, NULL, 'W'},
+  {"separator", required_argument, NULL, 't'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -111,21 +121,53 @@
   -f, --skip-fields=N   avoid comparing the first N fields\n\
   -i, --ignore-case     ignore differences in case when comparing\n\
   -s, --skip-chars=N    avoid comparing the first N characters\n\
+  -t, --separator=SEP   use SEParator to delimit fields\n\
   -u, --unique          only print unique lines\n\
   -w, --check-chars=N   compare no more than N characters in lines\n\
+  -W, --check-fields=N  compare no more than N fields in lines\n\
   -N                    same as -f N\n\
   +N                    same as -s N\n\
       --help            display this help and exit\n\
       --version         output version information and exit\n\
 \n\
-A field is a run of whitespace, then non-whitespace characters.\n\
-Fields are skipped before chars.\n\
+A field is a run of whitespace, then non-whitespace characters, unless\n\
+a SEParator is given.  Fields are skipped before chars.\n\
 "));
       puts (_("\nReport bugs to <[EMAIL PROTECTED]>."));
     }
   exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
 }
 
+/* Given a string,
+   return the length of the first n fields. */
+static int
+find_field_length (const char *string, size_t size, int n)
+{
+  int count;
+  int i = 0;
+
+  if (tab)
+    {
+      for (count = 0; count < n && i < size; count++)
+       {
+         while (string[i++] != tab && i < size)
+           ;
+       }
+    }
+  else
+    {
+      for (count = 0; count < n && i < size; count++)
+       {
+         while (i < size && ISBLANK (string[i]))
+           i++;
+         while (i < size && !ISBLANK (string[i]))
+           i++;
+       }
+    }
+
+  return i;
+}
+
 /* Given a linebuffer LINE,
    return a pointer to the beginning of the line's field to be compared. */
 
@@ -135,15 +177,9 @@
   register int count;
   register char *lp = line->buffer;
   register size_t size = line->length;
-  register size_t i = 0;
+  register size_t i;
 
-  for (count = 0; count < skip_fields && i < size; count++)
-    {
-      while (i < size && ISBLANK (lp[i]))
-       i++;
-      while (i < size && !ISBLANK (lp[i]))
-       i++;
-    }
+  i = find_field_length(lp, size, skip_fields);
 
   for (count = 0; count < skip_chars && i < size; count++)
     i++;
@@ -161,6 +197,11 @@
 {
   register int order;
 
+  if (check_fields)
+    {
+      oldlen = find_field_length(old, oldlen, check_fields);
+      newlen = find_field_length(new, newlen, check_fields);
+    }
   if (check_chars)
     {
       if (oldlen > check_chars)
@@ -292,11 +333,13 @@
   skip_chars = 0;
   skip_fields = 0;
   check_chars = 0;
+  check_fields = 0;
+  tab = 0;
   mode = output_all;
   countmode = count_none;
 
-  while ((optc = getopt_long (argc, argv, "0123456789cdDf:is:uw:", longopts,
-                             NULL)) != -1)
+  while ((optc = getopt_long (argc, argv, "0123456789cdDf:is:t:uw:W:",
+                             longopts, NULL)) != -1)
     {
       switch (optc)
        {
@@ -356,6 +399,10 @@
          }
          break;
 
+       case 't':
+         tab = *optarg;
+         break;
+
        case 'u':
          mode = output_unique;
          break;
@@ -369,6 +416,18 @@
                     _("invalid number of bytes to compare: `%s'"),
                     optarg);
            check_chars = (int) tmp_long;
+         }
+         break;
+
+       case 'W':
+         {
+           long int tmp_long;
+           if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
+               || tmp_long <= 0 || tmp_long > INT_MAX)
+             error (EXIT_FAILURE, 0,
+                    _("invalid number of fields to compare: `%s'"),
+                    optarg);
+           check_fields = (int) tmp_long;
          }
          break;

[PATCH] new uniq options: --check-fields and --separator

Reply via email to