patch for delimiting dupliate lines in uniq

Padraig Brady Sat, 21 Apr 2001 17:46:49 -0700

Hi,
I got no comments/enquiries on my previous descriptions?
Anyway here's the patch for uniq to implement the following
functionality:

>echo -ne "1\n2\n2\n3\n3\n" | uniq -Dl
2
2

3
3
>

The patch is against the 2.0 release.
To apply: cd textutils-2.0; patch -p1 <uniq_delim.patch
3 files will be patched (uniq.c, Test.pm & Textutils.texi).
The only thing I didn't update were the various language files.

Padraig.

diff -arc textutils-2.0/doc/textutils.texi textutils-pb/doc/textutils.texi
*** textutils-2.0/doc/textutils.texi    Sat Jul 31 10:03:30 1999
--- textutils-pb/doc/textutils.texi     Sun Apr 22 00:07:39 2001
***************
*** 2508,2513 ****
--- 2508,2524 ----
  This is a GNU extension.
  @c FIXME: give an example showing *how* it's useful
  
+ @item -l
+ @itemx --delim-repeated
+ @opindex -l
+ @opindex --delim-repeated
+ @cindex delimit duplicate lines, outputting
+ Print a blank line between groups of duplicate lines.
+ This option is only valid in conjunction with -D (--all-repeated).
+ It's main use is to simplify automated processing of uniq output,
+ but it is a useful aid to people using uniq interactively also.
+ This is a GNU extension.
+ 
  @item -u
  @itemx --unique
  @opindex -u
diff -arc textutils-2.0/src/uniq.c textutils-pb/src/uniq.c
*** textutils-2.0/src/uniq.c    Sun Jul  4 11:02:54 1999
--- textutils-pb/src/uniq.c     Sat Apr 21 23:47:15 2001
***************
*** 74,84 ****
--- 74,88 ----
  /* If nonzero, ignore case when comparing.  */
  static int ignore_case;
  
+ /* If nonzero, delimit groups of duplicate lines with \n  */
+ static int delim_repeated;
+ 
  static struct option const longopts[] =
  {
    {"count", no_argument, NULL, 'c'},
    {"repeated", no_argument, NULL, 'd'},
    {"all-repeated", no_argument, NULL, 'D'},
+   {"delim-repeated", no_argument, NULL, 'l'},
    {"ignore-case", no_argument, NULL, 'i'},
    {"unique", no_argument, NULL, 'u'},
    {"skip-fields", required_argument, NULL, 'f'},
***************
*** 108,113 ****
--- 112,118 ----
    -c, --count           prefix lines by the number of occurrences\n\
    -d, --repeated        only print duplicate lines\n\
    -D, --all-repeated    print all duplicate lines\n\
+   -l, --delim-repeated  delimit \"--all-repeated\" groups with blank lines\n\
    -f, --skip-fields=N   avoid comparing the first N fields\n\
    -i, --ignore-case     ignore differences in case when comparing\n\
    -s, --skip-chars=N    avoid comparing the first N characters\n\
***************
*** 214,219 ****
--- 219,225 ----
    char *prevfield, *thisfield;
    size_t prevlen, thislen;
    int match_count = 0;
+   int first_delimiter = 1;
  
    if (STREQ (infile, "-"))
      istream = stdin;
***************
*** 251,256 ****
--- 257,271 ----
  
        if (match)
        ++match_count;
+       
+       if (delim_repeated)
+         if (!match && mode == output_all_repeated)
+           {
+             if (match_count) /* a previous match */
+               first_delimiter = 0;
+           }
+         else if (match_count == 1 && !first_delimiter)
+           fwrite ("\n", sizeof (char), 1, ostream);
  
        if (!match || mode == output_all_repeated)
        {
***************
*** 295,301 ****
    mode = output_all;
    countmode = count_none;
  
!   while ((optc = getopt_long (argc, argv, "0123456789cdDf:is:uw:", longopts,
                              NULL)) != -1)
      {
        switch (optc)
--- 310,316 ----
    mode = output_all;
    countmode = count_none;
  
!   while ((optc = getopt_long (argc, argv, "0123456789cdDlf:is:uw:", longopts,
                              NULL)) != -1)
      {
        switch (optc)
***************
*** 328,333 ****
--- 343,352 ----
          mode = output_all_repeated;
          break;
  
+       case 'l':
+         delim_repeated = 1;
+         break;
+ 
        case 'f':               /* Like '-#'. */
          {
            long int tmp_long;
***************
*** 414,419 ****
--- 433,445 ----
      {
        error (0, 0,
           _("printing all duplicated lines and repeat counts is meaningless"));
+       usage (1);
+     }
+ 
+   if (delim_repeated && mode != output_all_repeated)
+     {
+       error (0, 0,
+          _("Grouping lines while not printing all duplicated lines is meaningless"));
        usage (1);
      }
  
diff -arc textutils-2.0/tests/uniq/Test.pm textutils-pb/tests/uniq/Test.pm
*** textutils-2.0/tests/uniq/Test.pm    Sat Jan  9 16:14:38 1999
--- textutils-pb/tests/uniq/Test.pm     Sat Apr 21 23:46:44 2001
***************
*** 70,75 ****
--- 70,80 ----
  ['110', '-D',    "a\na\n",          "a\na\n",                   0],
  ['111', '-D -w1',"a a\na b\n",      "a a\na b\n",               0],
  ['112', '-D -c', "a a\na b\n",      "",                         1],
+ # Check the local -l (--delim-repeated) option
+ ['113', '-Dl',    "a\na\n",          "a\na\n",                  0],
+ ['114', '-Dl',"a\na\nb\nc\nc\n",      "a\na\n\nc\nc\n",         0],
+ ['115', '-Dl',"a\na\nb\nb\nc\n",      "a\na\n\nb\nb\n",         0],
+ ['116', '-l', "a\na\n",      "",                                1],
  );
  
  sub test_vector

patch for delimiting dupliate lines in uniq

Reply via email to