Hello,
Attached is a suggestion for "--group" option in uniq, as discussed here:
http://lists.gnu.org/archive/html/coreutils/2011-03/msg00000.html
http://lists.gnu.org/archive/html/coreutils/2012-03/msg00052.html
The patch adds two parameters:
--group=[method] separate each unique line (whether duplicated or not)
with a marker.
method={none,separate(default),prepend,append,both)
--group-separator=SEP with --group, separates group using SEP
(default: empty line)
And it behaves "as expected":
===
$ printf "a\na\na\nb\nc\nc\n" | ./src/uniq --group-sep="--" --group=separate
a
--
b
--
c
$ printf "a\na\na\nb\nc\nc\n" | ./src/uniq --group-sep="--" --group=both
--
a
--
b
--
c
--
$ printf "a\na\na\nb\nc\nc\n" | ./src/uniq --group-sep="--" --group=prepend
--
a
--
b
--
c
$ printf "a\na\na\nb\nc\nc\n" | ./src/uniq -D --group-sep="--" --group=both
--
a
a
a
--
c
c
--
===
The added tests check all sorts of combinations.
If this is the right direction, I'll send an updated patch (with
NEWS/docs/etc.).
-gordon
>From ece4bcc78d23050da1572ddec29ed81a806cbf4b Mon Sep 17 00:00:00 2001
From: Assaf Gordon <[email protected]>
Date: Wed, 20 Feb 2013 13:31:22 -0500
Subject: [PATCH] uniq: add "--group" option
* src/uniq.c: implement "--group" options.
* tests/misc/uniq.pl: add tests.
---
src/uniq.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++--
tests/misc/uniq.pl | 33 ++++++++++++++++++
2 files changed, 125 insertions(+), 4 deletions(-)
diff --git a/src/uniq.c b/src/uniq.c
index 5efdad7..c41e25d 100644
--- a/src/uniq.c
+++ b/src/uniq.c
@@ -83,6 +83,10 @@ static bool output_later_repeated;
/* If true, ignore case when comparing. */
static bool ignore_case;
+/* If true, at least one line was printed to output
+ in writeline(). used with grouping */
+static bool first_group_printed = false;
+
enum delimit_method
{
/* No delimiters output. --all-repeated[=none] */
@@ -108,11 +112,53 @@ static enum delimit_method const delimit_method_map[] =
/* Select whether/how to delimit groups of duplicate lines. */
static enum delimit_method delimit_groups;
+enum grouping_method
+{
+ /* No grouping. --group=none */
+ GM_NONE,
+
+ /* Delimiter preceges all groups. --group=prepend */
+ GM_PREPEND,
+
+ /* Delimiter follows all groups. --group=append */
+ GM_APPEND,
+
+ /* Delimiter between groups. --group[=separate] */
+ GM_SEPARATE,
+
+ /* Delimiter before and after each group. --group=both */
+ GM_BOTH
+};
+
+static char const *const grouping_method_string[] =
+{
+ "none", "prepend", "append", "separate", "both", NULL
+};
+
+static enum grouping_method const grouping_method_map[] =
+{
+ GM_NONE, GM_PREPEND, GM_APPEND, GM_SEPARATE, GM_BOTH
+};
+
+static enum grouping_method grouping;
+
+/* Default group separator - empty line.
+ The end-of-line delimiter will be appended (newline or NULL) */
+static const char* grouping_delimiter = "";
+
+enum
+{
+ GROUP_OPTION = CHAR_MAX + 1,
+ GROUP_SEPARATOR_OPTION,
+};
+
static struct option const longopts[] =
{
{"count", no_argument, NULL, 'c'},
{"repeated", no_argument, NULL, 'd'},
{"all-repeated", optional_argument, NULL, 'D'},
+ {"group", optional_argument, NULL, GROUP_OPTION},
+ {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
{"ignore-case", no_argument, NULL, 'i'},
{"unique", no_argument, NULL, 'u'},
{"skip-fields", required_argument, NULL, 'f'},
@@ -159,6 +205,13 @@ With no options, matching lines are merged to the first occurrence.\n\
-z, --zero-terminated end lines with 0 byte, not newline\n\
"), stdout);
fputs (_("\
+ --group=[method] separate each unique line (whether duplicated or not)\n\
+ with a marker.\n\
+ method={none,separate(default),prepend,append,both)\n\
+ --group-separator=SEP with --group, separates group using SEP\n\
+ (default: empty line)\n\
+"), stdout);
+ fputs (_("\
-w, --check-chars=N compare no more than N characters in lines\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -257,17 +310,32 @@ different (char *old, char *new, size_t oldlen, size_t newlen)
static void
writeline (struct linebuffer const *line,
- bool match, uintmax_t linecount)
+ bool match, uintmax_t linecount, char delimiter)
{
if (! (linecount == 0 ? output_unique
: !match ? output_first_repeated
: output_later_repeated))
return;
+ if ( ( !output_later_repeated || (match && linecount==1) )
+ && (grouping == GM_PREPEND || grouping == GM_BOTH
+ || ( grouping == GM_SEPARATE && first_group_printed )))
+ {
+ fputs (grouping_delimiter, stdout);
+ putchar (delimiter);
+ }
+ first_group_printed = true;
+
if (countmode == count_occurrences)
printf ("%7" PRIuMAX " ", linecount + 1);
fwrite (line->buffer, sizeof (char), line->length, stdout);
+
+ if (!match && grouping == GM_APPEND)
+ {
+ fputs (grouping_delimiter, stdout);
+ putchar (delimiter);
+ }
}
/* Process input file INFILE with output to OUTFILE.
@@ -299,7 +367,8 @@ check_file (const char *infile, const char *outfile, char delimiter)
this optimization lets uniq output each different line right away,
without waiting to see if the next one is different. */
- if (output_unique && output_first_repeated && countmode == count_none)
+ if (output_unique && output_first_repeated && countmode == count_none
+ && grouping == GM_NONE)
{
char *prevfield IF_LINT ( = NULL);
size_t prevlen IF_LINT ( = 0);
@@ -377,7 +446,7 @@ check_file (const char *infile, const char *outfile, char delimiter)
if (!match || output_later_repeated)
{
- writeline (prevline, match, match_count);
+ writeline (prevline, match, match_count, delimiter);
SWAP_LINES (prevline, thisline);
prevfield = thisfield;
prevlen = thislen;
@@ -386,13 +455,19 @@ check_file (const char *infile, const char *outfile, char delimiter)
}
}
- writeline (prevline, false, match_count);
+ writeline (prevline, false, match_count, delimiter);
}
closefiles:
if (ferror (stdin) || fclose (stdin) != 0)
error (EXIT_FAILURE, 0, _("error reading %s"), infile);
+ if (grouping == GM_BOTH && first_group_printed)
+ {
+ fputs (grouping_delimiter, stdout);
+ putchar (delimiter);
+ }
+
/* stdout is handled via the atexit-invoked close_stdout function. */
free (lb1.buffer);
@@ -515,6 +590,19 @@ main (int argc, char **argv)
delimit_method_map);
break;
+ case GROUP_OPTION:
+ if (optarg == NULL)
+ grouping = GM_SEPARATE;
+ else
+ grouping = XARGMATCH ("--group", optarg,
+ grouping_method_string,
+ grouping_method_map);
+ break;
+
+ case GROUP_SEPARATOR_OPTION:
+ grouping_delimiter = optarg;
+ break;
+
case 'f':
skip_field_option_type = SFO_NEW;
skip_fields = size_opt (optarg,
diff --git a/tests/misc/uniq.pl b/tests/misc/uniq.pl
index 140a49b..a6ce93d 100755
--- a/tests/misc/uniq.pl
+++ b/tests/misc/uniq.pl
@@ -199,6 +199,39 @@ my @Tests =
# Check that --zero-terminated is synonymous with -z.
['123', '--zero-terminated', {IN=>"a\na\nb"}, {OUT=>"a\na\nb\0"}],
['124', '--zero-terminated', {IN=>"a\0a\0b"}, {OUT=>"a\0b\0"}],
+
+ # Check grouping
+ ['125', '--group=prepend', {IN=>"a\na\nb\n"}, {OUT=>"\na\n\nb\n"}],
+ ['126', '--group=append', {IN=>"a\na\nb\n"}, {OUT=>"a\n\nb\n\n"}],
+ ['127', '--group=separate',{IN=>"a\na\nb\n"}, {OUT=>"a\n\nb\n"}],
+ # no explicit grouping = separate
+ ['128', '--group', {IN=>"a\na\nb\n"}, {OUT=>"a\n\nb\n"}],
+ ['129', '--group=none', {IN=>"a\na\nb\n"}, {OUT=>"a\nb\n"}],
+ ['130', '--group=both', {IN=>"a\na\nb\n"}, {OUT=>"\na\n\nb\n\n"}],
+ # Grouping in the special case of a single group
+ ['131', '--group=prepend', {IN=>"a\na\n"}, {OUT=>"\na\n"}],
+ ['132', '--group=append', {IN=>"a\na\n"}, {OUT=>"a\n\n"}],
+ ['133', '--group=separate',{IN=>"a\na\n"}, {OUT=>"a\n"}],
+ ['134', '--group', {IN=>"a\na\n"}, {OUT=>"a\n"}],
+ ['135', '--group=both', {IN=>"a\na\n"}, {OUT=>"\na\n\n"}],
+ ['136', '--group=none', {IN=>"a\na\n"}, {OUT=>"a\n"}],
+ # Grouping with "-D" (=show duplicated lines)
+ ['137', '--group=prepend -D' , {IN=>"a\na\na\n"}, {OUT=>"\na\na\na\n"}],
+ ['138', '--group=append -D', {IN=>"a\na\na\n"}, {OUT=>"a\na\na\n\n"}],
+ ['139', '--group=separate -D', {IN=>"a\na\na\n"}, {OUT=>"a\na\na\n"}],
+ ['140', '--group=separate -D', {IN=>"a\na\na\nb\nb\nb\n"},
+ {OUT=>"a\na\na\n\nb\nb\nb\n"}],
+ ['141', '--group=both -D', {IN=>"a\na\na\nb\nb\nb\n"},
+ {OUT=>"\na\na\na\n\nb\nb\nb\n\n"}],
+ # Grouping with custom group delimiter
+ ['142', '--group=both --group-sep="-=-"', {IN=>"a\na\nb\n"},
+ {OUT=>"-=-\na\n-=-\nb\n-=-\n"}],
+ # Grouping with empty input - should never print anything
+ ['143', '--group=prepend', {IN=>""}, {OUT=>""}],
+ ['144', '--group=append', {IN=>""}, {OUT=>""}],
+ ['145', '--group=separate', {IN=>""}, {OUT=>""}],
+ ['146', '--group=none', {IN=>""}, {OUT=>""}],
+ ['147', '--group=both', {IN=>""}, {OUT=>""}],
);
# Set _POSIX2_VERSION=199209 in the environment of each obs-plus* test.
--
1.7.7.4