Re: RFE: head,tail: -z, --zero-terminated

Pádraig Brady Fri, 08 Jan 2016 08:57:24 -0800

On 28/09/15 15:17, Stephane Chazelas wrote:
> 2015-09-26 15:43:40 +0100, Richard Russon:
>> I'd like to add an option to both head and tail,
>> to allow them to work with NUL-terminated lines of text
>>     -z, --zero-terminated
>>
>> Thus allowing:
>>
>>     find dir -type f -print0 | head -z -n 10 | xargs -0 command
> [...]
> 
> See also
> 
> sed -z 10q
> 
> as an alternative to
> 
> head -zn 10
> 
> While we're at it, why not add it to every text utility (cut,
> paste, seq, yes, tac...) for those that don't have it already?


Yes we've been adding -z support piecemeal over time,
so I propose we add this as per the attached patch set to:
wc, comm, cut, head, tail, tac, paste

When looking at coreutils which might benefit from -z
I split them into four categories:

1. One output item per input argument.
NUL terminated input is catered for by xargs.

NUL terminated output is handled with -z,--zero
already for these utils:

  basename
  dirname
  du
  readlink
  realpath
  stat (handled with --printf='...\0')
  md5sum (\n is escaped)
  sha*sum (ditto)
  ls (ditto, also \0 supported by find)

Possible additions to this class:

  cksum (obsolescent)
  sum (ditto)
  wc


2. Multiple output records per input file/stdin
NUL terminated I/O is handled with -z,--zero-terminated
already for these utils:

  join
  shuf
  sort
  uniq

Possible additions to this class:

  comm
  cut
  head (especially since supports multiple files and seeking within them)
  tail (ditto)
  tac (ditto. extend -s to support '')
  paste

  nl (N/A as primarily text rather than record oriented)
  numfmt (ditto)
  expand (ditto)
  unexpand (ditto)

  fmt (N/A as word oriented rather than record oriented)
  fold (ditto)
  tsort (ditto)


3. Misc record processing

  id (handled already with -z)
  split (handled already with -t,--separator='\0')
  csplit (pattern based so only supports text)
  seq (might support -s '\0' but can't see need)
  yes (easy to handle this edge case with tr)

From 9da3cb61020fd58fde8dba6e8caf7ad101816797 Mon Sep 17 00:00:00 2001
From: Richard Russon <[email protected]>
Date: Sat, 26 Sep 2015 14:22:26 +0100
Subject: [PATCH 1/7] head,tail: add the -z,--zero-terminated option

* doc/coreutils.texi: Reference the option description.
* src/head.c: Parameterize the delimiter character.
* src/tail.c: Likewise.
* tests/misc/head.pl: Add test case.
* tests/misc/tail.pl: Likewise.
* NEWS: Mention the new feature.
---
 NEWS               |  3 +++
 doc/coreutils.texi |  4 ++++
 src/head.c         | 31 ++++++++++++++++++++++++-------
 src/tail.c         | 26 +++++++++++++++++++-------
 tests/misc/head.pl |  4 ++++
 tests/misc/tail.pl |  4 ++++
 6 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/NEWS b/NEWS
index 6e48a53..c88b4e3 100644
--- a/NEWS
+++ b/NEWS
@@ -39,6 +39,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   Its status=progress output now uses the same format as ordinary status,
   perhaps with trailing spaces to erase previous progress output.
 
+  head, tail now have -z, --zero-terminated options to work with
+  NUL delimited items.
+
   md5sum now supports the --ignore-missing option to allow
   verifying a subset of files given a larger list of checksums.
   This also affects sha1sum, sha224sum, sha256sum, sha384sum and sha512sum.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 2538062..2635fbe 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -2817,6 +2817,8 @@ Never print file name headers.
 @opindex --verbose
 Always print file name headers.
 
+@optZeroTerminated
+
 @end table
 
 For compatibility @command{head} also supports an obsolete option syntax
@@ -3042,6 +3044,8 @@ every @var{number} seconds.
 @opindex --verbose
 Always print file name headers.
 
+@optZeroTerminated
+
 @end table
 
 For compatibility @command{tail} also supports an obsolete usage
diff --git a/src/head.c b/src/head.c
index a5405aa..282c2ea 100644
--- a/src/head.c
+++ b/src/head.c
@@ -58,6 +58,9 @@ static bool presume_input_pipe;
 /* If true, print filename headers. */
 static bool print_headers;
 
+/* Character to split lines by. */
+static char line_end;
+
 /* When to print the filename banners. */
 enum header_mode
 {
@@ -90,6 +93,7 @@ static struct option const long_options[] =
   {"quiet", no_argument, NULL, 'q'},
   {"silent", no_argument, NULL, 'q'},
   {"verbose", no_argument, NULL, 'v'},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -126,6 +130,9 @@ With more than one FILE, precede each with a header giving the file name.\n\
   -q, --quiet, --silent    never print headers giving file names\n\
   -v, --verbose            always print headers giving file names\n\
 "), stdout);
+      fputs (_("\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
+"), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
       fputs (_("\
@@ -532,7 +539,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,
       {
         char const *buffer_end = tmp->buffer + n_read;
         char const *p = tmp->buffer;
-        while ((p = memchr (p, '\n', buffer_end - p)))
+        while ((p = memchr (p, line_end, buffer_end - p)))
           {
             ++p;
             ++tmp->nlines;
@@ -581,7 +588,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,
 
   /* If we read any bytes at all, count the incomplete line
      on files that don't end with a newline.  */
-  if (last->nbytes && last->buffer[last->nbytes - 1] != '\n')
+  if (last->nbytes && last->buffer[last->nbytes - 1] != line_end)
     {
       ++last->nlines;
       ++total_lines;
@@ -600,7 +607,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,
       size_t n = total_lines - n_elide;
       char const *buffer_end = tmp->buffer + tmp->nbytes;
       char const *p = tmp->buffer;
-      while (n && (p = memchr (p, '\n', buffer_end - p)))
+      while (n && (p = memchr (p, line_end, buffer_end - p)))
         {
           ++p;
           ++tmp->nlines;
@@ -664,7 +671,7 @@ elide_tail_lines_seekable (const char *pretty_filename, int fd,
   const bool all_lines = !n_lines;
 
   /* Count the incomplete line on files that don't end with a newline.  */
-  if (n_lines && bytes_read && buffer[bytes_read - 1] != '\n')
+  if (n_lines && bytes_read && buffer[bytes_read - 1] != line_end)
     --n_lines;
 
   while (1)
@@ -679,7 +686,7 @@ elide_tail_lines_seekable (const char *pretty_filename, int fd,
           else
             {
               char const *nl;
-              nl = memrchr (buffer, '\n', n);
+              nl = memrchr (buffer, line_end, n);
               if (nl == NULL)
                 break;
               n = nl - buffer;
@@ -804,7 +811,7 @@ head_lines (const char *filename, int fd, uintmax_t lines_to_write)
       if (bytes_read == 0)
         break;
       while (bytes_to_write < bytes_read)
-        if (buffer[bytes_to_write++] == '\n' && --lines_to_write == 0)
+        if (buffer[bytes_to_write++] == line_end && --lines_to_write == 0)
           {
             off_t n_bytes_past_EOL = bytes_read - bytes_to_write;
             /* If we have read more data than that on the specified number
@@ -942,6 +949,8 @@ main (int argc, char **argv)
 
   print_headers = false;
 
+  line_end = '\n';
+
   if (1 < argc && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
     {
       char *a = argv[1];
@@ -986,6 +995,10 @@ main (int argc, char **argv)
               header_mode = always;
               break;
 
+            case 'z':
+              line_end = '\0';
+              break;
+
             default:
               error (0, 0, _("invalid trailing option -- %c"), *a);
               usage (EXIT_FAILURE);
@@ -1006,7 +1019,7 @@ main (int argc, char **argv)
       argc--;
     }
 
-  while ((c = getopt_long (argc, argv, "c:n:qv0123456789", long_options, NULL))
+  while ((c = getopt_long (argc, argv, "c:n:qvz0123456789", long_options, NULL))
          != -1)
     {
       switch (c)
@@ -1039,6 +1052,10 @@ main (int argc, char **argv)
           header_mode = always;
           break;
 
+        case 'z':
+          line_end = '\0';
+          break;
+
         case_GETOPT_HELP_CHAR;
 
         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
diff --git a/src/tail.c b/src/tail.c
index 9007888..781adf2 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -180,6 +180,9 @@ static bool from_start;
 /* If true, print filename headers.  */
 static bool print_headers;
 
+/* Character to split lines by. */
+static char line_end;
+
 /* When to print the filename banners.  */
 enum header_mode
 {
@@ -238,6 +241,7 @@ static struct option const long_options[] =
   {"silent", no_argument, NULL, 'q'},
   {"sleep-interval", required_argument, NULL, 's'},
   {"verbose", no_argument, NULL, 'v'},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -297,6 +301,9 @@ With more than one FILE, precede each with a header giving the file name.\n\
                              least once every N seconds\n\
   -v, --verbose            always output headers giving file names\n\
 "), stdout);
+     fputs (_("\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
+"), stdout);
      fputs (HELP_OPTION_DESCRIPTION, stdout);
      fputs (VERSION_OPTION_DESCRIPTION, stdout);
      fputs (_("\
@@ -499,7 +506,7 @@ file_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
   *read_pos = pos + bytes_read;
 
   /* Count the incomplete line on files that don't end with a newline.  */
-  if (bytes_read && buffer[bytes_read - 1] != '\n')
+  if (bytes_read && buffer[bytes_read - 1] != line_end)
     --n_lines;
 
   do
@@ -510,7 +517,7 @@ file_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
       while (n)
         {
           char const *nl;
-          nl = memrchr (buffer, '\n', n);
+          nl = memrchr (buffer, line_end, n);
           if (nl == NULL)
             break;
           n = nl - buffer;
@@ -595,7 +602,7 @@ pipe_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
       {
         char const *buffer_end = tmp->buffer + n_read;
         char const *p = tmp->buffer;
-        while ((p = memchr (p, '\n', buffer_end - p)))
+        while ((p = memchr (p, line_end, buffer_end - p)))
           {
             ++p;
             ++tmp->nlines;
@@ -649,7 +656,7 @@ pipe_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
     goto free_lbuffers;
 
   /* Count the incomplete line on files that don't end with a newline.  */
-  if (last->buffer[last->nbytes - 1] != '\n')
+  if (last->buffer[last->nbytes - 1] != line_end)
     {
       ++last->nlines;
       ++total_lines;
@@ -671,7 +678,7 @@ pipe_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
         size_t j;
         for (j = total_lines - n_lines; j; --j)
           {
-            beg = memchr (beg, '\n', buffer_end - beg);
+            beg = memchr (beg, line_end, buffer_end - beg);
             assert (beg);
             ++beg;
           }
@@ -857,7 +864,7 @@ start_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
       *read_pos += bytes_read;
 
       char *p = buffer;
-      while ((p = memchr (p, '\n', buffer_end - p)))
+      while ((p = memchr (p, line_end, buffer_end - p)))
         {
           ++p;
           if (--n_lines == 0)
@@ -2047,7 +2054,7 @@ parse_options (int argc, char **argv,
 {
   int c;
 
-  while ((c = getopt_long (argc, argv, "c:n:fFqs:v0123456789",
+  while ((c = getopt_long (argc, argv, "c:n:fFqs:vz0123456789",
                            long_options, NULL))
          != -1)
     {
@@ -2124,6 +2131,10 @@ parse_options (int argc, char **argv,
           *header_mode = always;
           break;
 
+        case 'z':
+          line_end = '\0';
+          break;
+
         case_GETOPT_HELP_CHAR;
 
         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -2221,6 +2232,7 @@ main (int argc, char **argv)
 
   count_lines = true;
   forever = from_start = print_headers = false;
+  line_end = '\n';
   obsolete_option = parse_obsolete_option (argc, argv, &n_units);
   argc -= obsolete_option;
   argv += obsolete_option;
diff --git a/tests/misc/head.pl b/tests/misc/head.pl
index 78644f2..1f565cf 100755
--- a/tests/misc/head.pl
+++ b/tests/misc/head.pl
@@ -72,6 +72,10 @@ my @Tests =
   ['no-oct-2', '-010', {IN=>"\n"x12}, {OUT=>"\n"x10}],
   ['no-oct-3', '-n 08', {IN=>"\n"x12}, {OUT=>"\n"x8}],
   ['no-oct-4', '-c 08', {IN=>"\n"x12}, {OUT=>"\n"x8}],
+
+  # --zero-terminated
+  ['zero-1', '-z -n 1',  {IN=>"x\0y"}, {OUT=>"x\0"}],
+  ['zero-2', '-z -n 2',  {IN=>"x\0y"}, {OUT=>"x\0y"}],
 );
 
 @Tests = triple_test \@Tests;
diff --git a/tests/misc/tail.pl b/tests/misc/tail.pl
index c23102f..0d9bc48 100755
--- a/tests/misc/tail.pl
+++ b/tests/misc/tail.pl
@@ -101,6 +101,10 @@ my @tv = (
 
 # With textutils-1.22, this failed.
 ['f-pipe-1', '-f -n 1', "a\nb\n", "b\n", 0],
+
+# --zero-terminated
+['zero-1', '-z -n 1', "x\0y", "y", 0],
+['zero-2', '-z -n 2', "x\0y", "x\0y", 0],
 );
 
 my @Tests;
-- 
2.5.0


From 92d159dd43796d36b210a4ae8d493919cd4abda9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 8 Jan 2016 13:04:03 +0000
Subject: [PATCH 2/7] cut: add the -z,--zero-terminated option

* doc/coreutils.texi (cut invocation): Reference the description.
* src/cut.c: Parameterize '\n' references.
* tests/misc/cut.pl: Add tests for character and field processing.
* NEWS: Mention the new feature.
---
 NEWS               |  6 +++---
 doc/coreutils.texi |  2 ++
 src/cut.c          | 42 +++++++++++++++++++++++++++---------------
 tests/misc/cut.pl  |  8 ++++++++
 4 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/NEWS b/NEWS
index c88b4e3..22df138 100644
--- a/NEWS
+++ b/NEWS
@@ -33,15 +33,15 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 ** New features
 
+  cut, head, tail now have -z, --zero-terminated options to work with
+  NUL delimited items.
+
   dd now summarizes sizes in --human-readable format too, not just --si.
   E.g., "3441325000 bytes (3.4 GB, 3.2 GiB) copied".  It omits the summaries
   if they would not provide useful information, e.g., "3 bytes copied".
   Its status=progress output now uses the same format as ordinary status,
   perhaps with trailing spaces to erase previous progress output.
 
-  head, tail now have -z, --zero-terminated options to work with
-  NUL delimited items.
-
   md5sum now supports the --ignore-missing option to allow
   verifying a subset of files given a larger list of checksums.
   This also affects sha1sum, sha224sum, sha256sum, sha384sum and sha512sum.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 2635fbe..fd4322e 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -5898,6 +5898,8 @@ In other words, do @emph{not} print the bytes, characters or fields
 specified via those options.  This option is useful when you have
 many fields and want to print all but a few of them.
 
+@optZeroTerminated
+
 @end table
 
 @exitstatus
diff --git a/src/cut.c b/src/cut.c
index 96440af..7ab6be4 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -98,6 +98,9 @@ static bool complement;
 /* The delimiter character for field mode. */
 static unsigned char delim;
 
+/* The delimiter for each line/record. */
+static unsigned char line_delim = '\n';
+
 /* True if the --output-delimiter=STRING option was specified.  */
 static bool output_delimiter_specified;
 
@@ -128,6 +131,7 @@ static struct option const longopts[] =
   {"only-delimited", no_argument, NULL, 's'},
   {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
   {"complement", no_argument, NULL, COMPLEMENT_OPTION},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -171,6 +175,9 @@ Print selected parts of lines from each FILE to standard output.\n\
       --output-delimiter=STRING  use STRING as the output delimiter\n\
                             the default is to use the input delimiter\n\
 "), stdout);
+      fputs (_("\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
+"), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
       fputs (_("\
@@ -239,9 +246,9 @@ cut_bytes (FILE *stream)
 
       c = getc (stream);
 
-      if (c == '\n')
+      if (c == line_delim)
         {
-          putchar ('\n');
+          putchar (c);
           byte_idx = 0;
           print_delimiter = false;
           current_rp = frp;
@@ -249,7 +256,7 @@ cut_bytes (FILE *stream)
       else if (c == EOF)
         {
           if (byte_idx > 0)
-            putchar ('\n');
+            putchar (line_delim);
           break;
         }
       else
@@ -308,7 +315,7 @@ cut_fields (FILE *stream)
           size_t n_bytes;
 
           len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
-                            GETNLINE_NO_LIMIT, delim, '\n', stream);
+                            GETNLINE_NO_LIMIT, delim, line_delim, stream);
           if (len < 0)
             {
               free (field_1_buffer);
@@ -336,9 +343,9 @@ cut_fields (FILE *stream)
                 {
                   fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
                   /* Make sure the output line is newline terminated.  */
-                  if (field_1_buffer[n_bytes - 1] != '\n')
-                    putchar ('\n');
-                  c = '\n';
+                  if (field_1_buffer[n_bytes - 1] != line_delim)
+                    putchar (line_delim);
+                  c = line_delim;
                 }
               continue;
             }
@@ -348,7 +355,7 @@ cut_fields (FILE *stream)
               fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
 
               /* With -d$'\n' don't treat the last '\n' as a delimiter.  */
-              if (delim == '\n')
+              if (delim == line_delim)
                 {
                   int last_c = getc (stream);
                   if (last_c != EOF)
@@ -374,7 +381,7 @@ cut_fields (FILE *stream)
             }
           found_any_selected_field = true;
 
-          while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
+          while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
             {
               putchar (c);
               prev_c = c;
@@ -382,14 +389,14 @@ cut_fields (FILE *stream)
         }
       else
         {
-          while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
+          while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
             {
               prev_c = c;
             }
         }
 
       /* With -d$'\n' don't treat the last '\n' as a delimiter.  */
-      if (delim == '\n' && c == delim)
+      if (delim == line_delim && c == delim)
         {
           int last_c = getc (stream);
           if (last_c != EOF)
@@ -400,13 +407,14 @@ cut_fields (FILE *stream)
 
       if (c == delim)
         next_item (&field_idx);
-      else if (c == '\n' || c == EOF)
+      else if (c == line_delim || c == EOF)
         {
           if (found_any_selected_field
               || !(suppress_non_delimited && field_idx == 1))
             {
-              if (c == '\n' || prev_c != '\n' || delim == '\n')
-                putchar ('\n');
+              if (c == line_delim || prev_c != line_delim
+                  || delim == line_delim)
+                putchar (line_delim);
             }
           if (c == EOF)
             break;
@@ -492,7 +500,7 @@ main (int argc, char **argv)
   delim = '\0';
   have_read_stdin = false;
 
-  while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
+  while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1)
     {
       switch (optc)
         {
@@ -538,6 +546,10 @@ main (int argc, char **argv)
           suppress_non_delimited = true;
           break;
 
+        case 'z':
+          line_delim = '\0';
+          break;
+
         case COMPLEMENT_OPTION:
           complement = true;
           break;
diff --git a/tests/misc/cut.pl b/tests/misc/cut.pl
index 70c5a64..f6f8a56 100755
--- a/tests/misc/cut.pl
+++ b/tests/misc/cut.pl
@@ -161,6 +161,14 @@ my @Tests =
   ['newline-23', "-d'\n'", '-f1-', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
   ['newline-24', "-d'\n'", '-f1,2', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
 
+  # --zero-terminated
+  ['zerot-1', "-z", '-c1', {IN=>"ab\0cd\0"}, {OUT=>"a\0c\0"}],
+  ['zerot-2', "-z", '-c1', {IN=>"ab\0cd"}, {OUT=>"a\0c\0"}],
+  ['zerot-3', '-z -f1-', {IN=>""}, {OUT=>""}],
+  ['zerot-4', '-z -d:', '-f1', {IN=>"a:1\0b:2"}, {OUT=>"a\0b\0"}],
+  ['zerot-5', '-z -d:', '-f1-', {IN=>"a1:\0:"}, {OUT=>"a1:\0:\0"}],
+  ['zerot-6', "-z -d ''", '-f1,2', '--ou=:', {IN=>"a\0b\0"}, {OUT=>"a:b\0"}],
+
   # New functionality:
   ['out-delim1', '-c1-3,5-', '--output-d=:', {IN=>"abcdefg\n"},
    {OUT=>"abc:efg\n"}],
-- 
2.5.0


From 675d9e9113377d7e7c5918f0f0b1353c1f368fba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 8 Jan 2016 14:31:27 +0000
Subject: [PATCH 3/7] tac: support an empty (NUL) --separator

* doc/coreutils.texi (tac invocation): Mention the
NUL delineation with an empty --separator.
* src/tac.c (main): Allow an empty separator when -r not specified.
* tests/misc/tac.pl: Add test cases.
* NEWS: Mention the new feature.
Fixes http://bugs.gnu.org/8103
---
 NEWS               | 4 ++--
 doc/coreutils.texi | 2 ++
 src/tac.c          | 7 ++++---
 tests/misc/tac.pl  | 7 +++++++
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index 22df138..a3b5636 100644
--- a/NEWS
+++ b/NEWS
@@ -33,8 +33,8 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 ** New features
 
-  cut, head, tail now have -z, --zero-terminated options to work with
-  NUL delimited items.
+  cut, head, tail now have the -z,--zero-terminated option, and
+  tac --separator accepts an empty argument, to work with NUL delimited items.
 
   dd now summarizes sizes in --human-readable format too, not just --si.
   E.g., "3441325000 bytes (3.4 GB, 3.2 GiB) copied".  It omits the summaries
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index fd4322e..ba68416 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -1706,6 +1706,8 @@ Treat the separator string as a regular expression.
 @opindex -s
 @opindex --separator
 Use @var{separator} as the record separator, instead of newline.
+Note an empty @var{separator} is treated as a zero byte.
+I.e., input and output items are delimited with ASCII NUL.
 
 @end table
 
diff --git a/src/tac.c b/src/tac.c
index 2410224..4681f3a 100644
--- a/src/tac.c
+++ b/src/tac.c
@@ -639,8 +639,6 @@ main (int argc, char **argv)
           break;
         case 's':
           separator = optarg;
-          if (*separator == 0)
-            error (EXIT_FAILURE, 0, _("separator cannot be empty"));
           break;
         case_GETOPT_HELP_CHAR;
         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -651,6 +649,9 @@ main (int argc, char **argv)
 
   if (sentinel_length == 0)
     {
+      if (*separator == 0)
+        error (EXIT_FAILURE, 0, _("separator cannot be empty"));
+
       compiled_separator.buffer = NULL;
       compiled_separator.allocated = 0;
       compiled_separator.fastmap = compiled_separator_fastmap;
@@ -661,7 +662,7 @@ main (int argc, char **argv)
         error (EXIT_FAILURE, 0, "%s", (error_message));
     }
   else
-    match_length = sentinel_length = strlen (separator);
+    match_length = sentinel_length = *separator ? strlen (separator) : 1;
 
   read_size = INITIAL_READSIZE;
   while (sentinel_length >= read_size / 2)
diff --git a/tests/misc/tac.pl b/tests/misc/tac.pl
index 6297b16..fb76719 100755
--- a/tests/misc/tac.pl
+++ b/tests/misc/tac.pl
@@ -45,6 +45,13 @@ my @Tests =
   ['basic-j', '', {IN=>"1234\n8\n"}, {OUT=>"8\n1234\n"}],
   ['basic-k', '', {IN=>"123\n8\n"}, {OUT=>"8\n123\n"}],
 
+  ['nul-0', '-s ""', {IN=>""}, {OUT=>""}],
+  ['nul-a', '-s ""', {IN=>"a"}, {OUT=>"a"}],
+  ['nul-b', '-s ""', {IN=>"\0"}, {OUT=>"\0"}],
+  ['nul-c', '-s ""', {IN=>"a\0"}, {OUT=>"a\0"}],
+  ['nul-d', '-s ""', {IN=>"a\0b"}, {OUT=>"ba\0"}],
+  ['nul-e', '-s ""', {IN=>"a\0b\0"}, {OUT=>"b\0a\0"}],
+
   ['opt-b', '-b', {IN=>"\na\nb\nc"}, {OUT=>"\nc\nb\na"}],
   ['opt-s', '-s:', {IN=>"a:b:c:"}, {OUT=>"c:b:a:"}],
   ['opt-sb', qw(-s : -b), {IN=>":a:b:c"}, {OUT=>":c:b:a"}],
-- 
2.5.0


From 5380b507b3d92f689ff99a23972180ffbfb7f7d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 8 Jan 2016 15:14:01 +0000
Subject: [PATCH 4/7] comm: add the -z,--zero-terminated option

* doc/coreutils.texi (comm invocation): Reference option description.
* src/comm.c (main): Use readlinebuffer_delim() to support
a parameterized delimiter.
* tests/misc/comm.pl: Add test cases.
* NEWS: Mention the new feature.
---
 NEWS               |  2 +-
 doc/coreutils.texi |  2 ++
 src/comm.c         | 19 ++++++++++++++++---
 tests/misc/comm.pl |  3 +++
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index a3b5636..30a1526 100644
--- a/NEWS
+++ b/NEWS
@@ -33,7 +33,7 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 ** New features
 
-  cut, head, tail now have the -z,--zero-terminated option, and
+  comm, cut, head, tail now have the -z,--zero-terminated option, and
   tac --separator accepts an empty argument, to work with NUL delimited items.
 
   dd now summarizes sizes in --human-readable format too, not just --si.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index ba68416..99f0f2d 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -5126,6 +5126,8 @@ rather than the default of a single TAB character.
 
 The delimiter @var{str} may not be empty.
 
+@optZeroTerminated
+
 @end table
 
 @node ptx invocation
diff --git a/src/comm.c b/src/comm.c
index 89cee88..e66ac81 100644
--- a/src/comm.c
+++ b/src/comm.c
@@ -59,6 +59,9 @@ static bool seen_unpairable;
 /* If nonzero, we have warned about disorder in that file. */
 static bool issued_disorder_warning[2];
 
+/* line delimiter.  */
+static unsigned char delim = '\n';
+
 /* If nonzero, check that the input is correctly ordered. */
 static enum
   {
@@ -86,6 +89,7 @@ static struct option const long_options[] =
   {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
   {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
   {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -131,6 +135,9 @@ and column three contains lines common to both files.\n\
       fputs (_("\
   --output-delimiter=STR  separate columns with STR\n\
 "), stdout);
+      fputs (_("\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
+"), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
       fputs (_("\
@@ -277,7 +284,8 @@ compare_files (char **infiles)
 
       fadvise (streams[i], FADVISE_SEQUENTIAL);
 
-      thisline[i] = readlinebuffer (all_line[i][alt[i][0]], streams[i]);
+      thisline[i] = readlinebuffer_delim (all_line[i][alt[i][0]], streams[i],
+                                          delim);
       if (ferror (streams[i]))
         error (EXIT_FAILURE, errno, "%s", quotef (infiles[i]));
     }
@@ -336,7 +344,8 @@ compare_files (char **infiles)
             alt[i][1] = alt[i][0];
             alt[i][0] = (alt[i][0] + 1) & 0x03;
 
-            thisline[i] = readlinebuffer (all_line[i][alt[i][0]], streams[i]);
+            thisline[i] = readlinebuffer_delim (all_line[i][alt[i][0]],
+                                                streams[i], delim);
 
             if (thisline[i])
               check_order (all_line[i][alt[i][1]], thisline[i], i + 1);
@@ -382,7 +391,7 @@ main (int argc, char **argv)
   issued_disorder_warning[0] = issued_disorder_warning[1] = false;
   check_input_order = CHECK_ORDER_DEFAULT;
 
-  while ((c = getopt_long (argc, argv, "123", long_options, NULL)) != -1)
+  while ((c = getopt_long (argc, argv, "123z", long_options, NULL)) != -1)
     switch (c)
       {
       case '1':
@@ -397,6 +406,10 @@ main (int argc, char **argv)
         both = false;
         break;
 
+      case 'z':
+        delim = '\0';
+        break;
+
       case NOCHECK_ORDER_OPTION:
         check_input_order = CHECK_ORDER_DISABLED;
         break;
diff --git a/tests/misc/comm.pl b/tests/misc/comm.pl
index 52d14ba..3232d63 100755
--- a/tests/misc/comm.pl
+++ b/tests/misc/comm.pl
@@ -28,14 +28,17 @@ my $prog = 'comm';
 @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 
 my @inputs = ({IN=>{a=>"1\n3"}}, {IN=>{b=>"2\n3"}});
+my @zinputs = ({IN=>{za=>"1\0003"}}, {IN=>{zb=>"2\0003"}});
 
 my @Tests =
   (
    # basic operation
    ['basic', @inputs, {OUT=>"1\n\t2\n\t\t3\n"} ],
+   ['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t\t3\0"} ],
 
    # suppress lines unique to file 1
    ['opt-1', '-1', @inputs, {OUT=>"2\n\t3\n"} ],
+   ['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0\t3\0"} ],
 
    # suppress lines unique to file 2
    ['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n"} ],
-- 
2.5.0


From d79b96f443388b81e65ee230e02b64b7484356f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 8 Jan 2016 15:42:56 +0000
Subject: [PATCH 5/7] comm: support NUL --output-delimiter for consistency

* src/comm.c (main): Track the output delimiter length,
so that it can be adjusted to 1 for the NUL delimiter.
Also rename the global variable from "delimiter" to
"col_sep" so its use is more obvious, and to distinguish
from the recently added "delim" global variable.
* tests/misc/comm.pl: Adjust accordingly.
---
 src/comm.c         | 34 +++++++++++++---------------------
 tests/misc/comm.pl | 12 +++++++-----
 2 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/src/comm.c b/src/comm.c
index e66ac81..802bf90 100644
--- a/src/comm.c
+++ b/src/comm.c
@@ -71,9 +71,9 @@ static enum
   } check_input_order;
 
 /* Output columns will be delimited with this string, which may be set
-   on the command-line with --output-delimiter=STR.  The default is a
-   single TAB character. */
-static char const *delimiter;
+   on the command-line with --output-delimiter=STR.  */
+static char const *col_sep = "\t";
+static size_t col_sep_len = 0;
 
 /* For long options that have no equivalent short option, use a
    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
@@ -174,20 +174,17 @@ writeline (struct linebuffer const *line, FILE *stream, int class)
     case 2:
       if (!only_file_2)
         return;
-      /* Print a delimiter if we are printing lines from file 1.  */
       if (only_file_1)
-        fputs (delimiter, stream);
+        fwrite (col_sep, 1, col_sep_len, stream);
       break;
 
     case 3:
       if (!both)
         return;
-      /* Print a delimiter if we are printing lines from file 1.  */
       if (only_file_1)
-        fputs (delimiter, stream);
-      /* Print a delimiter if we are printing lines from file 2.  */
+        fwrite (col_sep, 1, col_sep_len, stream);
       if (only_file_2)
-        fputs (delimiter, stream);
+        fwrite (col_sep, 1, col_sep_len, stream);
       break;
     }
 
@@ -419,14 +416,10 @@ main (int argc, char **argv)
         break;
 
       case OUTPUT_DELIMITER_OPTION:
-        if (delimiter && !STREQ (delimiter, optarg))
-          error (EXIT_FAILURE, 0, _("multiple delimiters specified"));
-        delimiter = optarg;
-        if (!*delimiter)
-          {
-            error (EXIT_FAILURE, 0, _("empty %s not allowed"),
-                   quote ("--output-delimiter"));
-          }
+        if (col_sep_len && !STREQ (col_sep, optarg))
+          error (EXIT_FAILURE, 0, _("multiple output delimiters specified"));
+        col_sep = optarg;
+        col_sep_len = *optarg ? strlen (optarg) : 1;
         break;
 
       case_GETOPT_HELP_CHAR;
@@ -437,6 +430,9 @@ main (int argc, char **argv)
         usage (EXIT_FAILURE);
       }
 
+  if (! col_sep_len)
+    col_sep_len = 1;
+
   if (argc - optind < 2)
     {
       if (argc <= optind)
@@ -452,10 +448,6 @@ main (int argc, char **argv)
       usage (EXIT_FAILURE);
     }
 
-  /* The default delimiter is a TAB. */
-  if (!delimiter)
-    delimiter = "\t";
-
   compare_files (argv + optind);
 
   if (issued_disorder_warning[0] || issued_disorder_warning[1])
diff --git a/tests/misc/comm.pl b/tests/misc/comm.pl
index 3232d63..c5cd27f 100755
--- a/tests/misc/comm.pl
+++ b/tests/misc/comm.pl
@@ -134,13 +134,15 @@ my @Tests =
    ['delim-2char', '--output-delimiter=++', @inputs,
     {OUT=>"1\n++2\n++++3\n"} ],
 
-   # invalid empty delimiter
-   ['delim-empty', '--output-delimiter=', @inputs, {EXIT=>1},
-    {ERR => "$prog: empty '--output-delimiter' not allowed\n"}],
+   # NUL delimiter
+   ['delim-empty', '--output-delimiter=', @inputs,
+    {OUT=>"1\n\0002\n\000\0003\n"} ],
+   ['zdelim-empty', '-z', '-z --output-delimiter=', @zinputs,
+    {OUT=>"1\000\0002\000\000\0003\000"} ],
 
    # invalid dual delimiter
-   ['delim-dual', '--output-delimiter=,', '--output-delimiter=+',
-    @inputs, {EXIT=>1}, {ERR => "$prog: multiple delimiters specified\n"}],
+   ['delim-dual', '--output-delimiter=,', '--output-delimiter=+', @inputs,
+    {EXIT=>1}, {ERR => "$prog: multiple output delimiters specified\n"}],
 
    # valid dual delimiter specification
    ['delim-dual2', '--output-delimiter=,', '--output-delimiter=,', @inputs,
-- 
2.5.0


From 77865899176cb67ea1f2b3e935a7a181f7ad77c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 8 Jan 2016 15:57:06 +0000
Subject: [PATCH 6/7] paste: add the -z,--zero-terminated option

* doc/coreutils.texi (paste invocation): Reference -z description.
* src/paste.c (main): Parameterize the use of '\n'.
* tests/misc/paste.pl: Add test cases.
* NEWS: Mention the new feature.
---
 NEWS                |  2 +-
 doc/coreutils.texi  |  2 ++
 src/paste.c         | 26 ++++++++++++++++++--------
 tests/misc/paste.pl | 10 ++++++++++
 4 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/NEWS b/NEWS
index 30a1526..929dfcd 100644
--- a/NEWS
+++ b/NEWS
@@ -33,7 +33,7 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 ** New features
 
-  comm, cut, head, tail now have the -z,--zero-terminated option, and
+  comm, cut, head, paste, tail now have the -z,--zero-terminated option, and
   tac --separator accepts an empty argument, to work with NUL delimited items.
 
   dd now summarizes sizes in --human-readable format too, not just --si.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 99f0f2d..e878474 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -6000,6 +6000,8 @@ $ paste -d '%_' num2 let3 num2
 %c_
 @end example
 
+@optZeroTerminated
+
 @end table
 
 @exitstatus
diff --git a/src/paste.c b/src/paste.c
index a5acecd..bf99fe0 100644
--- a/src/paste.c
+++ b/src/paste.c
@@ -67,10 +67,13 @@ static char *delims;
 /* A pointer to the character after the end of 'delims'. */
 static char const *delim_end;
 
+static unsigned char line_delim = '\n';
+
 static struct option const longopts[] =
 {
   {"serial", no_argument, NULL, 's'},
   {"delimiters", required_argument, NULL, 'd'},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -250,7 +253,7 @@ paste_parallel (size_t nfiles, char **fnamptr)
               while (chr != EOF)
                 {
                   sometodo = true;
-                  if (chr == '\n')
+                  if (chr == line_delim)
                     break;
                   xputchar (chr);
                   chr = getc (fileptr[i]);
@@ -295,7 +298,7 @@ paste_parallel (size_t nfiles, char **fnamptr)
                             write_error ();
                           delims_saved = 0;
                         }
-                      xputchar ('\n');
+                      xputchar (line_delim);
                     }
                   continue;	/* Next read of files, or exit. */
                 }
@@ -316,7 +319,7 @@ paste_parallel (size_t nfiles, char **fnamptr)
               /* Except for last file, replace last newline with delim. */
               if (i + 1 != nfiles)
                 {
-                  if (chr != '\n' && chr != EOF)
+                  if (chr != line_delim && chr != EOF)
                     xputchar (chr);
                   if (*delimptr != EMPTY_DELIM)
                     xputchar (*delimptr);
@@ -327,7 +330,7 @@ paste_parallel (size_t nfiles, char **fnamptr)
                 {
                   /* If the last line of the last file lacks a newline,
                      print one anyhow.  POSIX requires this.  */
-                  char c = (chr == EOF ? '\n' : chr);
+                  char c = (chr == EOF ? line_delim : chr);
                   xputchar (c);
                 }
             }
@@ -386,7 +389,7 @@ paste_serial (size_t nfiles, char **fnamptr)
           while ((charnew = getc (fileptr)) != EOF)
             {
               /* Process the old character. */
-              if (charold == '\n')
+              if (charold == line_delim)
                 {
                   if (*delimptr != EMPTY_DELIM)
                     xputchar (*delimptr);
@@ -405,8 +408,8 @@ paste_serial (size_t nfiles, char **fnamptr)
           xputchar (charold);
         }
 
-      if (charold != '\n')
-        xputchar ('\n');
+      if (charold != line_delim)
+        xputchar (line_delim);
 
       if (ferror (fileptr))
         {
@@ -447,6 +450,9 @@ each FILE, separated by TABs, to standard output.\n\
   -d, --delimiters=LIST   reuse characters from LIST instead of TABs\n\
   -s, --serial            paste one file at a time instead of in parallel\n\
 "), stdout);
+      fputs (_("\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
+"), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
       /* FIXME: add a couple of examples.  */
@@ -473,7 +479,7 @@ main (int argc, char **argv)
   have_read_stdin = false;
   serial_merge = false;
 
-  while ((optc = getopt_long (argc, argv, "d:s", longopts, NULL)) != -1)
+  while ((optc = getopt_long (argc, argv, "d:sz", longopts, NULL)) != -1)
     {
       switch (optc)
         {
@@ -486,6 +492,10 @@ main (int argc, char **argv)
           serial_merge = true;
           break;
 
+        case 'z':
+          line_delim = '\0';
+          break;
+
         case_GETOPT_HELP_CHAR;
 
         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
diff --git a/tests/misc/paste.pl b/tests/misc/paste.pl
index b4409e7..7c95597 100755
--- a/tests/misc/paste.pl
+++ b/tests/misc/paste.pl
@@ -34,6 +34,11 @@ my @Tests =
    ['no-nl-3', {IN=>"a"},   {IN=>"b\n"}, {OUT=>"a\tb\n"}],
    ['no-nl-4', {IN=>"a\n"}, {IN=>"b\n"}, {OUT=>"a\tb\n"}],
 
+   ['zno-nl-1', '-z', {IN=>"a"},   {IN=>"b"},   {OUT=>"a\tb\0"}],
+   ['zno-nl-2', '-z', {IN=>"a\0"}, {IN=>"b"},   {OUT=>"a\tb\0"}],
+   ['zno-nl-3', '-z', {IN=>"a"},   {IN=>"b\0"}, {OUT=>"a\tb\0"}],
+   ['zno-nl-4', '-z', {IN=>"a\0"}, {IN=>"b\0"}, {OUT=>"a\tb\0"}],
+
    # Same as above, but with a two lines in each input file and
    # the addition of the -d option to make SPACE be the output delimiter.
    ['no-nla1', '-d" "', {IN=>"1\na"},   {IN=>"2\nb"},   {OUT=>"1 2\na b\n"}],
@@ -41,6 +46,11 @@ my @Tests =
    ['no-nla3', '-d" "', {IN=>"1\na"},   {IN=>"2\nb\n"}, {OUT=>"1 2\na b\n"}],
    ['no-nla4', '-d" "', {IN=>"1\na\n"}, {IN=>"2\nb\n"}, {OUT=>"1 2\na b\n"}],
 
+   ['zno-nla1', '-zd" "', {IN=>"1\0a"},   {IN=>"2\0b"},   {OUT=>"1 2\0a b\0"}],
+   ['zno-nla2', '-zd" "', {IN=>"1\0a\0"}, {IN=>"2\0b"},   {OUT=>"1 2\0a b\0"}],
+   ['zno-nla3', '-zd" "', {IN=>"1\0a"},   {IN=>"2\0b\0"}, {OUT=>"1 2\0a b\0"}],
+   ['zno-nla4', '-zd" "', {IN=>"1\0a\0"}, {IN=>"2\0b\0"}, {OUT=>"1 2\0a b\0"}],
+
    # Specifying a delimiter with a trailing backslash would overrun a
    # malloc'd buffer.
    ['delim-bs1', q!-d'\'!, {IN=>{'a'x50=>''}}, {EXIT => 1},
-- 
2.5.0


From 25d17125badcb603a9a303ca6a5f6912bfc01b41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 8 Jan 2016 16:29:40 +0000
Subject: [PATCH 7/7] wc: add the -z,--zero option

* doc/coreutils.texi (wc invocation): Reference the --zero description.
* src/wc.c (main): Parse the --zero option.
(write_counts): Write the "line_end" character instead of '\n'.
* tests/misc/wc-files0.sh: Add a test case.
* NEWS: Mention the new feature.
---
 NEWS                    |  2 +-
 doc/coreutils.texi      |  2 ++
 src/wc.c                | 11 ++++++++++-
 tests/misc/wc-files0.sh |  8 ++++++++
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index 929dfcd..2079887 100644
--- a/NEWS
+++ b/NEWS
@@ -33,7 +33,7 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 ** New features
 
-  comm, cut, head, paste, tail now have the -z,--zero-terminated option, and
+  comm, cut, head, paste, tail, wc now have the -z,--zero-terminated option, and
   tac --separator accepts an empty argument, to work with NUL delimited items.
 
   dd now summarizes sizes in --human-readable format too, not just --si.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index e878474..771418e 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3677,6 +3677,8 @@ find . -name '*.[ch]' -print0 |
   wc -L --files0-from=- | tail -n1
 @end example
 
+@optZero
+
 @end table
 
 @exitstatus
diff --git a/src/wc.c b/src/wc.c
index c2a9c3f..ea2c20f 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -70,6 +70,9 @@ static int number_width;
 /* True if we have ever read the standard input. */
 static bool have_read_stdin;
 
+/* Support NUL line endings.  */
+static unsigned char line_end = '\n';
+
 /* The result of calling fstat or stat on a file descriptor or file.  */
 struct fstatus
 {
@@ -96,6 +99,7 @@ static struct option const longopts[] =
   {"words", no_argument, NULL, 'w'},
   {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
   {"max-line-length", no_argument, NULL, 'L'},
+  {"zero", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -135,6 +139,7 @@ the following order: newline, word, character, byte, maximum line length.\n\
                            If F is - then read names from standard input\n\
   -L, --max-line-length  print the maximum display width\n\
   -w, --words            print the word counts\n\
+  -z, --zero             end each output line with NUL, not newline\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -183,7 +188,7 @@ write_counts (uintmax_t lines,
     }
   if (file)
     printf (" %s", file);
-  putchar ('\n');
+  putchar (line_end);
 }
 
 /* Count words.  FILE_X is the name of the file (or NULL for standard
@@ -669,6 +674,10 @@ main (int argc, char **argv)
         print_linelength = true;
         break;
 
+      case 'z':
+        line_end = '\0';
+        break;
+
       case FILES0_FROM_OPTION:
         files_from = optarg;
         break;
diff --git a/tests/misc/wc-files0.sh b/tests/misc/wc-files0.sh
index b6a204c..6220e72 100755
--- a/tests/misc/wc-files0.sh
+++ b/tests/misc/wc-files0.sh
@@ -40,4 +40,12 @@ if test "$fail" = ''; then
   compare exp out || fail=1
 fi
 
+if test "$fail" = ''; then
+  # Repeat the above test, but output NULs instead of newlines
+  rm -f out
+  tr '\n' '\0' < exp > zexp || framework_failure_
+  wc -z --files0-from=- < names > out || fail=1
+  compare exp out || fail=1
+fi
+
 Exit $fail
-- 
2.5.0

Re: RFE: head,tail: -z, --zero-terminated

Reply via email to