On 14/12/2020 22:27, Bernhard Voelker wrote:
Hi Padraig,

On 12/14/20 5:53 PM, Pádraig Brady wrote:
I've attached two patches to address this.

'git am' complains:
   Patch format detection failed.
At least the usual patch end marker "---\n$GITVERSION" seems to be
missing, but adding one doesn't help either; 'patch -p1' works though.

Sorry I attached output from `git show` rather than `git format-patch`.

There's one more place to clarify the GNU extension:

   $ nl --help
   ...
   -d, --section-delimiter=CC      use CC for logical page delimiters
   ...
   CC are two delimiter characters used to construct logical page delimiters;
   a missing second character implies ':'.

will do

Finally: what about 0-length section-delimiters?
   $ nl -d ''
Should we document this case as well?

Good point. We should document how this is handled.

The patch also fixes this case - and now uses the whole 
DEFAULT_SECTION_DELIMITERS:

   $ printf '%s\n' a '\:' c | /usr/bin/nl -d ''
        1       a
        2       \:
        3       c
   $ printf '%s\n' a '\:' c | src/nl -d ''
        1       a

          c

Therefore, `nl -d ''` is identical with `nl -d '\:'` now.  This behavior
looks more consistent than before to me.

TBH in retrospect I agree with Kobayashi here,
that `nl -d ''` should disable section matching.
It does this in an awkward way now though,
which might access uninitialized memory.
I've cleaned this up in the attached.

You can apply the 3 diffs by using git am on the attached.

thanks for the review,
Pádraig
>From 3dacee6a6d60f03050bb8ac878b7f45d0be66756 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 15 Dec 2020 01:02:32 +0000
Subject: [PATCH 1/3] maint: refactor nl section delimiter handling

* src/nl.c (main): Update the default delimiter characters
when passed two characters with --section-delimiter.
Avoid redundant copies for the body and footer delimiter strings,
and instead, just offset into the header string.
(check_section): Avoid redundant comparing of 2 bytes of memory
for an empty delimiter.
---
 src/nl.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/nl.c b/src/nl.c
index 959909f05..a1b38a7e2 100644
--- a/src/nl.c
+++ b/src/nl.c
@@ -54,7 +54,7 @@ static char const FORMAT_RIGHT_LZ[] = "%0*" PRIdMAX "%s";
 static char const FORMAT_LEFT[] = "%-*" PRIdMAX "%s";
 
 /* Default section delimiter characters.  */
-static char const DEFAULT_SECTION_DELIMITERS[] = "\\:";
+static char DEFAULT_SECTION_DELIMITERS[] = "\\:";
 
 /* Types of input lines: either one of the section delimiters,
    or text to output. */
@@ -96,7 +96,7 @@ static struct re_pattern_buffer *current_regex = NULL;
 static char const *separator_str = "\t";
 
 /* Input section delimiter string (-d).  */
-static char const *section_del = DEFAULT_SECTION_DELIMITERS;
+static char *section_del = DEFAULT_SECTION_DELIMITERS;
 
 /* Header delimiter string.  */
 static char *header_del = NULL;
@@ -388,7 +388,8 @@ check_section (void)
 {
   size_t len = line_buf.length - 1;
 
-  if (len < 2 || memcmp (line_buf.buffer, section_del, 2))
+  if (len < 2 || footer_del_len < 2
+      || memcmp (line_buf.buffer, section_del, 2))
     return Text;
   if (len == header_del_len
       && !memcmp (line_buf.buffer, header_del, header_del_len))
@@ -553,7 +554,14 @@ main (int argc, char **argv)
             }
           break;
         case 'd':
-          section_del = optarg;
+          if (strlen (optarg) == 2)  /* POSIX.  */
+            {
+              char *p = section_del;
+              while (*optarg)
+                *p++ = *optarg++;
+            }
+          else
+            section_del = optarg;  /* GNU extension.  */
           break;
         case_GETOPT_HELP_CHAR;
         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -574,12 +582,10 @@ main (int argc, char **argv)
   stpcpy (stpcpy (stpcpy (header_del, section_del), section_del), section_del);
 
   body_del_len = len * 2;
-  body_del = xmalloc (body_del_len + 1);
-  stpcpy (stpcpy (body_del, section_del), section_del);
+  body_del = header_del + len;
 
   footer_del_len = len;
-  footer_del = xmalloc (footer_del_len + 1);
-  stpcpy (footer_del, section_del);
+  footer_del = body_del + len;
 
   /* Initialize the input buffer.  */
   initbuffer (&line_buf);
-- 
2.26.2


>From 1db75b55faa7c1389a344c889bf59138030860cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 15 Dec 2020 01:06:50 +0000
Subject: [PATCH 2/3] doc: mention the GNU extensions to nl --section-delimiter

* doc/coreutils.texi (nl invocation): Mention the GNU extensions
of allowing arbitrary length and empty delimiter strings.
* src/nl.c (usage): Likewise.
* tests/misc/nl.sh: Add test cases for the GNU extensions.
---
 doc/coreutils.texi |  9 ++++++---
 src/nl.c           |  4 +++-
 tests/misc/nl.sh   | 20 ++++++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 5ac3745bd..df0655c20 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -1805,9 +1805,9 @@ start of body;
 start of footer.
 @end table
 
-The two characters from which these strings are made can be changed from
-@samp{\} and @samp{:} via options (see below), but the pattern and
-length of each string cannot be changed.
+The characters from which these strings are made can be changed from
+@samp{\} and @samp{:} via options (see below), but the pattern
+of each string cannot be changed.
 
 A section delimiter is replaced by an empty line on output.  Any text
 that comes before the first section delimiter string in the input file
@@ -1847,6 +1847,9 @@ expression @var{bre}.
 @cindex section delimiters of pages
 Set the section delimiter characters to @var{cd}; default is
 @samp{\:}.  If only @var{c} is given, the second remains @samp{:}.
+As a GNU extension more than two characters can be specified,
+and also if @var{cd} is empty (@option{-d ''}), then section
+matching is disabled.
 (Remember to protect @samp{\} or other metacharacters from shell
 expansion with quotes or extra backslashes.)
 
diff --git a/src/nl.c b/src/nl.c
index a1b38a7e2..d1ce8e774 100644
--- a/src/nl.c
+++ b/src/nl.c
@@ -211,7 +211,9 @@ Write each FILE to standard output, with line numbers added.\n\
 Default options are: -bt -d'\\:' -fn -hn -i1 -l1 -n'rn' -s<TAB> -v1 -w6\n\
 \n\
 CC are two delimiter characters used to construct logical page delimiters;\n\
-a missing second character implies ':'.\n\
+a missing second character implies ':'.  As a GNU extension one can specify\n\
+more than two characters, and also an empty string (-d '') can be specified\n\
+to disable section matching.\n\
 "), stdout);
       fputs (_("\
 \n\
diff --git a/tests/misc/nl.sh b/tests/misc/nl.sh
index fd9c5326c..0d57f3443 100755
--- a/tests/misc/nl.sh
+++ b/tests/misc/nl.sh
@@ -79,4 +79,24 @@ compare exp out || fail=1
 printf '%s\n' a b c > in.txt || framework_failure_
 returns_ 1 nl -v$INTMAX_MAX -i$INTMAX_MIN in.txt > out || fail=1
 
+# Test GNU extension to --section-delimiter, of disabling section matching
+printf '%s\n' a '\:\:' c > in.txt || framework_failure_
+nl -d '' in.txt > out || fail=1
+cat <<\EOF > exp
+     1	a
+     2	\:\:
+     3	c
+EOF
+compare exp out || fail=1
+
+# Test GNU extension to --section-delimiter, of supporting strings longer than 2
+printf '%s\n' a foofoo c > in.txt || framework_failure_
+nl -d 'foo' in.txt > out || fail=1
+cat <<EOF > exp
+     1	a
+
+     1	c
+EOF
+compare exp out || fail=1
+
 Exit $fail
-- 
2.26.2


>From c7b7ba84fc2c27e4cbf1c967558b0d618e5d10c3 Mon Sep 17 00:00:00 2001
From: KOBAYASHI Takashi <a141...@aiit.ac.jp>
Date: Mon, 14 Dec 2020 16:17:10 +0000
Subject: [PATCH 3/3] nl: fix --section-delimiter handling of single characters

* src/nl.c (main): Enforce the POSIX specified
behavior of assuming ':' is specified after a single
character argument to -d.
* tests/misc/nl.sh: Add a test case.
* NEWS: Mention the bug fix.
---
 NEWS             |  4 ++++
 src/nl.c         |  3 ++-
 tests/misc/nl.sh | 11 +++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index d2cb9ae48..dfc7bfa41 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,10 @@ GNU coreutils NEWS                                    -*- outline -*-
   ls no longer crashes when printing the SELinux context for unstatable files.
   [bug introduced in coreutils-6.9.91]
 
+  nl now handles single character --section-delimiter arguments,
+  by assuming a second ':' character has been specified, as specified by POSIX.
+  [This bug was present in "the beginning".]
+
   rm no longer skips an extra file when the removal of an empty directory fails.
   [bug introduced by the rewrite to use fts in coreutils-8.0]
 
diff --git a/src/nl.c b/src/nl.c
index d1ce8e774..bbe48d666 100644
--- a/src/nl.c
+++ b/src/nl.c
@@ -556,7 +556,8 @@ main (int argc, char **argv)
             }
           break;
         case 'd':
-          if (strlen (optarg) == 2)  /* POSIX.  */
+          len = strlen (optarg);
+          if (len == 1 || len == 2)  /* POSIX.  */
             {
               char *p = section_del;
               while (*optarg)
diff --git a/tests/misc/nl.sh b/tests/misc/nl.sh
index 0d57f3443..b64ab8e11 100755
--- a/tests/misc/nl.sh
+++ b/tests/misc/nl.sh
@@ -99,4 +99,15 @@ cat <<EOF > exp
 EOF
 compare exp out || fail=1
 
+# Ensure single char delimiters assume a following ':' character (as per POSIX)
+# coreutils <= v8.32 didn't match single char delimiters at all
+printf '%s\n' a x:x: c > in.txt || framework_failure_
+nl -d 'x' in.txt > out || fail=1
+cat <<EOF > exp
+     1	a
+
+     1	c
+EOF
+compare exp out || fail=1
+
 Exit $fail
-- 
2.26.2

Reply via email to