Tags: patch

While looking into something else I noticed that 'grep -T' does not work under an Emacs shell window. grep outputs tab-backspace-':'-C to align C to the next tab stop, but under Emacs the backspace undoes the tab and so the output is not aligned. Arguably this is a problem with Emacs, but even if Emacs were changed the grep approach is not portable: Vim displays "^H:" instead, for example, and this also breaks alignment. grep should use a tab character to go to the next tab stop; this is more reliable and is simpler.

While in the neighborhood I noticed that -Tn alignment does not work for files that are sufficiently large (10,000 lines, say). Plus, nowadays we can rely on PRIuMAX to simplify printf.

Proposed patches attached. Normally I'd just install this sort of thing, but since we have a release candidate out I thought I'd ask first.

From 5c052cb9cd7e0b972d8e7d44f3eb16c54a20af11 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 16 Sep 2016 09:25:31 -0700
Subject: [PATCH 1/3] grep: simplify by using PRIuMAX

* src/grep.c (print_offset): Simplify.
---
 src/grep.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/src/grep.c b/src/grep.c
index 65916ca..9777cfa 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1073,26 +1073,8 @@ print_sep (char sep)
 static void
 print_offset (uintmax_t pos, int min_width, const char *color)
 {
-  /* Do not rely on printf to print pos, since uintmax_t may be longer
-     than long, and long long is not portable.  */
-
-  char buf[sizeof pos * CHAR_BIT];
-  char *p = buf + sizeof buf;
-
-  do
-    {
-      *--p = '0' + pos % 10;
-      --min_width;
-    }
-  while ((pos /= 10) != 0);
-
-  /* Do this to maximize the probability of alignment across lines.  */
-  if (align_tabs)
-    while (--min_width >= 0)
-      *--p = ' ';
-
   pr_sgr_start_if (color);
-  fwrite_errno (p, 1, buf + sizeof buf - p);
+  printf_errno ("%*"PRIuMAX, align_tabs ? min_width : 0, pos);
   pr_sgr_end_if (color);
 }
 
-- 
2.7.4

From 7a5cd13f38ac9d2b6d0c8cdada45333686bd0319 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 16 Sep 2016 10:52:02 -0700
Subject: [PATCH 2/3] grep: -T no longer outputs BS

* NEWS: Document this.
* src/grep.c (print_line_head): Do not attempt to backspace output.
* tests/initial-tab: New test.
* tests/Makefile.am (TESTS): Add it.
---
 NEWS              |  4 ++++
 src/grep.c        | 25 +++++--------------------
 tests/Makefile.am |  1 +
 tests/initial-tab | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 20 deletions(-)
 create mode 100755 tests/initial-tab

diff --git a/NEWS b/NEWS
index a63a7b2..5613283 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,10 @@ GNU grep NEWS                                    -*- outline -*-
   Grep no longer omits output merely because it follows an output line
   suppressed due to encoding errors.  [bug introduced in grep-2.21]
 
+  To output ':' and tab-align the following character C, grep -T no
+  longer outputs tab-backspace-':'-C, an approach that has problems if
+  run inside an Emacs shell window.  [bug introduced in grep-2.5.2]
+
 ** Improvements
 
   grep can be much faster now when standard output is /dev/null.
diff --git a/src/grep.c b/src/grep.c
index 9777cfa..594b3f9 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1102,13 +1102,11 @@ print_line_head (char *beg, size_t len, char const *lim, char sep)
         }
     }
 
-  bool pending_sep = false;
-
   if (out_file)
     {
       print_filename ();
       if (filename_mask)
-        pending_sep = true;
+        print_sep (sep);
       else
         putchar_errno (0);
     }
@@ -1121,34 +1119,21 @@ print_line_head (char *beg, size_t len, char const *lim, char sep)
           totalnl = add_count (totalnl, 1);
           lastnl = lim;
         }
-      if (pending_sep)
-        print_sep (sep);
       print_offset (totalnl, 4, line_num_color);
-      pending_sep = true;
+      print_sep (sep);
     }
 
   if (out_byte)
     {
       uintmax_t pos = add_count (totalcc, beg - bufbeg);
       pos = dossified_pos (pos);
-      if (pending_sep)
-        print_sep (sep);
       print_offset (pos, 6, byte_num_color);
-      pending_sep = true;
-    }
-
-  if (pending_sep)
-    {
-      /* This assumes sep is one column wide.
-         Try doing this any other way with Unicode
-         (and its combining and wide characters)
-         filenames and you're wasting your efforts.  */
-      if (align_tabs)
-        fputs_errno ("\t\b");
-
       print_sep (sep);
     }
 
+  if (align_tabs && (out_file | out_line | out_byte) && len != 0)
+    putchar_errno ('\t');
+
   return true;
 }
 
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f4c82f4..a7187ff 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -108,6 +108,7 @@ TESTS =						\
   in-eq-out-infloop				\
   include-exclude				\
   inconsistent-range				\
+  initial-tab					\
   invalid-multibyte-infloop			\
   khadafy					\
   kwset-abuse					\
diff --git a/tests/initial-tab b/tests/initial-tab
new file mode 100755
index 0000000..af5efb3
--- /dev/null
+++ b/tests/initial-tab
@@ -0,0 +1,32 @@
+#!/bin/sh
+# Exercise -T.
+
+# Copyright 2016 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+fail=0
+
+printf 'x\n\n' > in || framework_failure_
+
+grep -T '^' in > out || fail=1
+compare in out || fail=1
+
+printf 'in:\tx\nin:\n' > exp || framework_failure_
+grep -T '^' in /dev/null > out || fail=1
+compare exp out || fail=1
+
+Exit $fail
-- 
2.7.4

From 782cebd65ca2452eb7466afef2408e68532dcd3a Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 16 Sep 2016 12:45:15 -0700
Subject: [PATCH 3/3] grep: -T now adjusts number widths for worst case

* NEWS, doc/grep.texi (Output Line Prefix Control): Document this.
* src/grep.c (offset_width): New static var.
(print_offset): Use it instead of arg.  All callers changed.
(grep): Set it.
* tests/initial-tab: Test this.
---
 NEWS              |  4 ++++
 doc/grep.texi     |  6 ++----
 src/grep.c        | 22 ++++++++++++++++++----
 tests/initial-tab |  5 +++++
 4 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/NEWS b/NEWS
index 5613283..8a2a0dd 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,10 @@ GNU grep NEWS                                    -*- outline -*-
   longer outputs tab-backspace-':'-C, an approach that has problems if
   run inside an Emacs shell window.  [bug introduced in grep-2.5.2]
 
+  grep -T now uses worst-case widths of line numbers and byte offsets
+  instead of guessing widths that might not work with larger files.
+  [bug introduced in grep-2.5.2]
+
 ** Improvements
 
   grep can be much faster now when standard output is /dev/null.
diff --git a/doc/grep.texi b/doc/grep.texi
index fcfad42..c1648df 100644
--- a/doc/grep.texi
+++ b/doc/grep.texi
@@ -460,10 +460,8 @@ Make sure that the first character of actual line content lies on a tab stop,
 so that the alignment of tabs looks normal.
 This is useful with options that prefix their output to the actual content:
 @option{-H}, @option{-n}, and @option{-b}.
-In order to improve the probability that lines
-from a single file will all start at the same column,
-this also causes the line number and byte offset (if present)
-to be printed in a minimum-size field width.
+This may also prepend spaces to output line numbers and byte offsets
+so that lines from a single file all start at the same column.
 
 @item -u
 @itemx --unix-byte-offsets
diff --git a/src/grep.c b/src/grep.c
index 594b3f9..84094b7 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -80,6 +80,9 @@ static bool only_matching;
 /* If nonzero, make sure first content char in a line is on a tab stop. */
 static bool align_tabs;
 
+/* Print width of line numbers and byte offsets.  Nonzero if ALIGN_TABS.  */
+static int offset_width;
+
 /* See below */
 struct FL_pair
   {
@@ -1071,10 +1074,10 @@ print_sep (char sep)
 
 /* Print a line number or a byte offset.  */
 static void
-print_offset (uintmax_t pos, int min_width, const char *color)
+print_offset (uintmax_t pos, const char *color)
 {
   pr_sgr_start_if (color);
-  printf_errno ("%*"PRIuMAX, align_tabs ? min_width : 0, pos);
+  printf_errno ("%*"PRIuMAX, offset_width, pos);
   pr_sgr_end_if (color);
 }
 
@@ -1119,7 +1122,7 @@ print_line_head (char *beg, size_t len, char const *lim, char sep)
           totalnl = add_count (totalnl, 1);
           lastnl = lim;
         }
-      print_offset (totalnl, 4, line_num_color);
+      print_offset (totalnl, line_num_color);
       print_sep (sep);
     }
 
@@ -1127,7 +1130,7 @@ print_line_head (char *beg, size_t len, char const *lim, char sep)
     {
       uintmax_t pos = add_count (totalcc, beg - bufbeg);
       pos = dossified_pos (pos);
-      print_offset (pos, 6, byte_num_color);
+      print_offset (pos, byte_num_color);
       print_sep (sep);
     }
 
@@ -1479,6 +1482,17 @@ grep (int fd, struct stat const *st)
       return 0;
     }
 
+  offset_width = 0;
+  if (align_tabs)
+    {
+      /* Width is log of maximum number.  Line numbers are origin-1.  */
+      uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
+      num += out_line && num < UINTMAX_MAX;
+      do
+        offset_width++;
+      while ((num /= 10) != 0);
+    }
+
   for (bool firsttime = true; ; firsttime = false)
     {
       if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
diff --git a/tests/initial-tab b/tests/initial-tab
index af5efb3..8b3cb8c 100755
--- a/tests/initial-tab
+++ b/tests/initial-tab
@@ -29,4 +29,9 @@ printf 'in:\tx\nin:\n' > exp || framework_failure_
 grep -T '^' in /dev/null > out || fail=1
 compare exp out || fail=1
 
+printf '%s\n' a b c d e f g h i j > in1 || framework_failure_
+printf 'in1: 1:\ta\n' > exp1 || framework_failure_
+grep -Tn 'a' in1 /dev/null > out1 || fail=1
+compare exp1 out1 || fail=1
+
 Exit $fail
-- 
2.7.4

Reply via email to