On 05/04/2017 10:28 AM, Jim Meyering wrote:
Making cmp perform that task does not seem worthwhile, because one
would have to choose between always counting newlines (like wc -l
does), and thus incurring this added cost all the time, or accepting a
partial (and racy) solution of rereading the regular file upon EOF.

There's a third way, that satisfies Dan Jacobson's request in the usual case, without incurring added cost. Proposed patch attached, with a simple test for 'cmp' (which is a good thing to have in any event).

>From 49cb18b133ce7f90dcf069dbd66dbe133f65c669 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 4 May 2017 17:17:23 -0700
Subject: [PATCH] cmp: report prefix length when one file is shorter

Requested by Dan Jacobson (Bug#22816).
* NEWS:
* doc/diffutils.texi (Invoking cmp, cmp Options): Document this.
* src/cmp.c (cmp): Implement it.
* tests/Makefile.am (TESTS): Add cmp.
* tests/cmp: New file.
---
 NEWS               |   5 ++
 doc/diffutils.texi |  20 ++++---
 src/cmp.c          |  20 ++++++-
 tests/Makefile.am  |   1 +
 tests/cmp          | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 200 insertions(+), 11 deletions(-)
 create mode 100755 tests/cmp

diff --git a/NEWS b/NEWS
index 2432e4f..740b172 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,11 @@ GNU diffutils NEWS                                    -*- outline -*-
 
 * Noteworthy changes in release ?.? (????-??-??) [?]
 
+** New features
+
+  When one file is a prefix of the other, cmp now appends the shorter
+  file's size to the EOF diagnostic.
+
 ** Bug fixes
 
   Using an invalid regular expression with --ignore-matching-lines=RE (-I)
diff --git a/doc/diffutils.texi b/doc/diffutils.texi
index 16da09e..ccc034a 100644
--- a/doc/diffutils.texi
+++ b/doc/diffutils.texi
@@ -3502,23 +3502,24 @@ ignore at the start of each file; they are equivalent to the
 @option{--ignore-initial=@var{from-skip}:@var{to-skip}} option.
 
 By default, @command{cmp} outputs nothing if the two files have the
-same contents.  If one file is a prefix of the other, @command{cmp}
-prints to standard error a message of the following form:
+same contents.  If the two files have bytes that differ, @command{cmp}
+reports the location of the first difference to standard output:
 
 @example
-cmp: EOF on @var{shorter-file}
+@var{from-file} @var{to-file} differ: char @var{byte-number}, line @var{line-number}
 @end example
 
-Otherwise, @command{cmp} prints to standard output a message of the
-following form:
+@noindent
+If one file is a prefix of the other, @command{cmp} reports the
+shorter file's length to standard error (@acronym{POSIX} allows but
+does not require the shorter file's name to be followed by a blank and
+additional information):
 
 @example
-@var{from-file} @var{to-file} differ: char @var{byte-number}, line @var{line-number}
+cmp: EOF on @var{shorter-file} after byte @var{byte-number}, line @var{line-number}
 @end example
 
 The message formats can differ outside the @acronym{POSIX} locale.
-Also, @acronym{POSIX} allows the @acronym{EOF} message to be followed
-by a blank and some additional information.
 
 An exit status of 0 means no differences were found, 1 means some
 differences were found, and 2 means trouble.
@@ -3565,7 +3566,8 @@ instead of the default standard output.
 Each output line contains a differing byte's number relative to the
 start of the input, followed by the differing byte values.
 Byte numbers start at 1.
-Also, output the @acronym{EOF} message if one file is shorter than the other.
+Also, if one file is shorter than the other, output the @acronym{EOF}
+message with just a byte number.
 
 @item -n @var{count}
 @itemx --bytes=@var{count}
diff --git a/src/cmp.c b/src/cmp.c
index 748c212..9cf0517 100644
--- a/src/cmp.c
+++ b/src/cmp.c
@@ -558,8 +558,24 @@ cmp (void)
 	{
 	  if (differing <= 0 && comparison_type != type_status)
 	    {
-	      /* See POSIX 1003.1-2001 for this format.  */
-	      fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
+	      char const *shorter_file = file[read1 < read0];
+	      char byte_buf[INT_BUFSIZE_BOUND (off_t)];
+	      char const *byte_num = offtostr (byte_number - 1, byte_buf);
+
+	      /* See POSIX 1003.1-2001 for the constraints on these
+		 format strings.  */
+	      if (comparison_type == type_first_diff)
+		{
+		  char line_buf[INT_BUFSIZE_BOUND (off_t)];
+		  char const *line_num = offtostr (line_number - 1, line_buf);
+		  fprintf (stderr,
+			   _("cmp: EOF on %s after byte %s, line %s\n"),
+			   shorter_file, byte_num, line_num);
+		}
+	      else
+		fprintf (stderr,
+			 _("cmp: EOF on %s after byte %s\n"),
+			 shorter_file, byte_num);
 	    }
 
 	  return EXIT_FAILURE;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 2c44609..6668863 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,6 +5,7 @@ TESTS = \
   bignum \
   binary \
   brief-vs-stat-zero-kernel-lies \
+  cmp \
   colliding-file-names \
   diff3 \
   excess-slash \
diff --git a/tests/cmp b/tests/cmp
new file mode 100755
index 0000000..58061f2
--- /dev/null
+++ b/tests/cmp
@@ -0,0 +1,165 @@
+#!/bin/sh
+# Test 'cmp'.
+
+# Copyright 2017 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+fail=0
+
+cat <<'EOF' > exp || fail=1
+cmp a a
+0
+cmp a b
+a b differ: char 1, line 1
+1
+cmp a c
+cmp: EOF on c after byte 0, line 0
+1
+cmp a d
+cmp: d: No such file or directory
+2
+cmp b a
+b a differ: char 1, line 1
+1
+cmp b b
+0
+cmp b c
+cmp: EOF on c after byte 0, line 0
+1
+cmp b d
+cmp: d: No such file or directory
+2
+cmp c a
+cmp: EOF on c after byte 0, line 0
+1
+cmp c b
+cmp: EOF on c after byte 0, line 0
+1
+cmp c c
+0
+cmp c d
+cmp: d: No such file or directory
+2
+cmp d a
+cmp: d: No such file or directory
+2
+cmp d b
+cmp: d: No such file or directory
+2
+cmp d c
+cmp: d: No such file or directory
+2
+cmp d d
+cmp: d: No such file or directory
+2
+cmp -l a a
+0
+cmp -l a b
+1 141 142
+1
+cmp -l a c
+cmp: EOF on c after byte 0
+1
+cmp -l a d
+cmp: d: No such file or directory
+2
+cmp -l b a
+1 142 141
+1
+cmp -l b b
+0
+cmp -l b c
+cmp: EOF on c after byte 0
+1
+cmp -l b d
+cmp: d: No such file or directory
+2
+cmp -l c a
+cmp: EOF on c after byte 0
+1
+cmp -l c b
+cmp: EOF on c after byte 0
+1
+cmp -l c c
+0
+cmp -l c d
+cmp: d: No such file or directory
+2
+cmp -l d a
+cmp: d: No such file or directory
+2
+cmp -l d b
+cmp: d: No such file or directory
+2
+cmp -l d c
+cmp: d: No such file or directory
+2
+cmp -l d d
+cmp: d: No such file or directory
+2
+cmp -s a a
+0
+cmp -s a b
+1
+cmp -s a c
+1
+cmp -s a d
+2
+cmp -s b a
+1
+cmp -s b b
+0
+cmp -s b c
+1
+cmp -s b d
+2
+cmp -s c a
+1
+cmp -s c b
+1
+cmp -s c c
+0
+cmp -s c d
+2
+cmp -s d a
+2
+cmp -s d b
+2
+cmp -s d c
+2
+cmp -s d d
+2
+EOF
+
+echo a >a
+echo b >b
+: >c
+rm -f d
+
+for option in '' -l -s; do
+  for i in a b c d; do
+    for j in a b c d; do
+      echo cmp $option $i $j
+      cmp $option $i $j 2>&1
+      echo $?
+    done
+  done
+done >out
+
+compare exp out || fail=1
+
+Exit $fail
-- 
2.9.3

Reply via email to