2007-05-22  James Youngman  <[EMAIL PROTECTED]>

       * src/wc.c (wc): Limit the number of messages about invalid
       multibyte sequences to error_limit (currently 5).  If the actual
       number of conversion errors exceeds this, indicate the number of
       suppressed messages.  The idea is that most users typing "wc
       /bin/sh" actually know there are no words there and other users
       will be clued in by the error messages that remain (and the
       impressive count of suppressed messages).

Example:-

$ ./wc /bin/sh
./wc: /bin/sh:1: Invalid or incomplete multibyte or wide character
./wc: /bin/sh:2: Invalid or incomplete multibyte or wide character
./wc: /bin/sh:5: Invalid or incomplete multibyte or wide character
./wc: /bin/sh:6: Invalid or incomplete multibyte or wide character
./wc: /bin/sh:7: Invalid or incomplete multibyte or wide character
./wc: 226251 more multibyte conversion error messages were suppressed
 2409  21687 677184 /bin/sh


The patch itself appears as an attachment to avoid space/tab problems.

James.
2007-05-22  James Youngman  <[EMAIL PROTECTED]>

	* src/wc.c (wc): Limit the number of messages about invalid
	multibyte sequences to error_limit (currently 5).  If the actual
	number of conversion errors exceed this, indicate the number of
	suppressed messages.  The idea is that most users typing "wc
	/bin/sh" actually know there are no words there and other users
	will be clued in by the error messages that remain (and the
	impressive count of suppressed messages).

Index: NEWS
===================================================================
RCS file: /sources/coreutils/coreutils/NEWS,v
retrieving revision 1.493
diff -u -p -r1.493 NEWS
--- NEWS	15 May 2007 05:15:28 -0000	1.493
+++ NEWS	22 May 2007 01:15:17 -0000
@@ -10,6 +10,11 @@ GNU coreutils NEWS                      
   option of the same name, this makes uniq consume and produce
   NUL-terminated lines rather than newline-terminated lines.
 
+  In multibyte locales, if you run wc on a binary file, only a small
+  number of multibyte character conversion error messages will be
+  produced, along with a count of the total number of errors.  This
+  makes running wc on binaries still irritating but not disastrous.
+
 ** Bug fixes
 
   ls -x DIR would sometimes output the wrong string in place of the
Index: src/wc.c
===================================================================
RCS file: /sources/coreutils/coreutils/src/wc.c,v
retrieving revision 1.114
diff -u -p -r1.114 wc.c
--- src/wc.c	28 Mar 2007 06:57:40 -0000	1.114
+++ src/wc.c	22 May 2007 01:15:17 -0000
@@ -275,6 +275,8 @@ wc (int fd, char const *file_x, struct f
       uintmax_t linepos = 0;
       mbstate_t state = { 0, };
       uintmax_t last_error_line = 0;
+      uintmax_t error_limit = 5;
+      uintmax_t error_count = 0;
       int last_error_errno = 0;
 # if SUPPORT_OLD_MBRTOWC
       /* Back-up the state before each multibyte character conversion and
@@ -327,12 +329,16 @@ wc (int fd, char const *file_x, struct f
 		  if (!(lines + 1 == last_error_line
 			&& errno == last_error_errno))
 		    {
-		      char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
-		      last_error_line = lines + 1;
-		      last_error_errno = errno;
-		      error (0, errno, "%s:%s", file,
-			     umaxtostr (last_error_line, line_number_buf));
-		      ok = false;
+		      if (error_limit == 0 ||
+			  (error_count++ < error_limit))
+			{
+			  char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
+			  last_error_line = lines + 1;
+			  last_error_errno = errno;
+			  error (0, errno, "%s:%s", file,
+				 umaxtostr (last_error_line, line_number_buf));
+			  ok = false;
+			}
 		    }
 		  p++;
 		  bytes_read--;
@@ -402,6 +408,16 @@ wc (int fd, char const *file_x, struct f
       if (linepos > linelength)
 	linelength = linepos;
       words += in_word;
+
+      if (error_limit && (error_count > error_limit))
+	{
+	  char suppression_buf[INT_BUFSIZE_BOUND (uintmax_t)];
+	  const uintmax_t suppressed = error_count - error_limit;
+	  error (0, 0,
+		 _("%s more multibyte conversion error messages "
+		   "were suppressed"),
+		 umaxtostr (suppressed, suppression_buf));
+	}
     }
 #endif
   else
_______________________________________________
Bug-coreutils mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/bug-coreutils

Reply via email to