2007-05-22 James Youngman <[EMAIL PROTECTED]>
* src/wc.c (wc): Limit the number of messages about invalid
multibyte sequences to error_limit (currently 5). If the actual
number of conversion errors exceeds this, indicate the number of
suppressed messages. The idea is that most users typing "wc
/bin/sh" actually know there are no words there and other users
will be clued in by the error messages that remain (and the
impressive count of suppressed messages).
Example:-
$ ./wc /bin/sh
./wc: /bin/sh:1: Invalid or incomplete multibyte or wide character
./wc: /bin/sh:2: Invalid or incomplete multibyte or wide character
./wc: /bin/sh:5: Invalid or incomplete multibyte or wide character
./wc: /bin/sh:6: Invalid or incomplete multibyte or wide character
./wc: /bin/sh:7: Invalid or incomplete multibyte or wide character
./wc: 226251 more multibyte conversion error messages were suppressed
2409 21687 677184 /bin/sh
The patch itself appears as an attachment to avoid space/tab problems.
James.
2007-05-22 James Youngman <[EMAIL PROTECTED]>
* src/wc.c (wc): Limit the number of messages about invalid
multibyte sequences to error_limit (currently 5). If the actual
number of conversion errors exceed this, indicate the number of
suppressed messages. The idea is that most users typing "wc
/bin/sh" actually know there are no words there and other users
will be clued in by the error messages that remain (and the
impressive count of suppressed messages).
Index: NEWS
===================================================================
RCS file: /sources/coreutils/coreutils/NEWS,v
retrieving revision 1.493
diff -u -p -r1.493 NEWS
--- NEWS 15 May 2007 05:15:28 -0000 1.493
+++ NEWS 22 May 2007 01:15:17 -0000
@@ -10,6 +10,11 @@ GNU coreutils NEWS
option of the same name, this makes uniq consume and produce
NUL-terminated lines rather than newline-terminated lines.
+ In multibyte locales, if you run wc on a binary file, only a small
+ number of multibyte character conversion error messages will be
+ produced, along with a count of the total number of errors. This
+ makes running wc on binaries still irritating but not disastrous.
+
** Bug fixes
ls -x DIR would sometimes output the wrong string in place of the
Index: src/wc.c
===================================================================
RCS file: /sources/coreutils/coreutils/src/wc.c,v
retrieving revision 1.114
diff -u -p -r1.114 wc.c
--- src/wc.c 28 Mar 2007 06:57:40 -0000 1.114
+++ src/wc.c 22 May 2007 01:15:17 -0000
@@ -275,6 +275,8 @@ wc (int fd, char const *file_x, struct f
uintmax_t linepos = 0;
mbstate_t state = { 0, };
uintmax_t last_error_line = 0;
+ uintmax_t error_limit = 5;
+ uintmax_t error_count = 0;
int last_error_errno = 0;
# if SUPPORT_OLD_MBRTOWC
/* Back-up the state before each multibyte character conversion and
@@ -327,12 +329,16 @@ wc (int fd, char const *file_x, struct f
if (!(lines + 1 == last_error_line
&& errno == last_error_errno))
{
- char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
- last_error_line = lines + 1;
- last_error_errno = errno;
- error (0, errno, "%s:%s", file,
- umaxtostr (last_error_line, line_number_buf));
- ok = false;
+ if (error_limit == 0 ||
+ (error_count++ < error_limit))
+ {
+ char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
+ last_error_line = lines + 1;
+ last_error_errno = errno;
+ error (0, errno, "%s:%s", file,
+ umaxtostr (last_error_line, line_number_buf));
+ ok = false;
+ }
}
p++;
bytes_read--;
@@ -402,6 +408,16 @@ wc (int fd, char const *file_x, struct f
if (linepos > linelength)
linelength = linepos;
words += in_word;
+
+ if (error_limit && (error_count > error_limit))
+ {
+ char suppression_buf[INT_BUFSIZE_BOUND (uintmax_t)];
+ const uintmax_t suppressed = error_count - error_limit;
+ error (0, 0,
+ _("%s more multibyte conversion error messages "
+ "were suppressed"),
+ umaxtostr (suppressed, suppression_buf));
+ }
}
#endif
else
_______________________________________________
Bug-coreutils mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/bug-coreutils