On 07/31/2012 11:43 AM, Jim Meyering wrote: > It seems a little too far-fetched > even for me.
Hah! I never thought I'd get you to say *that*! I suppose you're right. As far as POSIX-conformance goes, apps are not required to work "correctly" in the presence of mount+unmount syscalls by other processes, and as a practical matter it is a rare case where it'd matter. Still, it'd be nice to avoid the extra complexity, if we could. It's too bad that there's no reliable way to tell whether SEEK_HOLE really works without writing to the file system in question (which'd be even worse). > I do not like it when grep -r mysteriously hangs and makes > my system run out of memory. This really does affect me a couple > times per month, at least. This can happen when files look like text for the first 32 KiB, but have big holes later. If that's the problem, how about the following patch? >From ca6e5ec860cf5a2af060e2a5c8c53d9004ddc131 Mon Sep 17 00:00:00 2001 From: Paul Eggert <[email protected]> Date: Tue, 31 Jul 2012 15:37:40 -0700 Subject: [PATCH] grep: switch to binary mode when encountering binary data anywhere * src/main.c (buffer_is_binary): New function. (file_is_binary): Use it. Test for binary data anywhere in the file, not just at the start; and when it's encountered, switch to binary mode. --- src/main.c | 45 +++++++++++++++++++++++++++++++-------------- 1 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/main.c b/src/main.c index 84066d8..c2958a7 100644 --- a/src/main.c +++ b/src/main.c @@ -436,6 +436,12 @@ clean_up_stdout (void) close_stdout (); } +static int +buffer_is_binary (char const *buf, size_t bufsize) +{ + return memchr (buf, eolbyte ? 0 : '\200', bufsize) != 0; +} + /* Return 1 if a file is known to be binary for the purpose of 'grep'. BUF, of size BUFSIZE, is the initial buffer read from the file with descriptor FD and status ST. */ @@ -446,18 +452,13 @@ file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st) enum { SEEK_HOLE = SEEK_END }; #endif - /* If -z, test only whether the initial buffer contains '\200'; - knowing about holes won't help. */ - if (! eolbyte) - return memchr (buf, '\200', bufsize) != 0; - - /* If the initial buffer contains a null byte, guess that the file + /* If the initial buffer contains a binary byte, guess that the file is binary. */ - if (memchr (buf, '\0', bufsize)) + if (buffer_is_binary (buf, bufsize)) return 1; /* If the file has holes, it must contain a null byte somewhere. */ - if (SEEK_HOLE != SEEK_END && usable_st_size (st)) + if (SEEK_HOLE != SEEK_END && eolbyte && usable_st_size (st)) { off_t cur = bufsize; if (O_BINARY || fd == STDIN_FILENO) @@ -1155,6 +1156,8 @@ grep (int fd, struct stat const *st) char *beg; char *lim; char eol = eolbyte; + int test_for_binary = ((binary_files == BINARY_BINARY_FILES && !out_quiet) + || binary_files == WITHOUT_MATCH_BINARY_FILES); if (! reset (fd, st)) return 0; @@ -1176,13 +1179,16 @@ grep (int fd, struct stat const *st) return 0; } - not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet) - || binary_files == WITHOUT_MATCH_BINARY_FILES) + not_text = (test_for_binary && file_is_binary (bufbeg, buflim - bufbeg, fd, st)); - if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES) - return 0; - done_on_match += not_text; - out_quiet += not_text; + if (not_text) + { + if (binary_files == WITHOUT_MATCH_BINARY_FILES) + return 0; + test_for_binary = 0; + done_on_match++; + out_quiet++; + } for (;;) { @@ -1246,6 +1252,17 @@ grep (int fd, struct stat const *st) suppressible_error (filename, errno); goto finish_grep; } + if (test_for_binary + && buffer_is_binary (bufbeg + save, buflim - bufbeg - save)) + { + if ((nlines && !out_invert) + || binary_files == WITHOUT_MATCH_BINARY_FILES) + goto finish_grep; + not_text = 1; + test_for_binary = 0; + done_on_match++; + out_quiet++; + } } if (residue) { -- 1.7.6.5
