Thanks for looking into this. I added that test case, but took a more-conservative approach to fixing the bug, by disabling the optimization that's causing this problem; please see attached patches. The optimization was a hack anyway, and these bugs suggest that it's not a hack worth keeping.
>From c4d6d577820f6d40cc0f34be56fc8d8795381fef Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Fri, 24 Oct 2014 13:27:29 -0700
Subject: [PATCH 1/2] grep: fix grep -P crash

Reported by Shlomi Fish in: http://bugs.gnu.org/18806
Commit 9fa500407137f49f6edc3c6b4ee6c7096f0190c5 (2014-09-16) is a
hack that I put in to speed up 'grep -P'.  Unfortunately, not only
is it violation of modularity, it's also a bug magnet, as we have
found out with Bug#18738 and Bug#18806.  Remove the optimization
instead of applying more bandaids.  Perhaps we can think of a
better way of doing the optimization, or perhaps we can just live
with a slower grep -P (as -P is inherently slower anyway...).
* src/grep.c, src/grep.h (validated_boundary):
Remove.  All uses removed.
* src/pcresearch.c (Pexecute): Do not worry about validated_boundary.
---
 src/grep.c       |  3 ---
 src/grep.h       |  4 ----
 src/pcresearch.c | 37 ++++++++++++++-----------------------
 3 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/src/grep.c b/src/grep.c
index a0f2620..0a4ac27 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -352,7 +352,6 @@ bool match_words;
 bool match_lines;
 char eolbyte;
 enum textbin input_textbin;
-char const *validated_boundary;
 
 static char const *matcher;
 
@@ -1226,7 +1225,6 @@ grepbuf (char const *beg, char const *lim)
   intmax_t outleft0 = outleft;
   char const *p;
   char const *endp;
-  validated_boundary = beg;
 
   for (p = beg; p < lim; p = endp)
     {
@@ -2516,7 +2514,6 @@ main (int argc, char **argv)
   /* We need one byte prior and one after.  */
   char eolbytes[3] = { 0, eolbyte, 0 };
   size_t match_size;
-  validated_boundary = eolbytes + 1;
   skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0)
                       == out_invert);
 
diff --git a/src/grep.h b/src/grep.h
index 86259fb..02052b4 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -47,8 +47,4 @@ enum textbin
 /* Input file type.  */
 extern enum textbin input_textbin;
 
-/* Validation boundary.  Earlier bytes have already been validated by
-   the PCRE matcher, which cares about this sort of thing.  */
-extern char const *validated_boundary;
-
 #endif
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 1fd5bde..5451029 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -156,7 +156,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   char const *line_start = buf;
   int e = PCRE_ERROR_NOMATCH;
   char const *line_end;
-  char const *validated = validated_boundary;
 
   /* If the input type is unknown, the caller is still testing the
      input, which means the current buffer cannot contain encoding
@@ -210,34 +209,28 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
           int options = 0;
           if (!bol)
             options |= PCRE_NOTBOL;
-          if (multiline || p + search_bytes <= validated)
+          if (multiline)
             options |= PCRE_NO_UTF8_CHECK;
 
-          int valid_bytes = validated - p;
-          if (valid_bytes <= 0)
+          e = pcre_exec (cre, extra, p, search_bytes, 0,
+                         options, sub, NSUB);
+          if (e != PCRE_ERROR_BADUTF8)
             {
-              e = pcre_exec (cre, extra, p, search_bytes, 0,
-                             options, sub, NSUB);
-              if (e != PCRE_ERROR_BADUTF8)
+              if (0 < e && multiline && sub[1] - sub[0] != 0)
                 {
-                  validated = p + search_bytes;
-                  if (0 < e && multiline && sub[1] - sub[0] != 0)
+                  char const *nl = memchr (p + sub[0], eolbyte,
+                                           sub[1] - sub[0]);
+                  if (nl)
                     {
-                      char const *nl = memchr (p + sub[0], eolbyte,
-                                               sub[1] - sub[0]);
-                      if (nl)
-                        {
-                          /* This match crosses a line boundary; reject it.  */
-                          p += sub[0];
-                          line_end = nl;
-                          continue;
-                        }
+                      /* This match crosses a line boundary; reject it.  */
+                      p += sub[0];
+                      line_end = nl;
+                      continue;
                     }
-                  break;
                 }
-              valid_bytes = sub[0];
-              validated = p + valid_bytes;
+              break;
             }
+          int valid_bytes = sub[0];
 
           /* Try to match the string before the encoding error.
              Again, handle the empty-match case specially, for speed.  */
@@ -263,8 +256,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
       bol = true;
     }
 
-  validated_boundary = validated;
-
   if (e <= 0)
     {
       switch (e)
-- 
1.9.3

>From f79cd1bbd135f7c1751059be9d1d785598a7759b Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <nori...@kcn.ne.jp>
Date: Fri, 24 Oct 2014 13:33:04 -0700
Subject: [PATCH 2/2] tests: add test for grep -P fix

* tests/pcre-o: New test for this change.
* tests/Makefile.am (TESTS): Add it.
---
 tests/Makefile.am |  1 +
 tests/pcre-o      | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100755 tests/pcre-o

diff --git a/tests/Makefile.am b/tests/Makefile.am
index c298835..f6f051c 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -90,6 +90,7 @@ TESTS =						\
   pcre-abort					\
   pcre-infloop					\
   pcre-invalid-utf8-input			\
+  pcre-o					\
   pcre-utf8					\
   pcre-w					\
   pcre-wx-backref				\
diff --git a/tests/pcre-o b/tests/pcre-o
new file mode 100755
index 0000000..3d0677c
--- /dev/null
+++ b/tests/pcre-o
@@ -0,0 +1,17 @@
+#! /bin/sh
+# Ensure that grep -oP doesn't cause internal error at match.
+#
+# Copyright (C) 2014 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+fail=0
+
+echo ab | grep -oP 'a' || fail=1
+
+Exit $fail
-- 
1.9.3

Reply via email to