Thanks, I installed that patch with minor reworking of the ChangeLog entry and am marking this as done. By the way, a minor point -- nowadays we prefer single-quoting 'like this' rather than the old style `like this' -- the GNU coding standards changed recently in this regard.

I noticed a mostly-theoretical read buffer overrun in the patch, along with some other opportunities for simplification and tuning, and installed the attached fixup patch as well.
From a5c40993f53b288fc6aaf058a09b3f7538a2422c Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Sat, 26 Apr 2014 18:01:11 -0700
Subject: [PATCH] dfa: fix index bug in previous patch, and simplify

* src/dfa.c, src/dfa.h (dfaisfast): Arg is const pointer.
* src/dfa.c (dfaisfast): Simplify, since supersets never contain BACKREF.
* src/dfa.h (dfaisfast): Declare to be pure.
* src/dfasearch.c (EGexecute): Fix typo that could cause buffer
read overrun when !dfafast.  Hoist duplicate computation out
of an if's then and else parts.
---
 src/dfa.c       | 17 ++++++-----------
 src/dfa.h       |  5 ++---
 src/dfasearch.c | 24 ++++++++++--------------
 3 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index fc638c8..362de2c 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3412,25 +3412,20 @@ dfahint (struct dfa *d, char const *begin, char *end, 
size_t *count)
 }
 
 bool
-dfaisfast (struct dfa *d)
+dfaisfast (struct dfa const *d)
 {
-  size_t i;
   if (d->superset)
+    return true;
+  else if (d->multibyte)
+    return false;
+  else
     {
-      for (i = 0; i < d->superset->tindex; i++)
-        if (d->superset->tokens[i] == BACKREF)
-          return false;
-      return true;
-    }
-  else if (!d->multibyte)
-    {
+      size_t i;
       for (i = 0; i < d->tindex; i++)
         if (d->tokens[i] == BACKREF)
           return false;
       return true;
     }
-  else
-    return false;
 }
 
 static void
diff --git a/src/dfa.h b/src/dfa.h
index 2dd4871..fbcb191 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -82,9 +82,8 @@ extern char *dfaexec (struct dfa *d, char const *begin, char 
*end,
 extern size_t dfahint (struct dfa *d, char const *begin, char *end,
                        size_t *count);
 
-/* Return true, if `multibyte' attribute of struct dfa is false and the
-   pattern doesn't have BACKREF.  */
-extern bool dfaisfast (struct dfa *);
+/* Return true if the DFA is likely to be fast.  */
+extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
 
 /* Free the storage held by the components of a struct dfa. */
 extern void dfafree (struct dfa *);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 79a0cd8..7edc96b 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -225,21 +225,21 @@ EGexecute (char const *buf, size_t size, size_t 
*match_size,
           /* We don't care about an exact match.  */
           if (kwset)
             {
-              /* Find a possible match using the KWset matcher. */
+              /* Find a possible match using the KWset matcher.  */
               size_t offset = kwsexec (kwset, beg - begline,
                                        buflim - beg + begline, &kwsm);
               if (offset == (size_t) -1)
                 goto failure;
-              beg += offset;
-              /* Narrow down to the line containing the candidate, and
-                 run it through DFA. */
-              match = beg;
-              beg = memrchr (buf, eol, beg - buf);
+              match = beg + offset;
+
+              /* Narrow down to the line containing the candidate.  */
+              beg = memrchr (buf, eol, match - buf);
               beg = beg ? beg + 1 : buf;
+              end = memchr (match, eol, buflim - match);
+              end = end ? end + 1 : buflim;
+
               if (kwsm.index < kwset_exact_matches)
                 {
-                  end = memchr (beg, eol, buflim - beg);
-                  end = end ? end + 1 : buflim;
                   if (MB_CUR_MAX == 1)
                     goto success;
                   if (mb_start < beg)
@@ -256,9 +256,6 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
                 }
               else if (!dfafast)
                 {
-                  /* Narrow down to the line we've found if dfa isn't fast. */
-                  end = memchr (match, eol, buflim - beg);
-                  end = end ? end + 1 : buflim;
                   if (dfahint (dfa, beg, (char *) end, NULL) == (size_t) -1)
                     continue;
                   if (! dfaexec (dfa, beg, (char *) end, 0, NULL, &backref))
@@ -267,8 +264,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
             }
           if (!kwset || dfafast)
             {
-              /* No good fixed strings or DFA is fast; start with DFA
-                 broadly. */
+              /* No good fixed strings or DFA is fast; use DFA.  */
               size_t offset, count;
               char const *next_beg;
               count = 0;
@@ -287,7 +283,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
                 }
               else
                 next_beg = beg + offset;
-              /* Narrow down to the line we've found. */
+              /* Narrow down to the line we've found.  */
               beg = memrchr (buf, eol, next_beg - buf);
               beg = beg ? beg + 1 : buf;
               if (count != 0)
-- 
1.9.0

Reply via email to