Thanks, I installed that patch with minor reworking of the ChangeLog
entry and am marking this as done. By the way, a minor point --
nowadays we prefer single-quoting 'like this' rather than the old style
`like this' -- the GNU coding standards changed recently in this regard.
I noticed a mostly-theoretical read buffer overrun in the patch, along
with some other opportunities for simplification and tuning, and
installed the attached fixup patch as well.
From a5c40993f53b288fc6aaf058a09b3f7538a2422c Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Sat, 26 Apr 2014 18:01:11 -0700
Subject: [PATCH] dfa: fix index bug in previous patch, and simplify
* src/dfa.c, src/dfa.h (dfaisfast): Arg is const pointer.
* src/dfa.c (dfaisfast): Simplify, since supersets never contain BACKREF.
* src/dfa.h (dfaisfast): Declare to be pure.
* src/dfasearch.c (EGexecute): Fix typo that could cause buffer
read overrun when !dfafast. Hoist duplicate computation out
of an if's then and else parts.
---
src/dfa.c | 17 ++++++-----------
src/dfa.h | 5 ++---
src/dfasearch.c | 24 ++++++++++--------------
3 files changed, 18 insertions(+), 28 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index fc638c8..362de2c 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3412,25 +3412,20 @@ dfahint (struct dfa *d, char const *begin, char *end,
size_t *count)
}
bool
-dfaisfast (struct dfa *d)
+dfaisfast (struct dfa const *d)
{
- size_t i;
if (d->superset)
+ return true;
+ else if (d->multibyte)
+ return false;
+ else
{
- for (i = 0; i < d->superset->tindex; i++)
- if (d->superset->tokens[i] == BACKREF)
- return false;
- return true;
- }
- else if (!d->multibyte)
- {
+ size_t i;
for (i = 0; i < d->tindex; i++)
if (d->tokens[i] == BACKREF)
return false;
return true;
}
- else
- return false;
}
static void
diff --git a/src/dfa.h b/src/dfa.h
index 2dd4871..fbcb191 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -82,9 +82,8 @@ extern char *dfaexec (struct dfa *d, char const *begin, char
*end,
extern size_t dfahint (struct dfa *d, char const *begin, char *end,
size_t *count);
-/* Return true, if `multibyte' attribute of struct dfa is false and the
- pattern doesn't have BACKREF. */
-extern bool dfaisfast (struct dfa *);
+/* Return true if the DFA is likely to be fast. */
+extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
/* Free the storage held by the components of a struct dfa. */
extern void dfafree (struct dfa *);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 79a0cd8..7edc96b 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -225,21 +225,21 @@ EGexecute (char const *buf, size_t size, size_t
*match_size,
/* We don't care about an exact match. */
if (kwset)
{
- /* Find a possible match using the KWset matcher. */
+ /* Find a possible match using the KWset matcher. */
size_t offset = kwsexec (kwset, beg - begline,
buflim - beg + begline, &kwsm);
if (offset == (size_t) -1)
goto failure;
- beg += offset;
- /* Narrow down to the line containing the candidate, and
- run it through DFA. */
- match = beg;
- beg = memrchr (buf, eol, beg - buf);
+ match = beg + offset;
+
+ /* Narrow down to the line containing the candidate. */
+ beg = memrchr (buf, eol, match - buf);
beg = beg ? beg + 1 : buf;
+ end = memchr (match, eol, buflim - match);
+ end = end ? end + 1 : buflim;
+
if (kwsm.index < kwset_exact_matches)
{
- end = memchr (beg, eol, buflim - beg);
- end = end ? end + 1 : buflim;
if (MB_CUR_MAX == 1)
goto success;
if (mb_start < beg)
@@ -256,9 +256,6 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
}
else if (!dfafast)
{
- /* Narrow down to the line we've found if dfa isn't fast. */
- end = memchr (match, eol, buflim - beg);
- end = end ? end + 1 : buflim;
if (dfahint (dfa, beg, (char *) end, NULL) == (size_t) -1)
continue;
if (! dfaexec (dfa, beg, (char *) end, 0, NULL, &backref))
@@ -267,8 +264,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
}
if (!kwset || dfafast)
{
- /* No good fixed strings or DFA is fast; start with DFA
- broadly. */
+ /* No good fixed strings or DFA is fast; use DFA. */
size_t offset, count;
char const *next_beg;
count = 0;
@@ -287,7 +283,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
}
else
next_beg = beg + offset;
- /* Narrow down to the line we've found. */
+ /* Narrow down to the line we've found. */
beg = memrchr (buf, eol, next_beg - buf);
beg = beg ? beg + 1 : buf;
if (count != 0)
--
1.9.0