It seems that we can't ignore overheads by searching BACKREF in dfaisfast().
First patch fixes it. Second patch fixes a typo in a comment I have found
in the process.
From 9bcc040d57669393c28a1852dc3e9037dc8c81f7 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Mon, 5 May 2014 15:14:34 +0900
Subject: [PATCH 1/2] dfa: checking BACKREF in advance
* src/dfa.c (struct dfa): Define new member `has_backref'.
(addtok_mb): Turn on it, when add BACKREF to tokens.
(dfaisfast): Use it instead of checking BACKREF.
---
src/dfa.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 273d3d1..12fbdda 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -337,6 +337,7 @@ struct dfa
size_t nleaves; /* Number of leaves on the parse tree. */
size_t nregexps; /* Count of parallel regexps being built
with dfaparse. */
+ bool has_backref; /* True if has BACKREF in tokens. */
bool multibyte; /* True iff MB_CUR_MAX > 1. */
token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
mbstate_t mbs; /* Multibyte conversion state. */
@@ -1593,6 +1594,8 @@ addtok_mb (token t, int mbprop)
--depth;
break;
+ case BACKREF:
+ dfa->has_backref = true;
default:
++dfa->nleaves;
case EMPTY:
@@ -3419,18 +3422,7 @@ dfasuperset (struct dfa const *d)
bool
dfaisfast (struct dfa const *d)
{
- if (d->superset)
- return true;
- else if (d->multibyte)
- return false;
- else
- {
- size_t i;
- for (i = 0; i < d->tindex; i++)
- if (d->tokens[i] == BACKREF)
- return false;
- return true;
- }
+ return d->superset || (!d->multibyte && !d->has_backref);
}
static void
--
1.9.2
From f753e7eb872f0c09a8eb280afb057184dc39e6d4 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Mon, 5 May 2014 16:14:01 +0900
Subject: [PATCH 2/2] dfa: fix comment
* src/dfa.c (struct dfa): Fix comment typo.
---
src/dfa.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/dfa.c b/src/dfa.c
index 12fbdda..db8846f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -338,7 +338,7 @@ struct dfa
size_t nregexps; /* Count of parallel regexps being built
with dfaparse. */
bool has_backref; /* True if has BACKREF in tokens. */
- bool multibyte; /* True iff MB_CUR_MAX > 1. */
+ bool multibyte; /* True if MB_CUR_MAX > 1. */
token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
mbstate_t mbs; /* Multibyte conversion state. */
--
1.9.2