It seems that we can't ignore overheads by searching BACKREF in dfaisfast().
First patch fixes it.  Second patch fixes a typo in a comment I have found
in the process.
From 9bcc040d57669393c28a1852dc3e9037dc8c81f7 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Mon, 5 May 2014 15:14:34 +0900
Subject: [PATCH 1/2] dfa: checking BACKREF in advance

* src/dfa.c (struct dfa): Define new member `has_backref'.
(addtok_mb): Turn on it, when add BACKREF to tokens.
(dfaisfast): Use it instead of checking BACKREF.
---
 src/dfa.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 273d3d1..12fbdda 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -337,6 +337,7 @@ struct dfa
   size_t nleaves;               /* Number of leaves on the parse tree.  */
   size_t nregexps;              /* Count of parallel regexps being built
                                    with dfaparse.  */
+  bool has_backref;             /* True if has BACKREF in tokens.  */
   bool multibyte;              /* True iff MB_CUR_MAX > 1.  */
   token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.  */
   mbstate_t mbs;               /* Multibyte conversion state.  */
@@ -1593,6 +1594,8 @@ addtok_mb (token t, int mbprop)
       --depth;
       break;
 
+    case BACKREF:
+      dfa->has_backref = true;
     default:
       ++dfa->nleaves;
     case EMPTY:
@@ -3419,18 +3422,7 @@ dfasuperset (struct dfa const *d)
 bool
 dfaisfast (struct dfa const *d)
 {
-  if (d->superset)
-    return true;
-  else if (d->multibyte)
-    return false;
-  else
-    {
-      size_t i;
-      for (i = 0; i < d->tindex; i++)
-        if (d->tokens[i] == BACKREF)
-          return false;
-      return true;
-    }
+  return d->superset || (!d->multibyte && !d->has_backref);
 }
 
 static void
-- 
1.9.2

From f753e7eb872f0c09a8eb280afb057184dc39e6d4 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Mon, 5 May 2014 16:14:01 +0900
Subject: [PATCH 2/2] dfa: fix comment

* src/dfa.c (struct dfa): Fix comment typo.
---
 src/dfa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dfa.c b/src/dfa.c
index 12fbdda..db8846f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -338,7 +338,7 @@ struct dfa
   size_t nregexps;              /* Count of parallel regexps being built
                                    with dfaparse.  */
   bool has_backref;             /* True if has BACKREF in tokens.  */
-  bool multibyte;              /* True iff MB_CUR_MAX > 1.  */
+  bool multibyte;              /* True if MB_CUR_MAX > 1.  */
   token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.  */
   mbstate_t mbs;               /* Multibyte conversion state.  */
 
-- 
1.9.2

Reply via email to