Norihiro Tanaka wrote:
> We need to intialize the new member.
> I add it to the patch.

I haven't added it yet.  I have done now.
From 1ce1ca31da98690f5b5ea5c23df3ae4ce2edfe73 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Tue, 1 Apr 2014 23:48:16 +0900
Subject: [PATCH] grep: prefer regex to DFA for ANYCHAR in multi-byte locales

* src/dfa.c (dfaexec): prefer regex to for ANYCHAR in multi-byte locales.
---
 src/dfa.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index b6c1250..e424a78 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -288,7 +288,8 @@ typedef struct
   size_t hash;                  /* Hash of the positions of this state.  */
   position_set elems;           /* Positions this state could match.  */
   unsigned char context;        /* Context from previous state.  */
-  char backref;                 /* True if this state matches a \<digit>.  */
+  bool has_backref;             /* True if this state matches a \<digit>.  */
+  bool has_mbcset;              /* True if this state matches a MBCSET.  */
   unsigned short constraint;    /* Constraint for this state to accept.  */
   token first_end;              /* Token value of the first END in elems.  */
   position_set mbps;            /* Positions which can match multibyte
@@ -2125,7 +2126,8 @@ state_index (struct dfa *d, position_set const *s, int 
context)
   alloc_position_set (&d->states[i].elems, s->nelem);
   copy (s, &d->states[i].elems);
   d->states[i].context = context;
-  d->states[i].backref = 0;
+  d->states[i].has_backref = false;
+  d->states[i].has_mbcset = false;
   d->states[i].constraint = 0;
   d->states[i].first_end = 0;
   d->states[i].mbps.nelem = 0;
@@ -2143,7 +2145,7 @@ state_index (struct dfa *d, position_set const *s, int 
context)
     else if (d->tokens[s->elems[j].index] == BACKREF)
       {
         d->states[i].constraint = NO_CONSTRAINT;
-        d->states[i].backref = 1;
+        d->states[i].has_backref = true;
       }
 
   ++d->sindex;
@@ -2606,6 +2608,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
           if (d->states[s].mbps.nelem == 0)
             alloc_position_set (&d->states[s].mbps, 1);
           insert (pos, &(d->states[s].mbps));
+          d->states[s].has_mbcset |= (d->tokens[pos.index] == MBCSET);
           continue;
         }
       else
@@ -3410,7 +3413,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
                  better performance (up to 25% better on [a-z], for
                  example) and enables support for collating symbols and
                  equivalence classes.  */
-              if (backref)
+              if (d->states[s].has_mbcset && backref)
                 {
                   *backref = 1;
                   *end = saved_end;
@@ -3444,7 +3447,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
           if (d->success[s] & sbit[*p])
             {
               if (backref)
-                *backref = (d->states[s].backref != 0);
+                *backref = d->states[s].has_backref;
               *end = saved_end;
               return (char *) p;
             }
-- 
1.9.1

Reply via email to