Thanks, I pushed that with minor changes to the ChangeLog message (first attached patch). It's a tad faster to combine the relevant flags at compile-time so I did that (second attached patch).

"Iff" means "if and only if"; see <http://en.wikipedia.org/wiki/Iff>.  I
got the habit of trying to be logically-precise from RMS, who I guess
got it back in the days when he was doing AI. Anyway, I reworded the text in a different way to avoid the misuse of "if" (third attached patch).
From 13523ef8c728abe35922d6178d94dc1008aa7675 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Mon, 5 May 2014 15:14:34 +0900
Subject: [PATCH 1/3] dfa: speed up 'dfaisfast'

* src/dfa.c (struct dfa): New member 'has_backref'.
(addtok_mb): Set it.
(dfaisfast): Use it.
---
 src/dfa.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 273d3d1..12fbdda 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -337,6 +337,7 @@ struct dfa
   size_t nleaves;               /* Number of leaves on the parse tree.  */
   size_t nregexps;              /* Count of parallel regexps being built
                                    with dfaparse.  */
+  bool has_backref;             /* True if has BACKREF in tokens.  */
   bool multibyte;              /* True iff MB_CUR_MAX > 1.  */
   token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.  */
   mbstate_t mbs;               /* Multibyte conversion state.  */
@@ -1593,6 +1594,8 @@ addtok_mb (token t, int mbprop)
       --depth;
       break;
 
+    case BACKREF:
+      dfa->has_backref = true;
     default:
       ++dfa->nleaves;
     case EMPTY:
@@ -3419,18 +3422,7 @@ dfasuperset (struct dfa const *d)
 bool
 dfaisfast (struct dfa const *d)
 {
-  if (d->superset)
-    return true;
-  else if (d->multibyte)
-    return false;
-  else
-    {
-      size_t i;
-      for (i = 0; i < d->tindex; i++)
-        if (d->tokens[i] == BACKREF)
-          return false;
-      return true;
-    }
+  return d->superset || (!d->multibyte && !d->has_backref);
 }
 
 static void
-- 
1.9.0

From f082198257733b11195a32d342ae52e95f910f23 Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Tue, 6 May 2014 07:23:03 -0700
Subject: [PATCH 2/3] dfa: minor performance improvement for previous change

* src/dfa.c (struct dfa): New member 'fast'.  Remove 'has_backref'.
All uses changed.
---
 src/dfa.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 12fbdda..5b3704e 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -337,7 +337,7 @@ struct dfa
   size_t nleaves;               /* Number of leaves on the parse tree.  */
   size_t nregexps;              /* Count of parallel regexps being built
                                    with dfaparse.  */
-  bool has_backref;             /* True if has BACKREF in tokens.  */
+  bool fast;                   /* The DFA is fast.  */
   bool multibyte;              /* True iff MB_CUR_MAX > 1.  */
   token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.  */
   mbstate_t mbs;               /* Multibyte conversion state.  */
@@ -1595,7 +1595,7 @@ addtok_mb (token t, int mbprop)
       break;
 
     case BACKREF:
-      dfa->has_backref = true;
+      dfa->fast = false;
     default:
       ++dfa->nleaves;
     case EMPTY:
@@ -3422,7 +3422,7 @@ dfasuperset (struct dfa const *d)
 bool
 dfaisfast (struct dfa const *d)
 {
-  return d->superset || (!d->multibyte && !d->has_backref);
+  return d->fast;
 }
 
 static void
@@ -3462,6 +3462,7 @@ dfainit (struct dfa *d)
 {
   memset (d, 0, sizeof *d);
   d->multibyte = MB_CUR_MAX > 1;
+  d->fast = !d->multibyte;
 }
 
 static void
@@ -3579,7 +3580,10 @@ dfacomp (char const *s, size_t len, struct dfa *d, int 
searchflag)
   dfassbuild (d);
   dfaanalyze (d, searchflag);
   if (d->superset)
-    dfaanalyze (d->superset, searchflag);
+    {
+      d->fast = true;
+      dfaanalyze (d->superset, searchflag);
+    }
 }
 
 /* Free the storage held by the components of a dfa.  */
-- 
1.9.0

From 3c3468b15bb7d15869f6f1688f28bf943172572e Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Tue, 6 May 2014 07:40:52 -0700
Subject: [PATCH 3/3] dfa: clarify use of "if"

The phrase "Y is true if X" is logically equivalent to "X implies Y",
but often "X if and only if Y" was intended.
* src/dfa.c, src/dfa.h: Reword to avoid the incorrect use of "if".
---
 src/dfa.c | 52 ++++++++++++++++++++++++----------------------------
 src/dfa.h |  2 +-
 2 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 5b3704e..fc2acdc 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -46,7 +46,6 @@
    host does not conform to Posix.  */
 #define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
 
-/* gettext.h ensures that we don't use gettext if ENABLE_NLS is not defined */
 #include "gettext.h"
 #define _(str) gettext (str)
 
@@ -183,27 +182,25 @@ enum
                                    a backtracking matcher.  */
 
   BEGLINE,                      /* BEGLINE is a terminal symbol that matches
-                                   the empty string if it is at the beginning
-                                   of a line.  */
+                                  the empty string at the beginning of a
+                                  line.  */
 
   ENDLINE,                      /* ENDLINE is a terminal symbol that matches
-                                   the empty string if it is at the end of
-                                   a line.  */
+                                  the empty string at the end of a line.  */
 
   BEGWORD,                      /* BEGWORD is a terminal symbol that matches
-                                   the empty string if it is at the beginning
-                                   of a word.  */
+                                  the empty string at the beginning of a
+                                  word.  */
 
   ENDWORD,                      /* ENDWORD is a terminal symbol that matches
-                                   the empty string if it is at the end of
-                                   a word.  */
+                                  the empty string at the end of a word.  */
 
   LIMWORD,                      /* LIMWORD is a terminal symbol that matches
-                                   the empty string if it is at the beginning
-                                   or the end of a word.  */
+                                  the empty string at the beginning or the
+                                  end of a word.  */
 
   NOTLIMWORD,                   /* NOTLIMWORD is a terminal symbol that
-                                   matches the empty string if it is not at
+                                  matches the empty string not at
                                    the beginning or end of a word.  */
 
   QMARK,                        /* QMARK is an operator of one argument that
@@ -284,8 +281,8 @@ typedef struct
   size_t hash;                  /* Hash of the positions of this state.  */
   position_set elems;           /* Positions this state could match.  */
   unsigned char context;        /* Context from previous state.  */
-  bool has_backref;             /* True if this state matches a \<digit>.  */
-  bool has_mbcset;              /* True if this state matches a MBCSET.  */
+  bool has_backref;            /* This state matches a \<digit>.  */
+  bool has_mbcset;             /* This state matches a MBCSET.  */
   unsigned short constraint;    /* Constraint for this state to accept.  */
   token first_end;              /* Token value of the first END in elems.  */
   position_set mbps;            /* Positions which can match multibyte
@@ -338,7 +335,7 @@ struct dfa
   size_t nregexps;              /* Count of parallel regexps being built
                                    with dfaparse.  */
   bool fast;                   /* The DFA is fast.  */
-  bool multibyte;              /* True iff MB_CUR_MAX > 1.  */
+  bool multibyte;              /* MB_CUR_MAX > 1.  */
   token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.  */
   mbstate_t mbs;               /* Multibyte conversion state.  */
 
@@ -389,7 +386,7 @@ struct dfa
                                    matching the given position in a string
                                    matching the regexp.  Allocated to the
                                    maximum possible position index.  */
-  bool searchflag;              /* True if we are supposed to build a searching
+  bool searchflag;             /* We are supposed to build a searching
                                    as opposed to an exact matcher.  A searching
                                    matcher finds the first and shortest string
                                    matching a regexp anywhere in the buffer,
@@ -432,11 +429,10 @@ struct dfa
 
 /* Some macros for user access to dfa internals.  */
 
-/* ACCEPTING returns true if s could possibly be an accepting state of r.  */
+/* S could possibly be an accepting state of R.  */
 #define ACCEPTING(s, r) ((r).states[s].constraint)
 
-/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
-   specified context.  */
+/* STATE accepts in the specified context.  */
 #define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
   SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
 
@@ -626,9 +622,9 @@ equal (charclass const s1, charclass const s2)
    and return its new address.  Although PTR may be null, the returned
    value is never null.
 
-   The array holds *NALLOC items; *NALLOC must be zero if PTR is null,
-   and is updated on reallocation.  ITEMSIZE is the size of one item.
-   Avoid O(N**2) behavior on arrays growing linearly.  */
+   The array holds *NALLOC items; *NALLOC is updated on reallocation.
+   ITEMSIZE is the size of one item.  Avoid O(N**2) behavior on arrays
+   growing linearly.  */
 static void *
 maybe_realloc (void *ptr, size_t nitems, size_t *nalloc, size_t itemsize)
 {
@@ -691,7 +687,7 @@ static charclass newline;
 # define is_valid_unibyte_character(c) (btowc (c) != WEOF)
 #endif
 
-/* Return non-zero if C is a "word-constituent" byte; zero otherwise.  */
+/* C is a "word-constituent" byte.  */
 #define IS_WORD_CONSTITUENT(C) \
   (is_valid_unibyte_character (C) && (isalnum (C) || (C) == '_'))
 
@@ -786,7 +782,7 @@ using_utf8 (void)
   return utf8;
 }
 
-/* Return true if the current locale is known to be a unibyte locale
+/* The current locale is known to be a unibyte locale
    without multicharacter collating sequences and where range
    comparisons simply use the native encoding.  These locales can be
    processed more efficiently.  */
@@ -794,7 +790,7 @@ using_utf8 (void)
 static bool
 using_simple_locale (void)
 {
-  /* True if the native character set is known to be compatible with
+  /* The native character set is known to be compatible with
      the C locale.  The following test isn't perfect, but it's good
      enough in practice, as only ASCII and EBCDIC are in common use
      and this test correctly accepts ASCII and rejects EBCDIC.  */
@@ -834,7 +830,7 @@ using_simple_locale (void)
 static char const *lexptr;      /* Pointer to next input character.  */
 static size_t lexleft;          /* Number of characters remaining.  */
 static token lasttok;           /* Previous token returned; initially END.  */
-static bool laststart;          /* True if we're separated from beginning or (,
+static bool laststart;         /* We're separated from beginning or (,
                                    | only by zero-width characters.  */
 static size_t parens;           /* Count of outstanding left parens.  */
 static int minrep, maxrep;      /* Repeat counts for {m,n}.  */
@@ -976,7 +972,7 @@ parse_bracket_exp (void)
   int c, c1, c2;
   charclass ccl;
 
-  /* True if this is a bracket expression that dfaexec is known to
+  /* This is a bracket expression that dfaexec is known to
      process correctly.  */
   bool known_bracket_exp = true;
 
@@ -2560,7 +2556,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
   state_num state;              /* New state.  */
   state_num state_newline;      /* New state on a newline transition.  */
   state_num state_letter;       /* New state on a letter transition.  */
-  bool next_isnt_1st_byte = false; /* Flag if we can't add state0.  */
+  bool next_isnt_1st_byte = false; /* We can't add state0.  */
   size_t i, j, k;
 
   zeroset (matches);
diff --git a/src/dfa.h b/src/dfa.h
index e3ab700..f30c3cb 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -77,7 +77,7 @@ extern char *dfaexec (struct dfa *d, char const *begin, char 
*end,
    superset is available.  */
 extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
 
-/* Return true if the DFA is likely to be fast.  */
+/* The DFA is likely to be fast.  */
 extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
 
 /* Free the storage held by the components of a struct dfa. */
-- 
1.9.0

Reply via email to