This partially works around https://savannah.gnu.org/bugs/?29117,
but in general provides a speedup whenever fgrep is "almost" sufficient
but not quite (e.g. grep ^abc). Speedup is too good to be true :-)
(can get to 1000x on some not-too-contrived testcases).
* src/dfa.c (dfaoptimize): New.
(dfacomp): Call it.
---
src/dfa.c | 25 +++++++++++++++++++++++++
1 files changed, 25 insertions(+), 0 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 6a658c1..f9f7cd3 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3000,6 +3000,30 @@ dfainit (struct dfa *d)
#endif
}
+static void
+dfaoptimize (struct dfa *d)
+{
+ int i;
+ if (!using_utf8)
+ return;
+
+ for (i = 0; i < d->tindex; ++i)
+ {
+ switch(d->tokens[i])
+ {
+ case ANYCHAR:
+ return;
+ case MBCSET:
+ return;
+ default:
+ break; /* can not happen. */
+ }
+ }
+
+ free_mbdata (d);
+ d->mb_cur_max = 1;
+}
+
/* Parse and analyze a single string of the given length. */
void
dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
@@ -3007,6 +3031,7 @@ dfacomp (char const *s, size_t len, struct dfa *d, int
searchflag)
check_utf8();
dfainit(d);
dfaparse(s, len, d);
+ dfaoptimize(d);
dfamust(d);
dfaanalyze(d, searchflag);
}
--
1.6.6.1