Bram,
here is patch, that will switch from the NFA engine to the BT regular
expression engine when there are too many states encountered. I have
tested it on some input on which the NFA engine was slow (issue #164)
and it worked. I made it so, that when verbose is set, Vim outputs
whenever it switches the engines. This will however only be done, when
're' is set to 0
I am however not sure, what a good measure is, when to abort the NFA
engine and switch to the old engine, but that could easily be switched.
Best,
Christian
--
Manchmal sind die Finger schneler, als die Rechtschreibung.
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt
--- a/runtime/doc/options.txt
+++ b/runtime/doc/options.txt
@@ -5626,6 +5626,9 @@ A jump table for the options with a shor
Note that when using the NFA engine and the pattern contains something
that is not supported the pattern will not match. This is only useful
for debugging the regexp engine.
+ Using automatic selection enables Vim to switch the engine, if the NFA
+ engine becomes too costly (that should prevent a hanging Vim on
+ complex patterns or long input).
*'relativenumber'* *'rnu'* *'norelativenumber'* *'nornu'*
'relativenumber' 'rnu' boolean (default off)
diff --git a/src/regexp.c b/src/regexp.c
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -8026,18 +8026,14 @@ static regengine_T nfa_regengine =
nfa_regcomp,
nfa_regfree,
nfa_regexec_nl,
- nfa_regexec_multi
-#ifdef DEBUG
- ,(char_u *)""
-#endif
+ nfa_regexec_multi,
+ (char_u *)""
};
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
static int regexp_engine = 0;
-#define AUTOMATIC_ENGINE 0
-#define BACKTRACKING_ENGINE 1
-#define NFA_ENGINE 2
+
#ifdef DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
@@ -8059,6 +8055,23 @@ vim_regcomp(expr_arg, re_flags)
{
regprog_T *prog = NULL;
char_u *expr = expr_arg;
+ static int re_flags_saved = 0; /* re_flags for vim_comp() saved */
+
+ if (re_flags < 0)
+ {
+ re_flags = re_flags_saved;
+#ifdef FEAT_EVAL
+ if (p_verbose > 0)
+ {
+ verbose_enter();
+ MSG_PUTS(_("Switching to backtrack RE engine for pattern: "));
+ MSG_PUTS(expr);
+ verbose_leave();
+ }
+#endif
+ }
+ else
+ re_flags_saved = re_flags;
regexp_engine = p_re;
@@ -8084,10 +8097,8 @@ vim_regcomp(expr_arg, re_flags)
regexp_engine = AUTOMATIC_ENGINE;
}
}
-#ifdef DEBUG
bt_regengine.expr = expr;
nfa_regengine.expr = expr;
-#endif
/*
* First try the NFA engine, unless backtracking was requested.
@@ -8114,12 +8125,6 @@ vim_regcomp(expr_arg, re_flags)
BT_REGEXP_DEBUG_LOG_NAME);
}
#endif
- /*
- * If the NFA engine failed, the backtracking engine won't work either.
- *
- if (regexp_engine == AUTOMATIC_ENGINE)
- prog = bt_regengine.regcomp(expr, re_flags);
- */
}
return prog;
@@ -8149,7 +8154,26 @@ vim_regexec(rmp, line, col)
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
{
- return rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE);
+ int result = rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE);
+
+ /* nfa engine aborted using too many states */
+ if (p_re == AUTOMATIC_ENGINE &&
+ result == NFA_TOO_MANY_STATES)
+ {
+ int re_engine = p_re;
+ regprog_T *save_prog = rmp->regprog;
+
+ p_re = BACKTRACKING_ENGINE;
+ vim_regfree(rmp->regprog);
+ rmp->regprog = vim_regcomp(
+ ((nfa_regprog_T *)rmp->regprog)->pattern, -1);
+ if (rmp->regprog != NULL)
+ result = rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE);
+
+ vim_regfree(save_prog); /* free old engine */
+ p_re = re_engine;
+ }
+ return result;
}
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
@@ -8163,7 +8187,25 @@ vim_regexec_nl(rmp, line, col)
char_u *line;
colnr_T col;
{
- return rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE);
+ int result = rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE);
+
+ /* nfa engine aborted using too many states */
+ if (p_re == AUTOMATIC_ENGINE &&
+ result == NFA_TOO_MANY_STATES)
+ {
+ int re_engine = p_re;
+ regprog_T *save_prog = rmp->regprog;
+
+ p_re = BACKTRACKING_ENGINE;
+ vim_regfree(rmp->regprog);
+ rmp->regprog = vim_regcomp(
+ ((nfa_regprog_T *)rmp->regprog)->pattern, -1);
+ if (rmp->regprog != NULL)
+ result = rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE);
+ vim_regfree(save_prog); /* free old engine */
+ p_re = re_engine;
+ }
+ return result;
}
#endif
@@ -8184,5 +8226,22 @@ vim_regexec_multi(rmp, win, buf, lnum, c
colnr_T col; /* column to start looking for match */
proftime_T *tm; /* timeout limit or NULL */
{
- return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
+ int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
+
+ /* nfa engine aborted using too many states, only happens when NFA_Engine was used */
+ if (p_re == AUTOMATIC_ENGINE &&
+ result == NFA_TOO_MANY_STATES)
+ {
+ int re_engine = p_re;
+ regprog_T *save_prog = rmp->regprog;
+
+ p_re = BACKTRACKING_ENGINE;
+ rmp->regprog = vim_regcomp(
+ ((nfa_regprog_T *)rmp->regprog)->pattern, -1);
+ if (rmp->regprog != NULL)
+ result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
+ vim_regfree(save_prog); /* free old engine */
+ p_re = re_engine;
+ }
+ return result;
}
diff --git a/src/regexp.h b/src/regexp.h
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -27,6 +27,18 @@
*/
#define NFA_MAX_BRACES 20
+/*
+ * In the NFA engine: how many states are allowed
+ */
+#define NFA_MAX_STATES 100000
+#define NFA_TOO_MANY_STATES -1
+
+/* Which regexp engine to use? Needed for vim_regcomp().
+ * Must match with 'regexpengine'. */
+#define AUTOMATIC_ENGINE 0
+#define BACKTRACKING_ENGINE 1
+#define NFA_ENGINE 2
+
typedef struct regengine regengine_T;
/*
@@ -96,9 +108,7 @@ typedef struct
#ifdef FEAT_SYN_HL
int reghasz;
#endif
-#ifdef DEBUG
char_u *pattern;
-#endif
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[1]; /* actually longer.. */
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -5524,6 +5524,13 @@ nfa_regmatch(prog, start, submatch, m)
nextlist->n = 0; /* clear nextlist */
nextlist->has_pim = FALSE;
++nfa_listid;
+ if (p_re == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES)
+ {
+ /* too many states, retry with old engine */
+ nfa_match = NFA_TOO_MANY_STATES;
+ goto theend;
+ }
+
thislist->id = nfa_listid;
nextlist->id = nfa_listid + 1;
@@ -5706,6 +5713,11 @@ nfa_regmatch(prog, start, submatch, m)
*/
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
+ if (result == NFA_TOO_MANY_STATES)
+ {
+ nfa_match = result;
+ goto theend;
+ }
/* for \@! and \@<! it is a match when the result is
* FALSE */
@@ -5819,6 +5831,11 @@ nfa_regmatch(prog, start, submatch, m)
/* First try matching the pattern. */
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
+ if (result == NFA_TOO_MANY_STATES)
+ {
+ nfa_match = result;
+ goto theend;
+ }
if (result)
{
int bytelen;
@@ -6762,6 +6779,7 @@ nfa_regtry(prog, col)
int i;
regsubs_T subs, m;
nfa_state_T *start = prog->start;
+ int result;
#ifdef ENABLE_LOG
FILE *f;
#endif
@@ -6793,8 +6811,11 @@ nfa_regtry(prog, col)
clear_sub(&m.synt);
#endif
- if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
+ result = nfa_regmatch(prog, start, &subs, &m);
+ if (result == FALSE)
return 0;
+ else if (result == NFA_TOO_MANY_STATES)
+ return result;
cleanup_subexpr();
if (REG_MULTI)
@@ -6931,9 +6952,7 @@ nfa_regexec_both(line, startcol)
nfa_nsubexpr = prog->nsubexp;
nfa_listid = 1;
nfa_alt_listid = 2;
-#ifdef DEBUG
nfa_regengine.expr = prog->pattern;
-#endif
if (prog->reganch && col > 0)
return 0L;
@@ -6981,9 +7000,7 @@ nfa_regexec_both(line, startcol)
retval = nfa_regtry(prog, col);
-#ifdef DEBUG
nfa_regengine.expr = NULL;
-#endif
theend:
return retval;
@@ -7005,9 +7022,7 @@ nfa_regcomp(expr, re_flags)
if (expr == NULL)
return NULL;
-#ifdef DEBUG
nfa_regengine.expr = expr;
-#endif
init_class_tab();
@@ -7084,10 +7099,8 @@ nfa_regcomp(expr, re_flags)
/* Remember whether this pattern has any \z specials in it. */
prog->reghasz = re_has_z;
#endif
-#ifdef DEBUG
prog->pattern = vim_strsave(expr);
nfa_regengine.expr = NULL;
-#endif
out:
vim_free(post_start);
@@ -7101,9 +7114,7 @@ fail:
#ifdef ENABLE_LOG
nfa_postfix_dump(expr, FAIL);
#endif
-#ifdef DEBUG
nfa_regengine.expr = NULL;
-#endif
goto out;
}
@@ -7117,9 +7128,7 @@ nfa_regfree(prog)
if (prog != NULL)
{
vim_free(((nfa_regprog_T *)prog)->match_text);
-#ifdef DEBUG
vim_free(((nfa_regprog_T *)prog)->pattern);
-#endif
vim_free(prog);
}
}