Patch 7.3.1169
Problem:    New regexp engine: some work is done while executing a pattern,
            even though the result is predictable.
Solution:   Do the work while compiling the pattern.
Files:      src/regexp_nfa.c


*** ../vim-7.3.1168/src/regexp_nfa.c    2013-06-11 18:42:28.000000000 +0200
--- src/regexp_nfa.c    2013-06-11 22:40:12.000000000 +0200
***************
*** 64,72 ****
--- 64,76 ----
      NFA_NOPEN,                            /* Start of subexpression marked 
with \%( */
      NFA_NCLOSE,                           /* End of subexpr. marked with \%( 
... \) */
      NFA_START_INVISIBLE,
+     NFA_START_INVISIBLE_FIRST,
      NFA_START_INVISIBLE_NEG,
+     NFA_START_INVISIBLE_NEG_FIRST,
      NFA_START_INVISIBLE_BEFORE,
+     NFA_START_INVISIBLE_BEFORE_FIRST,
      NFA_START_INVISIBLE_BEFORE_NEG,
+     NFA_START_INVISIBLE_BEFORE_NEG_FIRST,
      NFA_START_PATTERN,
      NFA_END_INVISIBLE,
      NFA_END_INVISIBLE_NEG,
***************
*** 286,291 ****
--- 290,296 ----
  static int *re2post __ARGS((void));
  static nfa_state_T *alloc_state __ARGS((int c, nfa_state_T *out, nfa_state_T 
*out1));
  static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int 
nfa_calc_size));
+ static void nfa_postprocess __ARGS((nfa_regprog_T *prog));
  static int check_char_class __ARGS((int class, int c));
  static void st_error __ARGS((int *postfix, int *end, int *p));
  static void nfa_save_listids __ARGS((nfa_regprog_T *prog, int *list));
***************
*** 297,302 ****
--- 302,309 ----
  static void nfa_regfree __ARGS((regprog_T *prog));
  static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
  static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T 
*buf, linenr_T lnum, colnr_T col, proftime_T *tm));
+ static int match_follows __ARGS((nfa_state_T *startstate, int depth));
+ static int failure_chance __ARGS((nfa_state_T *state, int depth));
  
  /* helper functions used when doing re2post() ... regatom() parsing */
  #define EMIT(c)       do {                            \
***************
*** 2040,2051 ****
--- 2047,2066 ----
        case NFA_NOPEN:             STRCPY(code, "NFA_NOPEN"); break;
        case NFA_NCLOSE:            STRCPY(code, "NFA_NCLOSE"); break;
        case NFA_START_INVISIBLE:   STRCPY(code, "NFA_START_INVISIBLE"); break;
+       case NFA_START_INVISIBLE_FIRST:
+                            STRCPY(code, "NFA_START_INVISIBLE_FIRST"); break;
        case NFA_START_INVISIBLE_NEG:
                               STRCPY(code, "NFA_START_INVISIBLE_NEG"); break;
+       case NFA_START_INVISIBLE_NEG_FIRST:
+                        STRCPY(code, "NFA_START_INVISIBLE_NEG_FIRST"); break;
        case NFA_START_INVISIBLE_BEFORE:
                            STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break;
+       case NFA_START_INVISIBLE_BEFORE_FIRST:
+                     STRCPY(code, "NFA_START_INVISIBLE_BEFORE_FIRST"); break;
        case NFA_START_INVISIBLE_BEFORE_NEG:
                        STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG"); break;
+       case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
+                 STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG_FIRST"); break;
        case NFA_START_PATTERN:   STRCPY(code, "NFA_START_PATTERN"); break;
        case NFA_END_INVISIBLE:     STRCPY(code, "NFA_END_INVISIBLE"); break;
        case NFA_END_INVISIBLE_NEG: STRCPY(code, "NFA_END_INVISIBLE_NEG"); 
break;
***************
*** 3318,3323 ****
--- 3333,3395 ----
  #undef PUSH
  }
  
+ /*
+  * After building the NFA program, inspect it to add optimization hints.
+  */
+     static void
+ nfa_postprocess(prog)
+     nfa_regprog_T   *prog;
+ {
+     int i;
+     int c;
+ 
+     for (i = 0; i < prog->nstate; ++i)
+     {
+       c = prog->state[i].c;
+       if (c == NFA_START_INVISIBLE
+               || c == NFA_START_INVISIBLE_NEG
+               || c == NFA_START_INVISIBLE_BEFORE
+               || c == NFA_START_INVISIBLE_BEFORE_NEG)
+       {
+           int directly;
+ 
+           /* Do it directly when what follows is possibly the end of the
+            * match. */
+           if (match_follows(prog->state[i].out1->out, 0))
+               directly = TRUE;
+           else
+           {
+               int ch_invisible = failure_chance(prog->state[i].out, 0);
+               int ch_follows = failure_chance(prog->state[i].out1->out, 0);
+ 
+               /* Postpone when the invisible match is expensive or has a
+                * lower chance of failing. */
+               if (c == NFA_START_INVISIBLE_BEFORE
+                    || c == NFA_START_INVISIBLE_BEFORE_NEG)
+               {
+                   /* "before" matches are very expensive when
+                    * unbounded, always prefer what follows then,
+                    * unless what follows will always match.
+                    * Otherwise strongly prefer what follows. */
+                   if (prog->state[i].val <= 0 && ch_follows > 0)
+                       directly = FALSE;
+                   else
+                       directly = ch_follows * 10 < ch_invisible;
+               }
+               else
+               {
+                   /* normal invisible, first do the one with the
+                    * highest failure chance */
+                   directly = ch_follows < ch_invisible;
+               }
+           }
+           if (directly)
+               /* switch to the _FIRST state */
+               ++prog->state[i].c;
+       }
+     }
+ }
+ 
  /****************************************************************
   * NFA execution code.
   ****************************************************************/
***************
*** 3457,3463 ****
  static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from));
  static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2));
  static int has_state_with_pos __ARGS((nfa_list_T *l, nfa_state_T *state, 
regsubs_T *subs));
- static int match_follows __ARGS((nfa_state_T *startstate, int depth));
  static int state_in_list __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T 
*subs));
  static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T 
*subs, nfa_pim_T *pim, int off));
  static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, 
regsubs_T *subs, nfa_pim_T *pim, int *ip));
--- 3529,3534 ----
***************
*** 3703,3711 ****
--- 3774,3786 ----
                                     || match_follows(state->out1, depth + 1);
  
            case NFA_START_INVISIBLE:
+           case NFA_START_INVISIBLE_FIRST:
            case NFA_START_INVISIBLE_BEFORE:
+           case NFA_START_INVISIBLE_BEFORE_FIRST:
            case NFA_START_INVISIBLE_NEG:
+           case NFA_START_INVISIBLE_NEG_FIRST:
            case NFA_START_INVISIBLE_BEFORE_NEG:
+           case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
            case NFA_COMPOSING:
                /* skip ahead to next state */
                state = state->out1->out;
***************
*** 4440,4446 ****
      }
  
      if (state->c == NFA_START_INVISIBLE_BEFORE
!         || state->c == NFA_START_INVISIBLE_BEFORE_NEG)
      {
        /* The recursive match must end at the current position. When "pim" is
         * not NULL it specifies the current position. */
--- 4515,4523 ----
      }
  
      if (state->c == NFA_START_INVISIBLE_BEFORE
!         || state->c == NFA_START_INVISIBLE_BEFORE_FIRST
!         || state->c == NFA_START_INVISIBLE_BEFORE_NEG
!         || state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)
      {
        /* The recursive match must end at the current position. When "pim" is
         * not NULL it specifies the current position. */
***************
*** 4581,4587 ****
      return result;
  }
  
- static int failure_chance __ARGS((nfa_state_T *state, int depth));
  static int skip_to_start __ARGS((int c, colnr_T *colp));
  static long find_match_text __ARGS((colnr_T startcol, int regstart, char_u 
*match_text));
  
--- 4658,4663 ----
***************
*** 5093,5142 ****
                break;
  
            case NFA_START_INVISIBLE:
            case NFA_START_INVISIBLE_NEG:
            case NFA_START_INVISIBLE_BEFORE:
            case NFA_START_INVISIBLE_BEFORE_NEG:
                {
-                   int directly = FALSE;
- 
  #ifdef ENABLE_LOG
                    fprintf(log_fd, "Failure chance invisible: %d, what 
follows: %d\n",
                            failure_chance(t->state->out, 0),
                            failure_chance(t->state->out1->out, 0));
  #endif
!                   /* Do it directly when what follows is possibly the end of
!                    * the match.
!                    * Do it directly if there already is a PIM.
!                    * Postpone when the invisible match is expensive or has a
!                    * lower chance of failing. */
!                   if (match_follows(t->state->out1->out, 0)
!                                          || t->pim.result != NFA_PIM_UNUSED)
!                       directly = TRUE;
!                   else
!                   {
!                       int ch_invisible = failure_chance(t->state->out, 0);
!                       int ch_follows = failure_chance(t->state->out1->out, 0);
! 
!                       if (t->state->c == NFA_START_INVISIBLE_BEFORE
!                            || t->state->c == NFA_START_INVISIBLE_BEFORE_NEG)
!                       {
!                           /* "before" matches are very expensive when
!                            * unbounded, always prefer what follows then,
!                            * unless what follows will always match.
!                            * Otherwise strongly prefer what follows. */
!                           if (t->state->val <= 0 && ch_follows > 0)
!                               directly = FALSE;
!                           else
!                               directly = ch_follows * 10 < ch_invisible;
!                       }
!                       else
!                       {
!                           /* normal invisible, first do the one with the
!                            * highest failure chance */
!                           directly = ch_follows < ch_invisible;
!                       }
!                   }
!                   if (directly)
                    {
                        /*
                         * First try matching the invisible match, then what
--- 5169,5194 ----
                break;
  
            case NFA_START_INVISIBLE:
+           case NFA_START_INVISIBLE_FIRST:
            case NFA_START_INVISIBLE_NEG:
+           case NFA_START_INVISIBLE_NEG_FIRST:
            case NFA_START_INVISIBLE_BEFORE:
+           case NFA_START_INVISIBLE_BEFORE_FIRST:
            case NFA_START_INVISIBLE_BEFORE_NEG:
+           case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
                {
  #ifdef ENABLE_LOG
                    fprintf(log_fd, "Failure chance invisible: %d, what 
follows: %d\n",
                            failure_chance(t->state->out, 0),
                            failure_chance(t->state->out1->out, 0));
  #endif
!                   /* Do it directly if there already is a PIM or when
!                    * nfa_postprocess() detected it will work better. */
!                   if (t->pim.result != NFA_PIM_UNUSED
!                        || t->state->c == NFA_START_INVISIBLE_FIRST
!                        || t->state->c == NFA_START_INVISIBLE_NEG_FIRST
!                        || t->state->c == NFA_START_INVISIBLE_BEFORE_FIRST
!                        || t->state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)
                    {
                        /*
                         * First try matching the invisible match, then what
***************
*** 5148,5155 ****
                        /* for \@! and \@<! it is a match when the result is
                         * FALSE */
                        if (result != (t->state->c == NFA_START_INVISIBLE_NEG
!                                   || t->state->c
!                                          == NFA_START_INVISIBLE_BEFORE_NEG))
                        {
                            /* Copy submatch info from the recursive call */
                            copy_sub_off(&t->subs.norm, &m->norm);
--- 5200,5210 ----
                        /* for \@! and \@<! it is a match when the result is
                         * FALSE */
                        if (result != (t->state->c == NFA_START_INVISIBLE_NEG
!                              || t->state->c == NFA_START_INVISIBLE_NEG_FIRST
!                              || t->state->c
!                                          == NFA_START_INVISIBLE_BEFORE_NEG
!                              || t->state->c
!                                    == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
                        {
                            /* Copy submatch info from the recursive call */
                            copy_sub_off(&t->subs.norm, &m->norm);
***************
*** 5920,5927 ****
                        /* for \@! and \@<! it is a match when the result is
                         * FALSE */
                        if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
!                                   || pim->state->c
!                                          == NFA_START_INVISIBLE_BEFORE_NEG))
                        {
                            /* Copy submatch info from the recursive call */
                            copy_sub_off(&pim->subs.norm, &m->norm);
--- 5975,5985 ----
                        /* for \@! and \@<! it is a match when the result is
                         * FALSE */
                        if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
!                            || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST
!                            || pim->state->c
!                                          == NFA_START_INVISIBLE_BEFORE_NEG
!                            || pim->state->c
!                                    == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
                        {
                            /* Copy submatch info from the recursive call */
                            copy_sub_off(&pim->subs.norm, &m->norm);
***************
*** 5944,5951 ****
  
                    /* for \@! and \@<! it is a match when result is FALSE */
                    if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
!                               || pim->state->c
!                                          == NFA_START_INVISIBLE_BEFORE_NEG))
                    {
                        /* Copy submatch info from the recursive call */
                        copy_sub_off(&t->subs.norm, &pim->subs.norm);
--- 6002,6012 ----
  
                    /* for \@! and \@<! it is a match when result is FALSE */
                    if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
!                            || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST
!                            || pim->state->c
!                                          == NFA_START_INVISIBLE_BEFORE_NEG
!                            || pim->state->c
!                                    == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
                    {
                        /* Copy submatch info from the recursive call */
                        copy_sub_off(&t->subs.norm, &pim->subs.norm);
***************
*** 6413,6421 ****
      prog->has_backref = nfa_has_backref;
      prog->nsubexp = regnpar;
  
      prog->reganch = nfa_get_reganch(prog->start, 0);
      prog->regstart = nfa_get_regstart(prog->start, 0);
- 
      prog->match_text = nfa_get_match_text(prog->start);
  
  #ifdef ENABLE_LOG
--- 6474,6483 ----
      prog->has_backref = nfa_has_backref;
      prog->nsubexp = regnpar;
  
+     nfa_postprocess(prog);
+ 
      prog->reganch = nfa_get_reganch(prog->start, 0);
      prog->regstart = nfa_get_regstart(prog->start, 0);
      prog->match_text = nfa_get_match_text(prog->start);
  
  #ifdef ENABLE_LOG
*** ../vim-7.3.1168/src/version.c       2013-06-11 20:53:24.000000000 +0200
--- src/version.c       2013-06-11 22:43:27.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1169,
  /**/

-- 
hundred-and-one symptoms of being an internet addict:
156. You forget your friend's name but not her e-mail address.

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.


Raspunde prin e-mail lui