Patch 7.3.1133
Problem:    New regexp engine is a bit slow.
Solution:   Skip ahead to a character that must match.  Don't try matching a
            "^" patter past the start of line.
Files:      src/regexp_nfa.c, src/regexp.h


*** ../vim-7.3.1132/src/regexp_nfa.c    2013-06-06 18:04:47.000000000 +0200
--- src/regexp_nfa.c    2013-06-06 18:37:23.000000000 +0200
***************
*** 257,262 ****
--- 257,264 ----
  static int nfa_ll_index = 0;
  
  static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags));
+ static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
+ static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int 
extra_newl));
  static int nfa_emit_equi_class __ARGS((int c, int neg));
  static int nfa_regatom __ARGS((void));
***************
*** 331,336 ****
--- 333,487 ----
  }
  
  /*
+  * Figure out if the NFA state list starts with an anchor, must match at start
+  * of the line.
+  */
+     static int
+ nfa_get_reganch(start, depth)
+     nfa_state_T *start;
+     int               depth;
+ {
+     nfa_state_T *p = start;
+ 
+     if (depth > 4)
+       return 0;
+ 
+     while (p != NULL)
+     {
+       switch (p->c)
+       {
+           case NFA_BOL:
+           case NFA_BOF:
+               return 1; /* yes! */
+ 
+           case NFA_ZSTART:
+           case NFA_ZEND:
+           case NFA_CURSOR:
+           case NFA_VISUAL:
+ 
+           case NFA_MOPEN:
+           case NFA_MOPEN1:
+           case NFA_MOPEN2:
+           case NFA_MOPEN3:
+           case NFA_MOPEN4:
+           case NFA_MOPEN5:
+           case NFA_MOPEN6:
+           case NFA_MOPEN7:
+           case NFA_MOPEN8:
+           case NFA_MOPEN9:
+           case NFA_NOPEN:
+ #ifdef FEAT_SYN_HL
+           case NFA_ZOPEN:
+           case NFA_ZOPEN1:
+           case NFA_ZOPEN2:
+           case NFA_ZOPEN3:
+           case NFA_ZOPEN4:
+           case NFA_ZOPEN5:
+           case NFA_ZOPEN6:
+           case NFA_ZOPEN7:
+           case NFA_ZOPEN8:
+           case NFA_ZOPEN9:
+ #endif
+               p = p->out;
+               break;
+ 
+           case NFA_SPLIT:
+               return nfa_get_reganch(p->out, depth + 1)
+                                      && nfa_get_reganch(p->out1, depth + 1);
+ 
+           default:
+               return 0; /* noooo */
+       }
+     }
+     return 0;
+ }
+ 
+ /*
+  * Figure out if the NFA state list starts with a character which must match
+  * at start of the match.
+  */
+     static int
+ nfa_get_regstart(start, depth)
+     nfa_state_T *start;
+     int               depth;
+ {
+     nfa_state_T *p = start;
+ 
+     if (depth > 4)
+       return 0;
+ 
+     while (p != NULL)
+     {
+       switch (p->c)
+       {
+           /* all kinds of zero-width matches */
+           case NFA_BOL:
+           case NFA_BOF:
+           case NFA_BOW:
+           case NFA_EOW:
+           case NFA_ZSTART:
+           case NFA_ZEND:
+           case NFA_CURSOR:
+           case NFA_VISUAL:
+           case NFA_LNUM:
+           case NFA_LNUM_GT:
+           case NFA_LNUM_LT:
+           case NFA_COL:
+           case NFA_COL_GT:
+           case NFA_COL_LT:
+           case NFA_VCOL:
+           case NFA_VCOL_GT:
+           case NFA_VCOL_LT:
+           case NFA_MARK:
+           case NFA_MARK_GT:
+           case NFA_MARK_LT:
+ 
+           case NFA_MOPEN:
+           case NFA_MOPEN1:
+           case NFA_MOPEN2:
+           case NFA_MOPEN3:
+           case NFA_MOPEN4:
+           case NFA_MOPEN5:
+           case NFA_MOPEN6:
+           case NFA_MOPEN7:
+           case NFA_MOPEN8:
+           case NFA_MOPEN9:
+           case NFA_NOPEN:
+ #ifdef FEAT_SYN_HL
+           case NFA_ZOPEN:
+           case NFA_ZOPEN1:
+           case NFA_ZOPEN2:
+           case NFA_ZOPEN3:
+           case NFA_ZOPEN4:
+           case NFA_ZOPEN5:
+           case NFA_ZOPEN6:
+           case NFA_ZOPEN7:
+           case NFA_ZOPEN8:
+           case NFA_ZOPEN9:
+ #endif
+               p = p->out;
+               break;
+ 
+           case NFA_SPLIT:
+           {
+               int c1 = nfa_get_regstart(p->out, depth + 1);
+               int c2 = nfa_get_regstart(p->out1, depth + 1);
+ 
+               if (c1 == c2)
+                   return c1; /* yes! */
+               return 0;
+           }
+ 
+           default:
+               if (p->c > 0 && !p->negated)
+                   return p->c; /* yes! */
+               return 0;
+       }
+     }
+     return 0;
+ }
+ 
+ /*
   * Allocate more space for post_start.  Called when
   * running above the estimated number of states.
   */
***************
*** 2121,2126 ****
--- 2272,2281 ----
      if (debugf != NULL)
      {
        nfa_print_state(debugf, prog->start);
+ 
+       fprintf(debugf, "reganch: %d\n", prog->reganch);
+       fprintf(debugf, "regstart: %d\n", prog->regstart);
+ 
        fclose(debugf);
      }
  }
***************
*** 4248,4253 ****
--- 4403,4412 ----
                t = &neglist->t[0];
                neglist->n--;
                listidx--;
+ #ifdef ENABLE_LOG
+               fprintf(log_fd, "     using neglist entry, %d remaining\n",
+                       neglist->n);
+ #endif
            }
            else
                t = &thislist->t[listidx];
***************
*** 4688,4694 ****
  
            case NFA_END_NEG_RANGE:
                /* This follows a series of negated nodes, like:
!                * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */
                if (curc > 0)
                {
                    ll = nextlist;
--- 4847,4853 ----
  
            case NFA_END_NEG_RANGE:
                /* This follows a series of negated nodes, like:
!                * NOT CHAR(x), NOT CHAR(y), etc. */
                if (curc > 0)
                {
                    ll = nextlist;
***************
*** 5304,5316 ****
   * Returns 0 for failure, number of lines contained in the match otherwise.
   */
      static long
! nfa_regexec_both(line, col)
      char_u    *line;
!     colnr_T   col;            /* column to start looking for match */
  {
      nfa_regprog_T   *prog;
      long          retval = 0L;
      int                   i;
  
      if (REG_MULTI)
      {
--- 5463,5476 ----
   * Returns 0 for failure, number of lines contained in the match otherwise.
   */
      static long
! nfa_regexec_both(line, startcol)
      char_u    *line;
!     colnr_T   startcol;       /* column to start looking for match */
  {
      nfa_regprog_T   *prog;
      long          retval = 0L;
      int                   i;
+     colnr_T       col = startcol;
  
      if (REG_MULTI)
      {
***************
*** 5333,5342 ****
        goto theend;
      }
  
-     /* If the start column is past the maximum column: no need to try. */
-     if (ireg_maxcol > 0 && col >= ireg_maxcol)
-       goto theend;
- 
      /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
      if (prog->regflags & RF_ICASE)
        ireg_ic = TRUE;
--- 5493,5498 ----
***************
*** 5361,5366 ****
--- 5517,5548 ----
      nfa_regengine.expr = prog->pattern;
  #endif
  
+     if (prog->reganch && col > 0)
+       return 0L;
+ 
+     if (prog->regstart != NUL)
+     {
+       char_u *s;
+ 
+       /* Skip until the char we know it must start with.
+        * Used often, do some work to avoid call overhead. */
+       if (!ireg_ic
+ #ifdef FEAT_MBYTE
+                   && !has_mbyte
+ #endif
+                   )
+           s = vim_strbyte(regline + col, prog->regstart);
+       else
+           s = cstrchr(regline + col, prog->regstart);
+       if (s == NULL)
+           return 0L;
+       col = (int)(s - regline);
+     }
+ 
+     /* If the start column is past the maximum column: no need to try. */
+     if (ireg_maxcol > 0 && col >= ireg_maxcol)
+       goto theend;
+ 
      nstate = prog->nstate;
      for (i = 0; i < nstate; ++i)
      {
***************
*** 5459,5464 ****
--- 5641,5650 ----
      prog->has_zend = nfa_has_zend;
      prog->has_backref = nfa_has_backref;
      prog->nsubexp = regnpar;
+ 
+     prog->reganch = nfa_get_reganch(prog->start, 0);
+     prog->regstart = nfa_get_regstart(prog->start, 0);
+ 
  #ifdef ENABLE_LOG
      nfa_postfix_dump(expr, OK);
      nfa_dump(prog);
*** ../vim-7.3.1132/src/regexp.h        2013-06-03 12:17:00.000000000 +0200
--- src/regexp.h        2013-06-06 17:19:23.000000000 +0200
***************
*** 87,92 ****
--- 87,96 ----
      unsigned          regflags;
  
      nfa_state_T               *start;         /* points into state[] */
+ 
+     int                       reganch;        /* pattern starts with ^ */
+     int                       regstart;       /* char at start of pattern */
+ 
      int                       has_zend;       /* pattern contains \ze */
      int                       has_backref;    /* pattern contains \1 .. \9 */
  #ifdef FEAT_SYN_HL
*** ../vim-7.3.1132/src/version.c       2013-06-06 18:04:47.000000000 +0200
--- src/version.c       2013-06-06 18:43:55.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1133,
  /**/

-- 
>From "know your smileys":
 %      Bike accident.  A bit far-fetched, I suppose; although...
             o      _     _         _
     _o     /\_   _ \\o  (_)\__/o  (_)
   _< \_   _>(_) (_)/<_    \_| \   _|/' \/
  (_)>(_) (_)        (_)   (_)    (_)'  _\o_

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.


Raspunde prin e-mail lui