Patch 7.3.1029
Problem:    New regexp performance: Unused position state being copied.
Solution:   Keep track of which positions are actually valid.
Files:      src/regexp_nfa.c


*** ../vim-7.3.1028/src/regexp_nfa.c    2013-05-26 21:47:22.000000000 +0200
--- src/regexp_nfa.c    2013-05-26 22:45:26.000000000 +0200
***************
*** 1649,1670 ****
      return OK;
  }
  
- typedef union
- {
-     struct multipos
-     {
-       lpos_T  start;
-       lpos_T  end;
-     } multilist[NSUBEXP];
-     struct linepos
-     {
-       char_u  *start;
-       char_u  *end;
-     } linelist[NSUBEXP];
- } regsub_T;
- 
- static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, 
regsub_T *m));
- 
  #ifdef DEBUG
  static char_u code[50];
  
--- 1649,1654 ----
***************
*** 2489,2494 ****
--- 2473,2498 ----
   * NFA execution code.
   ****************************************************************/
  
+ typedef struct
+ {
+     int           in_use; /* number of subexpr with useful info */
+ 
+     /* When REG_MULTI is TRUE multilist is used, otherwise linelist. */
+     union
+     {
+       struct multipos
+       {
+           lpos_T      start;
+           lpos_T      end;
+       } multilist[NSUBEXP];
+       struct linepos
+       {
+           char_u      *start;
+           char_u      *end;
+       } linelist[NSUBEXP];
+     };
+ } regsub_T;
+ 
  /* nfa_thread_T contains execution information of a NFA state */
  typedef struct
  {
***************
*** 2507,2513 ****
  static int nfa_match;
  
  static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, 
int off, int lid));
- 
  static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T 
*m, int lid, int *ip));
  
      static void
--- 2511,2516 ----
***************
*** 2521,2527 ****
--- 2524,2532 ----
      int                       subidx;
      nfa_thread_T      *lastthread;
      lpos_T            save_lpos;
+     int                       save_in_use;
      char_u            *save_ptr;
+     int                       i;
  
      if (l == NULL || state == NULL)
        return;
***************
*** 2557,2572 ****
                state->lastlist = lid;
                lastthread = &l->t[l->n++];
                lastthread->state = state;
! 
!               /* Copy the match start and end positions. */
!               if (REG_MULTI)
!                   mch_memmove(&lastthread->sub.multilist[0],
!                               &m->multilist[0],
!                               sizeof(struct multipos) * nfa_nsubexpr);
!               else
!                   mch_memmove(&lastthread->sub.linelist[0],
!                               &m->linelist[0],
!                               sizeof(struct linepos) * nfa_nsubexpr);
            }
      }
  
--- 2562,2580 ----
                state->lastlist = lid;
                lastthread = &l->t[l->n++];
                lastthread->state = state;
!               lastthread->sub.in_use = m->in_use;
!               if (m->in_use > 0)
!               {
!                   /* Copy the match start and end positions. */
!                   if (REG_MULTI)
!                       mch_memmove(&lastthread->sub.multilist[0],
!                                   &m->multilist[0],
!                                   sizeof(struct multipos) * m->in_use);
!                   else
!                       mch_memmove(&lastthread->sub.linelist[0],
!                                   &m->linelist[0],
!                                   sizeof(struct linepos) * m->in_use);
!               }
            }
      }
  
***************
*** 2636,2644 ****
            else
                subidx = state->c - NFA_MOPEN;
  
            if (REG_MULTI)
            {
!               save_lpos = m->multilist[subidx].start;
                if (off == -1)
                {
                    m->multilist[subidx].start.lnum = reglnum + 1;
--- 2644,2668 ----
            else
                subidx = state->c - NFA_MOPEN;
  
+           /* Set the position (with "off") in the subexpression.  Save and
+            * restore it when it was in use.  Otherwise fill any gap. */
            if (REG_MULTI)
            {
!               if (subidx < m->in_use)
!               {
!                   save_lpos = m->multilist[subidx].start;
!                   save_in_use = -1;
!               }
!               else
!               {
!                   save_in_use = m->in_use;
!                   for (i = m->in_use; i < subidx; ++i)
!                   {
!                       m->multilist[i].start.lnum = -1;
!                       m->multilist[i].end.lnum = -1;
!                   }
!                   m->in_use = subidx + 1;
!               }
                if (off == -1)
                {
                    m->multilist[subidx].start.lnum = reglnum + 1;
***************
*** 2653,2668 ****
            }
            else
            {
!               save_ptr = m->linelist[subidx].start;
                m->linelist[subidx].start = reginput + off;
            }
  
            addstate(l, state->out, m, off, lid);
  
!           if (REG_MULTI)
!               m->multilist[subidx].start = save_lpos;
            else
!               m->linelist[subidx].start = save_ptr;
            break;
  
        case NFA_MCLOSE + 0:
--- 2677,2711 ----
            }
            else
            {
!               if (subidx < m->in_use)
!               {
!                   save_ptr = m->linelist[subidx].start;
!                   save_in_use = -1;
!               }
!               else
!               {
!                   save_in_use = m->in_use;
!                   for (i = m->in_use; i < subidx; ++i)
!                   {
!                       m->linelist[i].start = NULL;
!                       m->linelist[i].end = NULL;
!                   }
!                   m->in_use = subidx + 1;
!               }
                m->linelist[subidx].start = reginput + off;
            }
  
            addstate(l, state->out, m, off, lid);
  
!           if (save_in_use == -1)
!           {
!               if (REG_MULTI)
!                   m->multilist[subidx].start = save_lpos;
!               else
!                   m->linelist[subidx].start = save_ptr;
!           }
            else
!               m->in_use = save_in_use;
            break;
  
        case NFA_MCLOSE + 0:
***************
*** 2686,2691 ****
--- 2729,2739 ----
            else
                subidx = state->c - NFA_MCLOSE;
  
+           /* We don't fill in gaps here, there must have been an MOPEN that
+            * has done that. */
+           save_in_use = m->in_use;
+           if (m->in_use <= subidx)
+               m->in_use = subidx + 1;
            if (REG_MULTI)
            {
                save_lpos = m->multilist[subidx].end;
***************
*** 2713,2718 ****
--- 2761,2767 ----
                m->multilist[subidx].end = save_lpos;
            else
                m->linelist[subidx].end = save_ptr;
+           m->in_use = save_in_use;
            break;
      }
  }
***************
*** 2917,2922 ****
--- 2966,2973 ----
      }
  }
  
+ static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, 
regsub_T *m));
+ 
  /*
   * Main matching routine.
   *
***************
*** 2960,2966 ****
  #endif
      nfa_match = FALSE;
  
!     /* Allocate memory for the lists of nodes */
      size = (nstate + 1) * sizeof(nfa_thread_T);
      list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
      list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
--- 3011,3017 ----
  #endif
      nfa_match = FALSE;
  
!     /* Allocate memory for the lists of nodes. */
      size = (nstate + 1) * sizeof(nfa_thread_T);
      list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
      list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
***************
*** 3099,3105 ****
            {
            case NFA_MATCH:
                nfa_match = TRUE;
!               *submatch = t->sub;
  #ifdef ENABLE_LOG
                for (j = 0; j < 4; j++)
                    if (REG_MULTI)
--- 3150,3168 ----
            {
            case NFA_MATCH:
                nfa_match = TRUE;
!               submatch->in_use = t->sub.in_use;
!               if (REG_MULTI)
!                   for (j = 0; j < submatch->in_use; j++)
!                   {
!                       submatch->multilist[j].start = 
t->sub.multilist[j].start;
!                       submatch->multilist[j].end = t->sub.multilist[j].end;
!                   }
!               else
!                   for (j = 0; j < submatch->in_use; j++)
!                   {
!                       submatch->linelist[j].start = t->sub.linelist[j].start;
!                       submatch->linelist[j].end = t->sub.linelist[j].end;
!                   }
  #ifdef ENABLE_LOG
                for (j = 0; j < 4; j++)
                    if (REG_MULTI)
***************
*** 3194,3210 ****
                    reglnum = old_reglnum;
                    /* Copy submatch info from the recursive call */
                    if (REG_MULTI)
!                       for (j = 1; j < nfa_nsubexpr; j++)
                        {
                            t->sub.multilist[j].start = m->multilist[j].start;
                            t->sub.multilist[j].end = m->multilist[j].end;
                        }
                    else
!                       for (j = 1; j < nfa_nsubexpr; j++)
                        {
                            t->sub.linelist[j].start = m->linelist[j].start;
                            t->sub.linelist[j].end = m->linelist[j].end;
                        }
                    /* t->state->out1 is the corresponding END_INVISIBLE node */
                    addstate_here(thislist, t->state->out1->out, &t->sub,
                                                            listid, &listidx);
--- 3257,3275 ----
                    reglnum = old_reglnum;
                    /* Copy submatch info from the recursive call */
                    if (REG_MULTI)
!                       for (j = 1; j < m->in_use; j++)
                        {
                            t->sub.multilist[j].start = m->multilist[j].start;
                            t->sub.multilist[j].end = m->multilist[j].end;
                        }
                    else
!                       for (j = 1; j < m->in_use; j++)
                        {
                            t->sub.linelist[j].start = m->linelist[j].start;
                            t->sub.linelist[j].end = m->linelist[j].end;
                        }
+                   t->sub.in_use = m->in_use;
+ 
                    /* t->state->out1 is the corresponding END_INVISIBLE node */
                    addstate_here(thislist, t->state->out1->out, &t->sub,
                                                            listid, &listidx);
***************
*** 3703,3708 ****
--- 3768,3775 ----
        vim_memset(sub.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr);
        vim_memset(m.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr);
      }
+     sub.in_use = 0;
+     m.in_use = 0;
  
      if (nfa_regmatch(start, &sub, &m) == FALSE)
        return 0;
***************
*** 3710,3716 ****
      cleanup_subexpr();
      if (REG_MULTI)
      {
!       for (i = 0; i < nfa_nsubexpr; i++)
        {
            reg_startpos[i] = sub.multilist[i].start;
            reg_endpos[i] = sub.multilist[i].end;
--- 3777,3783 ----
      cleanup_subexpr();
      if (REG_MULTI)
      {
!       for (i = 0; i < sub.in_use; i++)
        {
            reg_startpos[i] = sub.multilist[i].start;
            reg_endpos[i] = sub.multilist[i].end;
***************
*** 3732,3738 ****
      }
      else
      {
!       for (i = 0; i < nfa_nsubexpr; i++)
        {
            reg_startp[i] = sub.linelist[i].start;
            reg_endp[i] = sub.linelist[i].end;
--- 3799,3805 ----
      }
      else
      {
!       for (i = 0; i < sub.in_use; i++)
        {
            reg_startp[i] = sub.linelist[i].start;
            reg_endp[i] = sub.linelist[i].end;
*** ../vim-7.3.1028/src/version.c       2013-05-26 21:47:22.000000000 +0200
--- src/version.c       2013-05-26 22:53:55.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1029,
  /**/

-- 
I used to be indecisive, now I'm not sure.

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.


Raspunde prin e-mail lui