Patch 7.3.1137
Problem:    New regexp engine: collections are slow.
Solution:   Handle all characters in one go.
Files:      src/regexp_nfa.c


*** ../vim-7.3.1136/src/regexp_nfa.c    2013-06-06 18:46:00.000000000 +0200
--- src/regexp_nfa.c    2013-06-07 13:40:58.000000000 +0200
***************
*** 34,48 ****
      NFA_SPLIT = -1024,
      NFA_MATCH,
      NFA_SKIP_CHAR,                /* matches a 0-length char */
-     NFA_END_NEG_RANGE,                    /* Used when expanding [^ab] */
  
!     NFA_CONCAT,
      NFA_OR,
      NFA_STAR,                     /* greedy * */
      NFA_STAR_NONGREEDY,                   /* non-greedy * */
      NFA_QUEST,                            /* greedy \? */
      NFA_QUEST_NONGREEDY,          /* non-greedy \? */
-     NFA_NOT,                      /* used for [^ab] negated char ranges */
  
      NFA_BOL,                      /* ^    Begin line */
      NFA_EOL,                      /* $    End line */
--- 34,56 ----
      NFA_SPLIT = -1024,
      NFA_MATCH,
      NFA_SKIP_CHAR,                /* matches a 0-length char */
  
!     NFA_START_COLL,               /* [abc] start */
!     NFA_END_COLL,                 /* [abc] end */
!     NFA_START_NEG_COLL,                   /* [^abc] start */
!     NFA_END_NEG_COLL,             /* [^abc] end (only used in postfix) */
!     NFA_RANGE,                            /* range of the two previous items 
(only
!                                    * used in postfix) */
!     NFA_RANGE_MIN,                /* low end of a range  */
!     NFA_RANGE_MAX,                /* high end of a range  */
! 
!     NFA_CONCAT,                           /* concatenate two previous items 
(only
!                                    * used in postfix) */
      NFA_OR,
      NFA_STAR,                     /* greedy * */
      NFA_STAR_NONGREEDY,                   /* non-greedy * */
      NFA_QUEST,                            /* greedy \? */
      NFA_QUEST_NONGREEDY,          /* non-greedy \? */
  
      NFA_BOL,                      /* ^    Begin line */
      NFA_EOL,                      /* $    End line */
***************
*** 260,266 ****
  static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
  static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int 
extra_newl));
! static int nfa_emit_equi_class __ARGS((int c, int neg));
  static int nfa_regatom __ARGS((void));
  static int nfa_regpiece __ARGS((void));
  static int nfa_regconcat __ARGS((void));
--- 268,274 ----
  static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
  static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int 
extra_newl));
! static int nfa_emit_equi_class __ARGS((int c));
  static int nfa_regatom __ARGS((void));
  static int nfa_regpiece __ARGS((void));
  static int nfa_regconcat __ARGS((void));
***************
*** 664,684 ****
   * NOTE! When changing this function, also update reg_equi_class()
   */
      static int
! nfa_emit_equi_class(c, neg)
      int           c;
-     int           neg;
  {
!     int       first = TRUE;
!     int       glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
! #define EMIT2(c)              \
!       EMIT(c);                \
!       if (neg == TRUE) {      \
!           EMIT(NFA_NOT);      \
!       }                       \
!       if (first == FALSE)     \
!           EMIT(glue);         \
!       else                    \
!           first = FALSE;      \
  
  #ifdef FEAT_MBYTE
      if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
--- 672,681 ----
   * NOTE! When changing this function, also update reg_equi_class()
   */
      static int
! nfa_emit_equi_class(c)
      int           c;
  {
! #define EMIT2(c)   EMIT(c); EMIT(NFA_CONCAT);
  
  #ifdef FEAT_MBYTE
      if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
***************
*** 687,770 ****
      {
        switch (c)
        {
!           case 'A': case '\300': case '\301': case '\302':
!           case '\303': case '\304': case '\305':
!                   EMIT2('A');     EMIT2('\300');  EMIT2('\301');
!                   EMIT2('\302');  EMIT2('\303');  EMIT2('\304');
!                   EMIT2('\305');
                    return OK;
  
!           case 'C': case '\307':
!                   EMIT2('C');     EMIT2('\307');
                    return OK;
  
!           case 'E': case '\310': case '\311': case '\312': case '\313':
!                   EMIT2('E');     EMIT2('\310');  EMIT2('\311');
!                   EMIT2('\312');  EMIT2('\313');
                    return OK;
  
!           case 'I': case '\314': case '\315': case '\316': case '\317':
!                   EMIT2('I');     EMIT2('\314');  EMIT2('\315');
!                   EMIT2('\316');  EMIT2('\317');
                    return OK;
  
!           case 'N': case '\321':
!                   EMIT2('N');     EMIT2('\321');
                    return OK;
  
!           case 'O': case '\322': case '\323': case '\324': case '\325':
!           case '\326':
!                   EMIT2('O');     EMIT2('\322');  EMIT2('\323');
!                   EMIT2('\324');  EMIT2('\325');  EMIT2('\326');
                    return OK;
  
!           case 'U': case '\331': case '\332': case '\333': case '\334':
!                   EMIT2('U');     EMIT2('\331');  EMIT2('\332');
!                   EMIT2('\333');  EMIT2('\334');
                    return OK;
  
!           case 'Y': case '\335':
!                   EMIT2('Y');     EMIT2('\335');
                    return OK;
  
!           case 'a': case '\340': case '\341': case '\342':
!           case '\343': case '\344': case '\345':
!                   EMIT2('a');     EMIT2('\340');  EMIT2('\341');
!                   EMIT2('\342');  EMIT2('\343');  EMIT2('\344');
!                   EMIT2('\345');
                    return OK;
  
!           case 'c': case '\347':
!                   EMIT2('c');     EMIT2('\347');
                    return OK;
  
!           case 'e': case '\350': case '\351': case '\352': case '\353':
!                   EMIT2('e');     EMIT2('\350');  EMIT2('\351');
!                   EMIT2('\352');  EMIT2('\353');
                    return OK;
  
!           case 'i': case '\354': case '\355': case '\356': case '\357':
!                   EMIT2('i');     EMIT2('\354');  EMIT2('\355');
!                   EMIT2('\356');  EMIT2('\357');
                    return OK;
  
!           case 'n': case '\361':
!                   EMIT2('n');     EMIT2('\361');
                    return OK;
  
!           case 'o': case '\362': case '\363': case '\364': case '\365':
!           case '\366':
!                   EMIT2('o');     EMIT2('\362');  EMIT2('\363');
!                   EMIT2('\364');  EMIT2('\365');  EMIT2('\366');
                    return OK;
  
!           case 'u': case '\371': case '\372': case '\373': case '\374':
!                   EMIT2('u');     EMIT2('\371');  EMIT2('\372');
!                   EMIT2('\373');  EMIT2('\374');
                    return OK;
  
!           case 'y': case '\375': case '\377':
!                   EMIT2('y');     EMIT2('\375');  EMIT2('\377');
                    return OK;
  
            default:
--- 684,767 ----
      {
        switch (c)
        {
!           case 'A': case 0300: case 0301: case 0302:
!           case 0303: case 0304: case 0305:
!                   EMIT2('A');     EMIT2(0300);  EMIT2(0301);
!                   EMIT2(0302);  EMIT2(0303);  EMIT2(0304);
!                   EMIT2(0305);
                    return OK;
  
!           case 'C': case 0307:
!                   EMIT2('C');     EMIT2(0307);
                    return OK;
  
!           case 'E': case 0310: case 0311: case 0312: case 0313:
!                   EMIT2('E');     EMIT2(0310);  EMIT2(0311);
!                   EMIT2(0312);  EMIT2(0313);
                    return OK;
  
!           case 'I': case 0314: case 0315: case 0316: case 0317:
!                   EMIT2('I');     EMIT2(0314);  EMIT2(0315);
!                   EMIT2(0316);  EMIT2(0317);
                    return OK;
  
!           case 'N': case 0321:
!                   EMIT2('N');     EMIT2(0321);
                    return OK;
  
!           case 'O': case 0322: case 0323: case 0324: case 0325:
!           case 0326:
!                   EMIT2('O');     EMIT2(0322);  EMIT2(0323);
!                   EMIT2(0324);  EMIT2(0325);  EMIT2(0326);
                    return OK;
  
!           case 'U': case 0331: case 0332: case 0333: case 0334:
!                   EMIT2('U');     EMIT2(0331);  EMIT2(0332);
!                   EMIT2(0333);  EMIT2(0334);
                    return OK;
  
!           case 'Y': case 0335:
!                   EMIT2('Y');     EMIT2(0335);
                    return OK;
  
!           case 'a': case 0340: case 0341: case 0342:
!           case 0343: case 0344: case 0345:
!                   EMIT2('a');     EMIT2(0340);  EMIT2(0341);
!                   EMIT2(0342);  EMIT2(0343);  EMIT2(0344);
!                   EMIT2(0345);
                    return OK;
  
!           case 'c': case 0347:
!                   EMIT2('c');     EMIT2(0347);
                    return OK;
  
!           case 'e': case 0350: case 0351: case 0352: case 0353:
!                   EMIT2('e');     EMIT2(0350);  EMIT2(0351);
!                   EMIT2(0352);  EMIT2(0353);
                    return OK;
  
!           case 'i': case 0354: case 0355: case 0356: case 0357:
!                   EMIT2('i');     EMIT2(0354);  EMIT2(0355);
!                   EMIT2(0356);  EMIT2(0357);
                    return OK;
  
!           case 'n': case 0361:
!                   EMIT2('n');     EMIT2(0361);
                    return OK;
  
!           case 'o': case 0362: case 0363: case 0364: case 0365:
!           case 0366:
!                   EMIT2('o');     EMIT2(0362);  EMIT2(0363);
!                   EMIT2(0364);  EMIT2(0365);  EMIT2(0366);
                    return OK;
  
!           case 'u': case 0371: case 0372: case 0373: case 0374:
!                   EMIT2('u');     EMIT2(0371);  EMIT2(0372);
!                   EMIT2(0373);  EMIT2(0374);
                    return OK;
  
!           case 'y': case 0375: case 0377:
!                   EMIT2('y');     EMIT2(0375);  EMIT2(0377);
                    return OK;
  
            default:
***************
*** 811,824 ****
      char_u    *old_regparse = regparse;
  #endif
      int               extra = 0;
-     int               first;
      int               emit_range;
      int               negated;
      int               result;
      int               startc = -1;
      int               endc = -1;
      int               oldstartc = -1;
-     int               glue;           /* ID that will "glue" nodes together */
  
      c = getchr();
      switch (c)
--- 808,819 ----
***************
*** 927,934 ****
  
        case Magic('n'):
            if (reg_string)
!           /* In a string "\n" matches a newline character. */
!           EMIT(NL);
            else
            {
                /* In buffer text "\n" matches the end of a line. */
--- 922,929 ----
  
        case Magic('n'):
            if (reg_string)
!               /* In a string "\n" matches a newline character. */
!               EMIT(NL);
            else
            {
                /* In buffer text "\n" matches the end of a line. */
***************
*** 1160,1191 ****
        case Magic('['):
  collection:
            /*
!            * Glue is emitted between several atoms from the [].
!            * It is either NFA_OR, or NFA_CONCAT.
!            *
!            * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
!            * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
!            *          NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
!            *          notation)
!            *
             */
  
- 
- /* Emit negation atoms, if needed.
-  * The CONCAT below merges the NOT with the previous node. */
- #define TRY_NEG()                 \
-           if (negated == TRUE)    \
-           {                       \
-               EMIT(NFA_NOT);      \
-           }
- 
- /* Emit glue between important nodes : CONCAT or OR. */
- #define EMIT_GLUE()               \
-           if (first == FALSE)     \
-               EMIT(glue);         \
-           else                    \
-               first = FALSE;
- 
            p = regparse;
            endp = skip_anyof(p);
            if (*endp == ']')
--- 1155,1169 ----
        case Magic('['):
  collection:
            /*
!            * [abc]  uses NFA_START_COLL - NFA_END_COLL
!            * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
!            * Each character is produced as a regular state, using
!            * NFA_CONCAT to bind them together.
!            * Besides normal characters there can be:
!            * - character classes  NFA_CLASS_*
!            * - ranges, two characters followed by NFA_RANGE.
             */
  
            p = regparse;
            endp = skip_anyof(p);
            if (*endp == ']')
***************
*** 1216,1236 ****
                 * version that turns [abc] into 'a' OR 'b' OR 'c'
                 */
                startc = endc = oldstartc = -1;
-               first = TRUE;       /* Emitting first atom in this sequence? */
                negated = FALSE;
-               glue = NFA_OR;
                if (*regparse == '^')                   /* negated range */
                {
                    negated = TRUE;
-                   glue = NFA_CONCAT;
                    mb_ptr_adv(regparse);
                }
                if (*regparse == '-')
                {
                    startc = '-';
                    EMIT(startc);
!                   TRY_NEG();
!                   EMIT_GLUE();
                    mb_ptr_adv(regparse);
                }
                /* Emit the OR branches for each character in the [] */
--- 1194,1213 ----
                 * version that turns [abc] into 'a' OR 'b' OR 'c'
                 */
                startc = endc = oldstartc = -1;
                negated = FALSE;
                if (*regparse == '^')                   /* negated range */
                {
                    negated = TRUE;
                    mb_ptr_adv(regparse);
+                   EMIT(NFA_START_NEG_COLL);
                }
+               else
+                   EMIT(NFA_START_COLL);
                if (*regparse == '-')
                {
                    startc = '-';
                    EMIT(startc);
!                   EMIT(NFA_CONCAT);
                    mb_ptr_adv(regparse);
                }
                /* Emit the OR branches for each character in the [] */
***************
*** 1306,1325 ****
                                    EMIT(NFA_CLASS_ESCAPE);
                                    break;
                            }
!                           TRY_NEG();
!                           EMIT_GLUE();
                            continue;
                        }
                        /* Try equivalence class [=a=] and the like */
                        if (equiclass != 0)
                        {
!                           result = nfa_emit_equi_class(equiclass, negated);
                            if (result == FAIL)
                            {
                                /* should never happen */
                                EMSG_RET_FAIL(_("E868: Error building NFA with 
equivalence class!"));
                            }
-                           EMIT_GLUE();
                            continue;
                        }
                        /* Try collating class like [. .]  */
--- 1283,1300 ----
                                    EMIT(NFA_CLASS_ESCAPE);
                                    break;
                            }
!                           EMIT(NFA_CONCAT);
                            continue;
                        }
                        /* Try equivalence class [=a=] and the like */
                        if (equiclass != 0)
                        {
!                           result = nfa_emit_equi_class(equiclass);
                            if (result == FAIL)
                            {
                                /* should never happen */
                                EMSG_RET_FAIL(_("E868: Error building NFA with 
equivalence class!"));
                            }
                            continue;
                        }
                        /* Try collating class like [. .]  */
***************
*** 1391,1409 ****
                        startc = oldstartc;
                        if (startc > endc)
                            EMSG_RET_FAIL(_(e_invrange));
  #ifdef FEAT_MBYTE
!                       if (has_mbyte && ((*mb_char2len)(startc) > 1
                                    || (*mb_char2len)(endc) > 1))
                        {
!                           if (endc > startc + 256)
!                               EMSG_RET_FAIL(_(e_invrange));
!                           /* Emit the range. "startc" was already emitted, so
!                            * skip it. */
                            for (c = startc + 1; c <= endc; c++)
                            {
                                EMIT(c);
!                               TRY_NEG();
!                               EMIT_GLUE();
                            }
                        }
                        else
--- 1366,1397 ----
                        startc = oldstartc;
                        if (startc > endc)
                            EMSG_RET_FAIL(_(e_invrange));
+ 
+                       if (endc > startc + 2)
+                       {
+                           /* Emit a range instead of the sequence of
+                            * individual characters. */
+                           if (startc == 0)
+                               /* \x00 is translated to \x0a, start at \x01. */
+                               EMIT(1);
+                           else
+                               --post_ptr; /* remove NFA_CONCAT */
+                           EMIT(endc);
+                           EMIT(NFA_RANGE);
+                           EMIT(NFA_CONCAT);
+                       }
+                       else
  #ifdef FEAT_MBYTE
!                            if (has_mbyte && ((*mb_char2len)(startc) > 1
                                    || (*mb_char2len)(endc) > 1))
                        {
!                           /* Emit the characters in the range.
!                            * "startc" was already emitted, so skip it.
!                            * */
                            for (c = startc + 1; c <= endc; c++)
                            {
                                EMIT(c);
!                               EMIT(NFA_CONCAT);
                            }
                        }
                        else
***************
*** 1425,1432 ****
  #endif
                                {
                                    EMIT(c);
!                                   TRY_NEG();
!                                   EMIT_GLUE();
                                }
                        }
                        emit_range = FALSE;
--- 1413,1419 ----
  #endif
                                {
                                    EMIT(c);
!                                   EMIT(NFA_CONCAT);
                                }
                        }
                        emit_range = FALSE;
***************
*** 1434,1456 ****
                    }
                    else
                    {
!                       /*
!                        * This char (startc) is not part of a range. Just
                         * emit it.
-                        *
                         * Normally, simply emit startc. But if we get char
                         * code=0 from a collating char, then replace it with
                         * 0x0a.
-                        *
                         * This is needed to completely mimic the behaviour of
!                        * the backtracking engine.
!                        */
!                       if (got_coll_char == TRUE && startc == 0)
!                           EMIT(0x0a);
                        else
!                           EMIT(startc);
!                       TRY_NEG();
!                       EMIT_GLUE();
                    }
  
                    mb_ptr_adv(regparse);
--- 1421,1449 ----
                    }
                    else
                    {
!                       /* This char (startc) is not part of a range. Just
                         * emit it.
                         * Normally, simply emit startc. But if we get char
                         * code=0 from a collating char, then replace it with
                         * 0x0a.
                         * This is needed to completely mimic the behaviour of
!                        * the backtracking engine. */
!                       if (startc == NFA_NEWL)
!                       {
!                           /* Line break can't be matched as part of the
!                            * collection, add an OR below. But not for negated
!                            * range. */
!                           if (!negated)
!                               extra = ADD_NL;
!                       }
                        else
!                       {
!                           if (got_coll_char == TRUE && startc == 0)
!                               EMIT(0x0a);
!                           else
!                               EMIT(startc);
!                           EMIT(NFA_CONCAT);
!                       }
                    }
  
                    mb_ptr_adv(regparse);
***************
*** 1460,1479 ****
                if (*regparse == '-')       /* if last, '-' is just a char */
                {
                    EMIT('-');
!                   TRY_NEG();
!                   EMIT_GLUE();
                }
                mb_ptr_adv(regparse);
  
                /* skip the trailing ] */
                regparse = endp;
                mb_ptr_adv(regparse);
                if (negated == TRUE)
!               {
!                   /* Mark end of negated char range */
!                   EMIT(NFA_END_NEG_RANGE);
!                   EMIT(NFA_CONCAT);
!               }
  
                /* \_[] also matches \n but it's not negated */
                if (extra == ADD_NL)
--- 1453,1471 ----
                if (*regparse == '-')       /* if last, '-' is just a char */
                {
                    EMIT('-');
!                   EMIT(NFA_CONCAT);
                }
                mb_ptr_adv(regparse);
  
                /* skip the trailing ] */
                regparse = endp;
                mb_ptr_adv(regparse);
+ 
+               /* Mark end of the collection. */
                if (negated == TRUE)
!                   EMIT(NFA_END_NEG_COLL);
!               else
!                   EMIT(NFA_END_COLL);
  
                /* \_[] also matches \n but it's not negated */
                if (extra == ADD_NL)
***************
*** 1532,1540 ****
            }
      }
  
- #undef TRY_NEG
- #undef EMIT_GLUE
- 
      return OK;
  }
  
--- 1524,1529 ----
***************
*** 2091,2100 ****
        case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
        case NFA_QUEST:         STRCPY(code, "NFA_QUEST"); break;
        case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
-       case NFA_NOT:           STRCPY(code, "NFA_NOT "); break;
        case NFA_SKIP_CHAR:     STRCPY(code, "NFA_SKIP_CHAR"); break;
        case NFA_OR:            STRCPY(code, "NFA_OR"); break;
!       case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break;
        case NFA_CLASS_ALNUM:   STRCPY(code, "NFA_CLASS_ALNUM"); break;
        case NFA_CLASS_ALPHA:   STRCPY(code, "NFA_CLASS_ALPHA"); break;
        case NFA_CLASS_BLANK:   STRCPY(code, "NFA_CLASS_BLANK"); break;
--- 2080,2096 ----
        case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
        case NFA_QUEST:         STRCPY(code, "NFA_QUEST"); break;
        case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
        case NFA_SKIP_CHAR:     STRCPY(code, "NFA_SKIP_CHAR"); break;
        case NFA_OR:            STRCPY(code, "NFA_OR"); break;
! 
!       case NFA_START_COLL:    STRCPY(code, "NFA_START_COLL"); break;
!       case NFA_END_COLL:      STRCPY(code, "NFA_END_COLL"); break;
!       case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break;
!       case NFA_END_NEG_COLL:  STRCPY(code, "NFA_END_NEG_COLL"); break;
!       case NFA_RANGE:         STRCPY(code, "NFA_RANGE"); break;
!       case NFA_RANGE_MIN:     STRCPY(code, "NFA_RANGE_MIN"); break;
!       case NFA_RANGE_MAX:     STRCPY(code, "NFA_RANGE_MAX"); break;
! 
        case NFA_CLASS_ALNUM:   STRCPY(code, "NFA_CLASS_ALNUM"); break;
        case NFA_CLASS_ALPHA:   STRCPY(code, "NFA_CLASS_ALPHA"); break;
        case NFA_CLASS_BLANK:   STRCPY(code, "NFA_CLASS_BLANK"); break;
***************
*** 2231,2238 ****
        fprintf(debugf, " %s", p);
  
      nfa_set_code(state->c);
!     fprintf(debugf, "%s%s (%d) (id=%d)\n",
!                state->negated ? "NOT " : "", code, state->c, abs(state->id));
      if (state->id < 0)
        return;
  
--- 2227,2238 ----
        fprintf(debugf, " %s", p);
  
      nfa_set_code(state->c);
!     fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n",
!                state->negated ? "NOT " : "",
!                code,
!                state->c,
!                abs(state->id),
!                state->val);
      if (state->id < 0)
        return;
  
***************
*** 2325,2330 ****
--- 2325,2331 ----
      s->c    = c;
      s->out  = out;
      s->out1 = out1;
+     s->val  = 0;
  
      s->id   = istate;
      s->lastlist[0] = 0;
***************
*** 2565,2577 ****
        switch (*p)
        {
        case NFA_CONCAT:
!           /* Catenation.
!            * Pay attention: this operator does not exist
!            * in the r.e. itself (it is implicit, really).
!            * It is added when r.e. is translated to postfix
!            * form in re2post().
!            *
!            * No new state added here. */
            if (nfa_calc_size == TRUE)
            {
                /* nstate += 0; */
--- 2566,2575 ----
        switch (*p)
        {
        case NFA_CONCAT:
!           /* Concatenation.
!            * Pay attention: this operator does not exist in the r.e. itself
!            * (it is implicit, really).  It is added when r.e. is translated
!            * to postfix form in re2post(). */
            if (nfa_calc_size == TRUE)
            {
                /* nstate += 0; */
***************
*** 2583,2604 ****
            PUSH(frag(e1.start, e2.out));
            break;
  
-       case NFA_NOT:
-           /* Negation of a character */
-           if (nfa_calc_size == TRUE)
-           {
-               /* nstate += 0; */
-               break;
-           }
-           e1 = POP();
-           e1.start->negated = TRUE;
- #ifdef FEAT_MBYTE
-           if (e1.start->c == NFA_COMPOSING)
-               e1.start->out1->negated = TRUE;
- #endif
-           PUSH(e1);
-           break;
- 
        case NFA_OR:
            /* Alternation */
            if (nfa_calc_size == TRUE)
--- 2581,2586 ----
***************
*** 2672,2677 ****
--- 2654,2696 ----
            PUSH(frag(s, append(e.out, list1(&s->out))));
            break;
  
+       case NFA_END_COLL:
+       case NFA_END_NEG_COLL:
+           /* On the stack is the sequence starting with NFA_START_COLL or
+            * NFA_START_NEG_COLL and all possible characters. Patch it to
+            * add the output to the start. */
+           if (nfa_calc_size == TRUE)
+           {
+               nstate++;
+               break;
+           }
+           e = POP();
+           s = alloc_state(NFA_END_COLL, NULL, NULL);
+           if (s == NULL)
+               goto theend;
+           patch(e.out, s);
+           e.start->out1 = s;
+           PUSH(frag(e.start, list1(&s->out)));
+           break;
+ 
+       case NFA_RANGE:
+           /* Before this are two characters, the low and high end of a
+            * range.  Turn them into two states with MIN and MAX. */
+           if (nfa_calc_size == TRUE)
+           {
+               /* nstate += 0; */
+               break;
+           }
+           e2 = POP();
+           e1 = POP();
+           e2.start->val = e2.start->c;
+           e2.start->c = NFA_RANGE_MAX;
+           e1.start->val = e1.start->c;
+           e1.start->c = NFA_RANGE_MIN;
+           patch(e1.out, e2.start);
+           PUSH(frag(e1.start, e2.out));
+           break;
+ 
        case NFA_SKIP_CHAR:
            /* Symbol of 0-length, Used in a repetition
             * with max/min count of 0 */
***************
*** 2990,2995 ****
--- 3009,3016 ----
      matchstate = &state_ptr[istate++]; /* the match state */
      matchstate->c = NFA_MATCH;
      matchstate->out = matchstate->out1 = NULL;
+     matchstate->negated = FALSE;
+     matchstate->id = 0;
  
      patch(e.out, matchstate);
      ret = e.start;
***************
*** 3308,3314 ****
      switch (state->c)
      {
        case NFA_SPLIT:
-       case NFA_NOT:
        case NFA_NOPEN:
        case NFA_SKIP_CHAR:
        case NFA_NCLOSE:
--- 3329,3334 ----
***************
*** 3782,3788 ****
  
        default:
            /* should not be here :P */
!           EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
      }
      return FAIL;
  }
--- 3802,3809 ----
  
        default:
            /* should not be here :P */
!           EMSGN("E877: (NFA regexp) Invalid character class: %ld", class);
!           return FAIL;
      }
      return FAIL;
  }
***************
*** 4320,4327 ****
      addstate(thislist, start, m, 0);
  
      /* There are two cases when the NFA advances: 1. input char matches the
!      * NFA node and 2. input char does not match the NFA node, but the next
!      * node is NFA_NOT. The following macro calls addstate() according to
       * these rules. It is used A LOT, so use the "listtbl" table for speed */
      listtbl[0][0] = NULL;
      listtbl[0][1] = neglist;
--- 4341,4348 ----
      addstate(thislist, start, m, 0);
  
      /* There are two cases when the NFA advances: 1. input char matches the
!      * NFA node and 2. input char does not match the NFA node and the state
!      * has the negated flag. The following macro calls addstate() according to
       * these rules. It is used A LOT, so use the "listtbl" table for speed */
      listtbl[0][0] = NULL;
      listtbl[0][1] = neglist;
***************
*** 4845,4860 ****
                ADD_POS_NEG_STATE(t->state);
                break;
  
!           case NFA_END_NEG_RANGE:
!               /* This follows a series of negated nodes, like:
!                * NOT CHAR(x), NOT CHAR(y), etc. */
!               if (curc > 0)
                {
                    ll = nextlist;
!                   add_state = t->state->out;
                    add_off = clen;
                }
                break;
  
            case NFA_ANY:
                /* Any char except '\0', (end of input) does not match. */
--- 4866,4944 ----
                ADD_POS_NEG_STATE(t->state);
                break;
  
!           case NFA_START_COLL:
!           case NFA_START_NEG_COLL:
!             {
!               /* What follows is a list of characters, until NFA_END_COLL.
!                * One of them must match or none of them must match. */
!               nfa_state_T     *state;
!               int             result_if_matched;
!               int             c1, c2;
! 
!               /* Never match EOL. If it's part of the collection it is added
!                * as a separate state with an OR. */
!               if (curc == NUL)
!                   break;
! 
!               state = t->state->out;
!               result_if_matched = (t->state->c == NFA_START_COLL);
!               for (;;)
                {
+                   if (state->c == NFA_END_COLL)
+                   {
+                       result = !result_if_matched;
+                       break;
+                   }
+                   if (state->c == NFA_RANGE_MIN)
+                   {
+                       c1 = state->val;
+                       state = state->out; /* advance to NFA_RANGE_MAX */
+                       c2 = state->val;
+ #ifdef ENABLE_LOG
+                       fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n",
+                               curc, c1, c2);
+ #endif
+                       if (curc >= c1 && curc <= c2)
+                       {
+                           result = result_if_matched;
+                           break;
+                       }
+                       if (ireg_ic)
+                       {
+                           int curc_low = MB_TOLOWER(curc);
+                           int done = FALSE;
+ 
+                           for ( ; c1 <= c2; ++c1)
+                               if (MB_TOLOWER(c1) == curc_low)
+                               {
+                                   result = result_if_matched;
+                                   done = TRUE;
+                                   break;
+                               }
+                           if (done)
+                               break;
+                       }
+                   }
+                   else if (state->c < 0 ? check_char_class(state->c, curc)
+                               : (curc == state->c
+                                  || (ireg_ic && MB_TOLOWER(curc)
+                                                   == MB_TOLOWER(state->c))))
+                   {
+                       result = result_if_matched;
+                       break;
+                   }
+                   state = state->out;
+               }
+               if (result)
+               {
+                   /* next state is in out of the NFA_END_COLL, out1 of
+                    * START points to the END state */
                    ll = nextlist;
!                   add_state = t->state->out1->out;
                    add_off = clen;
                }
                break;
+             }
  
            case NFA_ANY:
                /* Any char except '\0', (end of input) does not match. */
*** ../vim-7.3.1136/src/version.c       2013-06-06 21:31:02.000000000 +0200
--- src/version.c       2013-06-07 13:21:57.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
  {   /* Add new patch number below this line */
+ /**/
+     1137,
  /**/

-- 
>From "know your smileys":
 :.-(   Crying

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.


Raspunde prin e-mail lui