Patch 7.4.293
Problem:    It is not possible to ignore composing characters at a specific
            point in a pattern.
Solution:   Add the %C item.
Files:      src/regexp.c, src/regexp_nfa.c, src/testdir/test95.in,
            src/testdir/test95.ok, runtime/doc/pattern.txt


*** ../vim-7.4.292/src/regexp.c 2014-05-13 18:03:55.729737466 +0200
--- src/regexp.c        2014-05-13 18:27:08.725749659 +0200
***************
*** 244,249 ****
--- 244,250 ----
  
  #define RE_MARK               207     /* mark cmp  Match mark position */
  #define RE_VISUAL     208     /*      Match Visual area */
+ #define RE_COMPOSING  209     /* any composing characters */
  
  /*
   * Magic characters have a special meaning, they don't match literally.
***************
*** 2208,2213 ****
--- 2209,2218 ----
                    ret = regnode(RE_VISUAL);
                    break;
  
+               case 'C':
+                   ret = regnode(RE_COMPOSING);
+                   break;
+ 
                /* \%[abc]: Emit as a list of branches, all ending at the last
                 * branch which matches nothing. */
                case '[':
***************
*** 4710,4720 ****
                            status = RA_NOMATCH;
                    }
  #ifdef FEAT_MBYTE
!                   /* Check for following composing character. */
                    if (status != RA_NOMATCH
                            && enc_utf8
                            && UTF_COMPOSINGLIKE(reginput, reginput + len)
!                           && !ireg_icombine)
                    {
                        /* raaron: This code makes a composing character get
                         * ignored, which is the correct behavior (sometimes)
--- 4715,4727 ----
                            status = RA_NOMATCH;
                    }
  #ifdef FEAT_MBYTE
!                   /* Check for following composing character, unless %C
!                    * follows (skips over all composing chars). */
                    if (status != RA_NOMATCH
                            && enc_utf8
                            && UTF_COMPOSINGLIKE(reginput, reginput + len)
!                           && !ireg_icombine
!                           && OP(next) != RE_COMPOSING)
                    {
                        /* raaron: This code makes a composing character get
                         * ignored, which is the correct behavior (sometimes)
***************
*** 4791,4796 ****
--- 4798,4813 ----
                status = RA_NOMATCH;
            break;
  #endif
+         case RE_COMPOSING:
+ #ifdef FEAT_MBYTE
+           if (enc_utf8)
+           {
+               /* Skip composing characters. */
+               while (utf_iscomposing(utf_ptr2char(reginput)))
+                   mb_cptr_adv(reginput);
+           }
+ #endif
+           break;
  
          case NOTHING:
            break;
*** ../vim-7.4.292/src/regexp_nfa.c     2014-05-13 16:44:25.633695709 +0200
--- src/regexp_nfa.c    2014-05-13 19:25:58.285780556 +0200
***************
*** 81,86 ****
--- 81,87 ----
      NFA_COMPOSING,                /* Next nodes in NFA are part of the
                                       composing multibyte char */
      NFA_END_COMPOSING,                    /* End of a composing char in the 
NFA */
+     NFA_ANY_COMPOSING,                    /* \%C: Any composing characters. */
      NFA_OPT_CHARS,                /* \%[abc] */
  
      /* The following are used only in the postfix form, not in the NFA */
***************
*** 1418,1423 ****
--- 1419,1428 ----
                    EMIT(NFA_VISUAL);
                    break;
  
+               case 'C':
+                   EMIT(NFA_ANY_COMPOSING);
+                   break;
+ 
                case '[':
                    {
                        int         n;
***************
*** 2429,2434 ****
--- 2434,2440 ----
        case NFA_MARK_LT:       STRCPY(code, "NFA_MARK_LT "); break;
        case NFA_CURSOR:        STRCPY(code, "NFA_CURSOR "); break;
        case NFA_VISUAL:        STRCPY(code, "NFA_VISUAL "); break;
+       case NFA_ANY_COMPOSING: STRCPY(code, "NFA_ANY_COMPOSING "); break;
  
        case NFA_STAR:          STRCPY(code, "NFA_STAR "); break;
        case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
***************
*** 2967,2972 ****
--- 2973,2979 ----
            case NFA_NLOWER_IC:
            case NFA_UPPER_IC:
            case NFA_NUPPER_IC:
+           case NFA_ANY_COMPOSING:
                /* possibly non-ascii */
  #ifdef FEAT_MBYTE
                if (has_mbyte)
***************
*** 4152,4157 ****
--- 4159,4165 ----
                continue;
  
            case NFA_ANY:
+           case NFA_ANY_COMPOSING:
            case NFA_IDENT:
            case NFA_SIDENT:
            case NFA_KWORD:
***************
*** 4395,4401 ****
      switch (state->c)
      {
        case NFA_MATCH:
!           nfa_match = TRUE;
            break;
  
        case NFA_SPLIT:
--- 4403,4409 ----
      switch (state->c)
      {
        case NFA_MATCH:
! //        nfa_match = TRUE;
            break;
  
        case NFA_SPLIT:
***************
*** 5151,5156 ****
--- 5159,5165 ----
  
        case NFA_MATCH:
        case NFA_MCLOSE:
+       case NFA_ANY_COMPOSING:
            /* empty match works always */
            return 0;
  
***************
*** 5573,5578 ****
--- 5582,5593 ----
            {
            case NFA_MATCH:
              {
+ #ifdef FEAT_MBYTE
+               /* If the match ends before a composing characters and
+                * ireg_icombine is not set, that is not really a match. */
+               if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc))
+                   break;
+ #endif
                nfa_match = TRUE;
                copy_sub(&submatch->norm, &t->subs.norm);
  #ifdef FEAT_SYN_HL
***************
*** 6120,6125 ****
--- 6135,6157 ----
                }
                break;
  
+           case NFA_ANY_COMPOSING:
+               /* On a composing character skip over it.  Otherwise do
+                * nothing.  Always matches. */
+ #ifdef FEAT_MBYTE
+               if (enc_utf8 && utf_iscomposing(curc))
+               {
+                   add_off = clen;
+               }
+               else
+ #endif
+               {
+                   add_here = TRUE;
+                   add_off = 0;
+               }
+               add_state = t->state->out;
+               break;
+ 
            /*
             * Character classes like \a for alpha, \d for digit etc.
             */
***************
*** 6484,6495 ****
                if (!result && ireg_ic)
                    result = MB_TOLOWER(c) == MB_TOLOWER(curc);
  #ifdef FEAT_MBYTE
!               /* If there is a composing character which is not being
!                * ignored there can be no match. Match with composing
!                * character uses NFA_COMPOSING above. */
!               if (result && enc_utf8 && !ireg_icombine
!                                               && clen != utf_char2len(curc))
!                   result = FALSE;
  #endif
                ADD_STATE_IF_MATCH(t->state);
                break;
--- 6516,6525 ----
                if (!result && ireg_ic)
                    result = MB_TOLOWER(c) == MB_TOLOWER(curc);
  #ifdef FEAT_MBYTE
!               /* If ireg_icombine is not set only skip over the character
!                * itself.  When it is set skip over composing characters. */
!               if (result && enc_utf8 && !ireg_icombine)
!                   clen = utf_char2len(curc);
  #endif
                ADD_STATE_IF_MATCH(t->state);
                break;
diff: ../vim-7.4.292/src/testdir/test95.insrc/testdir/test95.ok,: No such file 
or directory
diff: src/testdir/test95.insrc/testdir/test95.ok,: No such file or directory
*** ../vim-7.4.292/runtime/doc/pattern.txt      2013-08-10 13:24:59.000000000 
+0200
--- runtime/doc/pattern.txt     2014-05-13 18:59:57.621766895 +0200
***************
*** 545,550 ****
--- 545,551 ----
  |/\%u|        \%u     \%u     match specified multibyte character (eg \%u20ac)
  |/\%U|        \%U     \%U     match specified large multibyte character (eg
                        \%U12345678)
+ |/\%C|        \%C     \%C     match any composing characters
  
  Example                       matches ~
  \<\I\i*               or
***************
*** 1207,1218 ****
  8. Composing characters                                       
*patterns-composing*
  
                                                        */\Z*
! When "\Z" appears anywhere in the pattern, composing characters are ignored.
! Thus only the base characters need to match, the composing characters may be
! different and the number of composing characters may differ.  Only relevant
! when 'encoding' is "utf-8".
  Exception: If the pattern starts with one or more composing characters, these
  must match.
  
  When a composing character appears at the start of the pattern of after an
  item that doesn't include the composing character, a match is found at any
--- 1208,1225 ----
  8. Composing characters                                       
*patterns-composing*
  
                                                        */\Z*
! When "\Z" appears anywhere in the pattern, all composing characters are
! ignored.  Thus only the base characters need to match, the composing
! characters may be different and the number of composing characters may differ.
! Only relevant when 'encoding' is "utf-8".
  Exception: If the pattern starts with one or more composing characters, these
  must match.
+                                                       */\%C*
+ Use "\%C" to skip any composing characters.  For example, the pattern "a" does
+ not match in "càt" (where the a has the composing character 0x0300), but
+ "a\%C" does.  Note that this does not match "cát" (where the á is character
+ 0xe1, it does not have a compositing character).  It does match "cat" (where
+ the a is just an a).
  
  When a composing character appears at the start of the pattern of after an
  item that doesn't include the composing character, a match is found at any
*** ../vim-7.4.292/src/version.c        2014-05-13 18:03:55.729737466 +0200
--- src/version.c       2014-05-13 18:28:45.885750510 +0200
***************
*** 736,737 ****
--- 736,739 ----
  {   /* Add new patch number below this line */
+ /**/
+     293,
  /**/

-- 
hundred-and-one symptoms of being an internet addict:
155. You forget to eat because you're too busy surfing the net.

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Raspunde prin e-mail lui