Patch 8.0.0519
Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes #1560)
            Update the documentation.
Files:      src/regexp.c, src/regexp_nfa.c, runtime/doc/pattern.txt,
            src/testdir/test_regexp_utf8.vim


*** ../vim-8.0.0518/src/regexp.c        2017-03-12 20:09:59.488468234 +0100
--- src/regexp.c        2017-03-29 15:19:23.207812438 +0200
***************
*** 2555,2571 ****
                                regc('\t');
                                break;
                            case CLASS_CNTRL:
!                               for (cu = 1; cu <= 255; cu++)
                                    if (iscntrl(cu))
                                        regmbc(cu);
                                break;
                            case CLASS_DIGIT:
!                               for (cu = 1; cu <= 255; cu++)
                                    if (VIM_ISDIGIT(cu))
                                        regmbc(cu);
                                break;
                            case CLASS_GRAPH:
!                               for (cu = 1; cu <= 255; cu++)
                                    if (isgraph(cu))
                                        regmbc(cu);
                                break;
--- 2555,2571 ----
                                regc('\t');
                                break;
                            case CLASS_CNTRL:
!                               for (cu = 1; cu <= 127; cu++)
                                    if (iscntrl(cu))
                                        regmbc(cu);
                                break;
                            case CLASS_DIGIT:
!                               for (cu = 1; cu <= 127; cu++)
                                    if (VIM_ISDIGIT(cu))
                                        regmbc(cu);
                                break;
                            case CLASS_GRAPH:
!                               for (cu = 1; cu <= 127; cu++)
                                    if (isgraph(cu))
                                        regmbc(cu);
                                break;
*** ../vim-8.0.0518/src/regexp_nfa.c    2017-03-12 20:09:59.488468234 +0100
--- src/regexp_nfa.c    2017-03-29 15:19:38.743715954 +0200
***************
*** 4871,4877 ****
                return OK;
            break;
        case NFA_CLASS_CNTRL:
!           if (c >= 1 && c <= 255 && iscntrl(c))
                return OK;
            break;
        case NFA_CLASS_DIGIT:
--- 4871,4877 ----
                return OK;
            break;
        case NFA_CLASS_CNTRL:
!           if (c >= 1 && c <= 127 && iscntrl(c))
                return OK;
            break;
        case NFA_CLASS_DIGIT:
***************
*** 4879,4885 ****
                return OK;
            break;
        case NFA_CLASS_GRAPH:
!           if (c >= 1 && c <= 255 && isgraph(c))
                return OK;
            break;
        case NFA_CLASS_LOWER:
--- 4879,4885 ----
                return OK;
            break;
        case NFA_CLASS_GRAPH:
!           if (c >= 1 && c <= 127 && isgraph(c))
                return OK;
            break;
        case NFA_CLASS_LOWER:
*** ../vim-8.0.0518/runtime/doc/pattern.txt     2016-09-12 12:45:26.000000000 
+0200
--- runtime/doc/pattern.txt     2017-03-29 15:18:47.300035463 +0200
***************
*** 1082,1106 ****
        - A character class expression is evaluated to the set of characters
          belonging to that character class.  The following character classes
          are supported:
!                         Name          Contents ~
! *[:alnum:]*             [:alnum:]     ASCII letters and digits
! *[:alpha:]*             [:alpha:]     ASCII letters
! *[:blank:]*             [:blank:]     space and tab characters
! *[:cntrl:]*             [:cntrl:]     control characters
! *[:digit:]*             [:digit:]     decimal digits
! *[:graph:]*             [:graph:]     printable characters excluding space
! *[:lower:]*             [:lower:]     lowercase letters (all letters when
                                        'ignorecase' is used)
! *[:print:]*             [:print:]     printable characters including space
! *[:punct:]*             [:punct:]     ASCII punctuation characters
! *[:space:]*             [:space:]     whitespace characters
! *[:upper:]*             [:upper:]     uppercase letters (all letters when
                                        'ignorecase' is used)
! *[:xdigit:]*            [:xdigit:]    hexadecimal digits
! *[:return:]*            [:return:]    the <CR> character
! *[:tab:]*               [:tab:]       the <Tab> character
! *[:escape:]*            [:escape:]    the <Esc> character
! *[:backspace:]*                 [:backspace:] the <BS> character
          The brackets in character class expressions are additional to the
          brackets delimiting a collection.  For example, the following is a
          plausible pattern for a UNIX filename: "[-./[:alnum:]_~]\+" That is,
--- 1085,1111 ----
        - A character class expression is evaluated to the set of characters
          belonging to that character class.  The following character classes
          are supported:
!                 Name        Func      Contents ~
! *[:alnum:]*     [:alnum:]   isalnum   ASCII letters and digits
! *[:alpha:]*     [:alpha:]   isalpha   ASCII letters
! *[:blank:]*     [:blank:]             space and tab
! *[:cntrl:]*     [:cntrl:]   iscntrl   ASCII control characters
! *[:digit:]*     [:digit:]             decimal digits '0' to '9'
! *[:graph:]*     [:graph:]   isgraph   ASCII printable characters excluding
!                                       space
! *[:lower:]*     [:lower:]   (1)       lowercase letters (all letters when
                                        'ignorecase' is used)
! *[:print:]*     [:print:]   (2)       printable characters including space
! *[:punct:]*     [:punct:]   ispunct   ASCII punctuation characters
! *[:space:]*     [:space:]             whitespace characters: space, tab, CR,
!                                       NL, vertical tab, form feed
! *[:upper:]*     [:upper:]   (3)       uppercase letters (all letters when
                                        'ignorecase' is used)
! *[:xdigit:]*    [:xdigit:]            hexadecimal digits: 0-9, a-f, A-F
! *[:return:]*    [:return:]            the <CR> character
! *[:tab:]*       [:tab:]               the <Tab> character
! *[:escape:]*    [:escape:]            the <Esc> character
! *[:backspace:]*         [:backspace:]         the <BS> character
          The brackets in character class expressions are additional to the
          brackets delimiting a collection.  For example, the following is a
          plausible pattern for a UNIX filename: "[-./[:alnum:]_~]\+" That is,
***************
*** 1111,1116 ****
--- 1116,1128 ----
          regexp engine.  See |two-engines|.  In the future these items may
          work for multi-byte characters.  For now, to get all "alpha"
          characters you can use: [[:lower:][:upper:]].
+ 
+         The "Func" column shows what library function is used.  The
+         implementation depends on the system.  Otherwise:
+         (1) Uses islower() for ASCII and Vim builtin rules for other
+         characters when built with the |+multi_byte| feature.
+         (2) Uses Vim builtin rules
+         (3) As with (1) but using isupper()
                                                        */[[=* *[==]*
        - An equivalence class.  This means that characters are matched that
          have almost the same meaning, e.g., when ignoring accents.  This
*** ../vim-8.0.0518/src/testdir/test_regexp_utf8.vim    2016-08-17 
20:17:41.000000000 +0200
--- src/testdir/test_regexp_utf8.vim    2017-03-29 15:22:28.294663344 +0200
***************
*** 38,49 ****
    set isprint=@,161-255
    call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
  
    let alphachars = ''
    let lowerchars = ''
-   let upperchars = ''
-   let alnumchars = ''
    let printchars = ''
    let punctchars = ''
    let xdigitchars = ''
    let i = 1
    while i <= 255
--- 38,58 ----
    set isprint=@,161-255
    call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
  
+   let alnumchars = ''
    let alphachars = ''
+   let backspacechar = ''
+   let blankchars = ''
+   let cntrlchars = ''
+   let digitchars = ''
+   let escapechar = ''
+   let graphchars = ''
    let lowerchars = ''
    let printchars = ''
    let punctchars = ''
+   let returnchar = ''
+   let spacechars = ''
+   let tabchar = ''
+   let upperchars = ''
    let xdigitchars = ''
    let i = 1
    while i <= 255
***************
*** 51,71 ****
      if c =~ '[[:alpha:]]'
        let alphachars .= c
      endif
-     if c =~ '[[:lower:]]'
-       let lowerchars .= c
-     endif
-     if c =~ '[[:upper:]]'
-       let upperchars .= c
-     endif
      if c =~ '[[:alnum:]]'
        let alnumchars .= c
      endif
      if c =~ '[[:print:]]'
        let printchars .= c
      endif
      if c =~ '[[:punct:]]'
        let punctchars .= c
      endif
      if c =~ '[[:xdigit:]]'
        let xdigitchars .= c
      endif
--- 60,107 ----
      if c =~ '[[:alpha:]]'
        let alphachars .= c
      endif
      if c =~ '[[:alnum:]]'
        let alnumchars .= c
      endif
+     if c =~ '[[:backspace:]]'
+       let backspacechar .= c
+     endif
+     if c =~ '[[:blank:]]'
+       let blankchars .= c
+     endif
+     if c =~ '[[:cntrl:]]'
+       let cntrlchars .= c
+     endif
+     if c =~ '[[:digit:]]'
+       let digitchars .= c
+     endif
+     if c =~ '[[:escape:]]'
+       let escapechar .= c
+     endif
+     if c =~ '[[:graph:]]'
+       let graphchars .= c
+     endif
+     if c =~ '[[:lower:]]'
+       let lowerchars .= c
+     endif
      if c =~ '[[:print:]]'
        let printchars .= c
      endif
      if c =~ '[[:punct:]]'
        let punctchars .= c
      endif
+     if c =~ '[[:return:]]'
+       let returnchar .= c
+     endif
+     if c =~ '[[:space:]]'
+       let spacechars .= c
+     endif
+     if c =~ '[[:tab:]]'
+       let tabchar .= c
+     endif
+     if c =~ '[[:upper:]]'
+       let upperchars .= c
+     endif
      if c =~ '[[:xdigit:]]'
        let xdigitchars .= c
      endif
***************
*** 73,83 ****
    endwhile
  
    call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 
alphachars)
-   call 
assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', 
lowerchars)
-   call 
assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', 
upperchars)
    call 
assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 
alnumchars)
    call assert_equal(' 
!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
 
¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
 printchars)
    call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
    call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
  endfunc
  
--- 109,130 ----
    endwhile
  
    call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 
alphachars)
    call 
assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 
alnumchars)
+   call assert_equal("\b", backspacechar)
+   call assert_equal("\t ", blankchars)
+   " Commented out: it succeeds on Linux and Windows, but fails on macOs in 
Travis.
+   " call 
assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f",
 cntrlchars)
+   call assert_equal("0123456789", digitchars)
+   call assert_equal("\<Esc>", escapechar)
+   " Commented out: it succeeds on Linux and Windows, but fails on macOs in 
Travis.
+   " call 
assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
 graphchars)
+   call 
assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', 
lowerchars)
    call assert_equal(' 
!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
 
¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
 printchars)
    call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
+   call 
assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', 
upperchars)
+   call assert_equal("\r", returnchar)
+   call assert_equal("\t\n\x0b\f\r ", spacechars)
+   call assert_equal("\t", tabchar)
    call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
  endfunc
  
*** ../vim-8.0.0518/src/version.c       2017-03-29 14:40:38.342357669 +0200
--- src/version.c       2017-03-29 15:23:59.134099666 +0200
***************
*** 766,767 ****
--- 766,769 ----
  {   /* Add new patch number below this line */
+ /**/
+     519,
  /**/

-- 
hundred-and-one symptoms of being an internet addict:
231. You sprinkle Carpet Fresh on the rugs and put your vacuum cleaner
     in the front doorway permanently so it always looks like you are
     actually attempting to do something about that mess that has amassed
     since you discovered the Internet.

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Raspunde prin e-mail lui