Patch 8.0.0519
Problem: Character classes are not well tested. They can differ between
platforms.
Solution: Add tests. In the documentation make clear which classes depend
on what library function. Only use :cntrl: and :graph: for ASCII.
(Kazunobu Kuriyama, Dominique Pelle, closes #1560)
Update the documentation.
Files: src/regexp.c, src/regexp_nfa.c, runtime/doc/pattern.txt,
src/testdir/test_regexp_utf8.vim
*** ../vim-8.0.0518/src/regexp.c 2017-03-12 20:09:59.488468234 +0100
--- src/regexp.c 2017-03-29 15:19:23.207812438 +0200
***************
*** 2555,2571 ****
regc('\t');
break;
case CLASS_CNTRL:
! for (cu = 1; cu <= 255; cu++)
if (iscntrl(cu))
regmbc(cu);
break;
case CLASS_DIGIT:
! for (cu = 1; cu <= 255; cu++)
if (VIM_ISDIGIT(cu))
regmbc(cu);
break;
case CLASS_GRAPH:
! for (cu = 1; cu <= 255; cu++)
if (isgraph(cu))
regmbc(cu);
break;
--- 2555,2571 ----
regc('\t');
break;
case CLASS_CNTRL:
! for (cu = 1; cu <= 127; cu++)
if (iscntrl(cu))
regmbc(cu);
break;
case CLASS_DIGIT:
! for (cu = 1; cu <= 127; cu++)
if (VIM_ISDIGIT(cu))
regmbc(cu);
break;
case CLASS_GRAPH:
! for (cu = 1; cu <= 127; cu++)
if (isgraph(cu))
regmbc(cu);
break;
*** ../vim-8.0.0518/src/regexp_nfa.c 2017-03-12 20:09:59.488468234 +0100
--- src/regexp_nfa.c 2017-03-29 15:19:38.743715954 +0200
***************
*** 4871,4877 ****
return OK;
break;
case NFA_CLASS_CNTRL:
! if (c >= 1 && c <= 255 && iscntrl(c))
return OK;
break;
case NFA_CLASS_DIGIT:
--- 4871,4877 ----
return OK;
break;
case NFA_CLASS_CNTRL:
! if (c >= 1 && c <= 127 && iscntrl(c))
return OK;
break;
case NFA_CLASS_DIGIT:
***************
*** 4879,4885 ****
return OK;
break;
case NFA_CLASS_GRAPH:
! if (c >= 1 && c <= 255 && isgraph(c))
return OK;
break;
case NFA_CLASS_LOWER:
--- 4879,4885 ----
return OK;
break;
case NFA_CLASS_GRAPH:
! if (c >= 1 && c <= 127 && isgraph(c))
return OK;
break;
case NFA_CLASS_LOWER:
*** ../vim-8.0.0518/runtime/doc/pattern.txt 2016-09-12 12:45:26.000000000
+0200
--- runtime/doc/pattern.txt 2017-03-29 15:18:47.300035463 +0200
***************
*** 1082,1106 ****
- A character class expression is evaluated to the set of characters
belonging to that character class. The following character classes
are supported:
! Name Contents ~
! *[:alnum:]* [:alnum:] ASCII letters and digits
! *[:alpha:]* [:alpha:] ASCII letters
! *[:blank:]* [:blank:] space and tab characters
! *[:cntrl:]* [:cntrl:] control characters
! *[:digit:]* [:digit:] decimal digits
! *[:graph:]* [:graph:] printable characters excluding space
! *[:lower:]* [:lower:] lowercase letters (all letters when
'ignorecase' is used)
! *[:print:]* [:print:] printable characters including space
! *[:punct:]* [:punct:] ASCII punctuation characters
! *[:space:]* [:space:] whitespace characters
! *[:upper:]* [:upper:] uppercase letters (all letters when
'ignorecase' is used)
! *[:xdigit:]* [:xdigit:] hexadecimal digits
! *[:return:]* [:return:] the <CR> character
! *[:tab:]* [:tab:] the <Tab> character
! *[:escape:]* [:escape:] the <Esc> character
! *[:backspace:]* [:backspace:] the <BS> character
The brackets in character class expressions are additional to the
brackets delimiting a collection. For example, the following is a
plausible pattern for a UNIX filename: "[-./[:alnum:]_~]\+" That is,
--- 1085,1111 ----
- A character class expression is evaluated to the set of characters
belonging to that character class. The following character classes
are supported:
! Name Func Contents ~
! *[:alnum:]* [:alnum:] isalnum ASCII letters and digits
! *[:alpha:]* [:alpha:] isalpha ASCII letters
! *[:blank:]* [:blank:] space and tab
! *[:cntrl:]* [:cntrl:] iscntrl ASCII control characters
! *[:digit:]* [:digit:] decimal digits '0' to '9'
! *[:graph:]* [:graph:] isgraph ASCII printable characters excluding
! space
! *[:lower:]* [:lower:] (1) lowercase letters (all letters when
'ignorecase' is used)
! *[:print:]* [:print:] (2) printable characters including space
! *[:punct:]* [:punct:] ispunct ASCII punctuation characters
! *[:space:]* [:space:] whitespace characters: space, tab, CR,
! NL, vertical tab, form feed
! *[:upper:]* [:upper:] (3) uppercase letters (all letters when
'ignorecase' is used)
! *[:xdigit:]* [:xdigit:] hexadecimal digits: 0-9, a-f, A-F
! *[:return:]* [:return:] the <CR> character
! *[:tab:]* [:tab:] the <Tab> character
! *[:escape:]* [:escape:] the <Esc> character
! *[:backspace:]* [:backspace:] the <BS> character
The brackets in character class expressions are additional to the
brackets delimiting a collection. For example, the following is a
plausible pattern for a UNIX filename: "[-./[:alnum:]_~]\+" That is,
***************
*** 1111,1116 ****
--- 1116,1128 ----
regexp engine. See |two-engines|. In the future these items may
work for multi-byte characters. For now, to get all "alpha"
characters you can use: [[:lower:][:upper:]].
+
+ The "Func" column shows what library function is used. The
+ implementation depends on the system. Otherwise:
+ (1) Uses islower() for ASCII and Vim builtin rules for other
+ characters when built with the |+multi_byte| feature.
+ (2) Uses Vim builtin rules
+ (3) As with (1) but using isupper()
*/[[=* *[==]*
- An equivalence class. This means that characters are matched that
have almost the same meaning, e.g., when ignoring accents. This
*** ../vim-8.0.0518/src/testdir/test_regexp_utf8.vim 2016-08-17
20:17:41.000000000 +0200
--- src/testdir/test_regexp_utf8.vim 2017-03-29 15:22:28.294663344 +0200
***************
*** 38,49 ****
set isprint=@,161-255
call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
let alphachars = ''
let lowerchars = ''
- let upperchars = ''
- let alnumchars = ''
let printchars = ''
let punctchars = ''
let xdigitchars = ''
let i = 1
while i <= 255
--- 38,58 ----
set isprint=@,161-255
call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
+ let alnumchars = ''
let alphachars = ''
+ let backspacechar = ''
+ let blankchars = ''
+ let cntrlchars = ''
+ let digitchars = ''
+ let escapechar = ''
+ let graphchars = ''
let lowerchars = ''
let printchars = ''
let punctchars = ''
+ let returnchar = ''
+ let spacechars = ''
+ let tabchar = ''
+ let upperchars = ''
let xdigitchars = ''
let i = 1
while i <= 255
***************
*** 51,71 ****
if c =~ '[[:alpha:]]'
let alphachars .= c
endif
- if c =~ '[[:lower:]]'
- let lowerchars .= c
- endif
- if c =~ '[[:upper:]]'
- let upperchars .= c
- endif
if c =~ '[[:alnum:]]'
let alnumchars .= c
endif
if c =~ '[[:print:]]'
let printchars .= c
endif
if c =~ '[[:punct:]]'
let punctchars .= c
endif
if c =~ '[[:xdigit:]]'
let xdigitchars .= c
endif
--- 60,107 ----
if c =~ '[[:alpha:]]'
let alphachars .= c
endif
if c =~ '[[:alnum:]]'
let alnumchars .= c
endif
+ if c =~ '[[:backspace:]]'
+ let backspacechar .= c
+ endif
+ if c =~ '[[:blank:]]'
+ let blankchars .= c
+ endif
+ if c =~ '[[:cntrl:]]'
+ let cntrlchars .= c
+ endif
+ if c =~ '[[:digit:]]'
+ let digitchars .= c
+ endif
+ if c =~ '[[:escape:]]'
+ let escapechar .= c
+ endif
+ if c =~ '[[:graph:]]'
+ let graphchars .= c
+ endif
+ if c =~ '[[:lower:]]'
+ let lowerchars .= c
+ endif
if c =~ '[[:print:]]'
let printchars .= c
endif
if c =~ '[[:punct:]]'
let punctchars .= c
endif
+ if c =~ '[[:return:]]'
+ let returnchar .= c
+ endif
+ if c =~ '[[:space:]]'
+ let spacechars .= c
+ endif
+ if c =~ '[[:tab:]]'
+ let tabchar .= c
+ endif
+ if c =~ '[[:upper:]]'
+ let upperchars .= c
+ endif
if c =~ '[[:xdigit:]]'
let xdigitchars .= c
endif
***************
*** 73,83 ****
endwhile
call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
alphachars)
- call
assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
lowerchars)
- call
assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ',
upperchars)
call
assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
alnumchars)
call assert_equal('
!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
printchars)
call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
endfunc
--- 109,130 ----
endwhile
call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
alphachars)
call
assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
alnumchars)
+ call assert_equal("\b", backspacechar)
+ call assert_equal("\t ", blankchars)
+ " Commented out: it succeeds on Linux and Windows, but fails on macOs in
Travis.
+ " call
assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f",
cntrlchars)
+ call assert_equal("0123456789", digitchars)
+ call assert_equal("\<Esc>", escapechar)
+ " Commented out: it succeeds on Linux and Windows, but fails on macOs in
Travis.
+ " call
assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
graphchars)
+ call
assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
lowerchars)
call assert_equal('
!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
printchars)
call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
+ call
assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ',
upperchars)
+ call assert_equal("\r", returnchar)
+ call assert_equal("\t\n\x0b\f\r ", spacechars)
+ call assert_equal("\t", tabchar)
call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
endfunc
*** ../vim-8.0.0518/src/version.c 2017-03-29 14:40:38.342357669 +0200
--- src/version.c 2017-03-29 15:23:59.134099666 +0200
***************
*** 766,767 ****
--- 766,769 ----
{ /* Add new patch number below this line */
+ /**/
+ 519,
/**/
--
hundred-and-one symptoms of being an internet addict:
231. You sprinkle Carpet Fresh on the rugs and put your vacuum cleaner
in the front doorway permanently so it always looks like you are
actually attempting to do something about that mess that has amassed
since you discovered the Internet.
/// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.