In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/bf3cd0e64298155c8984b0fce2cce3b4648d3110?hp=0d7b125b0c3d6ada9b99e6cdd424e39fbc3aa5f1>
- Log ----------------------------------------------------------------- commit bf3cd0e64298155c8984b0fce2cce3b4648d3110 Author: Karl Williamson <[email protected]> Date: Thu Dec 6 19:20:22 2012 -0700 locale.t: Add test This makes sure that taint isn't being added unnecessarily M lib/locale.t commit a9b7c63715bd82e15b815c96e5e1c6b87ab46f7b Author: Karl Williamson <[email protected]> Date: Thu Dec 6 19:19:13 2012 -0700 locale.t: Add optional test name supplement This allows tests to have extra stuff passed in that will be printed as part of the test results M lib/locale.t commit a9c4383684955582336fa9f07ca1f378d84bdb99 Author: Karl Williamson <[email protected]> Date: Thu Dec 6 19:07:41 2012 -0700 regcomp.c: Typo in comment; white space M regcomp.c commit 76a77b1b5dc44d6f70bf59fdc331a1332081b301 Author: Karl Williamson <[email protected]> Date: Thu Dec 6 11:31:19 2012 -0700 pp.c pp_pack.c: Use macro instead of function This converts to use is_SPACE_utf8() instead of the (soon to be deprecated) is_utf8_space(). The macro is faster, avoiding the function call completely, so the performance need to make a special case for a SPACE character is gone. M pp.c M pp_pack.c commit 009ccfeda269d5b353a297ca4059497343c8ce41 Author: Karl Williamson <[email protected]> Date: Thu Dec 6 09:20:51 2012 -0700 regcomp.c:regprop: [bracketize] \w..., add \v This function returns the name of a character class given its number. This changes the name of \w, \s, \d to be [\w] .... And it adds an entry for \v and \V. These makes a complete set, and will make things easier to read, as a result of changes coming in future commits M regcomp.c commit e699a1d5e627c6248a14e00e813d5a7a53f0947d Author: Karl Williamson <[email protected]> Date: Wed Dec 5 21:53:06 2012 -0700 regexec.c: Nits coding standards-type changing M regexec.c commit eb42f199581875bfa31b7e27ea4c1425417d0e5a Author: Karl Williamson <[email protected]> Date: Wed Dec 5 21:39:21 2012 -0700 perl.h: Add comments M perl.h commit 41805eb96d4ab7da622f82f6104ab8fa95527f33 Author: Karl Williamson <[email protected]> Date: Wed Dec 5 21:37:26 2012 -0700 perlrecharclass: Fix defn of [:word:] M pod/perlrecharclass.pod commit 583d08e34bb7232227da37acc36ec737ce653671 Author: Karl Williamson <[email protected]> Date: Sun Dec 2 19:18:34 2012 -0700 intrpvar.h: Add comment M intrpvar.h commit d87fc68d13011e0743cc7850f0c3fceb4633864f Author: Karl Williamson <[email protected]> Date: Sun Dec 2 18:50:44 2012 -0700 Add Todo test for Perl #114272 M t/re/re_tests commit 6c46377d3226d15447819a5fa3518c660bce7679 Author: Karl Williamson <[email protected]> Date: Sun Dec 2 18:48:33 2012 -0700 utf8.c: Combine 2 function calls into one There is a function that does both these together, more efficiently M utf8.c commit b81740c0c3e6a549e6766887035f48b39a45557c Author: Karl Williamson <[email protected]> Date: Sun Dec 2 18:47:23 2012 -0700 utf8.c: Move ARGS_ASSERT to earlier in function to a place where people more expect to see it. M utf8.c commit 9e3e825e7fea9a67d23db17933ab6c803f890219 Author: Karl Williamson <[email protected]> Date: Sun Dec 2 21:20:36 2012 -0700 embed.fnc: Add missing entry This function is defined in utf8.c, but isn't called by the core, and there was no entry for it in embed.fnc M embed.fnc M embed.h M proto.h commit a1894d81735066945ef520af52cc180d1e0dfb10 Author: Karl Williamson <[email protected]> Date: Thu Dec 6 22:42:18 2012 -0700 Silence some g++ compiler warnings Changing these slightly got rid of the warnings like: toke.c:9168: warning: format not a string literal and no format arguments M doio.c M pp.c M pp_ctl.c M toke.c commit 6c2f1a55be4cd937e7e87f2ffb875a22688fb01e Author: Karl Williamson <[email protected]> Date: Sat Dec 8 21:42:23 2012 -0700 intrpvar.h: Use #define instead of hard-coded number The number 12 is mysterious as to why we are using it otherwise. M intrpvar.h ----------------------------------------------------------------------- Summary of changes: doio.c | 2 +- embed.fnc | 1 + embed.h | 1 + intrpvar.h | 4 ++-- lib/locale.t | 16 ++++++++++++---- perl.h | 6 ++++++ pod/perlrecharclass.pod | 11 ++++++----- pp.c | 12 ++++++------ pp_ctl.c | 4 ++-- pp_pack.c | 2 +- proto.h | 4 ++++ regcomp.c | 20 +++++++++++--------- regexec.c | 7 +++++-- t/re/re_tests | 3 +++ toke.c | 4 ++-- utf8.c | 7 +++---- 16 files changed, 66 insertions(+), 38 deletions(-) diff --git a/doio.c b/doio.c index e915bd5..08ed433 100644 --- a/doio.c +++ b/doio.c @@ -1333,7 +1333,7 @@ I32 Perl_my_lstat_flags(pTHX_ const U32 flags) { dVAR; - static const char no_prev_lstat[] = "The stat preceding -l _ wasn't an lstat"; + static const char* const no_prev_lstat = "The stat preceding -l _ wasn't an lstat"; dSP; const char *file; if (PL_op->op_flags & OPf_REF) { diff --git a/embed.fnc b/embed.fnc index 3dea370..84c0b19 100644 --- a/embed.fnc +++ b/embed.fnc @@ -637,6 +637,7 @@ AMpR |bool |_is_uni_perl_idstart|UV c ApPR |bool |is_uni_alpha_lc|UV c ApPR |bool |is_uni_ascii_lc|UV c ApPR |bool |is_uni_space_lc|UV c +AMpPR |bool |is_uni_blank_lc|UV c ApPR |bool |is_uni_cntrl_lc|UV c ApPR |bool |is_uni_graph_lc|UV c ApPR |bool |is_uni_digit_lc|UV c diff --git a/embed.h b/embed.h index 25bd724..95f9943 100644 --- a/embed.h +++ b/embed.h @@ -230,6 +230,7 @@ #define is_uni_ascii(a) Perl_is_uni_ascii(aTHX_ a) #define is_uni_ascii_lc(a) Perl_is_uni_ascii_lc(aTHX_ a) #define is_uni_blank(a) Perl_is_uni_blank(aTHX_ a) +#define is_uni_blank_lc(a) Perl_is_uni_blank_lc(aTHX_ a) #define is_uni_cntrl(a) Perl_is_uni_cntrl(aTHX_ a) #define is_uni_cntrl_lc(a) Perl_is_uni_cntrl_lc(aTHX_ a) #define is_uni_digit(a) Perl_is_uni_digit(aTHX_ a) diff --git a/intrpvar.h b/intrpvar.h index 52b45ba..f58c0d1 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -613,7 +613,7 @@ PERLVAR(I, NonL1NonFinalFold, SV *) PERLVAR(I, HasMultiCharFold, SV *) /* utf8 character class swashes */ -PERLVAR(I, utf8_alnum, SV *) +PERLVAR(I, utf8_alnum, SV *) /* Should really be named "utf8_wordchar" */ PERLVAR(I, utf8_alpha, SV *) PERLVAR(I, utf8_graph, SV *) PERLVAR(I, utf8_digit, SV *) @@ -634,7 +634,7 @@ PERLVAR(I, utf8_charname_continue, SV *) PERLVAR(I, last_swash_hv, HV *) PERLVAR(I, last_swash_tmps, U8 *) PERLVAR(I, last_swash_slen, STRLEN) -PERLVARA(I, last_swash_key,12, U8) +PERLVARA(I, last_swash_key,UTF8_MAXBYTES-1, U8) PERLVAR(I, last_swash_klen, U8) /* Only needs to store 0-12 */ #ifdef FCRYPT diff --git a/lib/locale.t b/lib/locale.t index 26a7bd4..6d491e4 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -89,14 +89,22 @@ sub is_tainted { # hello, camel two. not eval { $dummy = join("", @_), kill 0; 1 } } -sub check_taint ($) { - ok is_tainted($_[0]), "verify that is tainted"; +sub check_taint ($;$) { + my $message_tail = $_[1] // ""; + $message_tail = ": $message_tail" if $message_tail; + ok is_tainted($_[0]), "verify that is tainted$message_tail"; } -sub check_taint_not ($) { - ok((not is_tainted($_[0])), "verify that isn't tainted"); +sub check_taint_not ($;$) { + my $message_tail = $_[1] // ""; + $message_tail = ": $message_tail" if $message_tail; + ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); } +"\tb\t" =~ /^m?(\s)(.*)\1$/; +check_taint_not $&, "not tainted outside 'use locale'"; +; + use locale; # engage locale and therefore locale taint. check_taint_not $a; diff --git a/perl.h b/perl.h index b13521a..d97a577 100644 --- a/perl.h +++ b/perl.h @@ -5006,8 +5006,14 @@ struct interpreter { /* Set up PERLVAR macros for populating structs */ # define PERLVAR(prefix,var,type) type prefix##var; + +/* 'var' is an array of length 'n' */ # define PERLVARA(prefix,var,n,type) type prefix##var[n]; + +/* initialize 'var' to init' */ # define PERLVARI(prefix,var,type,init) type prefix##var; + +/* like PERLVARI, but make 'var' a const */ # define PERLVARIC(prefix,var,type,init) type prefix##var; struct interpreter { diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod index 7dafc54..7478932 100644 --- a/pod/perlrecharclass.pod +++ b/pod/perlrecharclass.pod @@ -140,11 +140,12 @@ Any character not matched by C<\d> is matched by C<\D>. =head3 Word characters A C<\w> matches a single alphanumeric character (an alphabetic character, or a -decimal digit) or a connecting punctuation character, such as an -underscore ("_"). It does not match a whole word. To match a whole -word, use C<\w+>. This isn't the same thing as matching an English word, but -in the ASCII range it is the same as a string of Perl-identifier -characters. +decimal digit); or a connecting punctuation character, such as an +underscore ("_"); or a "mark" character (like some sort of accent) that +attaches to one of those. It does not match a whole word. To match a +whole word, use C<\w+>. This isn't the same thing as matching an +English word, but in the ASCII range it is the same as a string of +Perl-identifier characters. =over diff --git a/pp.c b/pp.c index cf59a84..33943e4 100644 --- a/pp.c +++ b/pp.c @@ -1646,7 +1646,7 @@ PP(pp_repeat) if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) { dMARK; - static const char oom_list_extend[] = "Out of memory during list extend"; + static const char* const oom_list_extend = "Out of memory during list extend"; const I32 items = SP - MARK; const I32 max = items * count; @@ -1698,7 +1698,7 @@ PP(pp_repeat) SV * const tmpstr = POPs; STRLEN len; bool isutf; - static const char oom_string_extend[] = + static const char* const oom_string_extend = "Out of memory during string extend"; if (TARG != tmpstr) @@ -5380,7 +5380,7 @@ PP(pp_split) orig = s; if (skipwhite) { if (do_utf8) { - while (*s == ' ' || is_utf8_space((U8*)s)) + while (isSPACE_utf8(s)) s += UTF8SKIP(s); } else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { @@ -5405,9 +5405,9 @@ PP(pp_split) m = s; /* this one uses 'm' and is a negative test */ if (do_utf8) { - while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) { + while (m < strend && ! isSPACE_utf8(m) ) { const int t = UTF8SKIP(m); - /* is_utf8_space returns FALSE for malform utf8 */ + /* isSPACE_utf8 returns FALSE for malform utf8 */ if (strend - m < t) m = strend; else @@ -5444,7 +5444,7 @@ PP(pp_split) /* this one uses 's' and is a positive test */ if (do_utf8) { - while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) + while (s < strend && isSPACE_utf8(s) ) s += UTF8SKIP(s); } else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { diff --git a/pp_ctl.c b/pp_ctl.c index 0256070..cd5033f 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -2692,7 +2692,7 @@ S_dofindlabel(pTHX_ OP *o, const char *label, STRLEN len, U32 flags, OP **opstac { dVAR; OP **ops = opstack; - static const char too_deep[] = "Target of goto is too deeply nested"; + static const char* const too_deep = "Target of goto is too deeply nested"; PERL_ARGS_ASSERT_DOFINDLABEL; @@ -2764,7 +2764,7 @@ PP(pp_goto) STRLEN label_len = 0; U32 label_flags = 0; const bool do_dump = (PL_op->op_type == OP_DUMP); - static const char must_have_label[] = "goto must have label"; + static const char* const must_have_label = "goto must have label"; if (PL_op->op_flags & OPf_STACKED) { SV * const sv = POPs; diff --git a/pp_pack.c b/pp_pack.c index a2a5c68..321a47d 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -1470,7 +1470,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) { for (ptr = s+len-1; ptr >= s; ptr--) if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) && - !is_utf8_space((U8 *) ptr)) break; + !isSPACE_utf8(ptr)) break; if (ptr >= s) ptr += UTF8SKIP(ptr); else ptr++; if (ptr > s+len) diff --git a/proto.h b/proto.h index 2351b32..1a29b2f 100644 --- a/proto.h +++ b/proto.h @@ -1701,6 +1701,10 @@ PERL_CALLCONV bool Perl_is_uni_blank(pTHX_ UV c) __attribute__warn_unused_result__ __attribute__pure__; +PERL_CALLCONV bool Perl_is_uni_blank_lc(pTHX_ UV c) + __attribute__warn_unused_result__ + __attribute__pure__; + PERL_CALLCONV bool Perl_is_uni_cntrl(pTHX_ UV c) __attribute__warn_unused_result__ __attribute__pure__; diff --git a/regcomp.c b/regcomp.c index 4736bbd..979dcae 100644 --- a/regcomp.c +++ b/regcomp.c @@ -12630,7 +12630,7 @@ parseit: /* If the highest code point is within Latin1, we can use the * compiled-in Alphas list, and not have to go out to disk. This - * yields two false positives, the masculine and feminine oridinal + * yields two false positives, the masculine and feminine ordinal * indicators, which are weeded out below using the * IS_IN_SOME_FOLD_L1() macro */ if (invlist_highest(cp_list) < 256) { @@ -12672,7 +12672,7 @@ parseit: assert(PL_utf8_tofold); /* Verify that worked */ } PL_utf8_foldclosures = - _swash_inversion_hash(PL_utf8_tofold); + _swash_inversion_hash(PL_utf8_tofold); } } @@ -13822,12 +13822,12 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */ static const char * const anyofs[] = { - "\\w", - "\\W", - "\\s", - "\\S", - "\\d", - "\\D", + "[\\w]", + "[\\W]", + "[\\s]", + "[\\S]", + "[\\d]", + "[\\D]", "[:alnum:]", "[:^alnum:]", "[:alpha:]", @@ -13851,7 +13851,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) "[:space:]", "[:^space:]", "[:blank:]", - "[:^blank:]" + "[:^blank:]", + "[\\v]", + "[\\V]" }; RXi_GET_DECL(prog,progi); GET_RE_DEBUG_FLAGS_DECL; diff --git a/regexec.c b/regexec.c index 3b2f012..d235cde 100644 --- a/regexec.c +++ b/regexec.c @@ -4455,7 +4455,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) if (nextchr == '\r' /* And if it was CR, and the next is LF, match the LF */ && locinput < PL_regeol - && UCHARAT(locinput) == '\n') locinput++; + && UCHARAT(locinput) == '\n') + { + locinput++; + } } else { @@ -4473,7 +4476,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) char *starting = locinput; /* In case have to backtrack the last prepend */ - char *previous_prepend = 0; + char *previous_prepend = NULL; LOAD_UTF8_CHARCLASS_GCB(); diff --git a/t/re/re_tests b/t/re/re_tests index fff9e4c..5321da6 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1716,4 +1716,7 @@ ab[c\\\](??{"x"})]{3}d ab\\](d y - - /^\S{11}/a \x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF} n - - /^\W{11}/a \x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF} n - - +# [ perl #114272] +\Vn \xFFn/ yT $& \xFFn + # vim: softtabstop=0 noexpandtab diff --git a/toke.c b/toke.c index 16f359e..628a5da 100644 --- a/toke.c +++ b/toke.c @@ -110,7 +110,7 @@ Individual members of C<PL_parser> have their own documentation. # define PL_nextval (PL_parser->nextval) #endif -static const char ident_too_long[] = "Identifier too long"; +static const char* const ident_too_long = "Identifier too long"; #ifdef PERL_MAD # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; } @@ -10646,7 +10646,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) SV *sv = NULL; /* place to put the converted number */ bool floatit; /* boolean: int or float? */ const char *lastub = NULL; /* position of last underbar */ - static char const number_too_long[] = "Number too long"; + static const char* const number_too_long = "Number too long"; PERL_ARGS_ASSERT_SCAN_NUM; diff --git a/utf8.c b/utf8.c index 70620af..418f0d8 100644 --- a/utf8.c +++ b/utf8.c @@ -3924,6 +3924,8 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) SV* invlist; + PERL_ARGS_ASSERT__SWASH_TO_INVLIST; + /* If not a hash, it must be the swash's inversion list instead */ if (SvTYPE(hv) != SVt_PVHV) { return (SV*) hv; @@ -3940,8 +3942,6 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) bits = SvUV(*bitssvp); octets = bits >> 3; /* if bits == 1, then octets == 0 */ - PERL_ARGS_ASSERT__SWASH_TO_INVLIST; - /* read $swash->{LIST} */ if (SvPOK(*listsvp)) { l = (U8*)SvPV(*listsvp, lcur); @@ -4055,8 +4055,7 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) _invlist_union(invlist, other, &invlist); break; case '!': - _invlist_invert(other); - _invlist_union(invlist, other, &invlist); + _invlist_union_maybe_complement_2nd(invlist, other, TRUE, &invlist); break; case '-': _invlist_subtract(invlist, other, &invlist); -- Perl5 Master Repository
