In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/8ce2ba821761a7ada1e1def512c0374977759cf7?hp=71622e40793536aa4f2ace7ffc704cc78151fd26>
- Log ----------------------------------------------------------------- commit 8ce2ba821761a7ada1e1def512c0374977759cf7 Author: Alex Vandiver <[email protected]> Date: Sun Mar 22 23:08:24 2015 -0400 Fix "...without parentheses is ambuguous" warning for UTF-8 function names While isWORDCHAR_lazy_if is UTF-8 aware, checking advanced byte-by-byte. This lead to errors of the form: Passing malformed UTF-8 to "XPosixWord" is deprecated Malformed UTF-8 character (unexpected continuation byte 0x9d, with no preceding start byte) Warning: Use of "�" without parentheses is ambiguous Use UTF8SKIP to advance character-by-character, not byte-by-byte. M t/lib/warnings/toke M toke.c commit 6e59c8626d31f697a2b7b36cf8a200b36d93eac2 Author: Alex Vandiver <[email protected]> Date: Sun Mar 22 22:45:54 2015 -0400 Allow unquoted UTF-8 HERE-document terminators When not explicitly quoted, tokenization of the HERE-document terminator dealt improperly with multi-byte characters, advancing one byte at a time instead of one character at a time. This lead to incomprehensible-to-the-user errors of the form: Passing malformed UTF-8 to "XPosixWord" is deprecated Malformed UTF-8 character (unexpected continuation byte 0xa7, with no preceding start byte) Can't find string terminator "EnFra�" anywhere before EOF If enclosed in single or double quotes, parsing was correctly effected, as delimcpy advances byte-by-byte, but looks only for the single-byte ending character. When doing a \w+ match looking for the end of the word, advance character-by-character instead of byte-by-byte, ensuring that the size does not extend past the available size in PL_tokenbuf. M t/lib/warnings/toke M toke.c commit b3089e964c0afaf7eb8d54aa5a912e4eb2e6c176 Author: Alex Vandiver <[email protected]> Date: Sun Mar 22 22:39:23 2015 -0400 [perl #124113] Make check for multi-dimensional arrays be UTF8-aware During parsing, toke.c checks if the user is attempting provide multiple indexes to an array index: $a[ $foo, $bar ]; However, while checking for word characters in variable names is aware of multi-byte characters if "use utf8" is enabled, the loop is only advanced one byte at a time, not one character at a time. As such, multibyte variables in array indexes incorrectly yield warnings: Passing malformed UTF-8 to "XPosixWord" is deprecated Malformed UTF-8 character (unexpected continuation byte 0x9d, with no preceding start byte) Switch the loop to advance character-by-character if UTF-8 semantics are in use. M t/lib/warnings/toke M toke.c commit d655d9a2c4d4884d0edf5364a3aafbc8b0b8de38 Author: Father Chrysostomos <[email protected]> Date: Fri Mar 27 12:39:54 2015 -0700 [perl #124099] Wrong CvOUTSIDE in find_lexical_cv Instead of following the chain of CvOUTSIDE pointers, I had it always looking at the CvOUTSIDE pointer of the current PL_compcv. So any time it had to dig down more than one level, it had a chance of crash- ing or looping. M op.c M t/op/lexsub.t ----------------------------------------------------------------------- Summary of changes: op.c | 2 +- t/lib/warnings/toke | 31 +++++++++++++++++++++++++++++++ t/op/lexsub.t | 6 ++++++ toke.c | 14 +++++++++----- 4 files changed, 47 insertions(+), 6 deletions(-) diff --git a/op.c b/op.c index 89bf436..3000c44 100644 --- a/op.c +++ b/op.c @@ -11238,7 +11238,7 @@ Perl_find_lexical_cv(pTHX_ PADOFFSET off) CV *compcv = PL_compcv; while (PadnameOUTER(name)) { assert(PARENT_PAD_INDEX(name)); - compcv = CvOUTSIDE(PL_compcv); + compcv = CvOUTSIDE(compcv); name = PadlistNAMESARRAY(CvPADLIST(compcv)) [off = PARENT_PAD_INDEX(name)]; } diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index 5d31104..6cbce2e 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -1521,3 +1521,34 @@ Use of literal control characters in variable names is deprecated at (eval 2) li -a; ;-a; EXPECT +######## +# toke.c +# [perl #124113] Compile-time warning with UTF8 variable in array index +use warnings; +use utf8; +my $ð = 0; +my @array = (0); +my $v = $array[ 0 + $ð ]; + $v = $array[ $ð + 0 ]; +EXPECT +######## +# toke.c +# Allow Unicode here doc boundaries +use warnings; +use utf8; +my $v = <<EnFraçais; +Comme ca! +EnFraçais +print $v; +EXPECT +Comme ca! +######## +# toke.c +# Fix 'Use of "..." without parentheses is ambiguous' warning for +# Unicode function names +use utf8; +use warnings; +sub ð(;$) { return 0; } +my $v = ð - 5; +EXPECT +Warning: Use of "ð" without parentheses is ambiguous at - line 7. diff --git a/t/op/lexsub.t b/t/op/lexsub.t index b1b76e8..adccf4c 100644 --- a/t/op/lexsub.t +++ b/t/op/lexsub.t @@ -961,3 +961,9 @@ like runperl( @AutoloadTest::ISA = AutoloadTestSuper::; AutoloadTest->blah; } + +# This used to crash because op.c:find_lexical_cv was looking at the wrong +# CVâs OUTSIDE pointer. [perl #124099] +{ + my sub h; sub{my $x; sub{h}} +} diff --git a/toke.c b/toke.c index ddc2431..f974b1c 100644 --- a/toke.c +++ b/toke.c @@ -1841,7 +1841,7 @@ S_check_uni(pTHX) PL_last_uni++; s = PL_last_uni; while (isWORDCHAR_lazy_if(s,UTF) || *s == '-') - s++; + s += UTF ? UTF8SKIP(s) : 1; if ((t = strchr(s, '(')) && t < PL_bufptr) return; @@ -6049,7 +6049,7 @@ Perl_yylex(pTHX) char *t = s+1; while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$') - t++; + t += UTF ? UTF8SKIP(t) : 1; if (*t++ == ',') { PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ while (t < PL_bufend && *t != ']') @@ -9210,10 +9210,14 @@ S_scan_heredoc(pTHX_ char *s) term = '"'; if (!isWORDCHAR_lazy_if(s,UTF)) deprecate("bare << to mean <<\"\""); - for (; isWORDCHAR_lazy_if(s,UTF); s++) { - if (d < e) - *d++ = *s; + peek = s; + while (isWORDCHAR_lazy_if(peek,UTF)) { + peek += UTF ? UTF8SKIP(peek) : 1; } + len = (peek - s >= e - d) ? (e - d) : (peek - s); + Copy(s, d, len, char); + s += len; + d += len; } if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) Perl_croak(aTHX_ "Delimiter for here document is too long"); -- Perl5 Master Repository
