In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/8ce2ba821761a7ada1e1def512c0374977759cf7?hp=71622e40793536aa4f2ace7ffc704cc78151fd26>

- Log -----------------------------------------------------------------
commit 8ce2ba821761a7ada1e1def512c0374977759cf7
Author: Alex Vandiver <[email protected]>
Date:   Sun Mar 22 23:08:24 2015 -0400

    Fix "...without parentheses is ambuguous" warning for UTF-8 function names
    
    While isWORDCHAR_lazy_if is UTF-8 aware, checking advanced byte-by-byte.
    This lead to errors of the form:
    
       Passing malformed UTF-8 to "XPosixWord" is deprecated
       Malformed UTF-8 character (unexpected continuation byte 0x9d, with
         no preceding start byte)
       Warning: Use of "�" without parentheses is ambiguous
    
    Use UTF8SKIP to advance character-by-character, not byte-by-byte.

M       t/lib/warnings/toke
M       toke.c

commit 6e59c8626d31f697a2b7b36cf8a200b36d93eac2
Author: Alex Vandiver <[email protected]>
Date:   Sun Mar 22 22:45:54 2015 -0400

    Allow unquoted UTF-8 HERE-document terminators
    
    When not explicitly quoted, tokenization of the HERE-document terminator
    dealt improperly with multi-byte characters, advancing one byte at a
    time instead of one character at a time.  This lead to
    incomprehensible-to-the-user errors of the form:
    
        Passing malformed UTF-8 to "XPosixWord" is deprecated
        Malformed UTF-8 character (unexpected continuation byte 0xa7, with
          no preceding start byte)
        Can't find string terminator "EnFra�" anywhere before EOF
    
    If enclosed in single or double quotes, parsing was correctly effected,
    as delimcpy advances byte-by-byte, but looks only for the single-byte
    ending character.
    
    When doing a \w+ match looking for the end of the word, advance
    character-by-character instead of byte-by-byte, ensuring that the size
    does not extend past the available size in PL_tokenbuf.

M       t/lib/warnings/toke
M       toke.c

commit b3089e964c0afaf7eb8d54aa5a912e4eb2e6c176
Author: Alex Vandiver <[email protected]>
Date:   Sun Mar 22 22:39:23 2015 -0400

    [perl #124113] Make check for multi-dimensional arrays be UTF8-aware
    
    During parsing, toke.c checks if the user is attempting provide multiple
    indexes to an array index:
    
        $a[ $foo, $bar ];
    
    However, while checking for word characters in variable names is aware
    of multi-byte characters if "use utf8" is enabled, the loop is only
    advanced one byte at a time, not one character at a time.  As such,
    multibyte variables in array indexes incorrectly yield warnings:
    
        Passing malformed UTF-8 to "XPosixWord" is deprecated
        Malformed UTF-8 character (unexpected continuation byte 0x9d, with
          no preceding start byte)
    
    Switch the loop to advance character-by-character if UTF-8 semantics are
    in use.

M       t/lib/warnings/toke
M       toke.c

commit d655d9a2c4d4884d0edf5364a3aafbc8b0b8de38
Author: Father Chrysostomos <[email protected]>
Date:   Fri Mar 27 12:39:54 2015 -0700

    [perl #124099] Wrong CvOUTSIDE in find_lexical_cv
    
    Instead of following the chain of CvOUTSIDE pointers, I had it always
    looking at the CvOUTSIDE pointer of the current PL_compcv.  So any
    time it had to dig down more than one level, it had a chance of crash-
    ing or looping.

M       op.c
M       t/op/lexsub.t
-----------------------------------------------------------------------

Summary of changes:
 op.c                |  2 +-
 t/lib/warnings/toke | 31 +++++++++++++++++++++++++++++++
 t/op/lexsub.t       |  6 ++++++
 toke.c              | 14 +++++++++-----
 4 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/op.c b/op.c
index 89bf436..3000c44 100644
--- a/op.c
+++ b/op.c
@@ -11238,7 +11238,7 @@ Perl_find_lexical_cv(pTHX_ PADOFFSET off)
     CV *compcv = PL_compcv;
     while (PadnameOUTER(name)) {
        assert(PARENT_PAD_INDEX(name));
-       compcv = CvOUTSIDE(PL_compcv);
+       compcv = CvOUTSIDE(compcv);
        name = PadlistNAMESARRAY(CvPADLIST(compcv))
                [off = PARENT_PAD_INDEX(name)];
     }
diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke
index 5d31104..6cbce2e 100644
--- a/t/lib/warnings/toke
+++ b/t/lib/warnings/toke
@@ -1521,3 +1521,34 @@ Use of literal control characters in variable names is 
deprecated at (eval 2) li
 -a;
 ;-a;
 EXPECT
+########
+# toke.c
+# [perl #124113] Compile-time warning with UTF8 variable in array index
+use warnings;
+use utf8;
+my $𝛃 = 0;
+my @array = (0);
+my $v = $array[ 0 + $𝛃 ];
+   $v = $array[ $𝛃 + 0 ];
+EXPECT
+########
+# toke.c
+# Allow Unicode here doc boundaries
+use warnings;
+use utf8;
+my $v = <<EnFraçais;
+Comme ca!
+EnFraçais
+print $v;
+EXPECT
+Comme ca!
+########
+# toke.c
+# Fix 'Use of "..." without parentheses is ambiguous' warning for
+# Unicode function names
+use utf8;
+use warnings;
+sub 𝛃(;$) { return 0; }
+my $v = 𝛃 - 5;
+EXPECT
+Warning: Use of "𝛃" without parentheses is ambiguous at - line 7.
diff --git a/t/op/lexsub.t b/t/op/lexsub.t
index b1b76e8..adccf4c 100644
--- a/t/op/lexsub.t
+++ b/t/op/lexsub.t
@@ -961,3 +961,9 @@ like runperl(
   @AutoloadTest::ISA = AutoloadTestSuper::;
   AutoloadTest->blah;
 }
+
+# This used to crash because op.c:find_lexical_cv was looking at the wrong
+# CV’s OUTSIDE pointer.  [perl #124099]
+{
+  my sub h; sub{my $x; sub{h}}
+}
diff --git a/toke.c b/toke.c
index ddc2431..f974b1c 100644
--- a/toke.c
+++ b/toke.c
@@ -1841,7 +1841,7 @@ S_check_uni(pTHX)
        PL_last_uni++;
     s = PL_last_uni;
     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
-       s++;
+       s += UTF ? UTF8SKIP(s) : 1;
     if ((t = strchr(s, '(')) && t < PL_bufptr)
        return;
 
@@ -6049,7 +6049,7 @@ Perl_yylex(pTHX)
                        char *t = s+1;
 
                        while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t 
== '$')
-                           t++;
+                           t += UTF ? UTF8SKIP(t) : 1;
                        if (*t++ == ',') {
                            PL_bufptr = skipspace(PL_bufptr); /* XXX can 
realloc */
                            while (t < PL_bufend && *t != ']')
@@ -9210,10 +9210,14 @@ S_scan_heredoc(pTHX_ char *s)
            term = '"';
        if (!isWORDCHAR_lazy_if(s,UTF))
            deprecate("bare << to mean <<\"\"");
-       for (; isWORDCHAR_lazy_if(s,UTF); s++) {
-           if (d < e)
-               *d++ = *s;
+       peek = s;
+       while (isWORDCHAR_lazy_if(peek,UTF)) {
+           peek += UTF ? UTF8SKIP(peek) : 1;
        }
+       len = (peek - s >= e - d) ? (e - d) : (peek - s);
+       Copy(s, d, len, char);
+       s += len;
+       d += len;
     }
     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
        Perl_croak(aTHX_ "Delimiter for here document is too long");

--
Perl5 Master Repository

Reply via email to