In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/cb6501357462af503ffcfa2608d54a8336e1f244?hp=ef269bf5f55cf5087c6190ddbf34459c60a69622>

- Log -----------------------------------------------------------------
commit cb6501357462af503ffcfa2608d54a8336e1f244
Author: Father Chrysostomos <[email protected]>
Date:   Tue Jul 26 01:46:23 2016 -0700

    [perl #128701] Fix err msg for Unicode delimiters
    
    The output of
    
        perl -CS -e 'use utf8; q«'
    
    is now correctly:
    
        Can't find string terminator "«" anywhere before EOF at -e line 1.
    
    Previously, the first byte of the delimiter (as encoded in UTF-8)
    would be used instead:
    
        Can't find string terminator "Â" anywhere before EOF at -e line 1.

M       t/lib/croak/toke
M       toke.c

commit 6745174b5616843ee57f0b733bd056bfab42f30f
Author: Father Chrysostomos <[email protected]>
Date:   Tue Jul 26 00:47:16 2016 -0700

    parser.h: Use UV for string delims
    
    We will need to store characters > 255 in here.
    
    Also, cast accordingly in toke.c.

M       parser.h
M       toke.c
-----------------------------------------------------------------------

Summary of changes:
 parser.h         |  4 ++--
 t/lib/croak/toke | 14 ++++++++++++++
 toke.c           | 33 +++++++++++++++++++++++----------
 3 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/parser.h b/parser.h
index 96ab4f5..35f172e 100644
--- a/parser.h
+++ b/parser.h
@@ -68,8 +68,8 @@ typedef struct yy_parser {
     SV         *lex_stuff;     /* runtime pattern from m// or s/// */
     I32                multi_start;    /* 1st line of multi-line string */
     I32                multi_end;      /* last line of multi-line string */
-    char       multi_open;     /* delimiter of said string */
-    char       multi_close;    /* delimiter of said string */
+    UV         multi_open;     /* delimiter of said string */
+    UV         multi_close;    /* delimiter of said string */
     bool       preambled;
     bool        lex_re_reparsing; /* we're doing G_RE_REPARSING */
     I32                lex_allbrackets;/* (), [], {}, ?: bracket count */
diff --git a/t/lib/croak/toke b/t/lib/croak/toke
index 1c6e4a2..cda6ffd 100644
--- a/t/lib/croak/toke
+++ b/t/lib/croak/toke
@@ -85,6 +85,20 @@ Can't find string terminator "/" anywhere before EOF at - 
line 1.
 EXPECT
 Can't find string terminator "'" anywhere before EOF at - line 1.
 ########
+# NAME Unterminated q// with non-ASCII delimiter, under utf8
+BEGIN { binmode STDERR, ":utf8" }
+use utf8;
+q«
+EXPECT
+Can't find string terminator "«" anywhere before EOF at - line 3.
+########
+# NAME Unterminated q// with non-Latin-1 delimiter
+BEGIN { binmode STDERR, ":utf8" }
+use utf8;
+q 옷
+EXPECT
+Can't find string terminator "옷" anywhere before EOF at - line 3.
+########
 # NAME /\N{/
 /\N{/
 EXPECT
diff --git a/toke.c b/toke.c
index c42d037..13d8c3e 100644
--- a/toke.c
+++ b/toke.c
@@ -555,26 +555,38 @@ S_no_op(pTHX_ const char *const what, char *s)
 STATIC void
 S_missingterm(pTHX_ char *s)
 {
-    char tmpbuf[3];
+    char tmpbuf[UTF8_MAXBYTES];
     char q;
+    bool uni = FALSE;
+    SV *sv;
     if (s) {
        char * const nl = strrchr(s,'\n');
        if (nl)
            *nl = '\0';
     }
-    else if ((U8) PL_multi_close < 32) {
+    else if (PL_multi_close < 32) {
        *tmpbuf = '^';
        tmpbuf[1] = (char)toCTRL(PL_multi_close);
        tmpbuf[2] = '\0';
        s = tmpbuf;
     }
     else {
-       *tmpbuf = (char)PL_multi_close;
-       tmpbuf[1] = '\0';
+       if (LIKELY(PL_multi_close < 256)) {
+           *tmpbuf = (char)PL_multi_close;
+           tmpbuf[1] = '\0';
+       }
+       else {
+           uni = TRUE;
+           *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
+       }
        s = tmpbuf;
     }
     q = strchr(s,'"') ? '\'' : '"';
-    Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before 
EOF",q,s,q);
+    sv = sv_2mortal(newSVpv(s,0));
+    if (uni)
+       SvUTF8_on(sv);
+    Perl_croak(aTHX_ "Can't find string terminator %c%"SVf
+                    "%c anywhere before EOF",q,SVfARG(sv),q);
 }
 
 #include "feature.h"
@@ -9947,14 +9959,14 @@ S_scan_str(pTHX_ char *start, int 
keep_bracketed_quoted, int keep_delims, int re
 
     /* mark where we are */
     PL_multi_start = CopLINE(PL_curcop);
-    PL_multi_open = term;
+    PL_multi_open = termcode;
     herelines = PL_parser->herelines;
 
     /* find corresponding closing delimiter */
     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
        termcode = termstr[0] = term = tmps[5];
 
-    PL_multi_close = term;
+    PL_multi_close = termcode;
 
     if (PL_multi_open == PL_multi_close) {
         keep_bracketed_quoted = FALSE;
@@ -10020,7 +10032,8 @@ S_scan_str(pTHX_ char *start, int 
keep_bracketed_quoted, int keep_delims, int re
                /* backslashes can escape the open or closing characters */
                if (*s == '\\' && s+1 < PL_bufend) {
                    if (!keep_bracketed_quoted
-                       && ((s[1] == PL_multi_open) || (s[1] == 
PL_multi_close)))
+                       && ( ((UV)s[1] == PL_multi_open)
+                         || ((UV)s[1] == PL_multi_close) ))
                     {
                        s++;
                     }
@@ -10028,9 +10041,9 @@ S_scan_str(pTHX_ char *start, int 
keep_bracketed_quoted, int keep_delims, int re
                        *to++ = *s++;
                 }
                /* allow nested opens and closes */
-               else if (*s == PL_multi_close && --brackets <= 0)
+               else if ((UV)*s == PL_multi_close && --brackets <= 0)
                    break;
-               else if (*s == PL_multi_open)
+               else if ((UV)*s == PL_multi_open)
                    brackets++;
                else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
                    has_utf8 = TRUE;

--
Perl5 Master Repository

Reply via email to