In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/cb6501357462af503ffcfa2608d54a8336e1f244?hp=ef269bf5f55cf5087c6190ddbf34459c60a69622>
- Log ----------------------------------------------------------------- commit cb6501357462af503ffcfa2608d54a8336e1f244 Author: Father Chrysostomos <[email protected]> Date: Tue Jul 26 01:46:23 2016 -0700 [perl #128701] Fix err msg for Unicode delimiters The output of perl -CS -e 'use utf8; q«' is now correctly: Can't find string terminator "«" anywhere before EOF at -e line 1. Previously, the first byte of the delimiter (as encoded in UTF-8) would be used instead: Can't find string terminator "Ã" anywhere before EOF at -e line 1. M t/lib/croak/toke M toke.c commit 6745174b5616843ee57f0b733bd056bfab42f30f Author: Father Chrysostomos <[email protected]> Date: Tue Jul 26 00:47:16 2016 -0700 parser.h: Use UV for string delims We will need to store characters > 255 in here. Also, cast accordingly in toke.c. M parser.h M toke.c ----------------------------------------------------------------------- Summary of changes: parser.h | 4 ++-- t/lib/croak/toke | 14 ++++++++++++++ toke.c | 33 +++++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/parser.h b/parser.h index 96ab4f5..35f172e 100644 --- a/parser.h +++ b/parser.h @@ -68,8 +68,8 @@ typedef struct yy_parser { SV *lex_stuff; /* runtime pattern from m// or s/// */ I32 multi_start; /* 1st line of multi-line string */ I32 multi_end; /* last line of multi-line string */ - char multi_open; /* delimiter of said string */ - char multi_close; /* delimiter of said string */ + UV multi_open; /* delimiter of said string */ + UV multi_close; /* delimiter of said string */ bool preambled; bool lex_re_reparsing; /* we're doing G_RE_REPARSING */ I32 lex_allbrackets;/* (), [], {}, ?: bracket count */ diff --git a/t/lib/croak/toke b/t/lib/croak/toke index 1c6e4a2..cda6ffd 100644 --- a/t/lib/croak/toke +++ b/t/lib/croak/toke @@ -85,6 +85,20 @@ Can't find string terminator "/" anywhere before EOF at - line 1. EXPECT Can't find string terminator "'" anywhere before EOF at - line 1. ######## +# NAME Unterminated q// with non-ASCII delimiter, under utf8 +BEGIN { binmode STDERR, ":utf8" } +use utf8; +q« +EXPECT +Can't find string terminator "«" anywhere before EOF at - line 3. +######## +# NAME Unterminated q// with non-Latin-1 delimiter +BEGIN { binmode STDERR, ":utf8" } +use utf8; +q ì· +EXPECT +Can't find string terminator "ì·" anywhere before EOF at - line 3. +######## # NAME /\N{/ /\N{/ EXPECT diff --git a/toke.c b/toke.c index c42d037..13d8c3e 100644 --- a/toke.c +++ b/toke.c @@ -555,26 +555,38 @@ S_no_op(pTHX_ const char *const what, char *s) STATIC void S_missingterm(pTHX_ char *s) { - char tmpbuf[3]; + char tmpbuf[UTF8_MAXBYTES]; char q; + bool uni = FALSE; + SV *sv; if (s) { char * const nl = strrchr(s,'\n'); if (nl) *nl = '\0'; } - else if ((U8) PL_multi_close < 32) { + else if (PL_multi_close < 32) { *tmpbuf = '^'; tmpbuf[1] = (char)toCTRL(PL_multi_close); tmpbuf[2] = '\0'; s = tmpbuf; } else { - *tmpbuf = (char)PL_multi_close; - tmpbuf[1] = '\0'; + if (LIKELY(PL_multi_close < 256)) { + *tmpbuf = (char)PL_multi_close; + tmpbuf[1] = '\0'; + } + else { + uni = TRUE; + *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0; + } s = tmpbuf; } q = strchr(s,'"') ? '\'' : '"'; - Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); + sv = sv_2mortal(newSVpv(s,0)); + if (uni) + SvUTF8_on(sv); + Perl_croak(aTHX_ "Can't find string terminator %c%"SVf + "%c anywhere before EOF",q,SVfARG(sv),q); } #include "feature.h" @@ -9947,14 +9959,14 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); - PL_multi_open = term; + PL_multi_open = termcode; herelines = PL_parser->herelines; /* find corresponding closing delimiter */ if (term && (tmps = strchr("([{< )]}> )]}>",term))) termcode = termstr[0] = term = tmps[5]; - PL_multi_close = term; + PL_multi_close = termcode; if (PL_multi_open == PL_multi_close) { keep_bracketed_quoted = FALSE; @@ -10020,7 +10032,8 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re /* backslashes can escape the open or closing characters */ if (*s == '\\' && s+1 < PL_bufend) { if (!keep_bracketed_quoted - && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) + && ( ((UV)s[1] == PL_multi_open) + || ((UV)s[1] == PL_multi_close) )) { s++; } @@ -10028,9 +10041,9 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re *to++ = *s++; } /* allow nested opens and closes */ - else if (*s == PL_multi_close && --brackets <= 0) + else if ((UV)*s == PL_multi_close && --brackets <= 0) break; - else if (*s == PL_multi_open) + else if ((UV)*s == PL_multi_open) brackets++; else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; -- Perl5 Master Repository
