In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/6b9660cae39cc0ce59738753e42153cb3be530c7?hp=fe2ba0a2de216bca4582bfb493b196d2eb4c94ae>
- Log ----------------------------------------------------------------- commit 6b9660cae39cc0ce59738753e42153cb3be530c7 Author: Karl Williamson <[email protected]> Date: Tue Jan 31 14:17:14 2017 -0700 PATCH: [perl #130655] Unrecognized UTF-8 char The root cause of this was code like this if (a) b which got changed into if (a) c b thus causing 'b' to being changed to be executed unconditionally. The solution is just to add braces if (a) { c b } This is why I always use braces even if not required at the moment. It was the coding standard at $work. It turns out that #130567 doesn't even come up with this fix in place. ----------------------------------------------------------------------- Summary of changes: t/lib/warnings/toke | 8 +++++--- toke.c | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index 2774f08dd1..8ed6177c39 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -1634,9 +1634,6 @@ s//\3000/; s//"\x{180};;s\221(*$@$`\241\275";/gee; s//"s\221\302\302\302\302\302\302\302$@\241\275";/gee; EXPECT -OPTION fatal -Malformed UTF-8 character: \xc3\x20 (unexpected non-continuation byte 0x20, immediately after start byte 0xc3; need 2 bytes, got 1) in eval "string" at - line 11. -Malformed UTF-8 character (fatal) at - line 11. ######## # NAME [perl $130666] Assertion failure no warnings "uninitialized"; @@ -1649,3 +1646,8 @@ EXPECT OPTION fatal syntax error at - line 1, at EOF Execution of - aborted due to compilation errors. +######## +# NAME [perl #130655] +use utf8; +qwâfoo â ⥠barâ +EXPECT diff --git a/toke.c b/toke.c index 9972b97418..b9096b033f 100644 --- a/toke.c +++ b/toke.c @@ -10549,6 +10549,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re if (termlen == 1) break; if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) + { if ( check_grapheme && UNLIKELY(! _is_grapheme((U8 *) start, (U8 *) s, @@ -10559,6 +10560,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re "%s", non_grapheme_msg); } break; + } } else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; -- Perl5 Master Repository
