In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/4a49355342c9c82bb3d19edbf938addfe2f43639?hp=01dbcb1bde5c332d4c6aa16261b174729d6e8e1b>
- Log ----------------------------------------------------------------- commit 4a49355342c9c82bb3d19edbf938addfe2f43639 Author: Karl Williamson <[email protected]> Date: Wed Dec 21 21:31:06 2016 -0700 t/uni/variables.t: Test what it purports to test One of the tests wasn't testing what it thought it was, since evalbytes downgrades the input if if is UTF-8 encoded. Therefore, this needs to use unicode_eval, as the other places in the .t that do similar things use. M t/uni/variables.t commit 4b110bc5ab230d1278d1c8388d3a3a8802da380f Author: Karl Williamson <[email protected]> Date: Tue Dec 20 13:07:23 2016 -0700 toke.c: Simplify finding mirror-image close delimiter This is the code that figures out what the closing delimiter is for a given opening string delimiter. For most, it is the same character, but for a few, it is a mirror-image character. I have had to figure out multiple times how these couple lines of code works. This time, as I started to comment it, so I wouldn't have to do figure it out again, I realized that its cleverness wasn't really saving anything, and might slow things down. So split into two parallel strings, with one string containing the opening delimiters which have mirror image closing ones, and the other containing those closing delimiters, in the same order. So we find the offset into the first string of the opening delimiter. If it isn't in that string, it isn't mirrored, but if it does, the corresponding closing delimiter is found at the same offset in the other string. M toke.c commit 17bd04ce227a02e9d1db91c9f7bbdc4e3e2ea06f Author: Karl Williamson <[email protected]> Date: Tue Dec 20 11:43:08 2016 -0700 toke.c: Skip some work for UTF-8 invariant Since these chars are the same when encoded in UTF-8 as when not, no need to do the extra UTF-8 work. M toke.c commit 841bfb4809ad79718eaf7af40cb5383fef81a4d2 Author: Karl Williamson <[email protected]> Date: Tue Dec 20 14:37:11 2016 -0700 pod/perlop: Note that need space between op and \w delim You can't say qqXfooX because it thinks it is all one bareword. Note this, and that qq XfooX works. M pod/perlop.pod ----------------------------------------------------------------------- Summary of changes: pod/perlop.pod | 9 ++++++++- t/uni/variables.t | 2 +- toke.c | 15 +++++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/pod/perlop.pod b/pod/perlop.pod index 8fefc4e6f9..2640b5173d 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1389,7 +1389,8 @@ Note, however, that this does not always work for quoting Perl code: is a syntax error. The C<L<Text::Balanced>> module (standard as of v5.8, and from CPAN before then) is able to do this properly. -There can be whitespace between the operator and the quoting +There can (and in some cases, must) be whitespace between the operator +and the quoting characters, except when C<#> is being used as the quoting character. C<q#foo#> is parsed as the string C<foo>, while S<C<q #foo#>> is the operator C<q> followed by a comment. Its argument will be taken @@ -1398,6 +1399,12 @@ from the next line. This allows you to write: s {foo} # Replace foo {bar} # with bar. +The cases where whitespace must be used are when the quoting character +is a word character (meaning it matches C</\w/>): + + q XfooX # Works: means the string 'foo' + qXfooX # WRONG! + The following escape sequences are available in constructs that interpolate, and in transliterations: X<\t> X<\n> X<\r> X<\f> X<\b> X<\a> X<\e> X<\x> X<\0> X<\c> X<\N> X<\N{}> diff --git a/t/uni/variables.t b/t/uni/variables.t index edeebf45ff..a1f7cc2d00 100644 --- a/t/uni/variables.t +++ b/t/uni/variables.t @@ -130,7 +130,7 @@ for ( 0x0 .. 0xff ) { "$name as a length-1 variable generates a syntax error"); $tests++; utf8::upgrade($chr); - evalbytes "no strict; use utf8; \$$chr = 4;", + eval "no strict; \$$chr = 4;", like($@, qr/ syntax\ error | Unrecognized\ character /x, " ... and the same under 'use utf8'"); $tests++; diff --git a/toke.c b/toke.c index cd01255444..f695265665 100644 --- a/toke.c +++ b/toke.c @@ -10192,6 +10192,10 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re STRLEN termlen; /* length of terminating string */ line_t herelines; + /* The delimiters that have a mirror-image closing one */ + const char * opening_delims = "([{<"; + const char * closing_delims = ")]}>"; + PERL_ARGS_ASSERT_SCAN_STR; /* skip space before the delimiter */ @@ -10204,15 +10208,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re /* after skipping whitespace, the next character is the terminator */ term = *s; - if (!UTF) { + if (!UTF || UTF8_IS_INVARIANT(term)) { termcode = termstr[0] = term; termlen = 1; } else { termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); Copy(s, termstr, termlen, U8); - if (!UTF8_IS_INVARIANT(term)) - has_utf8 = TRUE; } /* mark where we are */ @@ -10220,9 +10222,10 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re PL_multi_open = termcode; herelines = PL_parser->herelines; - /* find corresponding closing delimiter */ - if (term && (tmps = strchr("([{< )]}> )]}>",term))) - termcode = termstr[0] = term = tmps[5]; + /* If the delimiter has a mirror-image closing one, get it */ + if (term && (tmps = strchr(opening_delims, term))) { + termcode = termstr[0] = term = closing_delims[tmps - opening_delims]; + } PL_multi_close = termcode; -- Perl5 Master Repository
