In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/8c6b0c7d731fcf4b323b159e772b5fee09f791f3?hp=7d897bd0d938ac3c489af290b9289d016bf9fbbe>
- Log ----------------------------------------------------------------- commit 8c6b0c7d731fcf4b323b159e772b5fee09f791f3 Author: Father Chrysostomos <spr...@cpan.org> Date: Thu Aug 18 22:27:42 2016 -0700 toke.c: Introduce peekspace() This should make the sites that use LEX_NO_INCLINE a bit less arcane. This has nothing to do with the erstwhile PEEKSPACE macro that existed for MADnessâ sake. M toke.c commit 71fff7cb10b725e79df67426713d410d321f773b Author: Father Chrysostomos <spr...@cpan.org> Date: Thu Aug 18 22:23:45 2016 -0700 toke.c: Note retval of S_skipspace M toke.c commit 3218e2237b0e2ac0e334fa1f98624a63a02bf9a4 Author: Father Chrysostomos <spr...@cpan.org> Date: Thu Aug 18 22:22:25 2016 -0700 toke.c: Move skipspace closer to S_skipspace It was added back in 21791330a when we still had MADness. Back then there were about four skipspace functions, some of them before S_update_debugger_info and some after, and I just put the #define before all of them. But now the only skipspace function left is after S_update_debugger_info, so having the #define before it just makes it harder to see whatâs what. M toke.c commit bf8a9a15ea4a7b7ebcde5ba48aafe397c549eff2 Author: Father Chrysostomos <spr...@cpan.org> Date: Wed Aug 17 22:32:23 2016 -0700 [perl #128951] Fix ASan error with @{\327 By \327 I mean character number 327 in octal. Without memory tools like ASan, it produces garbled output. The added test fails like this: # Failed test 18 - @ { \327 \n - used to garble output (or fail asan) [perl \#128951] at ./test.pl line 1058 # got "Unrecognized character \\xD7; marked by <-- HERE after \x{a0}\x{f6}@3\x{a8}\x{7f}\000\000@{<-- HERE near column -1 at - line 1." # expected "Unrecognized character \\xD7; marked by <-- HERE after @{<-- HERE near column 3 at - line 1." Dave Mitchellâs explanation from the RT ticket: > The src code contains the bytes: > > @ { \327 \n > > after seeing "@{" the lexer calls scan_ident(), which sees the \327 as an > ident, then calls S_skipspace_flags() to skip the spaces following the > ident. This moves the current cursor position to the \n, and since that's > a line boundary, its updates PL_linestart and PL_bufptr to point to \n > too. > > When it finds that the next char isn't a '}', it does this: > > /* Didn't find the closing } at the point we expected, so restore > state such that the next thing to process is the opening { and */ > s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ > > i.e. it moves s back to the "{\317" then continues. > > However, PL_linestart doesn't get reset, so later when the parser > encounters the \327 and tries to croak with "Unrecognized character %s ...", > when it prints out the section of src code in error, since s < PL_linestr, > negative string lengths and ASAN errors ensue. This commit fixes it by passing the LEX_NO_INCLINE flag (added by 21791330a), which specifies that we are not trying to read past the newline but simply peek ahead. In that case lex_read_space does not reset PL_linestart. But that does cause problems with code like: ${; #line 3 } because we end up jumping ahead via skipspace without updating the line number. So we need to do a skipspace_flags(..., LEX_NO_INCLINE) first (i.e., peek ahead), and then when we know we donât need to go back again we can skipspace(...) for real. M t/op/lex.t M toke.c ----------------------------------------------------------------------- Summary of changes: t/op/lex.t | 11 +++++++++-- toke.c | 31 ++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/t/op/lex.t b/t/op/lex.t index c0f94c0..e68fab4 100644 --- a/t/op/lex.t +++ b/t/op/lex.t @@ -7,7 +7,7 @@ use warnings; BEGIN { chdir 't' if -d 't'; require './test.pl'; } -plan(tests => 27); +plan(tests => 28); { no warnings 'deprecated'; @@ -129,7 +129,7 @@ fresh_perl_is( '* <null> ident' ); SKIP: { - skip "Different output on EBCDIC (presumably)", 2 if $::IS_EBCDIC; + skip "Different output on EBCDIC (presumably)", 3 if $::IS_EBCDIC; fresh_perl_is( qq'"ab}"ax;&\0z\x8Ao}\x82x;', <<gibberish, Bareword found where operator expected at - line 1, near ""ab}"ax" @@ -150,6 +150,13 @@ gibberish { stderr => 1 }, 'gibberish containing &{+z} - used to crash [perl #123753]' ); + fresh_perl_is( + "\@{\327\n", <<\gibberisi, +Unrecognized character \xD7; marked by <-- HERE after @{<-- HERE near column 3 at - line 1. +gibberisi + { stderr => 1 }, + '@ { \327 \n - used to garble output (or fail asan) [perl #128951]' + ); } fresh_perl_is( diff --git a/toke.c b/toke.c index 2504911..2da8366 100644 --- a/toke.c +++ b/toke.c @@ -1779,9 +1779,6 @@ S_incline(pTHX_ const char *s) CopLINE_set(PL_curcop, line_num); } -#define skipspace(s) skipspace_flags(s, 0) - - STATIC void S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) { @@ -1808,11 +1805,19 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) } /* - * S_skipspace + * skipspace * Called to gobble the appropriate amount and type of whitespace. * Skips comments as well. + * Returns the next character after the whitespace that is skipped. + * + * peekspace + * Same thing, but look ahead without incrementing line numbers or + * adjusting PL_linestart. */ +#define skipspace(s) skipspace_flags(s, 0) +#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE) + STATIC char * S_skipspace_flags(pTHX_ char *s, U32 flags) { @@ -6896,7 +6901,7 @@ Perl_yylex(pTHX) bool arrow; STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); STRLEN soff = s - SvPVX(PL_linestr); - s = skipspace_flags(s, LEX_NO_INCLINE); + s = peekspace(s); arrow = *s == '=' && s[1] == '>'; PL_bufptr = SvPVX(PL_linestr) + bufoff; s = SvPVX(PL_linestr) + soff; @@ -9074,6 +9079,8 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) else if (ck_uni && bracket == -1) check_uni(); if (bracket != -1) { + bool skip; + char *s2; /* If we were processing {...} notation then... */ if (isIDFIRST_lazy_if(d,is_utf8)) { /* if it starts as a valid identifier, assume that it is one. @@ -9122,13 +9129,19 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) if ( !tmp_copline ) tmp_copline = CopLINE(PL_curcop); - if (s < PL_bufend && isSPACE(*s)) { - s = skipspace(s); - } + if ((skip = s < PL_bufend && isSPACE(*s))) + /* Avoid incrementing line numbers or resetting PL_linestart, + in case we have to back up. */ + s2 = peekspace(s); + else + s2 = s; /* Expect to find a closing } after consuming any trailing whitespace. */ - if (*s == '}') { + if (*s2 == '}') { + /* Now increment line numbers if applicable. */ + if (skip) + s = skipspace(s); s++; if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { PL_lex_state = LEX_INTERPEND; -- Perl5 Master Repository