[perl.git] branch blead, updated. v5.25.3-305-g8c6b0c7

Father Chrysostomos Thu, 18 Aug 2016 22:29:32 -0700

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/8c6b0c7d731fcf4b323b159e772b5fee09f791f3?hp=7d897bd0d938ac3c489af290b9289d016bf9fbbe>


- Log -----------------------------------------------------------------
commit 8c6b0c7d731fcf4b323b159e772b5fee09f791f3
Author: Father Chrysostomos <spr...@cpan.org>
Date:   Thu Aug 18 22:27:42 2016 -0700

    toke.c: Introduce peekspace()
    
    This should make the sites that use LEX_NO_INCLINE a bit less arcane.
    This has nothing to do with the erstwhile PEEKSPACE macro that existed
    for MADnessâ sake.

M       toke.c

commit 71fff7cb10b725e79df67426713d410d321f773b
Author: Father Chrysostomos <spr...@cpan.org>
Date:   Thu Aug 18 22:23:45 2016 -0700

    toke.c: Note retval of S_skipspace

M       toke.c

commit 3218e2237b0e2ac0e334fa1f98624a63a02bf9a4
Author: Father Chrysostomos <spr...@cpan.org>
Date:   Thu Aug 18 22:22:25 2016 -0700

    toke.c: Move skipspace closer to S_skipspace
    
    It was added back in 21791330a when we still had MADness.  Back then
    there were about four skipspace functions, some of them before
    S_update_debugger_info and some after, and I just put the #define
    before all of them.  But now the only skipspace function left is
    after S_update_debugger_info, so having the #define before it just
    makes it harder to see whatâs what.

M       toke.c

commit bf8a9a15ea4a7b7ebcde5ba48aafe397c549eff2
Author: Father Chrysostomos <spr...@cpan.org>
Date:   Wed Aug 17 22:32:23 2016 -0700

    [perl #128951] Fix ASan error with @{\327
    
    By \327 I mean character number 327 in octal.
    
    Without memory tools like ASan, it produces garbled output.  The added
    test fails like this:
    
    # Failed test 18 - @ { \327 \n - used to garble output (or fail asan) [perl 
\#128951] at ./test.pl line 1058
    #      got "Unrecognized character \\xD7; marked by <-- HERE after 
\x{a0}\x{f6}@3\x{a8}\x{7f}\000\000@{<-- HERE near column -1 at - line 1."
    # expected "Unrecognized character \\xD7; marked by <-- HERE after @{<-- 
HERE near column 3 at - line 1."
    
    Dave Mitchellâs explanation from the RT ticket:
    > The src code contains the bytes:
    >
    >     @ { \327 \n
    >
    > after seeing "@{" the lexer calls scan_ident(), which sees the \327 as an
    > ident, then calls S_skipspace_flags() to skip the spaces following the
    > ident.  This moves the current cursor position to the \n, and since that's
    > a line boundary, its updates PL_linestart and PL_bufptr to point to \n
    > too.
    >
    > When it finds that the next char isn't a '}', it does this:
    >
    >             /* Didn't find the closing } at the point we expected, so 
restore
    >                state such that the next thing to process is the opening { 
and */
    >           s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
    >
    > i.e. it moves s back to the "{\317" then continues.
    >
    > However, PL_linestart doesn't get reset, so later when the parser
    > encounters the \327 and tries to croak with "Unrecognized character %s 
...",
    > when it prints out the section of src code in error, since s < PL_linestr,
    > negative string lengths and ASAN errors ensue.
    
    This commit fixes it by passing the LEX_NO_INCLINE flag (added by
    21791330a), which specifies that we are not trying to read past the
    newline but simply peek ahead.  In that case lex_read_space does not
    reset PL_linestart.
    
    But that does cause problems with code like:
    
    ${;
    #line 3
    
    }
    
    because we end up jumping ahead via skipspace without updating the
    line number.  So we need to do a skipspace_flags(..., LEX_NO_INCLINE)
    first (i.e., peek ahead), and then when we know we donât need to go
    back again we can skipspace(...) for real.

M       t/op/lex.t
M       toke.c
-----------------------------------------------------------------------

Summary of changes:
 t/op/lex.t | 11 +++++++++--
 toke.c     | 31 ++++++++++++++++++++++---------
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/t/op/lex.t b/t/op/lex.t
index c0f94c0..e68fab4 100644
--- a/t/op/lex.t
+++ b/t/op/lex.t
@@ -7,7 +7,7 @@ use warnings;
 
 BEGIN { chdir 't' if -d 't'; require './test.pl'; }
 
-plan(tests => 27);
+plan(tests => 28);
 
 {
     no warnings 'deprecated';
@@ -129,7 +129,7 @@ fresh_perl_is(
   '* <null> ident'
 );
 SKIP: {
-    skip "Different output on EBCDIC (presumably)", 2 if $::IS_EBCDIC;
+    skip "Different output on EBCDIC (presumably)", 3 if $::IS_EBCDIC;
     fresh_perl_is(
       qq'"ab}"ax;&\0z\x8Ao}\x82x;', <<gibberish,
 Bareword found where operator expected at - line 1, near ""ab}"ax"
@@ -150,6 +150,13 @@ gibberish
        { stderr => 1 },
       'gibberish containing &{+z} - used to crash [perl #123753]'
     );
+    fresh_perl_is(
+      "\@{\327\n", <<\gibberisi,
+Unrecognized character \xD7; marked by <-- HERE after @{<-- HERE near column 3 
at - line 1.
+gibberisi
+       { stderr => 1 },
+      '@ { \327 \n - used to garble output (or fail asan) [perl #128951]'
+    );
 }
 
 fresh_perl_is(
diff --git a/toke.c b/toke.c
index 2504911..2da8366 100644
--- a/toke.c
+++ b/toke.c
@@ -1779,9 +1779,6 @@ S_incline(pTHX_ const char *s)
     CopLINE_set(PL_curcop, line_num);
 }
 
-#define skipspace(s) skipspace_flags(s, 0)
-
-
 STATIC void
 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
 {
@@ -1808,11 +1805,19 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char 
*const buf, STRLEN len)
 }
 
 /*
- * S_skipspace
+ * skipspace
  * Called to gobble the appropriate amount and type of whitespace.
  * Skips comments as well.
+ * Returns the next character after the whitespace that is skipped.
+ *
+ * peekspace
+ * Same thing, but look ahead without incrementing line numbers or
+ * adjusting PL_linestart.
  */
 
+#define skipspace(s) skipspace_flags(s, 0)
+#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
+
 STATIC char *
 S_skipspace_flags(pTHX_ char *s, U32 flags)
 {
@@ -6896,7 +6901,7 @@ Perl_yylex(pTHX)
            bool arrow;
            STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
            STRLEN   soff = s         - SvPVX(PL_linestr);
-           s = skipspace_flags(s, LEX_NO_INCLINE);
+           s = peekspace(s);
            arrow = *s == '=' && s[1] == '>';
            PL_bufptr = SvPVX(PL_linestr) + bufoff;
            s         = SvPVX(PL_linestr) +   soff;
@@ -9074,6 +9079,8 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, 
I32 ck_uni)
     else if (ck_uni && bracket == -1)
        check_uni();
     if (bracket != -1) {
+        bool skip;
+        char *s2;
         /* If we were processing {...} notation then...  */
        if (isIDFIRST_lazy_if(d,is_utf8)) {
             /* if it starts as a valid identifier, assume that it is one.
@@ -9122,13 +9129,19 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, 
I32 ck_uni)
 
         if ( !tmp_copline )
             tmp_copline = CopLINE(PL_curcop);
-        if (s < PL_bufend && isSPACE(*s)) {
-            s = skipspace(s);
-        }
+        if ((skip = s < PL_bufend && isSPACE(*s)))
+            /* Avoid incrementing line numbers or resetting PL_linestart,
+               in case we have to back up.  */
+            s2 = peekspace(s);
+        else
+            s2 = s;
            
         /* Expect to find a closing } after consuming any trailing whitespace.
          */
-       if (*s == '}') {
+        if (*s2 == '}') {
+            /* Now increment line numbers if applicable.  */
+            if (skip)
+                s = skipspace(s);
            s++;
            if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
                PL_lex_state = LEX_INTERPEND;

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.25.3-305-g8c6b0c7

Reply via email to