In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/b813f4458249da44eff5ac0843dc28de22112299?hp=d52196e1e0cc54a6dcfbb4b4bc58bf6f939156ad>
- Log ----------------------------------------------------------------- commit b813f4458249da44eff5ac0843dc28de22112299 Merge: d52196e 87ebe74 Author: Father Chrysostomos <[email protected]> Date: Wed Dec 3 17:45:31 2014 -0800 [Merge] Deparse regexp code blocks The actual ops that make up the code blocks are now deparsed. B::Deparse no longer uses the stringified form stored in the regexp. This fixes a few bugs. See the individual commits for details. commit 87ebe743f4db193eb0e52185efa8e36aa490e557 Author: Father Chrysostomos <[email protected]> Date: Wed Dec 3 12:52:13 2014 -0800 Deparse s/// with code blocks Before, s/$a(?{die})// would deparse like this: s/${a}do { die }(?{die})//; Now it deparses correctly. M lib/B/Deparse.pm M lib/B/Deparse.t commit 3e18cd1cfb38ceda2370a98ef2cab9f0d853455d Author: Father Chrysostomos <[email protected]> Date: Tue Dec 2 22:25:59 2014 -0800 Deparse.pm: Fold some logic into sub code_list Both callers were doing $op->first->sibling, so just have code_list do that itself. M lib/B/Deparse.pm commit 59d42aa0af9be77aa7bcfc0acefe62a6e1c2b5aa Author: Father Chrysostomos <[email protected]> Date: Tue Dec 2 22:22:25 2014 -0800 Deparse qr// and m// with code blocks and vars Before, this: /$a(?{ die $b; })/; qr/$a(?{ die $b; })/; would deparse as this: /${a}do { die $b }(?{ die $b; })/; qr/sub : lvalue { $a, do { die $b }, '(?{ die $b; })' } ->()/; Now it deparses correctly. M lib/B/Deparse-subclass.t M lib/B/Deparse.pm commit f6b6ee634827de2f95592fbfe6e1b2c9776e255a Author: Father Chrysostomos <[email protected]> Date: Tue Dec 2 18:10:35 2014 -0800 Deparse regexp code blocks in m// and split // The blocks themselves are now deparsed, rather than the original strings being emitted. This fixes problems with newlines turning into \n and here-docs missing their bodies. It only applies to compile-time patterns. Run-time patterns (with variables interpolated outside the code blocks) are still unfixed and deparse with do{...} embedded in the pattern. M lib/B/Deparse.pm M lib/B/Deparse.t commit 7ca9974d087306762c6eb761d7f77247d62616be Author: Father Chrysostomos <[email protected]> Date: Tue Dec 2 14:53:32 2014 -0800 Deparse.t: Put a line break after the test code The line break gets removed when it is extracted from the __DATA__ section, and then it gets wrapped in sub{$input}. That breaks here-docs. M lib/B/Deparse.t commit 061bc525741807da007a6d4f9af2e4d3ef537cc3 Author: Father Chrysostomos <[email protected]> Date: Tue Dec 2 09:33:38 2014 -0800 Deparse qr/(?{code_blocks})/ with no interpolation This is a preliminary patch that only applies when there are no varia- bles interpolated into the pattern outside of the code blocks. The code blocks are now actually deparsed, instead of the stringified form just being reproduced. This means the \n bug is gone: Before: $ ./perl -Ilib -mO=Deparse -e '{ qr/aaaaa\\\\\\(?{;' -e '$y})' -e '/}' { qr/aaaaa\\\\\\(?{;\n\$y})\n/; } -e syntax OK After: $ ./perl -Ilib -mO=Deparse -e '{ qr/aaaaa\\\\\\(?{;' -e '$y})' -e '/}' { qr/aaaaa\\\\\\(?{ $y; })\n/; } -e syntax OK You can see the \n translation now happens only outside of the block. It also means here-docs work: Before: $ ./perl -Ilib -mO=Deparse -e 'qr/(??{<<END})/' -efoo -eEND qr/(??{<<END})/; -e syntax OK (The output is a syntax error.) After: $ ./perl -Ilib -mO=Deparse -e 'qr/(??{<<END})/' -efoo -eEND qr/(??{ "foo\n"; })/; -e syntax OK M lib/B/Deparse.pm commit c9fa6ae98a68d7c92849ef1f5abf7984768a289c Author: Father Chrysostomos <[email protected]> Date: Mon Dec 1 22:32:41 2014 -0800 To-do tests for deparsing regexp code blocks Currently we have various bugs: ⢠Line breaks often come out as \n, changing the meaning. ⢠Some blocks are doubled up with do{...} for the first instance. ⢠qr/sub { .... }/ madness M lib/B/Deparse-subclass.t M lib/B/Deparse.t ----------------------------------------------------------------------- Summary of changes: lib/B/Deparse-subclass.t | 10 ++++-- lib/B/Deparse.pm | 79 ++++++++++++++++++++++++++++++++++++++++++++++-- lib/B/Deparse.t | 45 +++++++++++++++++++++++++-- 3 files changed, 128 insertions(+), 6 deletions(-) diff --git a/lib/B/Deparse-subclass.t b/lib/B/Deparse-subclass.t index 3bcb7be..c1484aa 100644 --- a/lib/B/Deparse-subclass.t +++ b/lib/B/Deparse-subclass.t @@ -4,7 +4,7 @@ # publicise an API for subclassing B::Deparse they can prevent us from # gratuitously breaking conventions that CPAN modules already use. -use Test::More tests => 1; +use Test::More tests => 2; use B::Deparse; @@ -13,6 +13,12 @@ package B::Deparse::NameMangler { sub padname { SUPER::padname{@_} . '_groovy' } } -like 'B::Deparse::NameMangler'->new->coderef2text(sub { my($a, $b, $c) }), +my $nm = 'B::Deparse::NameMangler'->new; + +like $nm->coderef2text(sub { my($a, $b, $c) }), qr/\$a_groovy, \$b_groovy, \$c_groovy/, 'overriding padname works for renaming lexicals'; + +like $nm->coderef2text(sub { my $c; /(??{ $c })/; }), + qr/\Q(??{\E \$c_groovy/, + 'overriding padname works for renaming lexicals in regexp blocks'; diff --git a/lib/B/Deparse.pm b/lib/B/Deparse.pm index 6c83d94..07b986f 100644 --- a/lib/B/Deparse.pm +++ b/lib/B/Deparse.pm @@ -5043,6 +5043,38 @@ sub pure_string { return 1; } +sub code_list { + my ($self,$op,$extended,$cv) = @_; + + # localise stuff relating to the current sub + $cv and + local($self->{'curcv'}) = $cv, + local($self->{'curcvlex'}), + local(@$self{qw'curstash warnings hints hinthash curcop'}) + = @$self{qw'curstash warnings hints hinthash curcop'}; + + my $re; + for ($op = $op->first->sibling; !null($op); $op = $op->sibling) { + if ($op->name eq 'null' and $op->flags & OPf_SPECIAL) { + my $scope = $op->first; + # 0 context (last arg to scopeop) means statement context, so + # the contents of the block will not be wrapped in do{...}. + my $block = scopeop($scope->first->name eq "enter", $self, + $scope, 0); + # next op is the source code of the block + $op = $op->sibling; + $re .= ($self->const_sv($op)->PV =~ m|^(\(\?\??\{)|)[0]; + my $multiline = $block =~ /\n/; + $re .= $multiline ? "\n\t" : ' '; + $re .= $block; + $re .= $multiline ? "\n\b})" : " })"; + } else { + $re = re_dq_disambiguate($re, $self->re_dq($op, $extended)); + } + } + $re; +} + sub regcomp { my $self = shift; my($op, $cx, $extended) = @_; @@ -5112,6 +5144,22 @@ map($matchwords{join "", sort split //, $_} = $_, 'cig', 'cog', 'cos', 'cogs', 'cox', 'go', 'is', 'ism', 'iso', 'mig', 'mix', 'osmic', 'ox', 'sic', 'sig', 'six', 'smog', 'so', 'soc', 'sog', 'xi'); +# When deparsing a regular expression with code blocks, we have to look in +# various places to find the blocks. +# +# For qr/(?{...})/ without interpolation, the CV is under $qr->qr_anoncv +# and the code list (list of blocks and constants, maybe vars) is under +# $cv->ROOT->first->code_list: +# ./perl -Ilib -MB -e 'use O "Concise", B::svref_2object(sub {qr/(?{die})/})->ROOT->first->first->sibling->pmregexp->qr_anoncv->object_2svref' +# +# For qr/$a(?{...})/ with interpolation, the code list is more accessible, +# under $pmop->code_list, but the $cv is something you have to dig for in +# the regcomp opâs kids: +# ./perl -Ilib -mO=Concise -e 'qr/$a(?{die})/' +# +# For m// and split //, things are much simpler. There is no CV. The code +# list is under $pmop->code_list. + sub matchop { my $self = shift; my($op, $cx, $name, $delim) = @_; @@ -5131,7 +5179,30 @@ sub matchop { my $pmflags = $op->pmflags; my $extended = ($pmflags & PMf_EXTENDED); my $rhs_bound_to_defsv; - if (null $kid) { + my ($cv, $bregexp); + my $have_kid = !null $kid; + # Check for code blocks first + if (not null my $code_list = $op->code_list) { + $re = $self->code_list($code_list, $extended, + $op->name eq 'qr' + ? $self->padval( + $kid->first # ex-list + ->first # pushmark + ->sibling # entersub + ->first # ex-list + ->first # pushmark + ->sibling # srefgen + ->first # ex-list + ->first # anoncode + ->targ + ) + : undef); + } elsif (${$bregexp = $op->pmregexp} && ${$cv = $bregexp->qr_anoncv}) { + my $patop = $cv->ROOT # leavesub + ->first # qr + ->code_list;# list + $re = $self->code_list($patop, $extended, $cv); + } elsif (!$have_kid) { my $unbacked = re_unback($op->precomp); if ($extended) { $re = re_uninterp_extended(escape_extended_re($unbacked)); @@ -5142,6 +5213,8 @@ sub matchop { carp("found ".$kid->name." where regcomp expected"); } else { ($re, $quote) = $self->regcomp($kid, 21, $extended); + } + if ($have_kid and $kid->name eq 'regcomp') { my $matchop = $kid->first; if ($matchop->name eq 'regcrest') { $matchop = $matchop->first; @@ -5291,7 +5364,9 @@ sub pp_subst { } } my $extended = ($pmflags & PMf_EXTENDED); - if (null $kid) { + if (not null my $code_list = $op->code_list) { + $re = $self->code_list($code_list, $extended); + } elsif (null $kid) { my $unbacked = re_unback($op->precomp); if ($extended) { $re = re_uninterp_extended(escape_extended_re($unbacked)); diff --git a/lib/B/Deparse.t b/lib/B/Deparse.t index 9d2dd46..02a3c6d 100644 --- a/lib/B/Deparse.t +++ b/lib/B/Deparse.t @@ -63,7 +63,7 @@ while (<DATA>) { new B::Deparse split /,/, $meta{options} : $deparse; - my $coderef = eval "$meta{context};\n" . <<'EOC' . "sub {$input}"; + my $coderef = eval "$meta{context};\n" . <<'EOC' . "sub {$input\n}"; # Tell B::Deparse about our ambient pragmas my ($hint_bits, $warning_bits, $hinthash); BEGIN { @@ -76,6 +76,7 @@ $deparse->ambient_pragmas ( ); EOC + local $::TODO = $meta{todo}; if ($@) { is($@, "", "compilation of $desc"); } @@ -86,7 +87,6 @@ EOC $regex =~ s/\s+/\\s+/g; $regex = '^\{\s*' . $regex . '\s*\}$'; - local $::TODO = $meta{todo}; like($deparsed, qr/$regex/, $desc); } } @@ -1144,6 +1144,47 @@ print /a/u, s/b/c/u; # [perl #119807] s//\(3)/ge should not warn when deparsed (\3 warns) s/foo/\(3);/eg; #### +# [perl #115256] +"" =~ /a(?{ print q| +|})/; +>>>> +'' =~ /a(?{ print "\n"; })/; +#### +# [perl #123217] +$_ = qr/(??{<<END})/ +f.o +b.r +END +>>>> +$_ = qr/(??{ "f.o\nb.r\n"; })/; +#### +# More regexp code block madness +my($b, @a); +/(?{ die $b; })/; +/a(?{ die $b; })a/; +/$a(?{ die $b; })/; +/@a(?{ die $b; })/; +/(??{ die $b; })/; +/a(??{ die $b; })a/; +/$a(??{ die $b; })/; +/@a(??{ die $b; })/; +qr/(?{ die $b; })/; +qr/a(?{ die $b; })a/; +qr/$a(?{ die $b; })/; +qr/@a(?{ die $b; })/; +qr/(??{ die $b; })/; +qr/a(??{ die $b; })a/; +qr/$a(??{ die $b; })/; +qr/@a(??{ die $b; })/; +s/(?{ die $b; })//; +s/a(?{ die $b; })a//; +s/$a(?{ die $b; })//; +s/@a(?{ die $b; })//; +s/(??{ die $b; })//; +s/a(??{ die $b; })a//; +s/$a(??{ die $b; })//; +s/@a(??{ die $b; })//; +#### # y///r tr/a/b/r + $a =~ tr/p/q/r; #### -- Perl5 Master Repository
