# New Ticket Created by Bruce Keeler # Please include the string: [perl #73862] # in the subject line of all future correspondence about this issue. # <URL: http://rt.perl.org/rt3/Ticket/Display.html?id=73862 >
The attached patch adds support for variable and block-result interpolation into regexes. It does so by means of a new PAST::Regex node pasttype 'interpolator'. The following syntaxes are supported by this patch: / $var / -- Interpolates as literal string, unless it's a Regex object / @foo / -- Interpolated as ||-style alternations of literal strings or Regex objects / <$var> / -- compiled into a Regex (unless it's already one), then interpolated / <@foo> / -- A list of ||-style alternations of things to be compiled into Regexes (unless they already are) / <{ ... }> / -- Result of capture is interpolated as a Regex, compiling if necessary / <?{ ... }> / -- Unchanged / { ... } / -- Capture is merely executed, but not interpolated. (Unchanged)
From 29c116e35d95e29b211d396a0b48137172ae0f28 Mon Sep 17 00:00:00 2001 From: Bruce Keeler <br...@drangle.com> Date: Sun, 28 Mar 2010 21:41:53 -0700 Subject: [PATCH] Variable interpolation in regexes --- build/Makefile.in | 1 + src/Perl6/Actions.pm | 64 ++++++++++--- src/Perl6/Grammar.pm | 20 ++++ src/cheats/parrot/past-compiler-regex.pir | 153 +++++++++++++++++++++++++++++ t/spectest.data | 1 + 5 files changed, 226 insertions(+), 13 deletions(-) create mode 100644 src/cheats/parrot/past-compiler-regex.pir diff --git a/build/Makefile.in b/build/Makefile.in index 743a81a..f9431bd 100644 --- a/build/Makefile.in +++ b/build/Makefile.in @@ -154,6 +154,7 @@ CHEATS_PIR = \ src/cheats/want-caller-side-callsig.pir \ src/cheats/parrot-role-quirks.pir \ src/cheats/parrot/attriter.pir \ + src/cheats/parrot/past-compiler-regex.pir \ src/cheats/parrot/Protoobject.pir \ src/cheats/parrot/P6role.pir \ src/gen/uprop-cheats.pir \ diff --git a/src/Perl6/Actions.pm b/src/Perl6/Actions.pm index 97fd6e5..197d8db 100644 --- a/src/Perl6/Actions.pm +++ b/src/Perl6/Actions.pm @@ -2358,10 +2358,51 @@ class Perl6::RegexActions is Regex::P6Regex::Actions { my $past := $<statement>.ast; make PAST::Regex.new( $past, :pasttype('pastnode') ); } + method metachar:sym<var>($/) { + my $past; + my $name := $<pos> ?? +$<pos> !! ~$<name>; + if $<quantified_atom> { + if $<var> { + $/.CURSOR.panic('"$var = " syntax not yet supported in regexes'); + } + $past := $<quantified_atom>[0].ast; + if $past.pasttype eq 'quant' && $past[0].pasttype eq 'subrule' { + Regex::P6Regex::Actions::subrule_alias($past[0], $name); + } + elsif $past.pasttype eq 'subrule' { Regex::P6Regex::Actions::subrule_alias($past, $name); } + else { + $past := PAST::Regex.new( $past, :name($name), :pasttype('subcapture'), :node($/) ); + } + } + else { + if $<var> { + my @MODIFIERS := Q:PIR { + %r = get_hll_global ['Regex';'P6Regex';'Actions'], '@MODIFIERS' + }; + my $subtype := @MODIFIERS[0]<i> ?? 'literal_i' !! 'literal'; + $past := PAST::Regex.new( $<var>.ast, :pasttype('interpolator'), + :subtype($subtype), :node($/) ); + } else { + $past := PAST::Regex.new( '!BACKREF', $name, :pasttype('subrule'), + :subtype('method'), :node($/) ); + } + } + make $past; + } - method metachar:sym<{ }>($/) { make $<codeblock>.ast; } + method assertion:sym<var>($/) { + make PAST::Regex.new( $<var>.ast, :pasttype('interpolator'), + :subtype('compile_regex'), :node($/) ); + } + + method metachar:sym<{ }>($/) { + make PAST::Regex.new(:node($/), :pasttype('pastnode'), $<codeblock>.ast); + } - method assertion:sym<{ }>($/) { make $<codeblock>.ast; } + method assertion:sym<{ }>($/) { + make PAST::Regex.new( :node($/), :pasttype('interpolator'), :subtype('compile_regex'), + $<codeblock>.ast ); + } method codeblock($/) { my $block := $<block>.ast; @@ -2376,20 +2417,17 @@ class Perl6::RegexActions is Regex::P6Regex::Actions { } sub bindmatch($past) { - PAST::Regex.new( - PAST::Stmts.new( + PAST::Stmts.new( + PAST::Op.new( + PAST::Var.new( :name('$/') ), PAST::Op.new( - PAST::Var.new( :name('$/') ), - PAST::Op.new( - PAST::Var.new( :name('$¢') ), - :name('MATCH'), - :pasttype('callmethod') - ), - :pasttype('bind') + PAST::Var.new( :name('$¢') ), + :name('MATCH'), + :pasttype('callmethod') ), - $past + :pasttype('bind') ), - :pasttype('pastnode') + $past, ); } } diff --git a/src/Perl6/Grammar.pm b/src/Perl6/Grammar.pm index ef77a7f..0539c48 100644 --- a/src/Perl6/Grammar.pm +++ b/src/Perl6/Grammar.pm @@ -1428,6 +1428,26 @@ grammar Perl6::Regex is Regex::P6Regex::Grammar { token metachar:sym<:my> { ':' <?before 'my'> <statement=.LANG('MAIN', 'statement')> <.ws> ';' } + + token metachar:sym<$> { + <sym> <!before \w> + } + + token metachar:sym<var> { + [ + | '$<' $<name>=[<-[>]>+] '>' + | '$' $<pos>=[\d+] + | <?before <[...@]> \w> <var=.LANG('MAIN', 'variable')> + | <?before '%' \w> <.panic: "Use of hash variable in patterns is reserved"> + ] + + [ <.ws> '=' <.ws> <quantified_atom> ]? + } + + token assertion:sym<var> { + | <?before <[...@]> \w> <var=.LANG('MAIN', 'variable')> + | <?before '%' \w> <.panic: "Use of hash variable in patterns is reserved"> + } token metachar:sym<{ }> { <?[{]> <codeblock> diff --git a/src/cheats/parrot/past-compiler-regex.pir b/src/cheats/parrot/past-compiler-regex.pir new file mode 100644 index 0000000..de0a22e --- /dev/null +++ b/src/cheats/parrot/past-compiler-regex.pir @@ -0,0 +1,153 @@ +=head1 NAME + +past-compiler-regex.pir - Compiler for the PAST::Regex interpolator type + +=head1 DESCRIPTION + +Implements the interpolator pasttype of PAST::Regex node. This has to be in Rakudo rather than +NQP-RX as it calls the Rakudo regex compiler. + +Scalar values are interpolated as literal strings or regexes, depending on the subtype. Array values +(or any Iterable) are interpolated as ||-type alternations. + +Subtype can be any of: + +=over 4 + +=item zerowidth + +Only test for truthiness and fail or not. No interpolation. + +=item compile_regex + +String values should be compiled into regexes and then interpolated. + +=item literal + +String values should be treated as literals. + +=item literal_i + +String values should be treated as literals and matched case-insensitively. + +=back + +=head2 Methods + +=over 4 + +=item interpolator(PAST::Regex node) + +=cut + +.HLL 'parrot' + +.namespace ['PAST';'Compiler'] + +.const int CURSOR_FAIL = -1 + +.sub 'interpolator' :method :multi(_, ['PAST'; 'Regex']) + .param pmc node + .local pmc cur, pos, fail, ops, eos, off, tgt + (cur, pos, eos, off, tgt, fail) = self.'!rxregs'('cur pos eos off tgt fail') + ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur) + + .local pmc zerowidth, negate, testop, subtype + subtype = node.'subtype'() + + ops.'push_pirop'('inline', subtype, 'inline'=>' # rx interp subtype=%1') + .local pmc cpast, cpost + cpast = node[0] + cpost = self.'as_post'(cpast, 'rtype'=>'P') + + self.'!cursorop'(ops, '!cursor_pos', 0, pos) + ops.'push'(cpost) + + # If this is just a zerowidth assertion, we don't actually interpolate anything. Just evaluate + # and fail or not. + if subtype == 'zerowidth' goto zerowidth_test + + .local string prefix + prefix = self.'unique'('rxinterp_') + .local pmc precompiled_label, done_label, loop_label, not_a_list_label, iterator_reg, label_reg + $S0 = concat prefix, '_precompiled' + precompiled_label = self.'post_new'('Label', 'result'=>$S0) + $S0 = concat prefix, '_done' + done_label = self.'post_new'('Label', 'result'=>$S0) + $S0 = concat prefix, '_loop' + loop_label = self.'post_new'('Label', 'result'=>$S0) + $S0 = concat prefix, '_not_a_list' + not_a_list_label = self.'post_new'('Label', 'result'=>$S0) + iterator_reg = self.'uniquereg'("P") + label_reg = self.'uniquereg'("I") + + ops.'push_pirop'('descalarref', '$P10', cpost) + ops.'push_pirop'('get_hll_global', '$P11', "'Iterable'") + ops.'push_pirop'('callmethod', "'isa'", '$P10', '$P11', 'result'=>'$P11') + ops.'push_pirop'('unless', '$P11', not_a_list_label) + + ops.'push_pirop'('callmethod', "'iterator'", '$P10', 'result'=>iterator_reg) + ops.'push_pirop'('set_addr', label_reg, loop_label) + ops.'push'(loop_label) + ops.'push_pirop'('callmethod', "'get'", iterator_reg, 'result'=>'$P10') + ops.'push_pirop'('isa', '$I10', '$P10', "['EMPTY']") + ops.'push_pirop'('if', '$I10', fail) + self.'!cursorop'(ops, '!mark_push', 0, 0, pos, label_reg) + + ops.'push'(not_a_list_label) + # Check if it isa Regex, and call it as a method if so + ops.'push_pirop'('isa', '$I10', '$P10', "['Regex']") + ops.'push_pirop'('if', '$I10', precompiled_label) + ops.'push_pirop'('set', '$S10', '$P10') + ne subtype, 'compile_regex', literal + + # Kinda cheesy, but the compiler can't be entered anywhere but TOP for now + ops.'push_pirop'('split', '$P9', "'/'", '$S10') + ops.'push_pirop'('join', '$S10', "'\\/'", '$P9') + ops.'push_pirop'('concat', '$S10', "'rx/'", '$S10') + ops.'push_pirop'('concat', '$S10', '$S10', "'/'") + ops.'push_pirop'('compreg', '$P10', '"perl6"') + ops.'push_pirop'('getinterp', '$P9') + ops.'push_pirop'('set', '$P9', "$P9['context';0]") + ops.'push_pirop'('callmethod', '"compile"', '$P10', '$S10', "'outer_ctx'=>$P9", 'result'=>'$P10') + ops.'push_pirop'('set', '$P8', '$P10[0]') + ops.'push_pirop'('getattribute', '$P9', '$P9', '"current_sub"') + ops.'push_pirop'('callmethod', '"set_outer"', '$P8', '$P9') + ops.'push_pirop'('call', '$P10', 'result'=>'$P10') + + goto have_compiled_regex + + literal: + ops.'push_pirop'('length', '$I10', '$S10') + ops.'push_pirop'('add', '$I11', pos, '$I10') + ops.'push_pirop'('gt', '$I11', eos, fail) + ops.'push_pirop'('sub', '$I11', pos, off) + ops.'push_pirop'('substr', '$S11', tgt, '$I11', '$I10') + ne subtype, 'literal_i', dont_downcase + ops.'push_pirop'('downcase', '$S10', '$S10') + ops.'push_pirop'('downcase', '$S11', '$S11') + dont_downcase: + ops.'push_pirop'('ne', '$S11', '$S10', fail) + ops.'push_pirop'('add', pos, '$I10') + ops.'push_pirop'('goto', done_label) + + have_compiled_regex: + ops.'push'(precompiled_label) + ops.'push_pirop'('callmethod', '$P10', cur, 'result'=>'$P10') + ops.'push_pirop'('unless', '$P10', fail) + self.'!cursorop'(ops, '!mark_push', 0, 0, CURSOR_FAIL, 0, '$P10') + ops.'push_pirop'('callmethod', '"pos"', '$P10', 'result'=>pos) + + ops.'push'(done_label) + + goto done + + zerowidth_test: + negate = node.'negate'() + testop = self.'??!!'(negate, 'if', 'unless') + ops.'push_pirop'(testop, cpost, fail) + done: + .return (ops) + + +.end diff --git a/t/spectest.data b/t/spectest.data index 8b0b091..8acf9b5 100644 --- a/t/spectest.data +++ b/t/spectest.data @@ -243,6 +243,7 @@ S05-grammar/protoregex.t S05-grammar/protos.t S05-grammar/signatures.t # S05-grammar/ws.t +S05-interpolation/regex-in-variable.t # S05-mass/named-chars.t # icu # S05-mass/properties-block.t # icu # S05-mass/properties-derived.t # icu -- 1.7.0