# New Ticket Created by Bruce Keeler
# Please include the string: [perl #73862]
# in the subject line of all future correspondence about this issue.
# <URL: http://rt.perl.org/rt3/Ticket/Display.html?id=73862 >
The attached patch adds support for variable and block-result
interpolation into regexes.
It does so by means of a new PAST::Regex node pasttype 'interpolator'.
The following syntaxes are supported by this patch:
/ $var / -- Interpolates as literal string, unless it's a Regex object
/ @foo / -- Interpolated as ||-style alternations of literal strings
or Regex objects
/ <$var> / -- compiled into a Regex (unless it's already one), then
interpolated
/ <@foo> / -- A list of ||-style alternations of things to be
compiled into Regexes (unless they already are)
/ <{ ... }> / -- Result of capture is interpolated as a Regex,
compiling if necessary
/ <?{ ... }> / -- Unchanged
/ { ... } / -- Capture is merely executed, but not interpolated.
(Unchanged)
From 29c116e35d95e29b211d396a0b48137172ae0f28 Mon Sep 17 00:00:00 2001
From: Bruce Keeler <[email protected]>
Date: Sun, 28 Mar 2010 21:41:53 -0700
Subject: [PATCH] Variable interpolation in regexes
---
build/Makefile.in | 1 +
src/Perl6/Actions.pm | 64 ++++++++++---
src/Perl6/Grammar.pm | 20 ++++
src/cheats/parrot/past-compiler-regex.pir | 153 +++++++++++++++++++++++++++++
t/spectest.data | 1 +
5 files changed, 226 insertions(+), 13 deletions(-)
create mode 100644 src/cheats/parrot/past-compiler-regex.pir
diff --git a/build/Makefile.in b/build/Makefile.in
index 743a81a..f9431bd 100644
--- a/build/Makefile.in
+++ b/build/Makefile.in
@@ -154,6 +154,7 @@ CHEATS_PIR = \
src/cheats/want-caller-side-callsig.pir \
src/cheats/parrot-role-quirks.pir \
src/cheats/parrot/attriter.pir \
+ src/cheats/parrot/past-compiler-regex.pir \
src/cheats/parrot/Protoobject.pir \
src/cheats/parrot/P6role.pir \
src/gen/uprop-cheats.pir \
diff --git a/src/Perl6/Actions.pm b/src/Perl6/Actions.pm
index 97fd6e5..197d8db 100644
--- a/src/Perl6/Actions.pm
+++ b/src/Perl6/Actions.pm
@@ -2358,10 +2358,51 @@ class Perl6::RegexActions is Regex::P6Regex::Actions {
my $past := $<statement>.ast;
make PAST::Regex.new( $past, :pasttype('pastnode') );
}
+ method metachar:sym<var>($/) {
+ my $past;
+ my $name := $<pos> ?? +$<pos> !! ~$<name>;
+ if $<quantified_atom> {
+ if $<var> {
+ $/.CURSOR.panic('"$var = " syntax not yet supported in
regexes');
+ }
+ $past := $<quantified_atom>[0].ast;
+ if $past.pasttype eq 'quant' && $past[0].pasttype eq 'subrule' {
+ Regex::P6Regex::Actions::subrule_alias($past[0], $name);
+ }
+ elsif $past.pasttype eq 'subrule' {
Regex::P6Regex::Actions::subrule_alias($past, $name); }
+ else {
+ $past := PAST::Regex.new( $past, :name($name),
:pasttype('subcapture'), :node($/) );
+ }
+ }
+ else {
+ if $<var> {
+ my @MODIFIERS := Q:PIR {
+ %r = get_hll_global ['Regex';'P6Regex';'Actions'],
'@MODIFIERS'
+ };
+ my $subtype := @MODIFIERS[0]<i> ?? 'literal_i' !! 'literal';
+ $past := PAST::Regex.new( $<var>.ast,
:pasttype('interpolator'),
+ :subtype($subtype), :node($/) );
+ } else {
+ $past := PAST::Regex.new( '!BACKREF', $name,
:pasttype('subrule'),
+ :subtype('method'), :node($/) );
+ }
+ }
+ make $past;
+ }
- method metachar:sym<{ }>($/) { make $<codeblock>.ast; }
+ method assertion:sym<var>($/) {
+ make PAST::Regex.new( $<var>.ast, :pasttype('interpolator'),
+ :subtype('compile_regex'), :node($/) );
+ }
+
+ method metachar:sym<{ }>($/) {
+ make PAST::Regex.new(:node($/), :pasttype('pastnode'),
$<codeblock>.ast);
+ }
- method assertion:sym<{ }>($/) { make $<codeblock>.ast; }
+ method assertion:sym<{ }>($/) {
+ make PAST::Regex.new( :node($/), :pasttype('interpolator'),
:subtype('compile_regex'),
+ $<codeblock>.ast );
+ }
method codeblock($/) {
my $block := $<block>.ast;
@@ -2376,20 +2417,17 @@ class Perl6::RegexActions is Regex::P6Regex::Actions {
}
sub bindmatch($past) {
- PAST::Regex.new(
- PAST::Stmts.new(
+ PAST::Stmts.new(
+ PAST::Op.new(
+ PAST::Var.new( :name('$/') ),
PAST::Op.new(
- PAST::Var.new( :name('$/') ),
- PAST::Op.new(
- PAST::Var.new( :name('$¢') ),
- :name('MATCH'),
- :pasttype('callmethod')
- ),
- :pasttype('bind')
+ PAST::Var.new( :name('$¢') ),
+ :name('MATCH'),
+ :pasttype('callmethod')
),
- $past
+ :pasttype('bind')
),
- :pasttype('pastnode')
+ $past,
);
}
}
diff --git a/src/Perl6/Grammar.pm b/src/Perl6/Grammar.pm
index ef77a7f..0539c48 100644
--- a/src/Perl6/Grammar.pm
+++ b/src/Perl6/Grammar.pm
@@ -1428,6 +1428,26 @@ grammar Perl6::Regex is Regex::P6Regex::Grammar {
token metachar:sym<:my> {
':' <?before 'my'> <statement=.LANG('MAIN', 'statement')> <.ws> ';'
}
+
+ token metachar:sym<$> {
+ <sym> <!before \w>
+ }
+
+ token metachar:sym<var> {
+ [
+ | '$<' $<name>=[<-[>]>+] '>'
+ | '$' $<pos>=[\d+]
+ | <?before <[...@]> \w> <var=.LANG('MAIN', 'variable')>
+ | <?before '%' \w> <.panic: "Use of hash variable in patterns is
reserved">
+ ]
+
+ [ <.ws> '=' <.ws> <quantified_atom> ]?
+ }
+
+ token assertion:sym<var> {
+ | <?before <[...@]> \w> <var=.LANG('MAIN', 'variable')>
+ | <?before '%' \w> <.panic: "Use of hash variable in patterns is
reserved">
+ }
token metachar:sym<{ }> {
<?[{]> <codeblock>
diff --git a/src/cheats/parrot/past-compiler-regex.pir
b/src/cheats/parrot/past-compiler-regex.pir
new file mode 100644
index 0000000..de0a22e
--- /dev/null
+++ b/src/cheats/parrot/past-compiler-regex.pir
@@ -0,0 +1,153 @@
+=head1 NAME
+
+past-compiler-regex.pir - Compiler for the PAST::Regex interpolator type
+
+=head1 DESCRIPTION
+
+Implements the interpolator pasttype of PAST::Regex node. This has to be in
Rakudo rather than
+NQP-RX as it calls the Rakudo regex compiler.
+
+Scalar values are interpolated as literal strings or regexes, depending on the
subtype. Array values
+(or any Iterable) are interpolated as ||-type alternations.
+
+Subtype can be any of:
+
+=over 4
+
+=item zerowidth
+
+Only test for truthiness and fail or not. No interpolation.
+
+=item compile_regex
+
+String values should be compiled into regexes and then interpolated.
+
+=item literal
+
+String values should be treated as literals.
+
+=item literal_i
+
+String values should be treated as literals and matched case-insensitively.
+
+=back
+
+=head2 Methods
+
+=over 4
+
+=item interpolator(PAST::Regex node)
+
+=cut
+
+.HLL 'parrot'
+
+.namespace ['PAST';'Compiler']
+
+.const int CURSOR_FAIL = -1
+
+.sub 'interpolator' :method :multi(_, ['PAST'; 'Regex'])
+ .param pmc node
+ .local pmc cur, pos, fail, ops, eos, off, tgt
+ (cur, pos, eos, off, tgt, fail) = self.'!rxregs'('cur pos eos off tgt
fail')
+ ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+
+ .local pmc zerowidth, negate, testop, subtype
+ subtype = node.'subtype'()
+
+ ops.'push_pirop'('inline', subtype, 'inline'=>' # rx interp subtype=%1')
+ .local pmc cpast, cpost
+ cpast = node[0]
+ cpost = self.'as_post'(cpast, 'rtype'=>'P')
+
+ self.'!cursorop'(ops, '!cursor_pos', 0, pos)
+ ops.'push'(cpost)
+
+ # If this is just a zerowidth assertion, we don't actually interpolate
anything. Just evaluate
+ # and fail or not.
+ if subtype == 'zerowidth' goto zerowidth_test
+
+ .local string prefix
+ prefix = self.'unique'('rxinterp_')
+ .local pmc precompiled_label, done_label, loop_label, not_a_list_label,
iterator_reg, label_reg
+ $S0 = concat prefix, '_precompiled'
+ precompiled_label = self.'post_new'('Label', 'result'=>$S0)
+ $S0 = concat prefix, '_done'
+ done_label = self.'post_new'('Label', 'result'=>$S0)
+ $S0 = concat prefix, '_loop'
+ loop_label = self.'post_new'('Label', 'result'=>$S0)
+ $S0 = concat prefix, '_not_a_list'
+ not_a_list_label = self.'post_new'('Label', 'result'=>$S0)
+ iterator_reg = self.'uniquereg'("P")
+ label_reg = self.'uniquereg'("I")
+
+ ops.'push_pirop'('descalarref', '$P10', cpost)
+ ops.'push_pirop'('get_hll_global', '$P11', "'Iterable'")
+ ops.'push_pirop'('callmethod', "'isa'", '$P10', '$P11', 'result'=>'$P11')
+ ops.'push_pirop'('unless', '$P11', not_a_list_label)
+
+ ops.'push_pirop'('callmethod', "'iterator'", '$P10',
'result'=>iterator_reg)
+ ops.'push_pirop'('set_addr', label_reg, loop_label)
+ ops.'push'(loop_label)
+ ops.'push_pirop'('callmethod', "'get'", iterator_reg, 'result'=>'$P10')
+ ops.'push_pirop'('isa', '$I10', '$P10', "['EMPTY']")
+ ops.'push_pirop'('if', '$I10', fail)
+ self.'!cursorop'(ops, '!mark_push', 0, 0, pos, label_reg)
+
+ ops.'push'(not_a_list_label)
+ # Check if it isa Regex, and call it as a method if so
+ ops.'push_pirop'('isa', '$I10', '$P10', "['Regex']")
+ ops.'push_pirop'('if', '$I10', precompiled_label)
+ ops.'push_pirop'('set', '$S10', '$P10')
+ ne subtype, 'compile_regex', literal
+
+ # Kinda cheesy, but the compiler can't be entered anywhere but TOP for now
+ ops.'push_pirop'('split', '$P9', "'/'", '$S10')
+ ops.'push_pirop'('join', '$S10', "'\\/'", '$P9')
+ ops.'push_pirop'('concat', '$S10', "'rx/'", '$S10')
+ ops.'push_pirop'('concat', '$S10', '$S10', "'/'")
+ ops.'push_pirop'('compreg', '$P10', '"perl6"')
+ ops.'push_pirop'('getinterp', '$P9')
+ ops.'push_pirop'('set', '$P9', "$P9['context';0]")
+ ops.'push_pirop'('callmethod', '"compile"', '$P10', '$S10',
"'outer_ctx'=>$P9", 'result'=>'$P10')
+ ops.'push_pirop'('set', '$P8', '$P10[0]')
+ ops.'push_pirop'('getattribute', '$P9', '$P9', '"current_sub"')
+ ops.'push_pirop'('callmethod', '"set_outer"', '$P8', '$P9')
+ ops.'push_pirop'('call', '$P10', 'result'=>'$P10')
+
+ goto have_compiled_regex
+
+ literal:
+ ops.'push_pirop'('length', '$I10', '$S10')
+ ops.'push_pirop'('add', '$I11', pos, '$I10')
+ ops.'push_pirop'('gt', '$I11', eos, fail)
+ ops.'push_pirop'('sub', '$I11', pos, off)
+ ops.'push_pirop'('substr', '$S11', tgt, '$I11', '$I10')
+ ne subtype, 'literal_i', dont_downcase
+ ops.'push_pirop'('downcase', '$S10', '$S10')
+ ops.'push_pirop'('downcase', '$S11', '$S11')
+ dont_downcase:
+ ops.'push_pirop'('ne', '$S11', '$S10', fail)
+ ops.'push_pirop'('add', pos, '$I10')
+ ops.'push_pirop'('goto', done_label)
+
+ have_compiled_regex:
+ ops.'push'(precompiled_label)
+ ops.'push_pirop'('callmethod', '$P10', cur, 'result'=>'$P10')
+ ops.'push_pirop'('unless', '$P10', fail)
+ self.'!cursorop'(ops, '!mark_push', 0, 0, CURSOR_FAIL, 0, '$P10')
+ ops.'push_pirop'('callmethod', '"pos"', '$P10', 'result'=>pos)
+
+ ops.'push'(done_label)
+
+ goto done
+
+ zerowidth_test:
+ negate = node.'negate'()
+ testop = self.'??!!'(negate, 'if', 'unless')
+ ops.'push_pirop'(testop, cpost, fail)
+ done:
+ .return (ops)
+
+
+.end
diff --git a/t/spectest.data b/t/spectest.data
index 8b0b091..8acf9b5 100644
--- a/t/spectest.data
+++ b/t/spectest.data
@@ -243,6 +243,7 @@ S05-grammar/protoregex.t
S05-grammar/protos.t
S05-grammar/signatures.t
# S05-grammar/ws.t
+S05-interpolation/regex-in-variable.t
# S05-mass/named-chars.t # icu
# S05-mass/properties-block.t # icu
# S05-mass/properties-derived.t # icu
--
1.7.0