# New Ticket Created by  Bruce Keeler 
# Please include the string:  [perl #73862]
# in the subject line of all future correspondence about this issue. 
# <URL: http://rt.perl.org/rt3/Ticket/Display.html?id=73862 >


The attached patch adds support for variable and block-result 
interpolation into regexes.

It does so by means of a new PAST::Regex node pasttype 'interpolator'.  
The following syntaxes are supported by this patch:

    / $var /  -- Interpolates as literal string, unless it's a Regex object
    / @foo / -- Interpolated as ||-style alternations of literal strings 
or Regex objects
    / <$var> / -- compiled into a Regex (unless it's already one), then 
interpolated
    / <@foo> / -- A list of ||-style alternations of things to be 
compiled into Regexes (unless they already are)
    / <{ ... }> / -- Result of capture is interpolated as a Regex, 
compiling if necessary
    / <?{ ... }> / -- Unchanged
    / { ... } / -- Capture is merely executed, but not interpolated.  
(Unchanged)
From 29c116e35d95e29b211d396a0b48137172ae0f28 Mon Sep 17 00:00:00 2001
From: Bruce Keeler <br...@drangle.com>
Date: Sun, 28 Mar 2010 21:41:53 -0700
Subject: [PATCH] Variable interpolation in regexes

---
 build/Makefile.in                         |    1 +
 src/Perl6/Actions.pm                      |   64 ++++++++++---
 src/Perl6/Grammar.pm                      |   20 ++++
 src/cheats/parrot/past-compiler-regex.pir |  153 +++++++++++++++++++++++++++++
 t/spectest.data                           |    1 +
 5 files changed, 226 insertions(+), 13 deletions(-)
 create mode 100644 src/cheats/parrot/past-compiler-regex.pir

diff --git a/build/Makefile.in b/build/Makefile.in
index 743a81a..f9431bd 100644
--- a/build/Makefile.in
+++ b/build/Makefile.in
@@ -154,6 +154,7 @@ CHEATS_PIR = \
   src/cheats/want-caller-side-callsig.pir \
   src/cheats/parrot-role-quirks.pir \
   src/cheats/parrot/attriter.pir \
+  src/cheats/parrot/past-compiler-regex.pir \
   src/cheats/parrot/Protoobject.pir \
   src/cheats/parrot/P6role.pir \
   src/gen/uprop-cheats.pir \
diff --git a/src/Perl6/Actions.pm b/src/Perl6/Actions.pm
index 97fd6e5..197d8db 100644
--- a/src/Perl6/Actions.pm
+++ b/src/Perl6/Actions.pm
@@ -2358,10 +2358,51 @@ class Perl6::RegexActions is Regex::P6Regex::Actions {
         my $past := $<statement>.ast;
         make PAST::Regex.new( $past, :pasttype('pastnode') );
     }
+    method metachar:sym<var>($/) {
+        my $past;
+        my $name := $<pos> ?? +$<pos> !! ~$<name>;
+        if $<quantified_atom> {
+            if $<var> {
+                $/.CURSOR.panic('"$var = " syntax not yet supported in 
regexes');
+            }
+            $past := $<quantified_atom>[0].ast;
+            if $past.pasttype eq 'quant' && $past[0].pasttype eq 'subrule' {
+                Regex::P6Regex::Actions::subrule_alias($past[0], $name);
+            }
+            elsif $past.pasttype eq 'subrule' { 
Regex::P6Regex::Actions::subrule_alias($past, $name); }
+            else {
+                $past := PAST::Regex.new( $past, :name($name), 
:pasttype('subcapture'), :node($/) );
+            }
+        }
+        else {
+            if $<var> {
+                my @MODIFIERS := Q:PIR {
+                    %r = get_hll_global ['Regex';'P6Regex';'Actions'], 
'@MODIFIERS'
+                };
+                my $subtype := @MODIFIERS[0]<i> ?? 'literal_i' !! 'literal';
+                $past := PAST::Regex.new( $<var>.ast, 
:pasttype('interpolator'),
+                                          :subtype($subtype), :node($/) );
+            } else {
+                $past := PAST::Regex.new( '!BACKREF', $name, 
:pasttype('subrule'),
+                                          :subtype('method'), :node($/) );
+            }
+        }
+        make $past;
+    }
 
-    method metachar:sym<{ }>($/) { make $<codeblock>.ast; }
+    method assertion:sym<var>($/) {
+        make PAST::Regex.new( $<var>.ast, :pasttype('interpolator'),
+                              :subtype('compile_regex'), :node($/) );
+    }
+
+    method metachar:sym<{ }>($/) { 
+        make PAST::Regex.new(:node($/), :pasttype('pastnode'), 
$<codeblock>.ast); 
+    }
 
-    method assertion:sym<{ }>($/) { make $<codeblock>.ast; }
+    method assertion:sym<{ }>($/) { 
+        make PAST::Regex.new( :node($/), :pasttype('interpolator'), 
:subtype('compile_regex'),
+                              $<codeblock>.ast );
+   }
 
     method codeblock($/) {
         my $block := $<block>.ast;
@@ -2376,20 +2417,17 @@ class Perl6::RegexActions is Regex::P6Regex::Actions {
     }
 
     sub bindmatch($past) {
-        PAST::Regex.new(
-            PAST::Stmts.new(
+        PAST::Stmts.new(
+            PAST::Op.new(
+                PAST::Var.new( :name('$/') ),
                 PAST::Op.new(
-                    PAST::Var.new( :name('$/') ),
-                    PAST::Op.new(
-                        PAST::Var.new( :name('$¢') ),
-                        :name('MATCH'),
-                        :pasttype('callmethod')
-                    ),
-                    :pasttype('bind')
+                    PAST::Var.new( :name('$¢') ),
+                    :name('MATCH'),
+                    :pasttype('callmethod')
                 ),
-                $past
+                :pasttype('bind')
             ),
-            :pasttype('pastnode')
+            $past,
         );
     }
 }
diff --git a/src/Perl6/Grammar.pm b/src/Perl6/Grammar.pm
index ef77a7f..0539c48 100644
--- a/src/Perl6/Grammar.pm
+++ b/src/Perl6/Grammar.pm
@@ -1428,6 +1428,26 @@ grammar Perl6::Regex is Regex::P6Regex::Grammar {
     token metachar:sym<:my> {
         ':' <?before 'my'> <statement=.LANG('MAIN', 'statement')> <.ws> ';'
     }
+ 
+    token metachar:sym<$> {
+        <sym> <!before \w>
+    }
+
+    token metachar:sym<var> {
+        [
+        | '$<' $<name>=[<-[>]>+] '>'
+        | '$' $<pos>=[\d+]
+        | <?before <[...@]> \w> <var=.LANG('MAIN', 'variable')>
+        | <?before '%' \w> <.panic: "Use of hash variable in patterns is 
reserved">
+        ]
+ 
+        [ <.ws> '=' <.ws> <quantified_atom> ]?
+    }
+
+    token assertion:sym<var> {
+        | <?before <[...@]> \w> <var=.LANG('MAIN', 'variable')>
+        | <?before '%' \w> <.panic: "Use of hash variable in patterns is 
reserved">
+    }
 
     token metachar:sym<{ }> {
         <?[{]> <codeblock>
diff --git a/src/cheats/parrot/past-compiler-regex.pir 
b/src/cheats/parrot/past-compiler-regex.pir
new file mode 100644
index 0000000..de0a22e
--- /dev/null
+++ b/src/cheats/parrot/past-compiler-regex.pir
@@ -0,0 +1,153 @@
+=head1 NAME
+
+past-compiler-regex.pir - Compiler for the PAST::Regex interpolator type
+
+=head1 DESCRIPTION
+
+Implements the interpolator pasttype of PAST::Regex node.  This has to be in 
Rakudo rather than
+NQP-RX as it calls the Rakudo regex compiler.
+
+Scalar values are interpolated as literal strings or regexes, depending on the 
subtype.  Array values
+(or any Iterable) are interpolated as ||-type alternations.
+
+Subtype can be any of:
+
+=over 4
+
+=item zerowidth
+
+Only test for truthiness and fail or not.  No interpolation.
+
+=item compile_regex
+
+String values should be compiled into regexes and then interpolated.
+
+=item literal
+
+String values should be treated as literals.
+
+=item literal_i
+
+String values should be treated as literals and matched case-insensitively.
+
+=back
+
+=head2 Methods
+
+=over 4
+
+=item interpolator(PAST::Regex node)
+
+=cut
+
+.HLL 'parrot'
+
+.namespace ['PAST';'Compiler']
+
+.const int CURSOR_FAIL = -1
+
+.sub 'interpolator' :method :multi(_, ['PAST'; 'Regex'])
+    .param pmc node
+    .local pmc cur, pos, fail, ops, eos, off, tgt
+    (cur, pos, eos, off, tgt, fail) = self.'!rxregs'('cur pos eos off tgt 
fail')
+    ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+ 
+    .local pmc zerowidth, negate, testop, subtype
+    subtype = node.'subtype'()
+
+    ops.'push_pirop'('inline', subtype, 'inline'=>'  # rx interp subtype=%1')
+    .local pmc cpast, cpost
+    cpast = node[0]
+    cpost = self.'as_post'(cpast, 'rtype'=>'P')
+ 
+    self.'!cursorop'(ops, '!cursor_pos', 0, pos)
+    ops.'push'(cpost)
+
+    # If this is just a zerowidth assertion, we don't actually interpolate 
anything.  Just evaluate
+    # and fail or not. 
+    if subtype == 'zerowidth' goto zerowidth_test
+
+    .local string prefix
+    prefix = self.'unique'('rxinterp_')
+    .local pmc precompiled_label, done_label, loop_label, not_a_list_label, 
iterator_reg, label_reg
+    $S0 =  concat prefix, '_precompiled'
+    precompiled_label = self.'post_new'('Label', 'result'=>$S0)
+    $S0 =  concat prefix, '_done'
+    done_label = self.'post_new'('Label', 'result'=>$S0)
+    $S0 =  concat prefix, '_loop'
+    loop_label = self.'post_new'('Label', 'result'=>$S0)
+    $S0 =  concat prefix, '_not_a_list'
+    not_a_list_label = self.'post_new'('Label', 'result'=>$S0)
+    iterator_reg = self.'uniquereg'("P")
+    label_reg = self.'uniquereg'("I")
+
+    ops.'push_pirop'('descalarref', '$P10', cpost)
+    ops.'push_pirop'('get_hll_global', '$P11', "'Iterable'")
+    ops.'push_pirop'('callmethod', "'isa'", '$P10', '$P11', 'result'=>'$P11')
+    ops.'push_pirop'('unless', '$P11', not_a_list_label)
+
+    ops.'push_pirop'('callmethod', "'iterator'", '$P10', 
'result'=>iterator_reg)
+    ops.'push_pirop'('set_addr', label_reg, loop_label)
+    ops.'push'(loop_label)
+    ops.'push_pirop'('callmethod', "'get'", iterator_reg, 'result'=>'$P10')
+    ops.'push_pirop'('isa', '$I10', '$P10', "['EMPTY']")
+    ops.'push_pirop'('if', '$I10', fail)
+    self.'!cursorop'(ops, '!mark_push', 0, 0, pos, label_reg)
+
+    ops.'push'(not_a_list_label)
+    # Check if it isa Regex, and call it as a method if so
+    ops.'push_pirop'('isa', '$I10', '$P10', "['Regex']")
+    ops.'push_pirop'('if', '$I10', precompiled_label)
+    ops.'push_pirop'('set', '$S10', '$P10')
+    ne subtype, 'compile_regex', literal
+
+    # Kinda cheesy, but the compiler can't be entered anywhere but TOP for now
+    ops.'push_pirop'('split', '$P9', "'/'", '$S10')
+    ops.'push_pirop'('join', '$S10', "'\\/'", '$P9')
+    ops.'push_pirop'('concat', '$S10', "'rx/'", '$S10')
+    ops.'push_pirop'('concat', '$S10', '$S10', "'/'")
+    ops.'push_pirop'('compreg', '$P10', '"perl6"')
+    ops.'push_pirop'('getinterp', '$P9')
+    ops.'push_pirop'('set', '$P9', "$P9['context';0]")
+    ops.'push_pirop'('callmethod', '"compile"', '$P10', '$S10', 
"'outer_ctx'=>$P9", 'result'=>'$P10')
+    ops.'push_pirop'('set', '$P8', '$P10[0]')
+    ops.'push_pirop'('getattribute', '$P9', '$P9', '"current_sub"')
+    ops.'push_pirop'('callmethod', '"set_outer"', '$P8', '$P9')
+    ops.'push_pirop'('call', '$P10', 'result'=>'$P10')
+
+    goto have_compiled_regex
+
+  literal:
+    ops.'push_pirop'('length', '$I10', '$S10')
+    ops.'push_pirop'('add', '$I11', pos, '$I10')
+    ops.'push_pirop'('gt', '$I11', eos, fail)
+    ops.'push_pirop'('sub', '$I11', pos, off)
+    ops.'push_pirop'('substr', '$S11', tgt, '$I11', '$I10')
+    ne subtype, 'literal_i', dont_downcase
+    ops.'push_pirop'('downcase', '$S10', '$S10')
+    ops.'push_pirop'('downcase', '$S11', '$S11')
+  dont_downcase:
+    ops.'push_pirop'('ne', '$S11', '$S10', fail)
+    ops.'push_pirop'('add', pos, '$I10')
+    ops.'push_pirop'('goto', done_label)
+
+  have_compiled_regex:
+    ops.'push'(precompiled_label)
+    ops.'push_pirop'('callmethod', '$P10', cur, 'result'=>'$P10')
+    ops.'push_pirop'('unless', '$P10', fail)
+    self.'!cursorop'(ops, '!mark_push', 0, 0, CURSOR_FAIL, 0, '$P10')
+    ops.'push_pirop'('callmethod', '"pos"', '$P10', 'result'=>pos)
+    
+    ops.'push'(done_label)
+
+    goto done
+
+  zerowidth_test:
+    negate = node.'negate'()
+    testop = self.'??!!'(negate, 'if', 'unless')
+    ops.'push_pirop'(testop, cpost, fail)
+  done:
+    .return (ops)
+
+
+.end
diff --git a/t/spectest.data b/t/spectest.data
index 8b0b091..8acf9b5 100644
--- a/t/spectest.data
+++ b/t/spectest.data
@@ -243,6 +243,7 @@ S05-grammar/protoregex.t
 S05-grammar/protos.t
 S05-grammar/signatures.t
 # S05-grammar/ws.t
+S05-interpolation/regex-in-variable.t
 # S05-mass/named-chars.t                                     # icu
 # S05-mass/properties-block.t                                # icu
 # S05-mass/properties-derived.t                              # icu
-- 
1.7.0

Reply via email to