Author: pmichaud
Date: Mon Oct 17 13:17:55 2005
New Revision: 9504

Added:
   trunk/runtime/parrot/library/PGE/Text.pir
   trunk/t/p6rules/text_brk.t
Removed:
   trunk/runtime/parrot/library/PGE/Class.pir
   trunk/runtime/parrot/library/PGE/RegCounter.pir
Modified:
   trunk/MANIFEST
   trunk/compilers/pge/demo.pir
   trunk/config/gen/makefiles/root.in
   trunk/lib/Parrot/Test/PGE.pm
   trunk/t/library/pge.t
Log:
Added PGE::Text::bracketed, which allows parsing of nested
bracketed expressions (see Perl 5's Text::Balanced for prior art).
Cleaned up some unused files from the library/PGE directory.


Modified: trunk/MANIFEST
==============================================================================
--- trunk/MANIFEST      (original)
+++ trunk/MANIFEST      Mon Oct 17 13:17:55 2005
@@ -1637,11 +1637,10 @@ runtime/parrot/library/Data/Dumper.imc  
 runtime/parrot/library/Data/Dumper/Base.imc       [library]
 runtime/parrot/library/Data/Dumper/Default.imc    [library]
 runtime/parrot/library/Digest/MD5.pir             [library]
-runtime/parrot/library/PGE/Class.pir              [library]
 runtime/parrot/library/PGE/Dumper.pir             [library]
 runtime/parrot/library/PGE/Glob.pir               [library]
-runtime/parrot/library/PGE/RegCounter.pir         [library]
 runtime/parrot/library/PGE/Hs.pir                 [library]
+runtime/parrot/library/PGE/Text.pir               [library]
 runtime/parrot/library/SDL.imc                    [library]
 runtime/parrot/library/SDL/App.imc                [library]
 runtime/parrot/library/SDL/Button.imc             [library]
@@ -1859,6 +1858,7 @@ t/p6rules/capture.t                     
 t/p6rules/cclass.t                                []
 t/p6rules/escape.t                                []
 t/p6rules/subrules.t                              []
+t/p6rules/text_brk.t                              []
 t/p6rules/ws.t                                    []
 t/perl/Parrot_Distribution.t                      [devel]
 t/perl/Parrot_Docs.t                              [devel]

Modified: trunk/compilers/pge/demo.pir
==============================================================================
--- trunk/compilers/pge/demo.pir        (original)
+++ trunk/compilers/pge/demo.pir        Mon Oct 17 13:17:55 2005
@@ -16,6 +16,7 @@
     load_bytecode "PGE.pbc"
     load_bytecode "PGE/Dumper.pir"
     load_bytecode "PGE/Glob.pir"
+    load_bytecode "PGE/Text.pir"
     find_global p6rule_compile, "PGE", "p6rule"
     find_global glob_compile, "PGE", "glob"
     istrace = 0

Modified: trunk/config/gen/makefiles/root.in
==============================================================================
--- trunk/config/gen/makefiles/root.in  (original)
+++ trunk/config/gen/makefiles/root.in  Mon Oct 17 13:17:55 2005
@@ -253,10 +253,9 @@ GEN_LIBRARY = \
     $(LIBRARY_DIR)/ncurses.pbc \
     $(LIBRARY_DIR)/parrotlib.pbc \
     $(LIBRARY_DIR)/pcre.pbc \
-    $(LIBRARY_DIR)/PGE/Class.pbc \
     $(LIBRARY_DIR)/PGE/Dumper.pbc \
     $(LIBRARY_DIR)/PGE/Glob.pbc \
-    $(LIBRARY_DIR)/PGE/RegCounter.pbc \
+    $(LIBRARY_DIR)/PGE/Text.pbc \
     $(LIBRARY_DIR)/Stream/Base.pbc \
     $(LIBRARY_DIR)/Stream/Combiner.pbc \
     $(LIBRARY_DIR)/Stream/Coroutine.pbc \

Modified: trunk/lib/Parrot/Test/PGE.pm
==============================================================================
--- trunk/lib/Parrot/Test/PGE.pm        (original)
+++ trunk/lib/Parrot/Test/PGE.pm        Mon Oct 17 13:17:55 2005
@@ -147,6 +147,7 @@ sub _generate_pir_for {
         .sub _PGE_Test
             .local pmc p6rule_compile
             load_bytecode "PGE.pbc"
+            load_bytecode "PGE/Text.pir"
             find_global p6rule_compile, "PGE", "p6rule"
 
             .local string target

Added: trunk/runtime/parrot/library/PGE/Text.pir
==============================================================================
--- (empty file)
+++ trunk/runtime/parrot/library/PGE/Text.pir   Mon Oct 17 13:17:55 2005
@@ -0,0 +1,130 @@
+=head1 TITLE
+
+PGE::Rule::Text - rules for extracting delimited text sequences from strings
+
+=head1 DESCRIPTION
+
+The various rules in this module may be used to extract delimited
+strings from within PGE rules.  (They can of course be called directly,
+also.)
+
+=cut
+
+.namespace [ "PGE::Text" ]
+
+.include "cclass.pasm"
+
+.sub "__onload" @LOAD
+    .local pmc base
+    base = getclass "PGE::Rule"
+    $P0 = subclass base, "PGE::Text"
+.end
+
+=head2 Available rules
+
+=item C<bracketed(PMC target, string delim)>
+
+Extracts a balanced-bracket-delimited substring from the
+current position of C<target> using the delimiters specified
+by C<delim>, and returns a C<Match> object containing the result
+of the extraction.  
+
+=cut
+
+.sub "bracketed"
+    .param pmc tgt                                 # target to match
+    .param string delim        :optional           # optional delimiters
+    .param int has_delim       :opt_flag
+    .local pmc mob                                 # return match object
+    .local string target                           # target as string
+    .local string bal, bra, ket                    # balanced brackets
+    .local string delim_bra, delim_ket             # delims for this match
+    .local string lookket                          # closing bracket char
+    .local int pos                                 # current match position
+    .local int balanced                            # in balanced match
+
+    $P0 = find_global "PGE::Match", "newfrom"
+    mob = $P0(tgt, 0)
+    $P0 = getattribute mob, "PGE::Match\x0$:target"
+    target = $P0
+    $P0 = getattribute mob, "PGE::Match\x0$:from"
+    pos = $P0
+
+    if has_delim goto mkdelims
+    delim = "{}()[]<>"
+
+  mkdelims:                                        # set up delimiters
+    delim_bra = ''                                 # list of open delims
+    delim_ket = ''                                 # list of close delims
+    bal = '{}()[]<>'                               # list of balance delims
+    bra = '{{(([[<<'                               # balanced openers
+    ket = '}}))]]>>'                               # balanced closers
+    $I0 = length delim                             # length of delim string
+  mkdelims_1:
+    dec $I0                          
+    if $I0 < 0 goto extract
+    $S0 = substr delim, $I0, 1
+    $I1 = index bal, $S0
+    if $I1 < 0 goto mkdelims_2
+    $S1 = substr bra, $I1, 1
+    delim_bra .= $S1
+    $S1 = substr ket, $I1, 1
+    delim_ket .= $S1
+    goto mkdelims_1
+  mkdelims_2:
+    delim_bra .= $S0
+    delim_ket .= $S0
+    goto mkdelims_1
+
+  extract:
+    $S0 = substr target, pos, 1                    
+    if $S0 == "\\" goto end                        # leading escape fails
+    $I0 = index delim_bra, $S0
+    if $I0 < 0 goto end                            # no leading delim fails
+    lookket = ''
+    balanced = 1
+  next:                                       
+    $S0 = substr target, pos, 1                    # check current pos
+    if $S0 == '' goto fail                         # end of string -> fail
+    if $S0 == "\\" goto escape                     # skip escaped pos
+    if $S0 == lookket goto close                   # end of current nest
+    if balanced < 0 goto skip                      # skip to next char
+    $I0 = index delim_bra, $S0                     # open new nest?
+    if $I0 >= 0 goto open
+    $I0 = index delim_ket, $S0                     # unbalanced nest?>
+    if $I0 >= 0 goto fail
+  skip:                                       
+    inc pos                                        # move to next char
+    goto next                                      # try next
+  escape:
+    pos += 2                                       # skip escape + char
+    goto next                                      # try next
+  open:                                            # open new nesting
+    save lookket                                   # save current nest
+    lookket = substr delim_ket, $I0, 1             # search to end of nest
+    balanced = index bra, $S0                      # is this a balanced nest?
+    inc pos                                        # skip open char
+    goto next                                      # continue scanning
+  close:                                           # close current nesting
+    restore lookket                                # restore previous nest
+    balanced = 1                                   # we're balancing again
+    inc pos                                        # skip close char
+    if lookket != '' goto next                     # still nested?
+    $P0 = getattribute mob, "PGE::Match\x0$:pos"   # we have a match!
+    $P0 = pos
+    goto end
+  fail:                                            # fail match
+    if lookket == '' goto end                      # clean up restore stack
+    restore lookket
+    goto fail
+  end:
+    .return (mob)
+.end
+
+=head1 AUTHOR
+
+Patrick Michaud ([EMAIL PROTECTED]) is the author and maintainer.
+Patches and suggestions should be sent to the Perl 6 compiler list
+([email protected]).
+
+=cut

Modified: trunk/t/library/pge.t
==============================================================================
--- trunk/t/library/pge.t       (original)
+++ trunk/t/library/pge.t       Mon Oct 17 13:17:55 2005
@@ -14,143 +14,9 @@ t/library/pge.t - Grammar Engine tests
 
 use strict;
 
-use Parrot::Test tests => 6;
+use Parrot::Test tests => 3;
 
 # 1
-pir_output_is(<<'CODE', <<'OUT', "character class membership");
-
-.sub _main
-    load_bytecode "library/PGE/Class.pir"
-
-    .local pmc vowels
-    .local pmc new_sub
-
-    find_global new_sub, "PGE::Class::Discrete", "new"
-    vowels = new_sub("aeiou")
-
-    $I1 = vowels["o"]
-    if $I1 goto OK1
-    print "not "
-OK1:
-    print "ok 1\n"
-
-    $I2 = vowels["q"]
-    unless $I2 goto OK2
-    print "not "
-OK2:
-    print "ok 2\n"
-
-    end
-.end
-CODE
-ok 1
-ok 2
-OUT
-
-# 2
-pir_output_is(<<'CODE', <<'OUT', "character class membership: method form");
-
-.sub _main
-    load_bytecode "library/PGE/Class.pir"
-
-    .local pmc vowels
-    .local pmc new_sub
-    find_global new_sub, "PGE::Class::Discrete", "new"
-    vowels = new_sub("aeiou")
-
-    $I1 = vowels.matches("o")
-    if $I1 goto OK1
-    print "not "
-OK1:
-    print "ok 1\n"
-
-    $I2 = vowels.matches("q")
-    unless $I2 goto OK2
-    print "not "
-OK2:
-    print "ok 2\n"
-
-    end
-.end
-CODE
-ok 1
-ok 2
-OUT
-
-# 3
-pir_output_is(<<'CODE', <<'OUT', "RegCounter");
-
-.sub _main
-    load_bytecode "library/PGE/RegCounter.pir"
-    
-    .local pmc rc
-    $P0 = find_global "PGE::RegCounter", "new"
-    rc = $P0()
-
-    rc.declare("foo", "$I")
-    rc.declare("bar", "$P")
-    rc.declare("baz", "$I")
-
-    $S0 = rc["foo"]
-    print $S0
-    print "\n"
-
-    $S0 = rc["bar"]
-    print $S0
-    print "\n"
-
-    $S0 = rc["baz"]
-    print $S0
-    print "\n"
-
-    $S0 = rc["bar"]
-    print $S0
-    print "\n"
-
-    .local pmc ch
-    ch = rc.next()
-
-    ch.declare("bar", "$P")
-    ch.declare("baz", "$P")
-    ch.declare("quux", "_L")
-
-    $S0 = ch["bar"]
-    print $S0
-    print "\n"
-
-    $S0 = ch["baz"]
-    print $S0
-    print "\n"
-
-    $S0 = ch["quux"]
-    print $S0
-    print "\n"
-
-    $S0 = ch["bar"]
-    print $S0
-    print "\n"
-
-    $S0 = rc["bar"]
-    print $S0
-    print "\n"
-
-    $S0 = rc["baz"]
-    print $S0
-    print "\n"
-.end
-CODE
-$I0
-$P1
-$I2
-$P1
-$P3
-$P4
-_L5
-$P3
-$P1
-$I2
-OUT
-
 pir_output_is(<<'CODE', <<'OUT', "Glob, wildcards");
 
 .sub _main
@@ -217,6 +83,7 @@ ok6
 ok7
 OUT
 
+# 2
 pir_output_is(<<'CODE', <<'OUT', "Glob, character classes");
 
 .sub _main
@@ -310,6 +177,7 @@ ok10
 ok11
 OUT
 
+# 3
 pir_output_is(<<'CODE', <<'OUT', "Glob, alternate");
 
 .sub _main

Added: trunk/t/p6rules/text_brk.t
==============================================================================
--- (empty file)
+++ trunk/t/p6rules/text_brk.t  Mon Oct 17 13:17:55 2005
@@ -0,0 +1,71 @@
+use Parrot::Test tests => 5;
+use Parrot::Test::PGE;
+
+## First, test direct calls to PGE::Text::bracketed
+##
+pir_output_is(<<'CODE', <<'OUT', "bracketed");
+.sub main :main
+    .local pmc bracketed
+    
+    load_bytecode 'PGE.pbc'
+    load_bytecode 'PGE/Text.pir'
+    
+    bracketed = find_global "PGE::Text", "bracketed"
+    
+    $S0 = "{ nested { and } okay, () and <>,  escaped \\}'s } okay"
+    $P0 = bracketed($S0)
+    bsr result
+    $S0 = "{ nested \n{ and } okay, \n() and <>, escaped \\}'s } okay"
+    $P0 = bracketed($S0) 
+    bsr result
+    $S0 = "{ nested { and } okay,  unbalanced ( and < , escaped \\}'s } okay"
+    $P0 = bracketed($S0, "{}")
+    bsr result
+    $S0 = "{ unmatched nested { not okay, nor ( and < } not okay"
+    $P0 = bracketed($S0, "{}")
+    bsr result
+    $S0 = "{ unbalanced nested [ even with } and ] to match not okay"
+    $P0 = bracketed($S0, "{}[]")
+    bsr result
+    $S0 = "<a quoted \">\" unbalanced right bracket> okay"
+    $P0 = bracketed($S0, "<\">")
+    bsr result
+    $S0 = "<quoted \">\" unbalanced of two quotes (`>>>\"\"\">>>>`)> okay"
+    $P0 = bracketed($S0, "<\"`>")
+    bsr result
+    $S0 = "<a misquoted '>' bracket ends string>"
+    $P0 = bracketed($S0, "<\"`>")
+    bsr result
+    .return ()
+
+  result:
+    if $P0 goto succeed
+    print "failed\n"
+    ret
+  succeed:
+    $I0 = $P0."to"()
+    $S1 = substr $S0, $I0
+    print $S1
+    print "\n"
+    ret
+.end
+CODE
+ okay
+ okay
+ okay
+failed
+failed
+ okay
+ okay
+' bracket ends string>
+OUT
+
+## Now, test calls as subrules
+##
+$PTB = "^<PGE::Text::bracketed>\$";
+p6rule_is  ("{ nested { and } okay, () and <> pairs okay }", $PTB);
+p6rule_is  ("{ nested { and } okay, escaped \\}'s okay }", $PTB);
+p6rule_isnt("{ unmatched nested { not okay }", $PTB);
+p6rule_isnt("{ unmatched nested ( not okay }", $PTB);
+
+# Don't forget to change the number of tests!

Reply via email to