Author: pmichaud
Date: Mon Oct 17 13:17:55 2005
New Revision: 9504
Added:
trunk/runtime/parrot/library/PGE/Text.pir
trunk/t/p6rules/text_brk.t
Removed:
trunk/runtime/parrot/library/PGE/Class.pir
trunk/runtime/parrot/library/PGE/RegCounter.pir
Modified:
trunk/MANIFEST
trunk/compilers/pge/demo.pir
trunk/config/gen/makefiles/root.in
trunk/lib/Parrot/Test/PGE.pm
trunk/t/library/pge.t
Log:
Added PGE::Text::bracketed, which allows parsing of nested
bracketed expressions (see Perl 5's Text::Balanced for prior art).
Cleaned up some unused files from the library/PGE directory.
Modified: trunk/MANIFEST
==============================================================================
--- trunk/MANIFEST (original)
+++ trunk/MANIFEST Mon Oct 17 13:17:55 2005
@@ -1637,11 +1637,10 @@ runtime/parrot/library/Data/Dumper.imc
runtime/parrot/library/Data/Dumper/Base.imc [library]
runtime/parrot/library/Data/Dumper/Default.imc [library]
runtime/parrot/library/Digest/MD5.pir [library]
-runtime/parrot/library/PGE/Class.pir [library]
runtime/parrot/library/PGE/Dumper.pir [library]
runtime/parrot/library/PGE/Glob.pir [library]
-runtime/parrot/library/PGE/RegCounter.pir [library]
runtime/parrot/library/PGE/Hs.pir [library]
+runtime/parrot/library/PGE/Text.pir [library]
runtime/parrot/library/SDL.imc [library]
runtime/parrot/library/SDL/App.imc [library]
runtime/parrot/library/SDL/Button.imc [library]
@@ -1859,6 +1858,7 @@ t/p6rules/capture.t
t/p6rules/cclass.t []
t/p6rules/escape.t []
t/p6rules/subrules.t []
+t/p6rules/text_brk.t []
t/p6rules/ws.t []
t/perl/Parrot_Distribution.t [devel]
t/perl/Parrot_Docs.t [devel]
Modified: trunk/compilers/pge/demo.pir
==============================================================================
--- trunk/compilers/pge/demo.pir (original)
+++ trunk/compilers/pge/demo.pir Mon Oct 17 13:17:55 2005
@@ -16,6 +16,7 @@
load_bytecode "PGE.pbc"
load_bytecode "PGE/Dumper.pir"
load_bytecode "PGE/Glob.pir"
+ load_bytecode "PGE/Text.pir"
find_global p6rule_compile, "PGE", "p6rule"
find_global glob_compile, "PGE", "glob"
istrace = 0
Modified: trunk/config/gen/makefiles/root.in
==============================================================================
--- trunk/config/gen/makefiles/root.in (original)
+++ trunk/config/gen/makefiles/root.in Mon Oct 17 13:17:55 2005
@@ -253,10 +253,9 @@ GEN_LIBRARY = \
$(LIBRARY_DIR)/ncurses.pbc \
$(LIBRARY_DIR)/parrotlib.pbc \
$(LIBRARY_DIR)/pcre.pbc \
- $(LIBRARY_DIR)/PGE/Class.pbc \
$(LIBRARY_DIR)/PGE/Dumper.pbc \
$(LIBRARY_DIR)/PGE/Glob.pbc \
- $(LIBRARY_DIR)/PGE/RegCounter.pbc \
+ $(LIBRARY_DIR)/PGE/Text.pbc \
$(LIBRARY_DIR)/Stream/Base.pbc \
$(LIBRARY_DIR)/Stream/Combiner.pbc \
$(LIBRARY_DIR)/Stream/Coroutine.pbc \
Modified: trunk/lib/Parrot/Test/PGE.pm
==============================================================================
--- trunk/lib/Parrot/Test/PGE.pm (original)
+++ trunk/lib/Parrot/Test/PGE.pm Mon Oct 17 13:17:55 2005
@@ -147,6 +147,7 @@ sub _generate_pir_for {
.sub _PGE_Test
.local pmc p6rule_compile
load_bytecode "PGE.pbc"
+ load_bytecode "PGE/Text.pir"
find_global p6rule_compile, "PGE", "p6rule"
.local string target
Added: trunk/runtime/parrot/library/PGE/Text.pir
==============================================================================
--- (empty file)
+++ trunk/runtime/parrot/library/PGE/Text.pir Mon Oct 17 13:17:55 2005
@@ -0,0 +1,130 @@
+=head1 TITLE
+
+PGE::Rule::Text - rules for extracting delimited text sequences from strings
+
+=head1 DESCRIPTION
+
+The various rules in this module may be used to extract delimited
+strings from within PGE rules. (They can of course be called directly,
+also.)
+
+=cut
+
+.namespace [ "PGE::Text" ]
+
+.include "cclass.pasm"
+
+.sub "__onload" @LOAD
+ .local pmc base
+ base = getclass "PGE::Rule"
+ $P0 = subclass base, "PGE::Text"
+.end
+
+=head2 Available rules
+
+=item C<bracketed(PMC target, string delim)>
+
+Extracts a balanced-bracket-delimited substring from the
+current position of C<target> using the delimiters specified
+by C<delim>, and returns a C<Match> object containing the result
+of the extraction.
+
+=cut
+
+.sub "bracketed"
+ .param pmc tgt # target to match
+ .param string delim :optional # optional delimiters
+ .param int has_delim :opt_flag
+ .local pmc mob # return match object
+ .local string target # target as string
+ .local string bal, bra, ket # balanced brackets
+ .local string delim_bra, delim_ket # delims for this match
+ .local string lookket # closing bracket char
+ .local int pos # current match position
+ .local int balanced # in balanced match
+
+ $P0 = find_global "PGE::Match", "newfrom"
+ mob = $P0(tgt, 0)
+ $P0 = getattribute mob, "PGE::Match\x0$:target"
+ target = $P0
+ $P0 = getattribute mob, "PGE::Match\x0$:from"
+ pos = $P0
+
+ if has_delim goto mkdelims
+ delim = "{}()[]<>"
+
+ mkdelims: # set up delimiters
+ delim_bra = '' # list of open delims
+ delim_ket = '' # list of close delims
+ bal = '{}()[]<>' # list of balance delims
+ bra = '{{(([[<<' # balanced openers
+ ket = '}}))]]>>' # balanced closers
+ $I0 = length delim # length of delim string
+ mkdelims_1:
+ dec $I0
+ if $I0 < 0 goto extract
+ $S0 = substr delim, $I0, 1
+ $I1 = index bal, $S0
+ if $I1 < 0 goto mkdelims_2
+ $S1 = substr bra, $I1, 1
+ delim_bra .= $S1
+ $S1 = substr ket, $I1, 1
+ delim_ket .= $S1
+ goto mkdelims_1
+ mkdelims_2:
+ delim_bra .= $S0
+ delim_ket .= $S0
+ goto mkdelims_1
+
+ extract:
+ $S0 = substr target, pos, 1
+ if $S0 == "\\" goto end # leading escape fails
+ $I0 = index delim_bra, $S0
+ if $I0 < 0 goto end # no leading delim fails
+ lookket = ''
+ balanced = 1
+ next:
+ $S0 = substr target, pos, 1 # check current pos
+ if $S0 == '' goto fail # end of string -> fail
+ if $S0 == "\\" goto escape # skip escaped pos
+ if $S0 == lookket goto close # end of current nest
+ if balanced < 0 goto skip # skip to next char
+ $I0 = index delim_bra, $S0 # open new nest?
+ if $I0 >= 0 goto open
+ $I0 = index delim_ket, $S0 # unbalanced nest?>
+ if $I0 >= 0 goto fail
+ skip:
+ inc pos # move to next char
+ goto next # try next
+ escape:
+ pos += 2 # skip escape + char
+ goto next # try next
+ open: # open new nesting
+ save lookket # save current nest
+ lookket = substr delim_ket, $I0, 1 # search to end of nest
+ balanced = index bra, $S0 # is this a balanced nest?
+ inc pos # skip open char
+ goto next # continue scanning
+ close: # close current nesting
+ restore lookket # restore previous nest
+ balanced = 1 # we're balancing again
+ inc pos # skip close char
+ if lookket != '' goto next # still nested?
+ $P0 = getattribute mob, "PGE::Match\x0$:pos" # we have a match!
+ $P0 = pos
+ goto end
+ fail: # fail match
+ if lookket == '' goto end # clean up restore stack
+ restore lookket
+ goto fail
+ end:
+ .return (mob)
+.end
+
+=head1 AUTHOR
+
+Patrick Michaud ([EMAIL PROTECTED]) is the author and maintainer.
+Patches and suggestions should be sent to the Perl 6 compiler list
+([email protected]).
+
+=cut
Modified: trunk/t/library/pge.t
==============================================================================
--- trunk/t/library/pge.t (original)
+++ trunk/t/library/pge.t Mon Oct 17 13:17:55 2005
@@ -14,143 +14,9 @@ t/library/pge.t - Grammar Engine tests
use strict;
-use Parrot::Test tests => 6;
+use Parrot::Test tests => 3;
# 1
-pir_output_is(<<'CODE', <<'OUT', "character class membership");
-
-.sub _main
- load_bytecode "library/PGE/Class.pir"
-
- .local pmc vowels
- .local pmc new_sub
-
- find_global new_sub, "PGE::Class::Discrete", "new"
- vowels = new_sub("aeiou")
-
- $I1 = vowels["o"]
- if $I1 goto OK1
- print "not "
-OK1:
- print "ok 1\n"
-
- $I2 = vowels["q"]
- unless $I2 goto OK2
- print "not "
-OK2:
- print "ok 2\n"
-
- end
-.end
-CODE
-ok 1
-ok 2
-OUT
-
-# 2
-pir_output_is(<<'CODE', <<'OUT', "character class membership: method form");
-
-.sub _main
- load_bytecode "library/PGE/Class.pir"
-
- .local pmc vowels
- .local pmc new_sub
- find_global new_sub, "PGE::Class::Discrete", "new"
- vowels = new_sub("aeiou")
-
- $I1 = vowels.matches("o")
- if $I1 goto OK1
- print "not "
-OK1:
- print "ok 1\n"
-
- $I2 = vowels.matches("q")
- unless $I2 goto OK2
- print "not "
-OK2:
- print "ok 2\n"
-
- end
-.end
-CODE
-ok 1
-ok 2
-OUT
-
-# 3
-pir_output_is(<<'CODE', <<'OUT', "RegCounter");
-
-.sub _main
- load_bytecode "library/PGE/RegCounter.pir"
-
- .local pmc rc
- $P0 = find_global "PGE::RegCounter", "new"
- rc = $P0()
-
- rc.declare("foo", "$I")
- rc.declare("bar", "$P")
- rc.declare("baz", "$I")
-
- $S0 = rc["foo"]
- print $S0
- print "\n"
-
- $S0 = rc["bar"]
- print $S0
- print "\n"
-
- $S0 = rc["baz"]
- print $S0
- print "\n"
-
- $S0 = rc["bar"]
- print $S0
- print "\n"
-
- .local pmc ch
- ch = rc.next()
-
- ch.declare("bar", "$P")
- ch.declare("baz", "$P")
- ch.declare("quux", "_L")
-
- $S0 = ch["bar"]
- print $S0
- print "\n"
-
- $S0 = ch["baz"]
- print $S0
- print "\n"
-
- $S0 = ch["quux"]
- print $S0
- print "\n"
-
- $S0 = ch["bar"]
- print $S0
- print "\n"
-
- $S0 = rc["bar"]
- print $S0
- print "\n"
-
- $S0 = rc["baz"]
- print $S0
- print "\n"
-.end
-CODE
-$I0
-$P1
-$I2
-$P1
-$P3
-$P4
-_L5
-$P3
-$P1
-$I2
-OUT
-
pir_output_is(<<'CODE', <<'OUT', "Glob, wildcards");
.sub _main
@@ -217,6 +83,7 @@ ok6
ok7
OUT
+# 2
pir_output_is(<<'CODE', <<'OUT', "Glob, character classes");
.sub _main
@@ -310,6 +177,7 @@ ok10
ok11
OUT
+# 3
pir_output_is(<<'CODE', <<'OUT', "Glob, alternate");
.sub _main
Added: trunk/t/p6rules/text_brk.t
==============================================================================
--- (empty file)
+++ trunk/t/p6rules/text_brk.t Mon Oct 17 13:17:55 2005
@@ -0,0 +1,71 @@
+use Parrot::Test tests => 5;
+use Parrot::Test::PGE;
+
+## First, test direct calls to PGE::Text::bracketed
+##
+pir_output_is(<<'CODE', <<'OUT', "bracketed");
+.sub main :main
+ .local pmc bracketed
+
+ load_bytecode 'PGE.pbc'
+ load_bytecode 'PGE/Text.pir'
+
+ bracketed = find_global "PGE::Text", "bracketed"
+
+ $S0 = "{ nested { and } okay, () and <>, escaped \\}'s } okay"
+ $P0 = bracketed($S0)
+ bsr result
+ $S0 = "{ nested \n{ and } okay, \n() and <>, escaped \\}'s } okay"
+ $P0 = bracketed($S0)
+ bsr result
+ $S0 = "{ nested { and } okay, unbalanced ( and < , escaped \\}'s } okay"
+ $P0 = bracketed($S0, "{}")
+ bsr result
+ $S0 = "{ unmatched nested { not okay, nor ( and < } not okay"
+ $P0 = bracketed($S0, "{}")
+ bsr result
+ $S0 = "{ unbalanced nested [ even with } and ] to match not okay"
+ $P0 = bracketed($S0, "{}[]")
+ bsr result
+ $S0 = "<a quoted \">\" unbalanced right bracket> okay"
+ $P0 = bracketed($S0, "<\">")
+ bsr result
+ $S0 = "<quoted \">\" unbalanced of two quotes (`>>>\"\"\">>>>`)> okay"
+ $P0 = bracketed($S0, "<\"`>")
+ bsr result
+ $S0 = "<a misquoted '>' bracket ends string>"
+ $P0 = bracketed($S0, "<\"`>")
+ bsr result
+ .return ()
+
+ result:
+ if $P0 goto succeed
+ print "failed\n"
+ ret
+ succeed:
+ $I0 = $P0."to"()
+ $S1 = substr $S0, $I0
+ print $S1
+ print "\n"
+ ret
+.end
+CODE
+ okay
+ okay
+ okay
+failed
+failed
+ okay
+ okay
+' bracket ends string>
+OUT
+
+## Now, test calls as subrules
+##
+$PTB = "^<PGE::Text::bracketed>\$";
+p6rule_is ("{ nested { and } okay, () and <> pairs okay }", $PTB);
+p6rule_is ("{ nested { and } okay, escaped \\}'s okay }", $PTB);
+p6rule_isnt("{ unmatched nested { not okay }", $PTB);
+p6rule_isnt("{ unmatched nested ( not okay }", $PTB);
+
+# Don't forget to change the number of tests!