cvsuser 04/09/17 20:17:00
Modified: languages/regex .cvsignore Makefile README regex.pl
languages/regex/lib/Regex Rewrite.pm
Added: languages/regex 01_basic.imc 02_date.imc regex-compiler.imc
Log:
Some simple examples of using this compiler as an embedded Parrot
compiler.
Revision Changes Path
1.2 +1 -0 parrot/languages/regex/.cvsignore
Index: .cvsignore
===================================================================
RCS file: /cvs/public/parrot/languages/regex/.cvsignore,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -w -r1.1 -r1.2
--- .cvsignore 14 Sep 2003 20:15:44 -0000 1.1
+++ .cvsignore 18 Sep 2004 03:16:57 -0000 1.2
@@ -1,2 +1,3 @@
test.imc
test.pbc
+regex-compiler.pbc
1.7 +11 -2 parrot/languages/regex/Makefile
Index: Makefile
===================================================================
RCS file: /cvs/public/parrot/languages/regex/Makefile,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -w -r1.6 -r1.7
--- Makefile 25 Aug 2002 08:45:24 -0000 1.6
+++ Makefile 18 Sep 2004 03:16:57 -0000 1.7
@@ -1,16 +1,19 @@
YAPP = yapp
+PARROT = ../../parrot
-all : lib/Regex/Grammar.pm
+all : lib/Regex/Grammar.pm regex-compiler.pbc
lib/Regex/Grammar.pm : lib/Regex/Grammar.y
$(YAPP) -s -m Regex::Grammar -o $@ lib/Regex/Grammar.y
+regex-compiler.pbc : regex-compiler.imc
+ $(PARROT) -o regex-compiler.pbc regex-compiler.imc
+
# Note: These tend to be in the order of the latest thing I've been
# working on to the oldest, so that I see the failures quicker.
.PHONY: test
test :
perl test.pl t/backopt.t
-# perl test.pl t/infinite.t
perl test.pl t/quantindex.t
perl test.pl t/literal.t
perl test.pl t/optional.t
@@ -23,3 +26,9 @@
perl test.pl t/staralt.t
perl test.pl t/plus.t
perl test.pl t/example.t
+
+# This is a TODO test -- currently, it loops forever. It will require
+# adding a check to the matcher that it is making progress. Or
+# something. I need to look at this again.
+#
+# perl test.pl t/infinite.t
1.10 +12 -7 parrot/languages/regex/README
Index: README
===================================================================
RCS file: /cvs/public/parrot/languages/regex/README,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -w -r1.9 -r1.10
--- README 14 Sep 2003 20:15:45 -0000 1.9
+++ README 18 Sep 2004 03:16:57 -0000 1.10
@@ -34,6 +34,12 @@
All of this really ought to be in a usage message.
+New stuff: now you can use the compiler as an embedded Parrot compiler.
+Run 'make regex-compiler.pbc' to generate regex-compiler.pbc. Then run
+../../parrot 01_basic.imc, ../../parrot 02_date.imc, etc. These will
+dynamically load in the regex-compiler as the compiler for the "regex"
+language, and then use it to compile some code and execute it.
+
STATUS
======
@@ -106,14 +112,13 @@
exactly how things are translated with this compiler, read Rewrite.pm
and Rewrite/Stackless.pm.
-Register Usage:
- I0 - temporary, not preserved between tree op rewrites
+"Local" variables:
+ * One integer temporary, not preserved between tree op rewrites
(it's just a very short-term temp register)
- I1 - current position within the input string
- I2 - length of the input string
- P0 - array of start indices for parenthesis matches
- P1 - array of end indices for parenthesis matches
- I3.. - callee-saved temporary registers
+ * The current position within the input string
+ * The length of the input string
+ * A Match PMC, for holding start and stop positions for matched groups
+ * Other stuff, but this list is hopelessly out of date at the moment
Optimizations implemented (notation: parentheses here are non-capturing):
1.12 +3 -3 parrot/languages/regex/regex.pl
Index: regex.pl
===================================================================
RCS file: /cvs/public/parrot/languages/regex/regex.pl,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -w -r1.11 -r1.12
--- regex.pl 18 Sep 2004 02:53:36 -0000 1.11
+++ regex.pl 18 Sep 2004 03:16:57 -0000 1.12
@@ -45,7 +45,8 @@
$options{'no-list-optimize'} = 1 if ! $list_opt;
$options{'DEBUG'} = 1 if $debug;
-my $tree = Regex::expr_to_tree($expr, %options);
+my $ctx = { };
+my $tree = Regex::expr_to_tree($expr, $ctx, %options);
if ($operation eq 'unparse' || $operation eq 'render') {
print $tree->render(), "\n";
@@ -55,11 +56,10 @@
exit;
}
-my $ctx = { };
my $code = Regex::tree_to_list($tree, $ctx, 'regex_done', 'regex_done',
%options);
-my @asm = Regex::list_to_pasm($code, %options);
+my @asm = Regex::list_to_pasm($code, $ctx, %options);
local *OUTPUT;
if (! defined $output || $output eq '-') {
1.1 parrot/languages/regex/01_basic.imc
Index: 01_basic.imc
===================================================================
# Basic example of using an embedded regex compiler. Requires
# "regex-compiler.pbc", which can be generated by running
#
# parrot -o regex-compiler.pbc regex-compiler.imc
#
.sub main @MAIN
.local pmc match
.local pmc compile_regex
.local pmc regex1
.local pmc regex2
load_bytecode "regex-compiler.pbc"
compile_regex = compreg "regex"
regex1 = compile_regex("a*")
match = regex1("aaaargh")
print "regex match \"aaaargh\" =~ /a*/: "
if match goto goodness
print "failed\n"
match2:
regex2 = compile_regex("aa")
match = regex2("aaaargh")
print "regex match \"aaaargh\" =~ /aa/: "
if match goto goodness2
print "failed\n"
goto match3
goodness:
print "succeeded!\n"
print "Matching range: "
$I0 = match["0";0]
print $I0
print ".."
$I0 = match["0";1]
print $I0
print "\n"
goto match2
goodness2:
print "succeeded!\n"
print "Matching range: "
$I0 = match["0";0]
print $I0
print ".."
$I0 = match["0";1]
print $I0
print "\n"
match3:
match=regex2("blah")
print "regex match \"blah\" =~ /aa/: "
if match goto goodness3
print "Failed (as it should)\n"
end
goodness3:
print "succeeded?! (should not have)\n"
end
.end
1.1 parrot/languages/regex/02_date.imc
Index: 02_date.imc
===================================================================
# Basic example of using an embedded regex compiler to parse out the
# components of a simple date string. Requires "regex-compiler.pbc",
# which can be generated by running
#
# parrot -o regex-compiler.pbc regex-compiler.imc
#
.sub main @MAIN
.local pmc match
.local pmc compile_regex
.local pmc regex1
.local pmc regex2
load_bytecode "regex-compiler.pbc"
compile_regex = compreg "regex"
regex1 = compile_regex("([a-zA-Z]{3}) ([a-zA-Z]+) *([0-9])+")
match = regex1("Mon Sep 6 22:45:48 PDT 2004")
print "regex match today =~ /<?weekday> <?month> *<?day>/ (sorta): "
if match goto goodness
print "failed\n"
end
goodness:
print "succeeded!\n"
print "Matching range: "
$I0 = match["0";0]
print $I0
print ".."
$I0 = match["0";1]
print $I0
print "\n"
print "Weekday: "
$S0 = match["1"]
print $S0
print "\nMonth: "
$S0 = match["2"]
print $S0
print "\nDay: "
$S0 = match["3"]
print $S0
print "\n"
end
.end
1.1 parrot/languages/regex/regex-compiler.imc
Index: regex-compiler.imc
===================================================================
.sub __init_compiler @LOAD
loadlib $P0, "match_group"
newsub $P0, .Sub, _compile_regex
compreg "regex", $P0
.pcc_begin_return
.pcc_end_return
.end
.sub _compile_regex
.local string code
code = S5
.local int pid
.local string imcfile
.local string pir_data
.local string cmdline
pid = getpid
imcfile = "/tmp/regex-"
$S0 = pid
imcfile = concat $S0
imcfile = concat ".imc"
cmdline = "regex.pl --sub-name=_regex -o "
cmdline = concat imcfile
cmdline = concat " '"
cmdline = concat code
cmdline = concat "'"
$I0 = spawnw cmdline
unless $I0 goto imc_to_pbc
print "Failed to compile to .imc file.\n"
invoke P1
imc_to_pbc:
pir_data = _readfile(imcfile)
$P0 = compreg "PIR"
$P1 = compile $P0, pir_data
# $P0 = compreg "FILE"
# $P1 = $P0(imcfile)
# $P1 = compile $P0, imcfile
$P1 = find_global "_regex"
.pcc_begin_return
.return $P1
.pcc_end_return
.end
.sub _readfile
.param string filename
.local pmc file
.local string result
.local string buffer
result = ""
file = open filename, "<"
loop: buffer = read file, 65536
$I0 = length buffer
le $I0, 0, done
result = concat buffer
goto loop
done: .pcc_begin_return
.return result
.pcc_end_return
.end
1.18 +1 -12 parrot/languages/regex/lib/Regex/Rewrite.pm
Index: Rewrite.pm
===================================================================
RCS file: /cvs/public/parrot/languages/regex/lib/Regex/Rewrite.pm,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -w -r1.17 -r1.18
--- Rewrite.pm 18 Sep 2004 02:52:09 -0000 1.17
+++ Rewrite.pm 18 Sep 2004 03:17:00 -0000 1.18
@@ -49,12 +49,7 @@
my ($self, $name) = @_;
my $NUM_REGISTERS = 32; # ?
$name ||= "_temp_int_" . $self->{_temp_int_count};
- my $register = "I" . $self->{_temp_int_count};
-
- if (++$self->{_temp_int_count} > $NUM_REGISTERS) {
- # Time to switch to IMCC.
- die "Too many temporaries requested! Implement register spilling!";
- }
+ my $register = "\$I" . $self->{_temp_int_count};
$self->{_temps}{$name} = $register;
return $register;
@@ -1006,12 +1001,6 @@
# Glue them together
@ops = (@pre_ops, @ops, @post_ops);
- foreach my $temp_reg (values %{ $self->{_temps} }) {
- warn "temp register $temp_reg: I'm not sure this is supported anymore";
- unshift @ops, aop('push_reg', [ $temp_reg ]);
- push @ops, aop('pop_reg', [ $temp_reg ]);
- }
-
return { lastback => $back, code => [EMAIL PROTECTED] };
}