cvsuser 03/06/06 09:27:00
Modified: . core.ops io.ops rx.ops
Log:
PMC-data-3: ops files; rx.ops reformatted
Revision Changes Path
1.277 +7 -6 parrot/core.ops
Index: core.ops
===================================================================
RCS file: /cvs/public/parrot/core.ops,v
retrieving revision 1.276
retrieving revision 1.277
diff -u -w -r1.276 -r1.277
--- core.ops 6 Jun 2003 12:01:35 -0000 1.276
+++ core.ops 6 Jun 2003 16:27:00 -0000 1.277
@@ -4317,12 +4317,12 @@
op newinterp(out PMC, in INT) {
struct Parrot_Interp *new_interp;
struct PMC *new_pmc;
+ new_pmc = new_pmc_header(interpreter);
+ $1 = new_pmc;
new_interp = make_interpreter((Interp_flags)$2);
new_interp->parent_interpreter = interpreter;
- new_pmc = new_pmc_header(interpreter);
- new_pmc->data = new_interp;
+ PMC_data(new_pmc) = new_interp;
new_pmc->vtable = YOU_LOSE_VTABLE;
- $1 = new_pmc;
goto NEXT();
}
@@ -4334,7 +4334,8 @@
=cut
op runinterp(inout PMC, in INT) {
- struct Parrot_Interp * new_interp = (struct Parrot_Interp *)$1->data;
+ struct Parrot_Interp * new_interp =
+ (struct Parrot_Interp *)PMC_data($1);
Interp_flags_SET(new_interp, PARROT_EXTERN_CODE_FLAG);
new_interp->code = interpreter->code;
runops(new_interp, REL_PC + $2);
@@ -4655,7 +4656,7 @@
PANIC("Failed to load native library");
}
pmc = new_pmc_header(interpreter);
- pmc->data = (void *)p;
+ PMC_data(pmc) = (void *)p;
$1 = pmc;
goto NEXT();
}
@@ -4664,7 +4665,7 @@
char * name = string_to_cstring(interpreter, ($3));
PMC *nci;
- Parrot_csub_t p = (Parrot_csub_t)D2FPTR(Parrot_dlsym(($2)->data, name));
+ Parrot_csub_t p = (Parrot_csub_t)D2FPTR(Parrot_dlsym(PMC_data($2), name));
free(name);
if(p == NULL) {
const char * err = Parrot_dlerror();
1.20 +12 -11 parrot/io.ops
Index: io.ops
===================================================================
RCS file: /cvs/public/parrot/io.ops,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -w -r1.19 -r1.20
--- io.ops 27 May 2003 19:05:25 -0000 1.19
+++ io.ops 6 Jun 2003 16:27:00 -0000 1.20
@@ -36,7 +36,7 @@
=cut
inline op close(inout PMC) {
- PIO_close(interpreter, (ParrotIO*)(($1)->data));
+ PIO_close(interpreter, (ParrotIO*)(PMC_data($1)));
goto NEXT();
}
@@ -258,7 +258,7 @@
op print(in PMC, in STR) {
ParrotIO * io;
- io = (ParrotIO*)($1->data);
+ io = (ParrotIO*)(PMC_data($1));
if ($2 && io) {
PIO_write(interpreter, io, ($2)->strstart, string_length($2));
}
@@ -366,7 +366,8 @@
n = $3;
$1 = string_make(interpreter, NULL, n, NULL, 0, NULL);
memset(($1)->strstart, 0, n);
- nr = PIO_read(interpreter, (ParrotIO*)($2->data), ($1)->strstart, (size_t)n);
+ nr = PIO_read(interpreter, (ParrotIO*)(PMC_data($2)),
+ ($1)->strstart, (size_t)n);
if(nr > 0)
($1)->strlen = ($1)->bufused = nr;
else
@@ -416,7 +417,7 @@
op seek(out INT, in PMC, in INT, in INT) {
ParrotIO * io;
- io = (ParrotIO*)($2->data);
+ io = (ParrotIO*)(PMC_data($2));
if (io) {
$1 = (INTVAL)PIO_seek(interpreter, io, 0, $3, $4);
}
@@ -425,7 +426,7 @@
op seek(out INT, in PMC, in INT, in INT, in INT) {
ParrotIO * io;
- io = (ParrotIO*)($2->data);
+ io = (ParrotIO*)(PMC_data($2));
if (io) {
$1 = (INTVAL)PIO_seek(interpreter, io, $3, $4, $5);
}
1.35 +217 -193 parrot/rx.ops
Index: rx.ops
===================================================================
RCS file: /cvs/public/parrot/rx.ops,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -w -r1.34 -r1.35
--- rx.ops 7 Apr 2003 17:48:19 -0000 1.34
+++ rx.ops 6 Jun 2003 16:27:00 -0000 1.35
@@ -54,26 +54,30 @@
=head1 DESCRIPTION
-The Perl 5 regular expression engine was state-of-the-art. It was the fastest and
-most featureful implementation available. Everybody used Perl 5's regular
expression
-syntax wherever possible.
+The Perl 5 regular expression engine was state-of-the-art. It was the
+fastest and most featureful implementation available. Everybody used
+Perl 5's regular expression syntax wherever possible.
The Perl 5 regular expression engine was also a mess.
-The engine was like a separate interpreter unto itself. Few understood its dark
magic,
-and fewer worked on its baroque source. It was a black box, sealed off from the
outside
-world with only a couple opcodes to show in other files. It was the slowest part
of Perl
-to adapt to new features--it was one of the last to get threadsafety and full
Unicode
-support--because so few people understood it. Larry Wall once said that three
people
-understood the regex engine, give or take four.
-
-Because of these issues, the design documents for Parrot called for regular
expression
-opcodes to be built in to the interpreter. This group of opcodes, called the
Parrot
-Regular Expression Engine version 4.0 (or simply Rx4), is the result.
+The engine was like a separate interpreter unto itself. Few
+understood its dark magic, and fewer worked on its baroque source. It
+was a black box, sealed off from the outside world with only a couple
+opcodes to show in other files. It was the slowest part of Perl to
+adapt to new features--it was one of the last to get threadsafety and
+full Unicode support--because so few people understood it. Larry Wall
+once said that three people understood the regex engine, give or take
+four.
+
+Because of these issues, the design documents for Parrot called for
+regular expression opcodes to be built in to the interpreter. This
+group of opcodes, called the Parrot Regular Expression Engine version
+4.0 (or simply Rx4), is the result.
=head2 Basic Concepts
-Perl 5 had one opcode for each operation in the regular expression. For example:
+Perl 5 had one opcode for each operation in the regular expression.
+For example:
>perl -mre=debug -e '/ab+[cd]/'
Compiling REx `ab+[cd]'
@@ -86,9 +90,10 @@
anchored `ab' at 0 floating `b' at 1..2147483647 (checking anchored) minlen 3
Freeing REx: `ab+[cd]'
-(The C<re> pragma with the 'debug' switch displays the compiled version of the
regex.
-The numbers in parenthesis represent where to jump to on success; 0 is a
-special value meaning "this part of the regex is done".)
+(The C<re> pragma with the 'debug' switch displays the compiled
+version of the regex. The numbers in parenthesis represent where to
+jump to on success; 0 is a special value meaning "this part of the
+regex is done".)
In Rx4, that regular expression would be something like:
@@ -108,42 +113,49 @@
rx_oneof S0, I1, "cd", $backtrack
branch $success
-(In Rx4, the last parameter is a label to branch to on I<failure>, not success.)
+(In Rx4, the last parameter is a label to branch to on I<failure>, not
+success.)
-10 operations in Rx4 to 5 in Perl 5. I can already hear the cynicism: "how could
-that be BETTER?!?" Well, there's several reasons.
+10 operations in Rx4 to 5 in Perl 5. I can already hear the cynicism:
+"how could that be BETTER?!?" Well, there's several reasons.
-The first is that it frees us to use normal ops, and in fact they're used all the
-time. C<branch> is a normal op; so is C<bsr>, the normal way to call a subrule.
-Things like C<(?{CODE})> can be implemented with relative ease--simply put the
-normal opcodes in the appropriate place in the regex. If you're debugging
-a regex, you can simply sprinkle output messages liberally throughout the regex.
-
-The second is opcode dispatch. Parrot has very fast opcode dispatch, and we can use
-that to our advantage.
-
-Finally, there's the matter of optimizations. As an example, take C</a+bc+/>. The
-most efficient way to look for that is probably to look for the constant string
'abc'
-and expand outwards from there--especially if you use Boyer-Moore or another fast
-search algorithm. It means that the code generator can decide whether to optimize
-for success or failure, for compilation or execution speed. You get the idea.
-
-Bottom line is, Rx4 lays out exactly what's going on. This is a feature. It gives
the
-regex compiler total control over what's going on.
+The first is that it frees us to use normal ops, and in fact they're
+used all the time. C<branch> is a normal op; so is C<bsr>, the normal
+way to call a subrule. Things like C<(?{CODE})> can be implemented
+with relative ease--simply put the normal opcodes in the appropriate
+place in the regex. If you're debugging a regex, you can simply
+sprinkle output messages liberally throughout the regex.
+
+The second is opcode dispatch. Parrot has very fast opcode dispatch,
+and we can use that to our advantage.
+
+Finally, there's the matter of optimizations. As an example, take
+C</a+bc+/>. The most efficient way to look for that is probably to
+look for the constant string 'abc' and expand outwards from
+there--especially if you use Boyer-Moore or another fast search
+algorithm. It means that the code generator can decide whether to
+optimize for success or failure, for compilation or execution speed.
+You get the idea.
+
+Bottom line is, Rx4 lays out exactly what's going on. This is a
+feature. It gives the regex compiler total control over what's going
+on.
=head2 The Opcodes
There are two basic rules to how the opcodes operate.
-The first rule is that the first argument to each opcode is the string we are
-matching against, and the second one is the current index in the string.
-
-The second rule pertains to the ops that have an integer constant as their last
parameter.
-For the most part, these ops will branch to that parameter if they 'fail'. For
most ops,
-'fail' means 'fail to match'.
+The first rule is that the first argument to each opcode is the string
+we are matching against, and the second one is the current index in
+the string.
+
+The second rule pertains to the ops that have an integer constant as
+their last parameter. For the most part, these ops will branch to
+that parameter if they 'fail'. For most ops, 'fail' means 'fail to
+match'.
-If the documentation for an op doesn't specifically mention the first or last
parameter,
-that's what they are.
+If the documentation for an op doesn't specifically mention the first
+or last parameter, that's what they are.
The documentation for each opcode follows.
@@ -162,9 +174,10 @@
=item C<rx_compile>(out str, in str, in str)
-Provides a built-in regular expression compiler. The first parameter is set to the
-address of the newly-compiled regex, which can then be C<jsr>'ed to; the second
-parameter is the regex itself; and the third parameter is the modifiers on the
regex.
+Provides a built-in regular expression compiler. The first parameter
+is set to the address of the newly-compiled regex, which can then be
+C<jsr>'ed to; the second parameter is the regex itself; and the third
+parameter is the modifiers on the regex.
B<XXX> Currently this op has not been implemented.
@@ -213,8 +226,9 @@
=item C<rx_pushmark>()
-Pushes a 'mark' onto the stack contained in the info structure. Marks are used
-to indicate where one operation's backtrack information ends and another's begins.
+Pushes a 'mark' onto the stack contained in the info structure. Marks
+are used to indicate where one operation's backtrack information ends
+and another's begins.
=cut
@@ -228,8 +242,8 @@
=item C<rx_popindex>(out int, inconst int)
-Pops an index off the stack. If it pops a mark off instead, it branches to the
-second parameter.
+Pops an index off the stack. If it pops a mark off instead, it
+branches to the second parameter.
=cut
@@ -265,8 +279,8 @@
=item C<rx_advance>(in str, inout int, inconst int)
-Increments the start index one character. Branches to the third parameter
-if it goes past the end of the string.
+Increments the start index one character. Branches to the third
+parameter if it goes past the end of the string.
$2 is the current value of start_index.
@@ -405,10 +419,11 @@
=item C<rx_oneof>(in str, inout int, in pmc, inconst int)
-Matches if the current character is one of the characters in the third parameter.
+Matches if the current character is one of the characters in the third
+parameter.
-This op requires that its input be sorted for efficiency. Further, it requires
that all
-ranges (C<a-z>) be expanded by the regex compiler.
+This op requires that its input be sorted for efficiency. Further, it
+requires that all ranges (C<a-z>) be expanded by the regex compiler.
=cut
@@ -437,8 +452,8 @@
=item C<rx_oneof_bmp>(in str, inout int, in pmc, inconst int)
-This op has the exact same behavior as C<rx_oneof>, except that the third parameter
-is a Pointer to a bitmap generated by C<rx_makebmp>.
+This op has the exact same behavior as C<rx_oneof>, except that the
+third parameter is a Pointer to a bitmap generated by C<rx_makebmp>.
=cut
@@ -449,7 +464,7 @@
}
- if(bitmap_match($3->data, string_index($1,$2) )) {
+ if(bitmap_match(PMC_data($3), string_index($1,$2) )) {
$2++;
goto NEXT();
}
@@ -463,8 +478,8 @@
=item C<rx_dot>(in str, inout int, inconst int)
-Matches any character. This currently works exactly like rx_advance, but we
-leave it here in case they have to diverge in the future.
+Matches any character. This currently works exactly like rx_advance,
+but we leave it here in case they have to diverge in the future.
=cut
@@ -479,8 +494,8 @@
=item C<rx_zwa_boundary>(in str, in int, inconst int)
-Matches if the one of the previous character and the next character is a word
-character, and the other one is not (usually C<\b>).
+Matches if the one of the previous character and the next character is
+a word character, and the other one is not (usually C<\b>).
=cut
@@ -542,8 +557,9 @@
=item C<rx_search>(in str, out int, inout int, in str, inconst in)
-Searches for the literal $4 on the string $1 starting at $3. Sets $2 to the current
-index in the string (after the literal), and $3 to start_index.
+Searches for the literal $4 on the string $1 starting at $3. Sets $2
+to the current index in the string (after the literal), and $3 to
+start_index.
Branches to $5 if the literal is not found.
@@ -597,8 +613,8 @@
=item C<rx_search_char> (in str, out int, inout int, in str, inconst in)
-Searches for the char $4 on the string $1 starting at $3. Sets $2 to the current
-index in the string (after the char)
+Searches for the char $4 on the string $1 starting at $3. Sets $2 to
+the current index in the string (after the char)
Branches to $5 if the char is not found.
@@ -650,9 +666,11 @@
Matches greedily the repetition of the literal passed in the third
parameter.
-It never fails, and it doesn't save the intermediate points in the stack.
+It never fails, and it doesn't save the intermediate points in the
+stack.
-If you need to backtrack over rx_literal_all, you should manage it manually:
+If you need to backtrack over rx_literal_all, you should manage it
+manually:
set I2, I1 # save the start point
rx_literal_all S0, I1, "lit" # lit *
@@ -749,7 +767,7 @@
str_length = string_length($1);
idx = $2;
- bmp = $3->data;
+ bmp = PMC_data($3);
while (idx < str_length) {
if(! bitmap_match(bmp, string_index($1,idx) ) ) {
@@ -777,18 +795,19 @@
=item C<rx_makebmp>(out pmc, in str)
-This op pre-generates bitmaps to be used with C<rx_oneof_bmp>, increasing
performance.
-The first parameter will be set to a Pointer to the bitmap; the second parameter
-is the string to be bitmapped.
-
-Note that bitmaps are currently NOT compatible with characters above 255 (as
defined by
-whatever character set you're using). This may change in the future.
+This op pre-generates bitmaps to be used with C<rx_oneof_bmp>,
+increasing performance. The first parameter will be set to a Pointer
+to the bitmap; the second parameter is the string to be bitmapped.
+
+Note that bitmaps are currently NOT compatible with characters above
+255 (as defined by whatever character set you're using). This may
+change in the future.
=cut
op rx_makebmp(out pmc, in str) {
$1=pmc_new(interpreter, enum_class_Pointer);
- $1->data=(void*)bitmap_make(interpreter, $2);
+ PMC_data($1)=(void*)bitmap_make(interpreter, $2);
goto NEXT();
}
@@ -803,20 +822,21 @@
=head3 Tutorial
-Let's see how simple regexes using the Rx4 engine. This examples will show inlined
-regexes (i.e., regexes that appear on the middle of perl code, of the kind that was
-so popular in the old perl5 days).
-
-We won't deal then with named regular expressions (also known as rules) and the
-conventions used to call them (we expect that some form of the standard calling
-conventions will be used).
-
-First of all, let's explain the concept behind the Rx4 ops. During the life-time
-of a match, we keep the state of the match in concrete, well-known (by the
-compiler) registers. There are at least three registers needed to save the state
-of match, which we will call S0, I0, I1 (there is no particular reason to use
-this three registers, the compiler can choose any registers of the right type that
-are free during the match, but we will use this ones in all our examples).
+Let's see how simple regexes using the Rx4 engine. This examples will
+show inlined regexes (i.e., regexes that appear on the middle of perl
+code, of the kind that was so popular in the old perl5 days).
+
+We won't deal then with named regular expressions (also known as
+rules) and the conventions used to call them (we expect that some form
+of the standard calling conventions will be used).
+
+First of all, let's explain the concept behind the Rx4 ops. During the
+life-time of a match, we keep the state of the match in concrete,
+well-known (by the compiler) registers. There are at least three
+registers needed to save the state of match, which we will call S0,
+I0, I1 (there is no particular reason to use this three registers, the
+compiler can choose any registers of the right type that are free
+during the match, but we will use this ones in all our examples).
The purpose of this registers is:
@@ -837,19 +857,20 @@
=back
-As we will see, most of the rx opcodes read or modify at least one of this
-registers. Sometimes, the compiler can decide to use some other registers, to
-save temporary information about the match (like the position of the begining
-of a group, for example).
+As we will see, most of the rx opcodes read or modify at least one of
+this registers. Sometimes, the compiler can decide to use some other
+registers, to save temporary information about the match (like the
+position of the begining of a group, for example).
-Let's start with a really simple regex. Imagine that we want to compile the code
+Let's start with a really simple regex. Imagine that we want to
+compile the code
if (/^foobar/) { print 1 };
-Now, this can done in a very simple way. Assuming that we have managed to put the
-string contents of $_ into S0, the code would be:
+Now, this can done in a very simple way. Assuming that we have managed
+to put the string contents of $_ into S0, the code would be:
$start:
set I0, 0
@@ -874,9 +895,10 @@
-This was a very simple kind of pattern. Imagine now that we wanted to search for
"foobar"
-in any point of the string. We would need to add some form of loop that iterates
over
-the characters of the string, looking for the match "foobar". This example shows
how:
+This was a very simple kind of pattern. Imagine now that we wanted to
+search for "foobar" in any point of the string. We would need to add
+some form of loop that iterates over the characters of the string,
+looking for the match "foobar". This example shows how:
set I0, 0
$start:
@@ -935,8 +957,8 @@
The list below gives simple templates for common quantifiers operations.
-(This templates could be heavily optimized in the particular case that "x" is
-a literal. But that's not the point here.)
+(This templates could be heavily optimized in the particular case that
+"x" is a literal. But that's not the point here.)
=over 4
@@ -1005,7 +1027,8 @@
=item C<x??>
$start:
- set I2, 0 #I2 used to make sure we haven't backtracked before
+ set I2, 0 #I2 used to make sure we haven't
+ # backtracked before
branch $next
$back:
if I2, $lastback
@@ -1016,8 +1039,8 @@
=item C<x|y|z>
$start:
- set I2, I1 #I2 is used to save the begining of the group
- set I3, -6 #I3 is used to store the next alternation in the
group, expressed
+ set I2, I1 #I2 the begining of the group
+ set I3, -6 #I3 next alternation in the group, expressed
#as an offset from the branch point
rx_literal S0, I1, "x", $alt2
@@ -1068,8 +1091,8 @@
=item *
-This code currently requires everything to be in an eight-bit encoding compatible
-with ASCII.
+This code currently requires everything to be in an eight-bit encoding
+compatible with ASCII.
=item *
@@ -1081,8 +1104,8 @@
=item *
-The implementation of perl6 advanced features such as subrules, or hypothetical
-variables has not been resolved.
+The implementation of perl6 advanced features such as subrules, or
+hypothetical variables has not been resolved.
=item *
@@ -1094,9 +1117,10 @@
Copyright (C) 2001-2002 The Parrot Team <[EMAIL PROTECTED]>.
-Initial version by Brent Dax <[EMAIL PROTECTED]>; special thanks to Angel
-Faus <[EMAIL PROTECTED]> and Jeff 'japhy' Pinyan <[EMAIL PROTECTED]> for major
help,
-especially with decisions on the architecture of the engine.
+Initial version by Brent Dax <[EMAIL PROTECTED]>; special thanks to
+Angel Faus <[EMAIL PROTECTED]> and Jeff 'japhy' Pinyan
+<[EMAIL PROTECTED]> for major help, especially with decisions on the
+architecture of the engine.
=cut