cvs commit: parrot core.ops io.ops rx.ops

Leopold Toetsch Fri, 06 Jun 2003 09:27:25 -0700

cvsuser     03/06/06 09:27:00


  Modified:    .        core.ops io.ops rx.ops
  Log:
  PMC-data-3: ops files; rx.ops reformatted
  
  Revision  Changes    Path
  1.277     +7 -6      parrot/core.ops
  
  Index: core.ops
  ===================================================================
  RCS file: /cvs/public/parrot/core.ops,v
  retrieving revision 1.276
  retrieving revision 1.277
  diff -u -w -r1.276 -r1.277
  --- core.ops  6 Jun 2003 12:01:35 -0000       1.276
  +++ core.ops  6 Jun 2003 16:27:00 -0000       1.277
  @@ -4317,12 +4317,12 @@
   op newinterp(out PMC, in INT) {
     struct Parrot_Interp *new_interp;
     struct PMC *new_pmc;
  +  new_pmc = new_pmc_header(interpreter);
  +  $1 = new_pmc;
     new_interp = make_interpreter((Interp_flags)$2);
     new_interp->parent_interpreter = interpreter;
  -  new_pmc = new_pmc_header(interpreter);
  -  new_pmc->data = new_interp;
  +  PMC_data(new_pmc) = new_interp;
     new_pmc->vtable = YOU_LOSE_VTABLE;
  -  $1 = new_pmc;
     goto NEXT();
   }
   
  @@ -4334,7 +4334,8 @@
   =cut
   
   op runinterp(inout PMC, in INT) {
  -  struct Parrot_Interp * new_interp = (struct Parrot_Interp *)$1->data;
  +  struct Parrot_Interp * new_interp =
  +      (struct Parrot_Interp *)PMC_data($1);
     Interp_flags_SET(new_interp, PARROT_EXTERN_CODE_FLAG);
     new_interp->code = interpreter->code;
     runops(new_interp, REL_PC + $2);
  @@ -4655,7 +4656,7 @@
        PANIC("Failed to load native library");
     }
     pmc = new_pmc_header(interpreter);
  -  pmc->data = (void *)p;
  +  PMC_data(pmc) = (void *)p;
     $1 = pmc;
     goto NEXT();
   }
  @@ -4664,7 +4665,7 @@
     char * name = string_to_cstring(interpreter, ($3));
     PMC *nci;
   
  -  Parrot_csub_t p = (Parrot_csub_t)D2FPTR(Parrot_dlsym(($2)->data, name));
  +  Parrot_csub_t p = (Parrot_csub_t)D2FPTR(Parrot_dlsym(PMC_data($2), name));
     free(name);
     if(p == NULL) {
        const char * err = Parrot_dlerror();
  
  
  
  1.20      +12 -11    parrot/io.ops
  
  Index: io.ops
  ===================================================================
  RCS file: /cvs/public/parrot/io.ops,v
  retrieving revision 1.19
  retrieving revision 1.20
  diff -u -w -r1.19 -r1.20
  --- io.ops    27 May 2003 19:05:25 -0000      1.19
  +++ io.ops    6 Jun 2003 16:27:00 -0000       1.20
  @@ -36,7 +36,7 @@
   =cut
   
   inline op close(inout PMC) {
  -     PIO_close(interpreter, (ParrotIO*)(($1)->data));
  +     PIO_close(interpreter, (ParrotIO*)(PMC_data($1)));
        goto NEXT();
   }
   
  @@ -258,7 +258,7 @@
   
   op print(in PMC, in STR) {
     ParrotIO * io;
  -  io = (ParrotIO*)($1->data);
  +  io = (ParrotIO*)(PMC_data($1));
     if ($2 && io) {
       PIO_write(interpreter, io, ($2)->strstart, string_length($2));
     }
  @@ -366,7 +366,8 @@
       n = $3; 
     $1 = string_make(interpreter, NULL, n, NULL, 0, NULL);
     memset(($1)->strstart, 0, n);
  -  nr = PIO_read(interpreter, (ParrotIO*)($2->data), ($1)->strstart, (size_t)n);
  +  nr = PIO_read(interpreter, (ParrotIO*)(PMC_data($2)),
  +    ($1)->strstart, (size_t)n);
     if(nr > 0)
       ($1)->strlen = ($1)->bufused = nr;
     else
  @@ -416,7 +417,7 @@
   
   op seek(out INT, in PMC, in INT, in INT) {
     ParrotIO * io;
  -  io = (ParrotIO*)($2->data);
  +  io = (ParrotIO*)(PMC_data($2));
     if (io) {
       $1 = (INTVAL)PIO_seek(interpreter, io, 0, $3, $4);
     }
  @@ -425,7 +426,7 @@
   
   op seek(out INT, in PMC, in INT, in INT, in INT) {
     ParrotIO * io;
  -  io = (ParrotIO*)($2->data);
  +  io = (ParrotIO*)(PMC_data($2));
     if (io) {
       $1 = (INTVAL)PIO_seek(interpreter, io, $3, $4, $5);
     }
  
  
  
  1.35      +217 -193  parrot/rx.ops
  
  Index: rx.ops
  ===================================================================
  RCS file: /cvs/public/parrot/rx.ops,v
  retrieving revision 1.34
  retrieving revision 1.35
  diff -u -w -r1.34 -r1.35
  --- rx.ops    7 Apr 2003 17:48:19 -0000       1.34
  +++ rx.ops    6 Jun 2003 16:27:00 -0000       1.35
  @@ -54,26 +54,30 @@
   
   =head1 DESCRIPTION
   
  -The Perl 5 regular expression engine was state-of-the-art.  It was the fastest and 
  -most featureful implementation available.  Everybody used Perl 5's regular 
expression
  -syntax wherever possible.
  +The Perl 5 regular expression engine was state-of-the-art.  It was the
  +fastest and most featureful implementation available.  Everybody used
  +Perl 5's regular expression syntax wherever possible.
   
   The Perl 5 regular expression engine was also a mess.
   
  -The engine was like a separate interpreter unto itself.  Few understood its dark 
magic, 
  -and fewer worked on its baroque source.  It was a black box, sealed off from the 
outside 
  -world with only a couple opcodes to show in other files.  It was the slowest part 
of Perl 
  -to adapt to new features--it was one of the last to get threadsafety and full 
Unicode 
  -support--because so few people understood it.  Larry Wall once said that three 
people 
  -understood the regex engine, give or take four.
  -
  -Because of these issues, the design documents for Parrot called for regular 
expression 
  -opcodes to be built in to the interpreter.  This group of opcodes, called the 
Parrot 
  -Regular Expression Engine version 4.0 (or simply Rx4), is the result.
  +The engine was like a separate interpreter unto itself.  Few
  +understood its dark magic, and fewer worked on its baroque source.  It
  +was a black box, sealed off from the outside world with only a couple
  +opcodes to show in other files.  It was the slowest part of Perl to
  +adapt to new features--it was one of the last to get threadsafety and
  +full Unicode support--because so few people understood it.  Larry Wall
  +once said that three people understood the regex engine, give or take
  +four.
  +
  +Because of these issues, the design documents for Parrot called for
  +regular expression opcodes to be built in to the interpreter.  This
  +group of opcodes, called the Parrot Regular Expression Engine version
  +4.0 (or simply Rx4), is the result.
   
   =head2 Basic Concepts
   
  -Perl 5 had one opcode for each operation in the regular expression.  For example:
  +Perl 5 had one opcode for each operation in the regular expression.
  +For example:
   
        >perl -mre=debug -e '/ab+[cd]/'
        Compiling REx `ab+[cd]'
  @@ -86,9 +90,10 @@
        anchored `ab' at 0 floating `b' at 1..2147483647 (checking anchored) minlen 3
        Freeing REx: `ab+[cd]'
   
  -(The C<re> pragma with the 'debug' switch displays the compiled version of the 
regex.
  -The numbers in parenthesis represent where to jump to on success; 0 is a
  -special value meaning "this part of the regex is done".)
  +(The C<re> pragma with the 'debug' switch displays the compiled
  +version of the regex.  The numbers in parenthesis represent where to
  +jump to on success; 0 is a special value meaning "this part of the
  +regex is done".)
   
   In Rx4, that regular expression would be something like:
   
  @@ -108,42 +113,49 @@
                rx_oneof S0, I1, "cd", $backtrack
                branch $success
   
  -(In Rx4, the last parameter is a label to branch to on I<failure>, not success.)
  +(In Rx4, the last parameter is a label to branch to on I<failure>, not
  +success.)
   
  -10 operations in Rx4 to 5 in Perl 5.  I can already hear the cynicism: "how could
  -that be BETTER?!?"  Well, there's several reasons.
  +10 operations in Rx4 to 5 in Perl 5.  I can already hear the cynicism:
  +"how could that be BETTER?!?"  Well, there's several reasons.
   
  -The first is that it frees us to use normal ops, and in fact they're used all the 
  -time.  C<branch> is a normal op; so is C<bsr>, the normal way to call a subrule. 
  -Things like C<(?{CODE})> can be implemented with relative ease--simply put the 
  -normal opcodes in the appropriate place in the regex.  If you're debugging
  -a regex, you can simply sprinkle output messages liberally throughout the regex.
  -
  -The second is opcode dispatch.  Parrot has very fast opcode dispatch, and we can use
  -that to our advantage.
  -
  -Finally, there's the matter of optimizations.  As an example, take C</a+bc+/>.  The
  -most efficient way to look for that is probably to look for the constant string 
'abc'
  -and expand outwards from there--especially if you use Boyer-Moore or another fast
  -search algorithm.  It means that the code generator can decide whether to optimize
  -for success or failure, for compilation or execution speed.  You get the idea.
  -
  -Bottom line is, Rx4 lays out exactly what's going on.  This is a feature.  It gives 
the
  -regex compiler total control over what's going on.
  +The first is that it frees us to use normal ops, and in fact they're
  +used all the time.  C<branch> is a normal op; so is C<bsr>, the normal
  +way to call a subrule.  Things like C<(?{CODE})> can be implemented
  +with relative ease--simply put the normal opcodes in the appropriate
  +place in the regex.  If you're debugging a regex, you can simply
  +sprinkle output messages liberally throughout the regex.
  +
  +The second is opcode dispatch.  Parrot has very fast opcode dispatch,
  +and we can use that to our advantage.
  +
  +Finally, there's the matter of optimizations.  As an example, take
  +C</a+bc+/>.  The most efficient way to look for that is probably to
  +look for the constant string 'abc' and expand outwards from
  +there--especially if you use Boyer-Moore or another fast search
  +algorithm.  It means that the code generator can decide whether to
  +optimize for success or failure, for compilation or execution speed.
  +You get the idea.
  +
  +Bottom line is, Rx4 lays out exactly what's going on.  This is a
  +feature.  It gives the regex compiler total control over what's going
  +on.
   
   =head2 The Opcodes
   
   There are two basic rules to how the opcodes operate.
   
  -The first rule is that the first argument to each opcode is the string we are 
  -matching against, and the second one is the current index in the string.
  -
  -The second rule pertains to the ops that have an integer constant as their last 
parameter.
  -For the most part, these ops will branch to that parameter if they 'fail'.  For 
most ops, 
  -'fail' means 'fail to match'.
  +The first rule is that the first argument to each opcode is the string
  +we are matching against, and the second one is the current index in
  +the string.
  +
  +The second rule pertains to the ops that have an integer constant as
  +their last parameter.  For the most part, these ops will branch to
  +that parameter if they 'fail'.  For most ops, 'fail' means 'fail to
  +match'.
   
  -If the documentation for an op doesn't specifically mention the first or last 
parameter, 
  -that's what they are.
  +If the documentation for an op doesn't specifically mention the first
  +or last parameter, that's what they are.
   
   The documentation for each opcode follows.
   
  @@ -162,9 +174,10 @@
   
   =item C<rx_compile>(out str, in str, in str)
   
  -Provides a built-in regular expression compiler.  The first parameter is set to the
  -address of the newly-compiled regex, which can then be C<jsr>'ed to; the second 
  -parameter is the regex itself; and the third parameter is the modifiers on the 
regex.
  +Provides a built-in regular expression compiler.  The first parameter
  +is set to the address of the newly-compiled regex, which can then be
  +C<jsr>'ed to; the second parameter is the regex itself; and the third
  +parameter is the modifiers on the regex.
   
   B<XXX> Currently this op has not been implemented.
   
  @@ -213,8 +226,9 @@
   
   =item C<rx_pushmark>()
   
  -Pushes a 'mark' onto the stack contained in the info structure.  Marks are used
  -to indicate where one operation's backtrack information ends and another's begins.
  +Pushes a 'mark' onto the stack contained in the info structure.  Marks
  +are used to indicate where one operation's backtrack information ends
  +and another's begins.
   
   =cut
   
  @@ -228,8 +242,8 @@
   
   =item C<rx_popindex>(out int, inconst int)
   
  -Pops an index off the stack.  If it pops a mark off instead, it branches to the 
  -second parameter.
  +Pops an index off the stack.  If it pops a mark off instead, it
  +branches to the second parameter.
   
   =cut
   
  @@ -265,8 +279,8 @@
   
   =item C<rx_advance>(in str, inout int, inconst int)
   
  -Increments the start index one character.  Branches to the third parameter
  -if it goes past the end of the string.
  +Increments the start index one character.  Branches to the third
  +parameter if it goes past the end of the string.
   
   $2 is the current value of start_index. 
   
  @@ -405,10 +419,11 @@
   
   =item C<rx_oneof>(in str, inout int, in pmc, inconst int)
   
  -Matches if the current character is one of the characters in the third parameter.
  +Matches if the current character is one of the characters in the third
  +parameter.
   
  -This op requires that its input be sorted for efficiency.  Further, it requires 
that all
  -ranges (C<a-z>) be expanded by the regex compiler.
  +This op requires that its input be sorted for efficiency.  Further, it
  +requires that all ranges (C<a-z>) be expanded by the regex compiler.
   
   =cut
   
  @@ -437,8 +452,8 @@
   
   =item C<rx_oneof_bmp>(in str, inout int, in pmc, inconst int)
   
  -This op has the exact same behavior as C<rx_oneof>, except that the third parameter 
  -is a Pointer to a bitmap generated by C<rx_makebmp>.
  +This op has the exact same behavior as C<rx_oneof>, except that the
  +third parameter is a Pointer to a bitmap generated by C<rx_makebmp>.
   
   =cut
   
  @@ -449,7 +464,7 @@
        }
        
        
  -     if(bitmap_match($3->data, string_index($1,$2) )) { 
  +     if(bitmap_match(PMC_data($3), string_index($1,$2) )) {
                $2++;
                goto NEXT();
        }
  @@ -463,8 +478,8 @@
   
   =item C<rx_dot>(in str, inout int, inconst int)
   
  -Matches any character. This currently works exactly like rx_advance, but we 
  -leave it here in case they have to diverge in the future.
  +Matches any character. This currently works exactly like rx_advance,
  +but we leave it here in case they have to diverge in the future.
   
   =cut
   
  @@ -479,8 +494,8 @@
   
   =item C<rx_zwa_boundary>(in str, in int, inconst int)
   
  -Matches if the one of the previous character and the next character is a word
  -character, and the other one is not (usually C<\b>).
  +Matches if the one of the previous character and the next character is
  +a word character, and the other one is not (usually C<\b>).
   
   =cut
   
  @@ -542,8 +557,9 @@
   
   =item C<rx_search>(in str, out int, inout int, in str, inconst in) 
   
  -Searches for the literal $4 on the string $1 starting at $3. Sets $2 to the current
  -index in the string (after the literal), and $3 to start_index.
  +Searches for the literal $4 on the string $1 starting at $3. Sets $2
  +to the current index in the string (after the literal), and $3 to
  +start_index.
   
   Branches to $5 if the literal is not found.
   
  @@ -597,8 +613,8 @@
   
   =item C<rx_search_char> (in str, out int, inout int, in str, inconst in) 
   
  -Searches for the char $4 on the string $1 starting at $3. Sets $2 to the current
  -index in the string (after the char)
  +Searches for the char $4 on the string $1 starting at $3. Sets $2 to
  +the current index in the string (after the char)
   
   Branches to $5 if the char is not found.
   
  @@ -650,9 +666,11 @@
   Matches greedily the repetition of the literal passed in the third 
   parameter. 
   
  -It never fails, and it doesn't save the intermediate points in the stack.
  +It never fails, and it doesn't save the intermediate points in the
  +stack.
   
  -If you need to backtrack over rx_literal_all, you should manage it manually:
  +If you need to backtrack over rx_literal_all, you should manage it
  +manually:
   
                set I2, I1                     # save the start point
                rx_literal_all S0, I1, "lit"   # lit *
  @@ -749,7 +767,7 @@
   
        str_length = string_length($1);
        idx = $2;
  -     bmp = $3->data;
  +     bmp = PMC_data($3);
   
        while (idx < str_length) { 
                if(! bitmap_match(bmp, string_index($1,idx) ) ) { 
  @@ -777,18 +795,19 @@
   
   =item C<rx_makebmp>(out pmc, in str)
   
  -This op pre-generates bitmaps to be used with C<rx_oneof_bmp>, increasing 
performance.
  -The first parameter will be set to a Pointer to the bitmap; the second parameter
  -is the string to be bitmapped.
  -
  -Note that bitmaps are currently NOT compatible with characters above 255 (as 
defined by
  -whatever character set you're using).  This may change in the future.
  +This op pre-generates bitmaps to be used with C<rx_oneof_bmp>,
  +increasing performance.  The first parameter will be set to a Pointer
  +to the bitmap; the second parameter is the string to be bitmapped.
  +
  +Note that bitmaps are currently NOT compatible with characters above
  +255 (as defined by whatever character set you're using).  This may
  +change in the future.
   
   =cut
   
   op rx_makebmp(out pmc, in str) {
        $1=pmc_new(interpreter, enum_class_Pointer);
  -     $1->data=(void*)bitmap_make(interpreter, $2);
  +     PMC_data($1)=(void*)bitmap_make(interpreter, $2);
        
        goto NEXT();
   }
  @@ -803,20 +822,21 @@
   
   =head3 Tutorial
   
  -Let's see how simple regexes using the Rx4 engine. This examples will show inlined 
  -regexes (i.e., regexes that appear on the middle of perl code, of the kind that was
  -so popular in the old perl5 days).
  -
  -We won't deal then with named regular expressions (also known as rules) and the 
  -conventions used to call them (we expect that some form of the standard calling
  -conventions will be used).
  -
  -First of all, let's explain the concept behind the Rx4 ops. During the life-time
  -of a match, we keep the state of the match in concrete, well-known (by the 
  -compiler) registers. There are at least three registers needed to save the state 
  -of match, which we will call S0, I0, I1 (there is no particular reason to use 
  -this three registers, the compiler can choose any registers of the right type that 
  -are free during the match, but we will use this ones in all our examples).
  +Let's see how simple regexes using the Rx4 engine. This examples will
  +show inlined regexes (i.e., regexes that appear on the middle of perl
  +code, of the kind that was so popular in the old perl5 days).
  +
  +We won't deal then with named regular expressions (also known as
  +rules) and the conventions used to call them (we expect that some form
  +of the standard calling conventions will be used).
  +
  +First of all, let's explain the concept behind the Rx4 ops. During the
  +life-time of a match, we keep the state of the match in concrete,
  +well-known (by the compiler) registers. There are at least three
  +registers needed to save the state of match, which we will call S0,
  +I0, I1 (there is no particular reason to use this three registers, the
  +compiler can choose any registers of the right type that are free
  +during the match, but we will use this ones in all our examples).
   
   The purpose of this registers is:
   
  @@ -837,19 +857,20 @@
   
   =back
   
  -As we will see, most of the rx opcodes read or modify at least one of this 
  -registers. Sometimes, the compiler can decide to use some other registers, to
  -save temporary information about the match (like the position of the begining
  -of a group, for example).
  +As we will see, most of the rx opcodes read or modify at least one of
  +this registers. Sometimes, the compiler can decide to use some other
  +registers, to save temporary information about the match (like the
  +position of the begining of a group, for example).
   
  -Let's start with a really simple regex. Imagine that we want to compile the code
  +Let's start with a really simple regex. Imagine that we want to
  +compile the code
   
     if (/^foobar/) { print 1 };
    
   
   
  -Now, this can done in a very simple way. Assuming that we have managed to put the
  -string contents of $_ into S0, the code would be:
  +Now, this can done in a very simple way. Assuming that we have managed
  +to put the string contents of $_ into S0, the code would be:
   
        $start:
                set I0, 0
  @@ -874,9 +895,10 @@
   
   
   
  -This was a very simple kind of pattern. Imagine now that we wanted to search for 
"foobar"
  -in any point of the string. We would need to add some form of loop that iterates 
over 
  -the characters of the string, looking for the match "foobar". This example shows 
how:
  +This was a very simple kind of pattern. Imagine now that we wanted to
  +search for "foobar" in any point of the string. We would need to add
  +some form of loop that iterates over the characters of the string,
  +looking for the match "foobar". This example shows how:
   
                set I0, 0
        $start:
  @@ -935,8 +957,8 @@
   
   The list below gives simple templates for common quantifiers operations.
   
  -(This templates could be heavily optimized in the particular case that "x" is 
  -a literal. But that's not the point here.)
  +(This templates could be heavily optimized in the particular case that
  +"x" is a literal. But that's not the point here.)
   
   =over 4
   
  @@ -1005,7 +1027,8 @@
   =item C<x??>
   
        $start:
  -             set I2, 0       #I2 used to make sure we haven't backtracked before
  +             set I2, 0       #I2 used to make sure we haven't
  +                             # backtracked before
                branch $next
        $back:
                if I2, $lastback
  @@ -1016,8 +1039,8 @@
   =item C<x|y|z>
   
        $start:
  -             set I2, I1       #I2 is used to save the begining of the group
  -             set I3, -6       #I3 is used to store the next alternation in the 
group, expressed
  +             set I2, I1       #I2  the begining of the group
  +             set I3, -6       #I3  next alternation in the group, expressed
                                 #as an offset from the branch point
                                 
                rx_literal S0, I1, "x", $alt2
  @@ -1068,8 +1091,8 @@
   
   =item *
   
  -This code currently requires everything to be in an eight-bit encoding compatible 
  -with ASCII.
  +This code currently requires everything to be in an eight-bit encoding
  +compatible with ASCII.
   
   =item *
   
  @@ -1081,8 +1104,8 @@
   
   =item *
   
  -The implementation of perl6 advanced features such as subrules, or hypothetical
  -variables has not been resolved.
  +The implementation of perl6 advanced features such as subrules, or
  +hypothetical variables has not been resolved.
   
   =item *
   
  @@ -1094,9 +1117,10 @@
   
   Copyright (C) 2001-2002 The Parrot Team <[EMAIL PROTECTED]>.
   
  -Initial version by Brent Dax <[EMAIL PROTECTED]>; special thanks to Angel
  -Faus <[EMAIL PROTECTED]> and Jeff 'japhy' Pinyan <[EMAIL PROTECTED]> for major 
help, 
  -especially with decisions on the architecture of the engine.
  +Initial version by Brent Dax <[EMAIL PROTECTED]>; special thanks to
  +Angel Faus <[EMAIL PROTECTED]> and Jeff 'japhy' Pinyan
  +<[EMAIL PROTECTED]> for major help, especially with decisions on the
  +architecture of the engine.
   
   =cut

cvs commit: parrot core.ops io.ops rx.ops

Reply via email to