[EMAIL PROTECTED] (Jeff) writes:
> The subject pretty much says it all. The format pretty much corresponds
> to the upcoming Exegesis. Major changes were to the modifiers, and a few
> syntax changes in the depths.

I've started rewriting my Shishi P6 RE module since it was becoming
way too cluttered.

Here's the P::RD it uses, in case anyone is interested. It covers
rather more of A4 than Jeff's version, and is a bit easier to turn
into real Shishi nodes.


package Shishi::Perl6RE;
use Parse::RecDescent;
use Data::Dumper;

undef $/;
my $g = <DATA>;

my $re = new Parse::RecDescent($g);

print Dumper($re->expr(shift));

__DATA__

expr        : term altlist(?)
                { $return = bless { 
                       mainline => $item[1], 
                       @{$item[2]} ? (
                        alternation => [@{$item[2][0]}] 
                       ) : () 
                  }, "P6RE::Expression" 
                }

altlist     : '|' term altlist
                { $return = [ $item[2], @{$item[3]} ] }
            | '|' <commit> term
                { $return = [ $item[3] ] }

term        : factor factor(s?)
                { bless [ $item[1], $item[2] && @{$item[2]} ], 
                    "P6RE::Term" 
                }

factor      : bit repspec
                { $return = { 
                    type => "repeat", 
                    repspec => $item[2], 
                    stuff=> $item[1] 
                  } 
                } 
            | bit
            | terminal_dollar

bit         : brackets
            | colons
            | hypo
            | interp_scalar
            | "\\" <commit> backwhack
            | '.' { $return = { type => "any" } }
            | ordinary_string

hypo        : /[\$\@\%]\w+/ ':=' term # Very ugly.
                { $return = { type => "hypo", 
                              target => $item[1], 
                              expr => $item[3] } 
                }
              
interp_scalar: '$' /\w+/ 
                { $return = { 
                    type => "text", 
                    target => "\$$item[2]", 
                    interp => 1 
                  }
                }

brackets    : '<' anglestuff '>' 
                { $return = $item[2] }
            | '(' <commit> expr ')'
                { $return = [
                        { type => "start" }, 
                        $item[3], 
                        { type => "stop" }
                  ]
                }
            | '[' <commit> expr ']'
                { $return = $item[3] } # Brackets are merely syntactic
            | { 
                if ($return = extract_codeblock($text, '{')) {
                  $return = { type => "code", code => $return }
                }
              }

repspec : /[+?*]\??/
        | /<!?\d*,\d*>\??/
        | /<!?\$\w+,\$\w+\??/

colons      : /:{1,4}/ { $return = {type => "backtrack", severity => length $item[0] } 
}

anglestuff  : /!?(\d*,\d*|\$\w+,\$\w+)>\??/ <reject> 
              # I'm actually a repetition specifier, not a directive

            | # <'foo'>
       { 
        if ($return = extract_delimited($text, "'")) { 
            $return =~ s/^.//; chop $return;
            $return = { type => "text", target => $return }
        }
       }
            | 'commit' 
               { $return = { type => "backtrack", severity => 4 } }
            | 'sp'
               { $return = { type => "char", target => " " } 
                 # XXX Not Unicode aware
               }
            | /!?\s*(before|after) expr/
               { die "Unsure how to implement this" }
            | /[\&\$\@\%]\w+/
               { $return = { type => "delayed", target => $item[1], interp=>1 } }
            | charclass
               { $return = { type => "class", target => $item[1] } }
            | '-' '<' charclass '>'
               { $return = { type => "negclass", target => $item[3] } }
            | /\w+/ 
               { $return = { type => "subrule", target => $item[1] } }
            | { if ($return = extract_bracketed($text, '()')) {
                $return = { type => "code", code => $return, assertion => 1 }
                }
              }
            | { if ($return = extract_codeblock($text, '{}')) {
                $return = { type => "delayed", code => $return, interp => 1 }
                }
              }
            | { if ($return = extract_bracketed($text, '[]')) {
                chop $return; $return =~ s/.//; 
                $return = { type => "anyof", target => $return }
                }
              }
            | '-' { if ($return = extract_bracketed($text, '[]')) {
                chop $return; $return =~ s/.//; 
                $return = { type => "neganyof", target => $return }
                }
              }
            | '.' { $return = { type=>"anyuni" } }

charclass : 'alpha' | 'ws' # add more here

backwhack   : /[dswDSW]/ 
                { $return = { type => "class", target => $item[1] } }
            | /htvrfn/
                { $return = { type => "metachar", target => $item[1] } }
            | /HTVRFN/
                { $return = { type => "neganyofmetachar", target => $item[1] } }
            | /x[0-9a-fA-F]+/
                { $return = { type => "char", target => chr(hex(substr($item[1],1))) } 
}
            | /X[0-9a-fA-F]+/
                { $return = { type => "neganyof", target => 
chr(hex(substr($item[1],1))) } }
            | /[LUQ]/ { extract_bracketed($text,'[]') }
                { die "Um, not sure what to do with these yet" }

ordinary_string: /[^\$@%()|{<]+(?![\+\?\*])/
                { $return = { type => "text", target => $item[1] } }
            | /\w/ 
                { $return = { type => "char", target => $item[1] } }

terminal_dollar : '$' { $return = { type => "end" } }

Reply via email to