[EMAIL PROTECTED] (Jeff) writes: > The subject pretty much says it all. The format pretty much corresponds > to the upcoming Exegesis. Major changes were to the modifiers, and a few > syntax changes in the depths.
I've started rewriting my Shishi P6 RE module since it was becoming way too cluttered. Here's the P::RD it uses, in case anyone is interested. It covers rather more of A4 than Jeff's version, and is a bit easier to turn into real Shishi nodes. package Shishi::Perl6RE; use Parse::RecDescent; use Data::Dumper; undef $/; my $g = <DATA>; my $re = new Parse::RecDescent($g); print Dumper($re->expr(shift)); __DATA__ expr : term altlist(?) { $return = bless { mainline => $item[1], @{$item[2]} ? ( alternation => [@{$item[2][0]}] ) : () }, "P6RE::Expression" } altlist : '|' term altlist { $return = [ $item[2], @{$item[3]} ] } | '|' <commit> term { $return = [ $item[3] ] } term : factor factor(s?) { bless [ $item[1], $item[2] && @{$item[2]} ], "P6RE::Term" } factor : bit repspec { $return = { type => "repeat", repspec => $item[2], stuff=> $item[1] } } | bit | terminal_dollar bit : brackets | colons | hypo | interp_scalar | "\\" <commit> backwhack | '.' { $return = { type => "any" } } | ordinary_string hypo : /[\$\@\%]\w+/ ':=' term # Very ugly. { $return = { type => "hypo", target => $item[1], expr => $item[3] } } interp_scalar: '$' /\w+/ { $return = { type => "text", target => "\$$item[2]", interp => 1 } } brackets : '<' anglestuff '>' { $return = $item[2] } | '(' <commit> expr ')' { $return = [ { type => "start" }, $item[3], { type => "stop" } ] } | '[' <commit> expr ']' { $return = $item[3] } # Brackets are merely syntactic | { if ($return = extract_codeblock($text, '{')) { $return = { type => "code", code => $return } } } repspec : /[+?*]\??/ | /<!?\d*,\d*>\??/ | /<!?\$\w+,\$\w+\??/ colons : /:{1,4}/ { $return = {type => "backtrack", severity => length $item[0] } } anglestuff : /!?(\d*,\d*|\$\w+,\$\w+)>\??/ <reject> # I'm actually a repetition specifier, not a directive | # <'foo'> { if ($return = extract_delimited($text, "'")) { $return =~ s/^.//; chop $return; $return = { type => "text", target => $return } } } | 'commit' { $return = { type => "backtrack", severity => 4 } } | 'sp' { $return = { type => "char", target => " " } # XXX Not Unicode aware } | /!?\s*(before|after) expr/ { die "Unsure how to implement this" } | /[\&\$\@\%]\w+/ { $return = { type => "delayed", target => $item[1], interp=>1 } } | charclass { $return = { type => "class", target => $item[1] } } | '-' '<' charclass '>' { $return = { type => "negclass", target => $item[3] } } | /\w+/ { $return = { type => "subrule", target => $item[1] } } | { if ($return = extract_bracketed($text, '()')) { $return = { type => "code", code => $return, assertion => 1 } } } | { if ($return = extract_codeblock($text, '{}')) { $return = { type => "delayed", code => $return, interp => 1 } } } | { if ($return = extract_bracketed($text, '[]')) { chop $return; $return =~ s/.//; $return = { type => "anyof", target => $return } } } | '-' { if ($return = extract_bracketed($text, '[]')) { chop $return; $return =~ s/.//; $return = { type => "neganyof", target => $return } } } | '.' { $return = { type=>"anyuni" } } charclass : 'alpha' | 'ws' # add more here backwhack : /[dswDSW]/ { $return = { type => "class", target => $item[1] } } | /htvrfn/ { $return = { type => "metachar", target => $item[1] } } | /HTVRFN/ { $return = { type => "neganyofmetachar", target => $item[1] } } | /x[0-9a-fA-F]+/ { $return = { type => "char", target => chr(hex(substr($item[1],1))) } } | /X[0-9a-fA-F]+/ { $return = { type => "neganyof", target => chr(hex(substr($item[1],1))) } } | /[LUQ]/ { extract_bracketed($text,'[]') } { die "Um, not sure what to do with these yet" } ordinary_string: /[^\$@%()|{<]+(?![\+\?\*])/ { $return = { type => "text", target => $item[1] } } | /\w/ { $return = { type => "char", target => $item[1] } } terminal_dollar : '$' { $return = { type => "end" } }