I keep expecting Damian or Larry or someone to step in with The True Grammar and make this obsolete -- does such a thing exist?
Changes in this version: - A bit more speed (though nowhere near enough). It comes from a combination of improving rule ordering, inlining some rules, moving most parse tree munging into post-processing functions, and (optionally) tweaking Parse::RecDescent to inline a couple frequently-called functions. - 'err' and '//' are included, and have the right precedence (cf Exegesis 4). - Dereferencing and indexing the current topic (".{blah}") now works. - Output is slightly less ugly. - Implicit currying variables ($^a etc) are in. I thought I had read somewhere they were gone in favor of closure args, but people seem to be using them, and they're not hard to put in. - loop(;;) { ... } - labels. Notably absent: - Distinguishing lvalues from rvalues. - anything from Apocalypse 5. Feedback and improvements welcome. /s
use Data::Dumper; use Getopt::Long; use strict; $Data::Dumper::Terse = 1; $Data::Dumper::Indent = 1; use Term::ReadLine; ###################################################################### # Argument context for functions and control structures ###################################################################### %::WANT = (); ############################## # Functions (list operators): # XXX: many of these need their own special want_* rules my $FUNCTION_ARGS = 'maybe_comma'; my @builtin_funcs = qw(crypt index pack rindex sprintf substr join unpack split push unshift splice warn die print printf read select syscall sysread sysseek syswrite truncate write vec chmod chown fcntl ioctl link open opendir rename symlink sysopen unlink return fail not); @::WANT{@builtin_funcs} = ($FUNCTION_ARGS) x @builtin_funcs; sub ::add_function { my $fname = shift->[1]; $::WANT{$fname} = shift || $FUNCTION_ARGS; 1; } ############################## # Loop control my @loop_control = qw(redo last next continue); @::WANT{@loop_control} = ('maybe_namepart') x @loop_control; ############################## # Unary operators # XXX: need to handle default $_ my @unary_ops = qw(chop chomp chr hex lc lcfirst length ord reverse uc ucfirst abs atan2 cos exp hex int log oct rand sin sqrt srand pop shift delete each exists keys values defined undef chdir chroot glob mkdir rmdir stat umask close); @::WANT{@unary_ops} = ('prefix') x @unary_ops; ############################## # Control operators my @control = qw(for given when default if elsif else grep map); @::WANT{@control} = map { "want_for_$_" } @control; ############################## # Named blocks my @special_blocks = qw(CATCH BEGIN END INIT AUTOLOAD PRE POST NEXT LAST FIRST try do); @::WANT{@special_blocks} = ('closure') x @special_blocks; ############################## # Classes (builtin and otherwise) %::CLASSES = (); my @builtin_types = qw(int real str HASH ARRAY SCALAR true false); # XXX: these are really constants @::CLASSES{@builtin_types} = @builtin_types; sub ::add_class { # seen class. my $c = shift->[1]; $::CLASSES{$c} = $c; 1; } # HACK to distinguish between "my ($a, $b) ..." and "foo ($a, $b)". # Don't need all keywords in here, but only the ones that cause # problems. %::KEYWORDS = (); @::KEYWORDS{qw(my our temp)} = 1; # (see Parse::RecDescent::Consumer) sub ::consumer { my $t = shift; my $old_len = length $t; return sub { my $len = length($_[0]); return substr($t, 0, ($old_len - $len)); }; } my %since; sub ::check_end { my ($type, $text) = @_; if ($since{$type}) { local $_ = $since{$type}->($text); return m/\A[\s\n]+\z/ || undef; } return undef; } sub ::mark_end { my ($type, $text) = @_; $since{$type} = ::consumer($text); } ###################################################################### my $literals = <<'END'; sv_literal: /(?:\d+(?:\.\d+)?|\.\d+)(?:[Ee]-?\d+)?/ | '{' <commit> hv_seq '}' | '[' <commit> av_seq(?) ']' | <perl_quotelike> av_seq: semi /[;,]?/ av_literal: '(' av_seq(?) ')' hv_seq: <leftop: pair ',' pair> /,?/ hv_literal: '(' hv_seq ')' END ###################################################################### $::NAMEPART = qr/[a-zA-Z_][\w_]*/; my $variables = <<'END'; variable: sigil <skip:''> varname sigil: /[\@\%\$\&]/ varname: name | /\d+/ | /[\!_]/ | '^' <commit> namepart | ('*')(?) '{' <skip:'\s*'> (scalar_expr | name) '}' name: /(?:::|\.|\*)?$::NAMEPART(::$::NAMEPART)*/o namepart: /$::NAMEPART/o END ###################################################################### $::COMPARE = qr{cmp | eq | [gnl]e | [gl]t | <=> | [<>=!]= | < | > }x; $::CONTEXT = qr{[\%\@\$\&*_?] | \+(?!\+)}x; $::MULDIV = qr{[\%*x] | /(?!/)}x; $::PREFIX = qr{[!~\\] | -(?![->])}x; $::ADDSUB = qr{[-+_]}; $::LOG_OR = qr{x?or | err}x; $::LOGOR = qr{\|\| | ~~ | //}x; $::FILETEST = qr{-[rwxoRWXOezsfdlpSbctugkTBMAC]+}; $::ASSIGN = qr{(?: ! | : # != and := | // # defined | &&? | \|\|? | ~~? # Logical and bitwise operators | << | >> # bitshifts | $::ADDSUB | $::MULDIV | \*\* # pow )? =}x; my $operators = <<'END'; hype: '^' <commit> <skip:''> <matchrule:$arg[0]> | <matchrule:$arg[0]> maybe_namepart: namepart | maybe_comma: comma[$arg[0]] | hv_indices: /[\w_]+/ | comma arglist: '(' comma(?) ')' subscript: <skip:''> '{' <commit> <skip:$item[1]> hv_indices '}' | <skip:''> '[' <commit> <skip:$item[1]> av_seq ']' | '(' comma(?) ')' subscriptable: name <commit> { exists($::KEYWORDS{$item[1][1]}) ? undef : 1 } arglist | '.' <commit> <skip:''> namepart(?) | '(' <commit> av_seq(?) ')' | variable context: /$::CONTEXT/o # context: '%' | '@' | '$' | '&' | '*' | '_' | '?' # | /\+(?!\+)/ # numeric context... term: '<' <commit> expr(?) '>' | subscriptable <commit> subscript(s?) | /$::CONTEXT/o <commit> term | sv_literal | class | closure apply_rhs: namepart <commit> subscript(s?) | subscript(s) apply: <leftop: term hype['apply_op'] apply_rhs> apply_op: '.' incr: hype['incr_op'] <commit> apply | apply hype['incr_op'](?) incr_op: '++' | '--' pow: <leftop: incr hype['pow_op'] prefix> pow_op: '**' prefix: filetest_op <commit> prefix | hype['prefix_op'] <commit> prefix | name { $::WANT{$item[1][1]} } <matchrule:$item[2]> | pow # prefix_op: '!' | '~' | '\\' | /-(?![->])/ prefix_op: /$::PREFIX/o filetest_op: /$::FILETEST/o pair: namepart '=>' <commit> prefix | prefix '=>' prefix maybe_pair: namepart '=>' <commit> prefix | prefix ('=>' prefix)(?) match: <leftop: maybe_pair hype['match_op'] maybe_pair> match_op: '=~' | '!~' muldiv: <leftop: match hype['muldiv_op'] match> # muldiv_op: '*' | '/' | '%' | 'x' muldiv_op: /$::MULDIV/o addsub: <leftop: muldiv hype['addsub_op'] muldiv> # addsub_op: '+' | '-' | '_' addsub_op: /$::ADDSUB/o bitshift: <leftop: addsub hype['bitshift_op'] addsub> bitshift_op: '<<' | '>>' compare: <leftop: bitshift hype['compare_op'] bitshift> compare_op: /$::COMPARE/o # compare_op: '<=>' | '<=' | '==' | '>=' | '<' | '>' | '!=' # | 'eq' | 'ge' | 'ne' | 'le' | 'lt' | 'gt' | 'cmp' bitand: <leftop: compare hype['bitand_op'] compare> bitand_op: '&' bitor: <leftop: bitand hype['bitor_op'] bitand> bitor_op: '|' | '~' logand: <leftop: bitor hype['logand_op'] bitor> logand_op: '&&' logor: <leftop: logand hype['logor_op'] logand> # logor_op: '||' | '~~' | '//' logor_op: /$::LOGOR/o range: logor (range_op logor)(?) range_op: '..' ternary: range ('??' ternary '::' ternary)(?) scope: 'my' | 'temp' | 'our' class: name { $::CLASSES{$item[1][1]} } { bless ['class', $item[1]], 'Perl6::class' } scope_class: scope <commit> class(?) | class property: name { ($item[1][1] ne $arg[0]) || undef } arglist(?) and_prop: "$arg[0]" <commit> property[$arg[0]] | property[$arg[0]] props: "$arg[0]" <commit> property and_prop[$arg[0]](s?) | # nothing decl: '(' <commit> <leftop: variable ',' variable> ')' props['are'] | variable props['is'] assign: assign_lhs assign_rhs(s?) assign_lhs: scope_class decl | ternary assign_rhs: hype['assign_op'] scalar_expr assign_op: /$::ASSIGN/o # assign_op: /[!:]?=/ <commit> # | assignable_op <skip:''> '=' # assignable_op: '//' # | logand_op | logor_op # | bitand_op | bitor_op | bitshift_op # | addsub_op | muldiv_op | pow_op scalar_expr: assign but(s?) but: 'but' assign comma: <leftop: <matchrule:@{[$arg[0] || 'scalar_expr']}> comma_op <matchrule:@{[$arg[0] || 'scalar_expr']}> > comma_op: ',' semi: <leftop: comma semi_op comma> semi_op: ';' adverb: scalar_expr adv_clause(?) adv_clause: /:(?!:)/ comma['scalar_expr'] log_AND: <leftop: adverb hype['log_AND_op'] adverb> log_AND_op: 'and' log_OR: <leftop: log_AND hype['log_OR_op'] log_AND> # log_OR_op: 'or' | 'xor' | 'err' log_OR_op: /$::LOG_OR/o expr: log_OR END ###################################################################### my $declarations = <<'END'; params: '(' (_params ',')(?) '*' <commit> '@' namepart ')' | '(' <commit> _params(?) (';' _params)(?) ')' | # nothing _params: <leftop: _param ',' _param> _param: scope_class(?) variable props['is'] initializer(?) initializer: hype['assign_op'] expr END ###################################################################### my $statements = <<'END'; prog: /\A/ stmts /\z/ | <error> stmts: <leftop: stmt stmt_sep stmt> stmt_sep(?) | # nothing stmt_sep: ';' | { ::check_end('block', $text) } | { ::check_end('label', $text) } stmt: namepart ':' { ::mark_end('label', $text);1 } '' | directive <commit> name comma(?) | 'method' <commit> name params props['is'] block | 'loop' <commit> '(' scalar_expr ';' scalar_expr ';' scalar_expr ')' block | scope(?) 'sub' <commit> name { ::add_function($item[4]);1 } params props['is'] block | scope(?) 'class' <commit> name { ::add_class($item[4]);1 } props['is'] block | expr guard(?) directive: 'package' | 'module' | 'use' guard: ('if' | 'unless' | 'while') <commit> scalar_expr | 'for' expr block: start_block '...' <commit> '}' { ::mark_end('block', $text);1; } '' | start_block stmts '}' { ::mark_end('block', $text);1; } '' start_block: <skip:''> /\s*(?<![^\n\s]){\s*/m closure: '->' '(' <commit> _closure_args(?) ')' block | '->' <commit> _closure_args(?) block | block _closure_args: <leftop: comma['variable'](?) ';' comma['variable']> END ###################################################################### my $wants = <<'END'; want_for_for: av_seq closure want_for_given: scalar_expr closure want_for_when: comma closure want_for_default: closure want_for_if: scalar_expr closure elsif(s?) else(?) elsif: 'elsif' scalar_expr closure else: 'else' closure want_for_grep: scalar_expr comma want_for_map: scalar_expr comma END ###################################################################### # Parse tree simplification: sub preorder(&$) { # walk parse tree pre-order my $f = shift; $f->($_[0]); if (UNIVERSAL::isa($_[0], 'ARRAY')) { preorder($f, $_) for @{$_[0]}; } } sub postorder(&$) { # walk post-order my $f = shift; if (UNIVERSAL::isa($_[0], 'ARRAY')) { postorder($f, $_) for @{$_[0]}; } $f->($_[0]); } # postorder_filter BLOCK $tree, \@classes # # call BLOCK on each node whose class is in \@classes, or on # every node if \@classes is not given. # sub postorder_filter(&$;$) { my ($f, $tree, $filter) = @_; my %doit; my $nofilter; if (ref $filter) { @doit{map { 'Perl6::'.$_ } @$filter} = (1)x@$filter; } else { $nofilter = 1; } postorder { if ($nofilter || exists $doit{ref($_[0])}) { $f->($_[0]); } } $tree; } # remove_if BLOCK $tree, \@classes # # call BLOCK on each node whose class is in \@classes, or on # every node if \@classes is not given. If BLOCK returns true, # remove the node. # sub remove_if(&$;$) { my ($f, $tree, $filter) = @_; postorder_filter { my $x = $_[0]; if (UNIVERSAL::isa($x, 'ARRAY')) { my $i = 0; while ($i <= $#{$x}) { if ($f->($x->[$i])) { splice @$x, $i, 1; } else { ++$i; } } } } $tree, $filter; } sub remove_names { # remove first item from each node. preorder { if(ref($_[0]) =~ /^Perl6::/) { shift @{$_[0]}; } } $_[0]; } # A couple of specialized pruning functions: sub Perl6::block::tidy { my $x = $_[0]; my $y = $x->[2]; splice @$x, 1, scalar(@$x), $y; } sub Perl6::and_prop::tidy { if (@{$_[0]} == 4) { my $y = $_[0][-1]; splice @{$_[0]}, 1, scalar(@{$_[0]}) - 1, $y; } } sub prune { my $tree = $_[0]; # Custom filtering first: preorder { $_[0]->tidy if UNIVERSAL::can($_[0], 'tidy') } $tree; # Remove redundant names: remove_names $tree; # Elements we never care about: remove_if { (UNIVERSAL::isa($_[0], 'Perl6::stmt_sep') || $_[0] eq '\s*' || UNIVERSAL::isa($_[0], 'ARRAY') && @{$_[0]} == 0) } $tree; # Uninteresting literals: remove_if { !ref($_[0]) } $tree, [qw(arglist prog closure _param props)]; # Ones from successful directives: remove_if { $_[0] == 1 } $tree, [qw(stmt stmts term class property scope_class incr param params prefix subscript)]; # flatten infix operators with single operands: postorder_filter { if (@{$_[0][0]} == 1) { $_[0] = $_[0][0][0]; } else { splice @{$_[0]}, 0, 1, @{$_[0][0]}; } } $tree, [qw(hv_seq apply pow match muldiv addsub bitshift compare bitand bitor logand logor comma semi log_AND log_OR _params stmts _closure_args)]; # Flatten final altnernative: postorder_filter { my $y = $_[0][-1]; if (@{$_[0]} > 1 && UNIVERSAL::isa($y, 'ARRAY')) { splice @{$_[0]}, -1, 1, @$y; } } $tree, [qw(assign props property _param scope_class)]; # Flatten items that add no information: postorder_filter { $_[0] = $_[0][0] if @{$_[0]} == 1 } $tree, [qw(sigil name namepart maybe_comma maybe_namepart varname subscriptable term incr prefix maybe_pair expr range ternary assign_lhs assign scalar_expr adverb and_prop hype prog stmt)]; } ###################################################################### # Interaction my %o; (GetOptions(\%o, qw(dumper rule=s batch help cache trace silent no-hitem)) && !$o{help}) || die <<END; Usage: $0 [options] --batch read batch on STDIN, write to STDOUT --dumper use Data::Dumper to generate output --rule NAME start with rule NAME (default = 'prog') --cache use precompiled grammar --silent don't do any output (not too useful) --trace set \$::RD_TRACE --no-hitem don't keep track of \%item hash In interactive mode, output is terminated by a blank line. END $::RD_TRACE = $o{trace}; $::RD_NO_HITEM = $o{"no-hitem"}; $::rule = $o{rule} || 'prog'; my $parser; my $gname = 'Perl6grammar'; $::RD_AUTOACTION = q { bless [@item], 'Perl6::'.$item[0] }; if ($o{cache} && eval("require $gname")) { $parser = eval "new $gname" or die "$gname: $@"; } else { print STDERR "Constructing parser..."; use Parse::RecDescent; my $grammar = <<'END'; { $SIG{__DIE__} = sub { use Carp 'confess'; confess @_ }; } END $grammar .= $variables .$literals .$operators .$declarations .$statements .$wants; if ($o{cache}) { Parse::RecDescent->Precompile($grammar, $gname); eval "require $gname" or die $@; $parser = new $gname; } else { $parser = new Parse::RecDescent($grammar); } print STDERR "done\n"; } sub as_sexp { my $x = shift; if (UNIVERSAL::isa($x, 'ARRAY')) { (my $t = ref($x)) =~ s/.*:://; return '('.join(' ', $t, map { as_sexp($_) } @$x).')'; } else { return $x; } } sub pretty { my $x = shift; prune($x); if ($o{dumper}) { Dumper($x); } else { as_sexp($x); } } my $in = ''; if ($o{batch}) { local $/ = undef; $in = <STDIN>; my $result = $parser->$::rule($in); print STDERR "done\n"; print pretty($result) unless $o{silent}; exit; } my $term = new Term::ReadLine; my $prompt = '> '; while (defined(my $l = $term->readline($prompt))) { if ($in =~ /^:(.*)/) { print eval $1, "\n"; $in = ''; next; } unless ($l =~ /^$/) { $in .= "$l\n"; $prompt = '? '; next; } print "as $::rule:\n"; my $result = $parser->$::rule($in); print STDERR "done\n"; if ($result) { print pretty($result); } else { print "parse error\n"; } print "\n"; $in = ''; $prompt = '> '; }