This is an automated email from the git hooks/post-receive script. js pushed a commit to tag PEVANS in repository libparser-mgc-perl.
commit 79aa1b271436ee7b06bc0d1c61c142ca2d5d3fb7 Author: Paul Evans <leon...@leonerd.org.uk> Date: Thu Mar 17 17:52:48 2011 +0000 Import of PEVANS/Parser-MGC-0.06 from CPAN. gitpan-cpan-distribution: Parser-MGC gitpan-cpan-version: 0.06 gitpan-cpan-path: PEVANS/Parser-MGC-0.06.tar.gz gitpan-cpan-author: PEVANS gitpan-cpan-maturity: released --- Changes | 10 ++ MANIFEST | 10 +- META.yml | 4 +- README | 116 +++++++++++++++++++++-- examples/eval-expr.pl | 41 +++++---- examples/parse-dict.pl | 54 +++++++++++ examples/parse-pod.pl | 39 ++++++++ examples/synopsis.pl | 9 +- lib/Parser/MGC.pm | 215 ++++++++++++++++++++++++++++++++++++++----- t/02expect.t | 6 +- t/06substring.t | 27 ++++++ t/12token_string.t | 17 +++- t/21scope_of.t | 26 +++++- t/{24one_of.t => 24any_of.t} | 2 +- t/30commit.t | 2 +- t/31scope_level.t | 2 +- t/90ex_dict.t | 32 +++++++ t/90ex_expr.t | 33 +++++++ t/90ex_pod.t | 36 ++++++++ t/90ex_synopsis.t | 21 +++++ t/98backcompat.t | 25 +++++ 21 files changed, 666 insertions(+), 61 deletions(-) diff --git a/Changes b/Changes index 5b7312c..0259a77 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,15 @@ Revision history for Parser-MGC +0.06 CHANGES: + * Renamed ->one_of to ->any_of + * Added ->substring_before + * Allow ->scope_of to not take a start pattern + * Recognise the usual set of character escapes in ->token_string + * Added more example scripts to demonstrate: + + the use ->substring_before to parse POD-like notation + + accumulator variables instead of structural return + * Unit-test the example scripts + 0.05 CHANGES: * Added ->scope_level * Added ->from_reader as a new potential source of string input diff --git a/MANIFEST b/MANIFEST index d2bcc68..33ae696 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,6 +1,8 @@ Build.PL Changes examples/eval-expr.pl +examples/parse-dict.pl +examples/parse-pod.pl examples/synopsis.pl lib/Parser/MGC.pm LICENSE @@ -14,6 +16,7 @@ t/02expect.t t/03reader.t t/04where.t t/05comment.t +t/06substring.t t/10token_int.t t/11token_float.t t/12token_string.t @@ -23,7 +26,12 @@ t/20maybe.t t/21scope_of.t t/22list_of.t t/23sequence_of.t -t/24one_of.t +t/24any_of.t t/30commit.t t/31scope_level.t +t/90ex_dict.t +t/90ex_expr.t +t/90ex_pod.t +t/90ex_synopsis.t +t/98backcompat.t t/99pod.t diff --git a/META.yml b/META.yml index 4a88122..41e1e73 100644 --- a/META.yml +++ b/META.yml @@ -15,9 +15,9 @@ name: Parser-MGC provides: Parser::MGC: file: lib/Parser/MGC.pm - version: 0.05 + version: 0.06 requires: File::Slurp: 0 resources: license: http://dev.perl.org/licenses/ -version: 0.05 +version: 0.06 diff --git a/README b/README index 6b211da..efe5982 100644 --- a/README +++ b/README @@ -10,7 +10,7 @@ SYNOPSIS my $self = shift; $self->sequence_of( sub { - $self->one_of( + $self->any_of( sub { $self->token_int }, sub { $self->token_string }, sub { \$self->token_ident }, @@ -171,6 +171,20 @@ STRUCTURE-FORMING METHODS $self->scope_of( "{", sub { $self->parse_statements }, "}" ); } + If the $start pattern is undefined, it is presumed the caller has + already checked for this. This is useful when the stop pattern needs to + be calculated based on the start pattern. + + sub parse_bracketed + { + my $self = shift; + + my $delim = $self->expect( qr/[\(\[\<\{]/ ); + $delim =~ tr/([<{/)]>}/; + + $self->enter_scope( undef, sub { $self->parse_body }, $delim ); + } + $ret = $parser->list_of( $sep, $code ) Expects to find a list of instances of something parsed by $code, separated by the $sep pattern. Returns an ARRAY ref containing a list of @@ -202,7 +216,7 @@ STRUCTURE-FORMING METHODS $self->sequence_of( sub { $self->parse_statement } ); } - $ret = $parser->one_of( @codes ) + $ret = $parser->any_of( @codes ) Expects that one of the given code references can parse something from the input, returning what it returned. Each code reference may indicate a failure to parse by calling the "fail" method. @@ -214,18 +228,23 @@ STRUCTURE-FORMING METHODS { my $self = shift; - $self->one_of( + $self->any_of( sub { $self->parse_declaration; $self->expect(";") }, sub { $self->parse_expression; $self->expect(";") }, sub { $self->parse_block }, ); } + Note: This method used to be called "one_of", but was renamed for + clarity. Currently this method is provided also as an alias by the old + name. Code using the old name should be rewritten to "any_of" instead, + as this backward-compatibility alias may be removed in a later version. + $parser->commit Calling this method will cancel the backtracking behaviour of the - innermost "maybe" or "one_of" structure forming method. That is, if + innermost "maybe" or "any_of" structure forming method. That is, if later code then calls "fail", the exception will be propagated out of - "maybe", and no further code blocks will be attempted by "one_of". + "maybe", and no further code blocks will be attempted by "any_of". Typically this will be called once the grammatical structure of an alternation has been determined, ensuring that any further failures are @@ -235,7 +254,7 @@ STRUCTURE-FORMING METHODS { my $self = shift; - $self->one_of( + $self->any_of( ... sub { $self->scope_of( "{", @@ -249,11 +268,39 @@ TOKEN PARSING METHODS The following methods attempt to consume some part of the input string, to be used as part of the parsing process. - $parser->expect( $string ) - $parser->expect( qr/pattern/ ) + $str = $parser->expect( $literal ) + $str = $parser->expect( qr/pattern/ ) Expects to find a literal string or regexp pattern match, and consumes it. This method returns the string that was captured. + $str = $parser->substring_before( $literal ) + $str = $parser->substring_before( qr/pattern/ ) + Expects to possibly find a literal string or regexp pattern match. If it + finds such, consume all the input text before but excluding this match, + and return it. If it fails to find a match before the end of the current + scope, consumes all the input text until the end of scope and return it. + + This method does not consume the part of input that matches, only the + text before it. It is not considered a failure if the substring before + this match is empty. If a non-empty match is required, use the "fail" + method: + + sub token_nonempty_part + { + my $self = shift; + + my $str = $parser->substring_before( "," ); + length $str or $self->fail( "Expected a string fragment before ," ); + + return $str; + } + + Note that unlike most of the other token parsing methods, this method + does not consume either leading or trailing whitespace around the + substring. It is expected that this method would be used as part a + parser to read quoted strings, or similar cases where whitespace should + be preserved. + $int = $parser->token_int Expects to find an integer in decimal, octal or hexadecimal notation, and consumes it. Negative integers, preceeded by "-", are also @@ -268,6 +315,24 @@ TOKEN PARSING METHODS Expects to find a quoted string, and consumes it. The string should be quoted using """ or "'" quote marks. + The content of the quoted string can contain character escapes similar + to those accepted by C or Perl. Specifically, the following forms are + recognised: + + \a Bell ("alert") + \b Backspace + \e Escape + \f Form feed + \n Newline + \r Return + \t Horizontal Tab + \0, \012 Octal character + \x34, \x{5678} Hexadecimal character + + C's "\v" for vertical tab is not supported as it is rarely used in + practice and it collides with Perl's "\v" regexp escape. Perl's "\c" for + forming other control characters is also not supported. + $ident = $parser->token_ident Expects to find an identifier, and consumes it. @@ -275,8 +340,41 @@ TOKEN PARSING METHODS Expects to find a keyword, and consumes it. A keyword is defined as an identifier which is exactly one of the literal values passed in. +EXAMPLES + Accumulating Results Using Variables + Although the structure-forming methods all return a value, obtained from + their nested parsing code, it can sometimes be more convenient to use a + variable to accumulate a result in instead. For example, consider the + following parser method, designed to parse a set of "name: "value"" + assignments, such as might be found in a configuration file, or + YAML/JSON-style mapping value. + + sub parse_dict + { + my $self = shift; + + my %ret; + $self->list_of( ",", sub { + my $key = $self->token_ident; + exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" ); + + $self->expect( ":" ); + + $ret{$key} = $self->parse_value; + } ); + + return \%ret + } + + Instead of using the return value from "list_of", this method + accumulates values in the %ret hash, eventually returning a reference to + it as its result. Because of this, it can perform some error checking + while it parses; namely, rejecting duplicate keys. + TODO - * Unescaping of string constants; customisable + * Make unescaping of string constants more customisable. Possibly + consider instead a "parse_string_generic" using a loop over + "substring_before". * Easy ability for subclasses to define more token types diff --git a/examples/eval-expr.pl b/examples/eval-expr.pl index 71c2ecb..ab9a1f2 100755 --- a/examples/eval-expr.pl +++ b/examples/eval-expr.pl @@ -3,6 +3,7 @@ use strict; use warnings; +package ExprParser; use base qw( Parser::MGC ); sub parse @@ -16,33 +17,37 @@ sub parse_term { my $self = shift; - my $lhs = $self->parse_factor; + my $val = $self->parse_factor; - $self->one_of( - sub { $self->expect( "+" ); $self->commit; $lhs + $self->parse_term }, - sub { $self->expect( "-" ); $self->commit; $lhs - $self->parse_term }, - sub { $lhs } + 1 while $self->any_of( + sub { $self->expect( "+" ); $self->commit; $val += $self->parse_factor; 1 }, + sub { $self->expect( "-" ); $self->commit; $val -= $self->parse_factor; 1 }, + sub { 0 }, ); + + return $val; } sub parse_factor { my $self = shift; - my $lhs = $self->parse_atom; + my $val = $self->parse_atom; - $self->one_of( - sub { $self->expect( "*" ); $self->commit; $lhs * $self->parse_term }, - sub { $self->expect( "/" ); $self->commit; $lhs / $self->parse_term }, - sub { $lhs } + 1 while $self->any_of( + sub { $self->expect( "*" ); $self->commit; $val *= $self->parse_atom; 1 }, + sub { $self->expect( "/" ); $self->commit; $val /= $self->parse_atom; 1 }, + sub { 0 }, ); + + return $val; } sub parse_atom { my $self = shift; - $self->one_of( + $self->any_of( sub { $self->scope_of( "(", sub { $self->commit; $self->parse }, ")" ) }, sub { $self->token_int }, ); @@ -50,11 +55,15 @@ sub parse_atom use Data::Dump qw( pp ); -my $parser = __PACKAGE__->new; +if( !caller ) { + my $parser = __PACKAGE__->new; -while( defined( my $line = <STDIN> ) ) { - my $ret = eval { $parser->from_string( $line ) }; - print $@ and next if $@; + while( defined( my $line = <STDIN> ) ) { + my $ret = eval { $parser->from_string( $line ) }; + print $@ and next if $@; - print pp( $ret ) . "\n"; + print pp( $ret ) . "\n"; + } } + +1; diff --git a/examples/parse-dict.pl b/examples/parse-dict.pl new file mode 100644 index 0000000..178132d --- /dev/null +++ b/examples/parse-dict.pl @@ -0,0 +1,54 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +package DictParser; +use base qw( Parser::MGC ); + +sub parse +{ + my $self = shift; + + $self->any_of( + sub { $self->token_int }, + + sub { $self->token_string }, + + sub { $self->scope_of( "{", + sub { $self->commit; $self->parse_dict }, + "}" ); + }, + ); +} + +sub parse_dict +{ + my $self = shift; + + my %ret; + $self->list_of( ",", sub { + my $key = $self->token_ident; + + $self->expect( ":" ); + + $ret{$key} = $self->parse; + } ); + + return \%ret +} + +use Data::Dump qw( pp ); + +if( !caller ) { + my $parser = __PACKAGE__->new; + + while( defined( my $line = <STDIN> ) ) { + my $ret = eval { $parser->from_string( $line ) }; + print $@ and next if $@; + + print pp( $ret ) . "\n"; + } +} + +1; diff --git a/examples/parse-pod.pl b/examples/parse-pod.pl new file mode 100755 index 0000000..918a6c7 --- /dev/null +++ b/examples/parse-pod.pl @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +package PodParser; +use base qw( Parser::MGC ); + +sub parse +{ + my $self = shift; + + $self->sequence_of( + sub { $self->any_of( + + sub { my $tag = $self->expect( qr/[A-Z](?=<)/ ); + $self->commit; + my $delim = $self->expect( qr/<+/ ); + +{ $tag => $self->scope_of( undef, \&parse, ">" x length $delim ) }; }, + + sub { $self->substring_before( qr/[A-Z]</ ) }, + ) }, + ); +} + +use Data::Dump qw( pp ); + +if( !caller ) { + my $parser = __PACKAGE__->new; + + while( defined( my $line = <STDIN> ) ) { + my $ret = eval { $parser->from_string( $line ) }; + print $@ and next if $@; + + print pp( $ret ) . "\n"; + } +} + +1; diff --git a/examples/synopsis.pl b/examples/synopsis.pl old mode 100644 new mode 100755 index 4c1cbb4..51a8006 --- a/examples/synopsis.pl +++ b/examples/synopsis.pl @@ -11,7 +11,7 @@ sub parse my $self = shift; $self->sequence_of( sub { - $self->one_of( + $self->any_of( sub { $self->token_int }, sub { $self->token_string }, sub { \$self->token_ident }, @@ -23,4 +23,9 @@ sub parse my $parser = LispParser->new; use Data::Dump qw( pp ); -print pp( $parser->from_file( $ARGV[0] ) ); + +if( !caller ) { + print pp( $parser->from_file( $ARGV[0] ) ); +} + +1; diff --git a/lib/Parser/MGC.pm b/lib/Parser/MGC.pm index 235ec8d..b58897f 100644 --- a/lib/Parser/MGC.pm +++ b/lib/Parser/MGC.pm @@ -8,7 +8,7 @@ package Parser::MGC; use strict; use warnings; -our $VERSION = '0.05'; +our $VERSION = '0.06'; use Carp; @@ -28,7 +28,7 @@ C<Parser::MGC> - build simple recursive-descent parsers my $self = shift; $self->sequence_of( sub { - $self->one_of( + $self->any_of( sub { $self->token_int }, sub { $self->token_string }, sub { \$self->token_ident }, @@ -283,8 +283,6 @@ sub fail my $self = shift; my ( $message ) = @_; - my ( $lineno, $col, $line ) = $self->where; - die Parser::MGC::Failure->new( $message, $self->where ); } @@ -298,16 +296,22 @@ sub at_eos { my $self = shift; - $self->skip_ws; - my $pos = pos $self->{str}; - return 1 if defined $pos and $pos >= length $self->{str}; + $self->skip_ws; - return 0 unless defined $self->{endofscope}; + my $at_eos; + if( pos( $self->{str} ) >= length $self->{str} ) { + $at_eos = 1; + } + elsif( defined $self->{endofscope} ) { + $at_eos = $self->{str} =~ m/\G$self->{endofscope}/; + } + else { + $at_eos = 0; + } - # No /g so we won't actually alter pos() - my $at_eos = $self->{str} =~ m/\G$self->{endofscope}/; + pos( $self->{str} ) = $pos; return $at_eos; } @@ -393,6 +397,20 @@ failure if called at the end of a scope. $self->scope_of( "{", sub { $self->parse_statements }, "}" ); } +If the C<$start> pattern is undefined, it is presumed the caller has already +checked for this. This is useful when the stop pattern needs to be calculated +based on the start pattern. + + sub parse_bracketed + { + my $self = shift; + + my $delim = $self->expect( qr/[\(\[\<\{]/ ); + $delim =~ tr/([<{/)]>}/; + + $self->enter_scope( undef, sub { $self->parse_body }, $delim ); + } + =cut sub scope_of @@ -402,7 +420,8 @@ sub scope_of ref $stop or $stop = qr/\Q$stop/; - $self->expect( $start ); + $self->expect( $start ) if defined $start; + local $self->{endofscope} = $stop; local $self->{scope_level} = $self->{scope_level} + 1; @@ -441,7 +460,7 @@ sub list_of my @ret; while( !$self->at_eos ) { - push @ret, scalar $code->( $self ); + push @ret, $code->( $self ); $self->skip_ws; $self->{str} =~ m/\G$sep/gc or last; @@ -472,10 +491,16 @@ sub sequence_of my $self = shift; my ( $code ) = @_; - return $self->list_of( "", $code ); + my @ret; + + while( !$self->at_eos ) { + push @ret, $code->( $self ); + } + + return \@ret; } -=head2 $ret = $parser->one_of( @codes ) +=head2 $ret = $parser->any_of( @codes ) Expects that one of the given code references can parse something from the input, returning what it returned. Each code reference may indicate a failure @@ -488,16 +513,21 @@ alternations of possible parse trees. { my $self = shift; - $self->one_of( + $self->any_of( sub { $self->parse_declaration; $self->expect(";") }, sub { $self->parse_expression; $self->expect(";") }, sub { $self->parse_block }, ); } +Note: This method used to be called C<one_of>, but was renamed for clarity. +Currently this method is provided also as an alias by the old name. Code +using the old name should be rewritten to C<any_of> instead, as this +backward-compatibility alias may be removed in a later version. + =cut -sub one_of +sub any_of { my $self = shift; @@ -519,12 +549,14 @@ sub one_of $self->fail( "Found nothing parseable" ); } +*one_of = \&any_of; + =head2 $parser->commit Calling this method will cancel the backtracking behaviour of the innermost -C<maybe> or C<one_of> structure forming method. That is, if later code then +C<maybe> or C<any_of> structure forming method. That is, if later code then calls C<fail>, the exception will be propagated out of C<maybe>, and no -further code blocks will be attempted by C<one_of>. +further code blocks will be attempted by C<any_of>. Typically this will be called once the grammatical structure of an alternation has been determined, ensuring that any further failures are raised @@ -534,7 +566,7 @@ as real exceptions, rather than by attempting other alternatives. { my $self = shift; - $self->one_of( + $self->any_of( ... sub { $self->scope_of( "{", @@ -593,9 +625,9 @@ sub skip_ws } } -=head2 $parser->expect( $string ) +=head2 $str = $parser->expect( $literal ) -=head2 $parser->expect( qr/pattern/ ) +=head2 $str = $parser->expect( qr/pattern/ ) Expects to find a literal string or regexp pattern match, and consumes it. This method returns the string that was captured. @@ -616,6 +648,62 @@ sub expect return $1; } +=head2 $str = $parser->substring_before( $literal ) + +=head2 $str = $parser->substring_before( qr/pattern/ ) + +Expects to possibly find a literal string or regexp pattern match. If it finds +such, consume all the input text before but excluding this match, and return +it. If it fails to find a match before the end of the current scope, consumes +all the input text until the end of scope and return it. + +This method does not consume the part of input that matches, only the text +before it. It is not considered a failure if the substring before this match +is empty. If a non-empty match is required, use the C<fail> method: + + sub token_nonempty_part + { + my $self = shift; + + my $str = $parser->substring_before( "," ); + length $str or $self->fail( "Expected a string fragment before ," ); + + return $str; + } + +Note that unlike most of the other token parsing methods, this method does not +consume either leading or trailing whitespace around the substring. It is +expected that this method would be used as part a parser to read quoted +strings, or similar cases where whitespace should be preserved. + +=cut + +sub substring_before +{ + my $self = shift; + my ( $expect ) = @_; + + ref $expect or $expect = qr/\Q$expect/; + + my $endre = ( defined $self->{endofscope} ) ? + qr/$expect|$self->{endofscope}/ : + $expect; + + # NO skip_ws + + my $start = pos $self->{str}; + my $end; + if( $self->{str} =~ m/\G(?s:.*?)($endre)/ ) { + $end = $-[1]; + } + else { + $end = length $self->{str}; + } + + pos( $self->{str} ) = $end; + return substr( $self->{str}, $start, $end - $start ); +} + =head2 $int = $parser->token_int Expects to find an integer in decimal, octal or hexadecimal notation, and @@ -629,6 +717,7 @@ sub token_int $self->fail( "Expected integer" ) if $self->at_eos; + $self->skip_ws; $self->{str} =~ m/\G(-?)($self->{patterns}{int})/gc or $self->fail( "Expected integer" ); @@ -654,6 +743,7 @@ sub token_float $self->fail( "Expected float" ) if $self->at_eos; + $self->skip_ws; $self->{str} =~ m/\G(-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+)/gci or $self->fail( "Expected float" ); @@ -665,8 +755,35 @@ sub token_float Expects to find a quoted string, and consumes it. The string should be quoted using C<"> or C<'> quote marks. +The content of the quoted string can contain character escapes similar to +those accepted by C or Perl. Specifically, the following forms are recognised: + + \a Bell ("alert") + \b Backspace + \e Escape + \f Form feed + \n Newline + \r Return + \t Horizontal Tab + \0, \012 Octal character + \x34, \x{5678} Hexadecimal character + +C's C<\v> for vertical tab is not supported as it is rarely used in practice +and it collides with Perl's C<\v> regexp escape. Perl's C<\c> for forming other +control characters is also not supported. + =cut +my %escapes = ( + a => "\a", + b => "\b", + e => "\e", + f => "\f", + n => "\n", + r => "\r", + t => "\t", +); + sub token_string { my $self = shift; @@ -675,17 +792,31 @@ sub token_string my $pos = pos $self->{str}; + $self->skip_ws; $self->{str} =~ m/\G($self->{patterns}{string_delim})/gc or $self->fail( "Expected string delimiter" ); my $delim = $1; - $self->{str} =~ m/\G((?:\\.|[^\\])*?)$delim/gc or - pos($self->{str}) = $pos, $self->fail( "Expected contents of string" ); + $self->{str} =~ m/ + \G( + (?: + \\[0-7]{1,3} # octal escape + |\\x[0-9A-F]{2} # 2-digit hex escape + |\\x\{[0-9A-F]+\} # {}-delimited hex escape + |\\. # symbolic escape + |[^\\$delim]+ # plain chunk + )*? + )$delim/gcix or + pos($self->{str}) = $pos, $self->fail( "Expected contents of string" ); my $string = $1; - # TODO: Unescape stuff like \\ and \n and whatnot + $string =~ s<\\(?:([0-7]{1,3})|x([0-9A-F]{2})|x\{([0-9A-F]+)\}|(.))> + [defined $1 ? chr oct $1 : + defined $2 ? chr hex $2 : + defined $3 ? chr hex $3 : + exists $escapes{$4} ? $escapes{$4} : $4]egi; return $string; } @@ -702,6 +833,7 @@ sub token_ident $self->fail( "Expected identifier" ) if $self->at_eos; + $self->skip_ws; $self->{str} =~ m/\G($self->{patterns}{ident})/gc or $self->fail( "Expected identifier" ); @@ -764,13 +896,46 @@ sub STRING # Provide fallback operators for cmp, eq, etc... use overload fallback => 1; +=head1 EXAMPLES + +=head2 Accumulating Results Using Variables + +Although the structure-forming methods all return a value, obtained from their +nested parsing code, it can sometimes be more convenient to use a variable to +accumulate a result in instead. For example, consider the following parser +method, designed to parse a set of C<name: "value"> assignments, such as might +be found in a configuration file, or YAML/JSON-style mapping value. + + sub parse_dict + { + my $self = shift; + + my %ret; + $self->list_of( ",", sub { + my $key = $self->token_ident; + exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" ); + + $self->expect( ":" ); + + $ret{$key} = $self->parse_value; + } ); + + return \%ret + } + +Instead of using the return value from C<list_of>, this method accumulates +values in the C<%ret> hash, eventually returning a reference to it as its +result. Because of this, it can perform some error checking while it parses; +namely, rejecting duplicate keys. + =head1 TODO =over 4 =item * -Unescaping of string constants; customisable +Make unescaping of string constants more customisable. Possibly consider +instead a C<parse_string_generic> using a loop over C<substring_before>. =item * diff --git a/t/02expect.t b/t/02expect.t index a6382e2..6826c73 100644 --- a/t/02expect.t +++ b/t/02expect.t @@ -2,7 +2,7 @@ use strict; -use Test::More tests => 3; +use Test::More tests => 4; package TestParser; use base qw( Parser::MGC ); @@ -22,6 +22,10 @@ is_deeply( $parser->from_string( "hello world" ), [ "hello", "world" ], '"hello world"' ); +is_deeply( $parser->from_string( " hello world " ), + [ "hello", "world" ], + '" hello world "' ); + # Perl 5.13.6 changed the regexp form # Accept both old and new-style stringification my $modifiers = (qr/foobar/ =~ /\Q(?^/) ? '^' : '-xism'; diff --git a/t/06substring.t b/t/06substring.t new file mode 100644 index 0000000..8d76371 --- /dev/null +++ b/t/06substring.t @@ -0,0 +1,27 @@ +#!/usr/bin/perl -w + +use strict; + +use Test::More tests => 2; + +package TestParser; +use base qw( Parser::MGC ); + +sub parse +{ + my $self = shift; + + [ $self->substring_before( "!" ), $self->expect( "!" ) ]; +} + +package main; + +my $parser = TestParser->new; + +is_deeply( $parser->from_string( "Hello, world!" ), + [ "Hello, world", "!" ], + '"Hello, world!"' ); + +is_deeply( $parser->from_string( "!" ), + [ "", "!" ], + '"Hello, world!"' ); diff --git a/t/12token_string.t b/t/12token_string.t index f57b560..39c3eba 100644 --- a/t/12token_string.t +++ b/t/12token_string.t @@ -2,7 +2,7 @@ use strict; -use Test::More tests => 7; +use Test::More tests => 18; package TestParser; use base qw( Parser::MGC ); @@ -24,6 +24,21 @@ is( $parser->from_string( q["double"] ), "double", 'Double quoted string' ); is( $parser->from_string( q["foo 'bar'"] ), "foo 'bar'", 'Double quoted string containing single substr' ); is( $parser->from_string( q['foo "bar"'] ), 'foo "bar"', 'Single quoted string containing double substr' ); +is( $parser->from_string( q["tab \t"] ), "tab \t", '\t' ); +is( $parser->from_string( q["newline \n"] ), "newline \n", '\n' ); +is( $parser->from_string( q["return \r"] ), "return \r", '\r' ); +is( $parser->from_string( q["form feed \f"] ), "form feed \f", '\f' ); +is( $parser->from_string( q["backspace \b"] ), "backspace \b", '\b' ); +is( $parser->from_string( q["bell \a"] ), "bell \a", '\a' ); +is( $parser->from_string( q["escape \e"] ), "escape \e", '\e' ); + +# ord('A') == 65 == 0101 == 0x41 +# TODO: This is ASCII dependent. If anyone on EBCDIC cares, do let me know... +is( $parser->from_string( q["null \0"] ), "null \0", 'Octal null' ); +is( $parser->from_string( q["octal \101BC"] ), "octal ABC", 'Octal' ); +is( $parser->from_string( q["hex \x41BC"] ), "hex ABC", 'Hexadecimal' ); +is( $parser->from_string( q["unihex \x{263a}"] ), "unihex \x{263a}", 'Unicode hex' ); + $parser = TestParser->new( patterns => { string_delim => qr/"/ } ); diff --git a/t/21scope_of.t b/t/21scope_of.t index fcb7da9..1ebf552 100644 --- a/t/21scope_of.t +++ b/t/21scope_of.t @@ -2,7 +2,7 @@ use strict; -use Test::More tests => 3; +use Test::More tests => 6; package TestParser; use base qw( Parser::MGC ); @@ -18,6 +18,23 @@ sub parse ); } +package DynamicDelimParser; +use base qw( Parser::MGC ); + +sub parse +{ + my $self = shift; + + my $delim = $self->expect( qr/[\(\[]/ ); + $delim =~ tr/([/)]/; + + $self->scope_of( + undef, + sub { return $self->token_int }, + $delim, + ); +} + package main; my $parser = TestParser->new; @@ -26,3 +43,10 @@ is( $parser->from_string( "(123)" ), 123, '"(123)"' ); ok( !eval { $parser->from_string( "(abc)" ) }, '"(abc)"' ); ok( !eval { $parser->from_string( "456" ) }, '"456"' ); + +$parser = DynamicDelimParser->new; + +is( $parser->from_string( "(45)" ), 45, '"(45)"' ); +is( $parser->from_string( "[45]" ), 45, '"[45]"' ); + +ok( !eval { $parser->from_string( "(45]" ) }, '"(45]" fails' ); diff --git a/t/24one_of.t b/t/24any_of.t similarity index 97% rename from t/24one_of.t rename to t/24any_of.t index 665e291..aec0559 100644 --- a/t/24one_of.t +++ b/t/24any_of.t @@ -11,7 +11,7 @@ sub parse { my $self = shift; - $self->one_of( + $self->any_of( sub { [ int => $self->token_int ] }, sub { [ str => $self->token_string ] }, sub { [ ident => $self->token_ident ] }, diff --git a/t/30commit.t b/t/30commit.t index b9080bb..77fe010 100644 --- a/t/30commit.t +++ b/t/30commit.t @@ -11,7 +11,7 @@ sub parse { my $self = shift; - $self->one_of( + $self->any_of( sub { $self->token_int }, sub { $self->scope_of( "(", diff --git a/t/31scope_level.t b/t/31scope_level.t index 06e87ac..50e581f 100644 --- a/t/31scope_level.t +++ b/t/31scope_level.t @@ -13,7 +13,7 @@ sub parse $self->sequence_of( sub { - $self->one_of( + $self->any_of( sub { $self->expect( qr/[a-z]+/ ) . "/" . $self->scope_level }, sub { $self->scope_of( "(", \&parse, ")" ) }, ); diff --git a/t/90ex_dict.t b/t/90ex_dict.t new file mode 100644 index 0000000..efdd78d --- /dev/null +++ b/t/90ex_dict.t @@ -0,0 +1,32 @@ +#!/usr/bin/perl -w + +use strict; + +use Test::More tests => 4; + +require "examples/parse-dict.pl"; + +my $parser = DictParser->new; + +sub test +{ + my ( $str, $expect, $name ) = @_; + + is_deeply( $parser->from_string( $str ), $expect, $name ); +} + +test q[123], + 123, + "Number"; + +test q["Hello"], + "Hello", + "String"; + +test q[{one: 1, two: 2}], + { one => 1, two => 2 }, + "Flat dict"; + +test q[{numbers: {three: 3, four: 4}}], + { numbers => { three => 3, four => 4 } }, + "Nested dict"; diff --git a/t/90ex_expr.t b/t/90ex_expr.t new file mode 100644 index 0000000..c3df649 --- /dev/null +++ b/t/90ex_expr.t @@ -0,0 +1,33 @@ +#!/usr/bin/perl -w + +use strict; + +use Test::More; + +require "examples/eval-expr.pl"; + +my $parser = ExprParser->new; + +while( <DATA> ) { + chomp; + my ( $str, $expect ) = split m/=/; + + is( $parser->from_string( $str ), $expect, $str ); +} + +done_testing; + +__DATA__ +1+2=3 + 1 + 2 =3 +1+2+3=6 +10-4=6 +10-2-2=6 +3*4=12 +3*4*5=60 +20/4=5 +20/5/2=2 +3+4*5=23 +4*5+3=23 +(3+4)*5=35 +4*(5+3)=32 diff --git a/t/90ex_pod.t b/t/90ex_pod.t new file mode 100644 index 0000000..1dbb98f --- /dev/null +++ b/t/90ex_pod.t @@ -0,0 +1,36 @@ +#!/usr/bin/perl -w + +use strict; + +use Test::More tests => 5; + +require "examples/parse-pod.pl"; + +my $parser = PodParser->new; + +sub test +{ + my ( $str, $expect, $name ) = @_; + + is_deeply( $parser->from_string( $str ), $expect, $name ); +} + +test "Plain text", + [ "Plain text" ], + "plain"; + +test "B<bold>", + [ { B => [ "bold" ] } ], + "B<>"; + +test "Text with I<italic> text", + [ "Text with ", { I => [ "italic" ] }, " text" ], + "I<> surrounded"; + +test "Nested B<I<tags>>", + [ "Nested ", { B => [ { I => [ "tags" ] } ] } ], + "Nested"; + +test "Double C<< Class->method >> tags", + [ "Double ", { C => [ " Class->method " ] }, " tags" ], + "Double tags"; diff --git a/t/90ex_synopsis.t b/t/90ex_synopsis.t new file mode 100644 index 0000000..097c4ca --- /dev/null +++ b/t/90ex_synopsis.t @@ -0,0 +1,21 @@ +#!/usr/bin/perl -w + +use strict; + +use Test::More tests => 4; + +require "examples/synopsis.pl"; + +my $parser = LispParser->new; + +sub test +{ + my ( $str, $expect ) = @_; + + is_deeply( $parser->from_string( $str ), [ $expect ], qq("$str") ); +} + +test "123", 123; +test "'hello'", 'hello'; +test "(123 456)", [ 123, 456 ]; +test "(+ 1 (* 2 3))", [ \'+', 1, [ \'*', 2, 3 ] ]; diff --git a/t/98backcompat.t b/t/98backcompat.t new file mode 100644 index 0000000..a4d9f1c --- /dev/null +++ b/t/98backcompat.t @@ -0,0 +1,25 @@ +#!/usr/bin/perl -w + +use strict; + +use Test::More tests => 2; + +package OneOfParser; +use base qw( Parser::MGC ); + +sub parse +{ + my $self = shift; + + $self->one_of( + sub { [ int => $self->token_int ] }, + sub { [ str => $self->token_string ] }, + ); +} + +package main; + +my $parser = OneOfParser->new; + +is_deeply( $parser->from_string( "123" ), [ int => 123 ], 'one_of integer' ); +is_deeply( $parser->from_string( q["hi"] ), [ str => "hi" ], 'one_of string' ); -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libparser-mgc-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list Pkg-perl-cvs-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits