I am writing a some parsers to extract some basic information from
VB.NET source files and turn them into restructured text that I can
later transform into API docs with Sphinx.
My first experiments with parsing modular level functions and
subroutines worked out pretty well, so I thought I would try to parse
class members (subs, functions, and properties). However, I have run
into an ambiguity that I believe I have tracked down to a shift/reduce
conflict in my parser.out file.
In the 2 samples below, you can see the first function fails, and the
second succeeds:
Input:
Public Function Baz(ByVal obj As Frotz.Plugh, ByVal a As Integer()) As
Muppet.Fozzy.Bear
Lexing...
LexToken(PUBLIC,'Public',1,0)
LexToken(FUNCTION,'Function',1,7)
LexToken(IDENTIFIER,'Baz',1,16)
LexToken(LPAREN,'(',1,19)
LexToken(BYVAL,'ByVal',1,20)
LexToken(IDENTIFIER,'obj',1,26)
LexToken(AS,'As',1,30)
LexToken(IDENTIFIER,'Frotz',1,33)
LexToken(PERIOD,'.',1,38)
LexToken(IDENTIFIER,'Plugh',1,39)
LexToken(COMMA,',',1,44)
LexToken(BYVAL,'ByVal',1,46)
LexToken(IDENTIFIER,'a',1,52)
LexToken(AS,'As',1,54)
LexToken(IDENTIFIER,'Integer',1,57)
LexToken(LPAREN,'(',1,64)
LexToken(RPAREN,')',1,65)
LexToken(RPAREN,')',1,66)
LexToken(AS,'As',1,68)
LexToken(IDENTIFIER,'Muppet',1,71)
LexToken(PERIOD,'.',1,77)
LexToken(IDENTIFIER,'Fozzy',1,78)
LexToken(PERIOD,'.',1,83)
LexToken(IDENTIFIER,'Bear',1,84)
Parsing...
Error token is: LexToken(BYVAL,'ByVal',1,20)
Input:
Public Function Frotz(of T)(ByVal magic As Zork.XYZZY, Optional ByVal
lantern as Zork.Light = Nothing) As Zork.Random
Lexing...
LexToken(PUBLIC,'Public',1,0)
LexToken(FUNCTION,'Function',1,7)
LexToken(IDENTIFIER,'Frotz',1,16)
LexToken(LPAREN,'(',1,21)
LexToken(OF,'of',1,22)
LexToken(IDENTIFIER,'T',1,25)
LexToken(RPAREN,')',1,26)
LexToken(LPAREN,'(',1,27)
LexToken(BYVAL,'ByVal',1,28)
LexToken(IDENTIFIER,'magic',1,34)
LexToken(AS,'As',1,40)
LexToken(IDENTIFIER,'Zork',1,43)
LexToken(PERIOD,'.',1,47)
LexToken(IDENTIFIER,'XYZZY',1,48)
LexToken(COMMA,',',1,53)
LexToken(OPTIONAL,'Optional',1,55)
LexToken(BYVAL,'ByVal',1,64)
LexToken(IDENTIFIER,'lantern',1,70)
LexToken(AS,'as',1,78)
LexToken(IDENTIFIER,'Zork',1,81)
LexToken(PERIOD,'.',1,85)
LexToken(IDENTIFIER,'Light',1,86)
LexToken(EQUALS,'=',1,92)
LexToken(NOTHING,'Nothing',1,94)
LexToken(RPAREN,')',1,101)
LexToken(AS,'As',1,103)
LexToken(IDENTIFIER,'Zork',1,106)
LexToken(PERIOD,'.',1,110)
LexToken(IDENTIFIER,'Random',1,111)
Parsing...
VBFunction: Public Function Frotz(Of T) As Zork.Random:
VBArgument: ByVal magic As Zork.XYZZY
VBArgument: Optional ByVal lantern As Zork.Light = Nothing
I think it is because the second function takes a type parameter and
the first function does not (it stops at the BYVAL token).
My grammar rules follow:
def p_member(p):
"""
member : function
| sub
| property
"""
p[0] = p[1]
def p_function(p):
"""
function : access_spec override_spec FUNCTION func_name
generic_spec LPAREN arg_list RPAREN AS return_type
"""
p[0] = VBFunction("%s%s" % (p[4], p[5]), p[7], p[10], RT_VBFUNC)
def p_sub(p):
"""
sub : access_spec override_spec SUB
func_name generic_spec LPAREN arg_list RPAREN
"""
p[0] = VBFunction("%s%s" % (p[4], p[5]), p[7], None)
def p_property(p):
"""
property : access_spec readonly_spec
override_spec PROPERTY func_name LPAREN arg_list RPAREN AS return_type
"""
p[0] = VBFunction(p[5], p[7], p[10], p[13], RT_VBPROP)
def p_access_spec(p):
"""
access_spec : PUBLIC
| PROTECTED
| FRIEND
"""
def p_readonly_spec(p):
"""
readonly_spec : READONLY
|
"""
if len(p) == 1:
p[0] = ''
else:
p[0] = p[1]
def p_override_spec(p):
"""
override_spec : OVERRIDES
| OVERRIDABLE
| MUSTOVERRIDE
| SHARED
|
"""
if len(p) == 1:
p[0] = ''
else:
p[0] = p[1]
def p_generic_spec(p):
"""
generic_spec : LPAREN OF type_param_spec_list
RPAREN
|
"""
if len(p) == 1:
p[0] = ''
else:
p[0] = "(Of %s)" % p[3]
def p_type_param_spec_list(p):
"""
type_param_spec_list : type_param_spec
| type_param_spec_list COMMA
type_param_spec
"""
if len(p) == 2:
p[0] = p[1]
else:
p[0] = '%s, %s' % (p[1], p[3])
def p_type_param_spec(p):
"""
type_param_spec : IDENTIFIER
| IDENTIFIER AS type_constraint_spec
"""
if len(p) == 2:
p[0] = p[1]
else:
p[0] = '%s As %s' % (p[1], p[3])
def p_type_constraint_spec(p):
"""
type_constraint_spec : type_constraint_spec_item
| LBRACE type_constraint_spec_list
RBRACE
"""
if len(p) == 2:
p[0] = p[1]
else:
p[0] = '{%s}' % p[2]
def p_type_constraint_spec_list(p):
"""
type_constraint_spec_list : type_constraint_spec_item
| type_constraint_spec_list COMMA
type_constraint_spec_item
"""
if len(p) == 2:
p[0] = p[1]
else:
p[0] = '%s, %s' % (p[1], p[3])
def p_func_name(p):
"""
func_name : IDENTIFIER
"""
p[0] = p[1]
def p_return_type(p):
"""
return_type : dt
"""
p[0] = p[1]
def p_arg_list(p):
"""
arg_list : populated_arg_list
|
"""
if len(p) == 1:
p[0] = ''
else:
p[0] = p[1]
def p_populated_arg_list(p):
"""
populated_arg_list : arg
| populated_arg_list COMMA arg
| BYVAL PARAMARRAY IDENTIFIER LPAREN RPAREN AS
dt_singular
"""
if len(p) == 2:
p[0] = [p[1]]
elif len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = [VBArgument('ByVal', p[3], p[7], True)]
def p_arg(p):
"""
arg : required_arg
| optional_arg
"""
p[0] = p[1]
def p_required_arg(p):
"""
required_arg : pass_method IDENTIFIER AS dt
"""
p[0] = VBArgument(p[1], p[2], p[4])
def p_optional_arg(p):
"""
optional_arg : OPTIONAL pass_method IDENTIFIER AS dt EQUALS
expression
"""
p[0] = VBArgument(p[2], p[3], p[5], optional_value = p[7])
def p_expression(p):
"""
expression : QUOTE QUOTE
| QUOTE non_quotes QUOTE
| number_literal
| NOTHING
| TRUE
| FALSE
"""
p[0] = ''.join(p[1:])
def p_non_quotes(p):
"""
non_quotes : CHARS
| ESCAPED_QUOTE
| non_quotes CHARS
| non_quotes ESCAPED_QUOTE
"""
p[0] = ''.join(p[1:])
def p_number_literal(p):
"""
number_literal : INT_LITERAL
| FLOAT_LITERAL
"""
p[0] = p[1]
def p_pass_method(p):
"""
pass_method : BYVAL
| BYREF
"""
p[0] = p[1]
def p_dt(p):
"""
dt : dt_singular
| dt_generic
| dt_array
"""
p[0] = p[1]
def p_dt_singular(p):
"""
dt_singular : IDENTIFIER
| dt_singular PERIOD IDENTIFIER
"""
p[0] = ''.join(p[1:])
def p_dt_generic(p):
"""
dt_generic : dt_singular LPAREN OF dt RPAREN
"""
p[0] = "%s(Of %s)" % (p[1], p[4])
def p_dt_array(p):
"""
dt_array : dt_singular LPAREN RPAREN
"""
p[0] = ''.join(p[1:])
def p_type_constraint_spec_item(p):
"""
type_constraint_spec_item : dt_singular
| dt_generic
"""
p[0] = p[1]
I think the issue is that generic_spec needs to be reduced to an empty
production at some point, but instead the parser shifts BYVAL. I
looked into assigning a precedence, but the documentation for PLY
seems to indicate that precedence can only be assigned to tokens, and
not grammar rules as such. The grammar rule has the precedence of the
right most token, but in this case, the grammar rule has no terminals!
Is there a better way to write my grammar, or is there some trick to
using precedence that could correct this issue.
Any help or direction you could provide would be appreciated.
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"ply-hack" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/ply-hack?hl=en
-~----------~----~----~----~------~----~------~--~---