changeset 9bc3e4611009 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=9bc3e4611009
description:
isa_parser: Turn the ISA Parser into a subclass of Grammar.
This is to prepare for future cleanup where we allow SCons to create a
separate grammar class for each ISA
diffstat:
1 file changed, 604 insertions(+), 598 deletions(-)
src/arch/isa_parser.py | 1202 ++++++++++++++++++++++++------------------------
diffs (truncated from 1337 to 300 lines):
diff -r a886774d5ae1 -r 9bc3e4611009 src/arch/isa_parser.py
--- a/src/arch/isa_parser.py Wed Sep 23 18:28:29 2009 -0700
+++ b/src/arch/isa_parser.py Wed Sep 23 18:28:29 2009 -0700
@@ -34,697 +34,699 @@
# get type names
from types import *
-from ply import lex
-from ply import yacc
+from m5.util.grammar import Grammar
-#####################################################################
-#
-# Lexer
-#
-# The PLY lexer module takes two things as input:
-# - A list of token names (the string list 'tokens')
-# - A regular expression describing a match for each token. The
-# regexp for token FOO can be provided in two ways:
-# - as a string variable named t_FOO
-# - as the doc string for a function named t_FOO. In this case,
-# the function is also executed, allowing an action to be
-# associated with each token match.
-#
-#####################################################################
+class ISAParser(Grammar):
+ def __init__(self, *args, **kwargs):
+ super(ISAParser, self).__init__(*args, **kwargs)
+ self.templateMap = {}
-# Reserved words. These are listed separately as they are matched
-# using the same regexp as generic IDs, but distinguished in the
-# t_ID() function. The PLY documentation suggests this approach.
-reserved = (
- 'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
- 'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
- 'OUTPUT', 'SIGNED', 'TEMPLATE'
+ #####################################################################
+ #
+ # Lexer
+ #
+ # The PLY lexer module takes two things as input:
+ # - A list of token names (the string list 'tokens')
+ # - A regular expression describing a match for each token. The
+ # regexp for token FOO can be provided in two ways:
+ # - as a string variable named t_FOO
+ # - as the doc string for a function named t_FOO. In this case,
+ # the function is also executed, allowing an action to be
+ # associated with each token match.
+ #
+ #####################################################################
+
+ # Reserved words. These are listed separately as they are matched
+ # using the same regexp as generic IDs, but distinguished in the
+ # t_ID() function. The PLY documentation suggests this approach.
+ reserved = (
+ 'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
+ 'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
+ 'OUTPUT', 'SIGNED', 'TEMPLATE'
+ )
+
+ # List of tokens. The lex module requires this.
+ tokens = reserved + (
+ # identifier
+ 'ID',
+
+ # integer literal
+ 'INTLIT',
+
+ # string literal
+ 'STRLIT',
+
+ # code literal
+ 'CODELIT',
+
+ # ( ) [ ] { } < > , ; . : :: *
+ 'LPAREN', 'RPAREN',
+ 'LBRACKET', 'RBRACKET',
+ 'LBRACE', 'RBRACE',
+ 'LESS', 'GREATER', 'EQUALS',
+ 'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
+ 'ASTERISK',
+
+ # C preprocessor directives
+ 'CPPDIRECTIVE'
+
+ # The following are matched but never returned. commented out to
+ # suppress PLY warning
+ # newfile directive
+ # 'NEWFILE',
+
+ # endfile directive
+ # 'ENDFILE'
)
-# List of tokens. The lex module requires this.
-tokens = reserved + (
- # identifier
- 'ID',
+ # Regular expressions for token matching
+ t_LPAREN = r'\('
+ t_RPAREN = r'\)'
+ t_LBRACKET = r'\['
+ t_RBRACKET = r'\]'
+ t_LBRACE = r'\{'
+ t_RBRACE = r'\}'
+ t_LESS = r'\<'
+ t_GREATER = r'\>'
+ t_EQUALS = r'='
+ t_COMMA = r','
+ t_SEMI = r';'
+ t_DOT = r'\.'
+ t_COLON = r':'
+ t_DBLCOLON = r'::'
+ t_ASTERISK = r'\*'
- # integer literal
- 'INTLIT',
+ # Identifiers and reserved words
+ reserved_map = { }
+ for r in reserved:
+ reserved_map[r.lower()] = r
- # string literal
- 'STRLIT',
+ def t_ID(self, t):
+ r'[A-Za-z_]\w*'
+ t.type = self.reserved_map.get(t.value, 'ID')
+ return t
- # code literal
- 'CODELIT',
+ # Integer literal
+ def t_INTLIT(self, t):
+ r'(0x[\da-fA-F]+)|\d+'
+ try:
+ t.value = int(t.value,0)
+ except ValueError:
+ error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)
+ t.value = 0
+ return t
- # ( ) [ ] { } < > , ; . : :: *
- 'LPAREN', 'RPAREN',
- 'LBRACKET', 'RBRACKET',
- 'LBRACE', 'RBRACE',
- 'LESS', 'GREATER', 'EQUALS',
- 'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
- 'ASTERISK',
+ # String literal. Note that these use only single quotes, and
+ # can span multiple lines.
+ def t_STRLIT(self, t):
+ r"(?m)'([^'])+'"
+ # strip off quotes
+ t.value = t.value[1:-1]
+ t.lexer.lineno += t.value.count('\n')
+ return t
- # C preprocessor directives
- 'CPPDIRECTIVE'
-# The following are matched but never returned. commented out to
-# suppress PLY warning
- # newfile directive
-# 'NEWFILE',
+ # "Code literal"... like a string literal, but delimiters are
+ # '{{' and '}}' so they get formatted nicely under emacs c-mode
+ def t_CODELIT(self, t):
+ r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
+ # strip off {{ & }}
+ t.value = t.value[2:-2]
+ t.lexer.lineno += t.value.count('\n')
+ return t
- # endfile directive
-# 'ENDFILE'
-)
+ def t_CPPDIRECTIVE(self, t):
+ r'^\#[^\#].*\n'
+ t.lexer.lineno += t.value.count('\n')
+ return t
-# Regular expressions for token matching
-t_LPAREN = r'\('
-t_RPAREN = r'\)'
-t_LBRACKET = r'\['
-t_RBRACKET = r'\]'
-t_LBRACE = r'\{'
-t_RBRACE = r'\}'
-t_LESS = r'\<'
-t_GREATER = r'\>'
-t_EQUALS = r'='
-t_COMMA = r','
-t_SEMI = r';'
-t_DOT = r'\.'
-t_COLON = r':'
-t_DBLCOLON = r'::'
-t_ASTERISK = r'\*'
+ def t_NEWFILE(self, t):
+ r'^\#\#newfile\s+"[\w/.-]*"'
+ fileNameStack.push((t.value[11:-1], t.lexer.lineno))
+ t.lexer.lineno = 0
-# Identifiers and reserved words
-reserved_map = { }
-for r in reserved:
- reserved_map[r.lower()] = r
+ def t_ENDFILE(self, t):
+ r'^\#\#endfile'
+ (old_filename, t.lexer.lineno) = fileNameStack.pop()
-def t_ID(t):
- r'[A-Za-z_]\w*'
- t.type = reserved_map.get(t.value,'ID')
- return t
+ #
+ # The functions t_NEWLINE, t_ignore, and t_error are
+ # special for the lex module.
+ #
-# Integer literal
-def t_INTLIT(t):
- r'(0x[\da-fA-F]+)|\d+'
- try:
- t.value = int(t.value,0)
- except ValueError:
- error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)
- t.value = 0
- return t
+ # Newlines
+ def t_NEWLINE(self, t):
+ r'\n+'
+ t.lexer.lineno += t.value.count('\n')
-# String literal. Note that these use only single quotes, and
-# can span multiple lines.
-def t_STRLIT(t):
- r"(?m)'([^'])+'"
- # strip off quotes
- t.value = t.value[1:-1]
- t.lexer.lineno += t.value.count('\n')
- return t
+ # Comments
+ def t_comment(self, t):
+ r'//.*'
+ # Completely ignored characters
+ t_ignore = ' \t\x0c'
-# "Code literal"... like a string literal, but delimiters are
-# '{{' and '}}' so they get formatted nicely under emacs c-mode
-def t_CODELIT(t):
- r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
- # strip off {{ & }}
- t.value = t.value[2:-2]
- t.lexer.lineno += t.value.count('\n')
- return t
+ # Error handler
+ def t_error(self, t):
+ error(t.lexer.lineno, "illegal character '%s'" % t.value[0])
+ t.skip(1)
-def t_CPPDIRECTIVE(t):
- r'^\#[^\#].*\n'
- t.lexer.lineno += t.value.count('\n')
- return t
+ #####################################################################
+ #
+ # Parser
+ #
+ # Every function whose name starts with 'p_' defines a grammar
+ # rule. The rule is encoded in the function's doc string, while
+ # the function body provides the action taken when the rule is
+ # matched. The argument to each function is a list of the values
+ # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
+ # symbols on the RHS. For tokens, the value is copied from the
+ # t.value attribute provided by the lexer. For non-terminals, the
+ # value is assigned by the producing rule; i.e., the job of the
+ # grammar rule function is to set the value for the non-terminal
+ # on the LHS (by assigning to t[0]).
+ #####################################################################
-def t_NEWFILE(t):
- r'^\#\#newfile\s+"[\w/.-]*"'
- fileNameStack.push((t.value[11:-1], t.lexer.lineno))
- t.lexer.lineno = 0
-
-def t_ENDFILE(t):
- r'^\#\#endfile'
- (old_filename, t.lexer.lineno) = fileNameStack.pop()
-
-#
-# The functions t_NEWLINE, t_ignore, and t_error are
-# special for the lex module.
-#
-
-# Newlines
-def t_NEWLINE(t):
- r'\n+'
- t.lexer.lineno += t.value.count('\n')
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev