changeset 9bc3e4611009 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=9bc3e4611009
description:
        isa_parser: Turn the ISA Parser into a subclass of Grammar.
        This is to prepare for future cleanup where we allow SCons to create a
        separate grammar class for each ISA

diffstat:

1 file changed, 604 insertions(+), 598 deletions(-)
src/arch/isa_parser.py | 1202 ++++++++++++++++++++++++------------------------

diffs (truncated from 1337 to 300 lines):

diff -r a886774d5ae1 -r 9bc3e4611009 src/arch/isa_parser.py
--- a/src/arch/isa_parser.py    Wed Sep 23 18:28:29 2009 -0700
+++ b/src/arch/isa_parser.py    Wed Sep 23 18:28:29 2009 -0700
@@ -34,697 +34,699 @@
 # get type names
 from types import *
 
-from ply import lex
-from ply import yacc
+from m5.util.grammar import Grammar
 
-#####################################################################
-#
-#                                Lexer
-#
-# The PLY lexer module takes two things as input:
-# - A list of token names (the string list 'tokens')
-# - A regular expression describing a match for each token.  The
-#   regexp for token FOO can be provided in two ways:
-#   - as a string variable named t_FOO
-#   - as the doc string for a function named t_FOO.  In this case,
-#     the function is also executed, allowing an action to be
-#     associated with each token match.
-#
-#####################################################################
+class ISAParser(Grammar):
+    def __init__(self, *args, **kwargs):
+        super(ISAParser, self).__init__(*args, **kwargs)
+        self.templateMap = {}
 
-# Reserved words.  These are listed separately as they are matched
-# using the same regexp as generic IDs, but distinguished in the
-# t_ID() function.  The PLY documentation suggests this approach.
-reserved = (
-    'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
-    'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
-    'OUTPUT', 'SIGNED', 'TEMPLATE'
+    #####################################################################
+    #
+    #                                Lexer
+    #
+    # The PLY lexer module takes two things as input:
+    # - A list of token names (the string list 'tokens')
+    # - A regular expression describing a match for each token.  The
+    #   regexp for token FOO can be provided in two ways:
+    #   - as a string variable named t_FOO
+    #   - as the doc string for a function named t_FOO.  In this case,
+    #     the function is also executed, allowing an action to be
+    #     associated with each token match.
+    #
+    #####################################################################
+
+    # Reserved words.  These are listed separately as they are matched
+    # using the same regexp as generic IDs, but distinguished in the
+    # t_ID() function.  The PLY documentation suggests this approach.
+    reserved = (
+        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
+        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
+        'OUTPUT', 'SIGNED', 'TEMPLATE'
+        )
+
+    # List of tokens.  The lex module requires this.
+    tokens = reserved + (
+        # identifier
+        'ID',
+
+        # integer literal
+        'INTLIT',
+
+        # string literal
+        'STRLIT',
+
+        # code literal
+        'CODELIT',
+
+        # ( ) [ ] { } < > , ; . : :: *
+        'LPAREN', 'RPAREN',
+        'LBRACKET', 'RBRACKET',
+        'LBRACE', 'RBRACE',
+        'LESS', 'GREATER', 'EQUALS',
+        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
+        'ASTERISK',
+
+        # C preprocessor directives
+        'CPPDIRECTIVE'
+
+    # The following are matched but never returned. commented out to
+    # suppress PLY warning
+        # newfile directive
+    #    'NEWFILE',
+
+        # endfile directive
+    #    'ENDFILE'
     )
 
-# List of tokens.  The lex module requires this.
-tokens = reserved + (
-    # identifier
-    'ID',
+    # Regular expressions for token matching
+    t_LPAREN           = r'\('
+    t_RPAREN           = r'\)'
+    t_LBRACKET         = r'\['
+    t_RBRACKET         = r'\]'
+    t_LBRACE           = r'\{'
+    t_RBRACE           = r'\}'
+    t_LESS             = r'\<'
+    t_GREATER          = r'\>'
+    t_EQUALS           = r'='
+    t_COMMA            = r','
+    t_SEMI             = r';'
+    t_DOT              = r'\.'
+    t_COLON            = r':'
+    t_DBLCOLON         = r'::'
+    t_ASTERISK         = r'\*'
 
-    # integer literal
-    'INTLIT',
+    # Identifiers and reserved words
+    reserved_map = { }
+    for r in reserved:
+        reserved_map[r.lower()] = r
 
-    # string literal
-    'STRLIT',
+    def t_ID(self, t):
+        r'[A-Za-z_]\w*'
+        t.type = self.reserved_map.get(t.value, 'ID')
+        return t
 
-    # code literal
-    'CODELIT',
+    # Integer literal
+    def t_INTLIT(self, t):
+        r'(0x[\da-fA-F]+)|\d+'
+        try:
+            t.value = int(t.value,0)
+        except ValueError:
+            error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)
+            t.value = 0
+        return t
 
-    # ( ) [ ] { } < > , ; . : :: *
-    'LPAREN', 'RPAREN',
-    'LBRACKET', 'RBRACKET',
-    'LBRACE', 'RBRACE',
-    'LESS', 'GREATER', 'EQUALS',
-    'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
-    'ASTERISK',
+    # String literal.  Note that these use only single quotes, and
+    # can span multiple lines.
+    def t_STRLIT(self, t):
+        r"(?m)'([^'])+'"
+        # strip off quotes
+        t.value = t.value[1:-1]
+        t.lexer.lineno += t.value.count('\n')
+        return t
 
-    # C preprocessor directives
-    'CPPDIRECTIVE'
 
-# The following are matched but never returned. commented out to
-# suppress PLY warning
-    # newfile directive
-#    'NEWFILE',
+    # "Code literal"... like a string literal, but delimiters are
+    # '{{' and '}}' so they get formatted nicely under emacs c-mode
+    def t_CODELIT(self, t):
+        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
+        # strip off {{ & }}
+        t.value = t.value[2:-2]
+        t.lexer.lineno += t.value.count('\n')
+        return t
 
-    # endfile directive
-#    'ENDFILE'
-)
+    def t_CPPDIRECTIVE(self, t):
+        r'^\#[^\#].*\n'
+        t.lexer.lineno += t.value.count('\n')
+        return t
 
-# Regular expressions for token matching
-t_LPAREN           = r'\('
-t_RPAREN           = r'\)'
-t_LBRACKET         = r'\['
-t_RBRACKET         = r'\]'
-t_LBRACE           = r'\{'
-t_RBRACE           = r'\}'
-t_LESS             = r'\<'
-t_GREATER          = r'\>'
-t_EQUALS           = r'='
-t_COMMA            = r','
-t_SEMI             = r';'
-t_DOT              = r'\.'
-t_COLON            = r':'
-t_DBLCOLON         = r'::'
-t_ASTERISK         = r'\*'
+    def t_NEWFILE(self, t):
+        r'^\#\#newfile\s+"[\w/.-]*"'
+        fileNameStack.push((t.value[11:-1], t.lexer.lineno))
+        t.lexer.lineno = 0
 
-# Identifiers and reserved words
-reserved_map = { }
-for r in reserved:
-    reserved_map[r.lower()] = r
+    def t_ENDFILE(self, t):
+        r'^\#\#endfile'
+        (old_filename, t.lexer.lineno) = fileNameStack.pop()
 
-def t_ID(t):
-    r'[A-Za-z_]\w*'
-    t.type = reserved_map.get(t.value,'ID')
-    return t
+    #
+    # The functions t_NEWLINE, t_ignore, and t_error are
+    # special for the lex module.
+    #
 
-# Integer literal
-def t_INTLIT(t):
-    r'(0x[\da-fA-F]+)|\d+'
-    try:
-        t.value = int(t.value,0)
-    except ValueError:
-        error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)
-        t.value = 0
-    return t
+    # Newlines
+    def t_NEWLINE(self, t):
+        r'\n+'
+        t.lexer.lineno += t.value.count('\n')
 
-# String literal.  Note that these use only single quotes, and
-# can span multiple lines.
-def t_STRLIT(t):
-    r"(?m)'([^'])+'"
-    # strip off quotes
-    t.value = t.value[1:-1]
-    t.lexer.lineno += t.value.count('\n')
-    return t
+    # Comments
+    def t_comment(self, t):
+        r'//.*'
 
+    # Completely ignored characters
+    t_ignore = ' \t\x0c'
 
-# "Code literal"... like a string literal, but delimiters are
-# '{{' and '}}' so they get formatted nicely under emacs c-mode
-def t_CODELIT(t):
-    r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
-    # strip off {{ & }}
-    t.value = t.value[2:-2]
-    t.lexer.lineno += t.value.count('\n')
-    return t
+    # Error handler
+    def t_error(self, t):
+        error(t.lexer.lineno, "illegal character '%s'" % t.value[0])
+        t.skip(1)
 
-def t_CPPDIRECTIVE(t):
-    r'^\#[^\#].*\n'
-    t.lexer.lineno += t.value.count('\n')
-    return t
+    #####################################################################
+    #
+    #                                Parser
+    #
+    # Every function whose name starts with 'p_' defines a grammar
+    # rule.  The rule is encoded in the function's doc string, while
+    # the function body provides the action taken when the rule is
+    # matched.  The argument to each function is a list of the values
+    # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
+    # symbols on the RHS.  For tokens, the value is copied from the
+    # t.value attribute provided by the lexer.  For non-terminals, the
+    # value is assigned by the producing rule; i.e., the job of the
+    # grammar rule function is to set the value for the non-terminal
+    # on the LHS (by assigning to t[0]).
+    #####################################################################
 
-def t_NEWFILE(t):
-    r'^\#\#newfile\s+"[\w/.-]*"'
-    fileNameStack.push((t.value[11:-1], t.lexer.lineno))
-    t.lexer.lineno = 0
-
-def t_ENDFILE(t):
-    r'^\#\#endfile'
-    (old_filename, t.lexer.lineno) = fileNameStack.pop()
-
-#
-# The functions t_NEWLINE, t_ignore, and t_error are
-# special for the lex module.
-#
-
-# Newlines
-def t_NEWLINE(t):
-    r'\n+'
-    t.lexer.lineno += t.value.count('\n')
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to