[m5-dev] changeset in m5: isa_parser: move code around to prepare for put...

Nathan Binkert Sat, 27 Feb 2010 08:37:26 -0800

changeset e37de92468f1 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=e37de92468f1
description:
        isa_parser: move code around to prepare for putting more stuff in the 
class


diffstat:

1 file changed, 921 insertions(+), 923 deletions(-)
src/arch/isa_parser.py | 1844 +++++++++++++++++++++++-------------------------

diffs (truncated from 1901 to 300 lines):

diff -r 102004662eda -r e37de92468f1 src/arch/isa_parser.py
--- a/src/arch/isa_parser.py    Fri Feb 26 18:14:48 2010 -0800
+++ b/src/arch/isa_parser.py    Fri Feb 26 18:14:48 2010 -0800
@@ -36,702 +36,221 @@
 
 from m5.util.grammar import Grammar
 
-class ISAParser(Grammar):
-    def __init__(self, *args, **kwargs):
-        super(ISAParser, self).__init__(*args, **kwargs)
-        self.templateMap = {}
+###################
+# Utility functions
 
-    #####################################################################
-    #
-    #                                Lexer
-    #
-    # The PLY lexer module takes two things as input:
-    # - A list of token names (the string list 'tokens')
-    # - A regular expression describing a match for each token.  The
-    #   regexp for token FOO can be provided in two ways:
-    #   - as a string variable named t_FOO
-    #   - as the doc string for a function named t_FOO.  In this case,
-    #     the function is also executed, allowing an action to be
-    #     associated with each token match.
-    #
-    #####################################################################
+#
+# Indent every line in string 's' by two spaces
+# (except preprocessor directives).
+# Used to make nested code blocks look pretty.
+#
+def indent(s):
+    return re.sub(r'(?m)^(?!#)', '  ', s)
 
-    # Reserved words.  These are listed separately as they are matched
-    # using the same regexp as generic IDs, but distinguished in the
-    # t_ID() function.  The PLY documentation suggests this approach.
-    reserved = (
-        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
-        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
-        'OUTPUT', 'SIGNED', 'TEMPLATE'
-        )
+#
+# Munge a somewhat arbitrarily formatted piece of Python code
+# (e.g. from a format 'let' block) into something whose indentation
+# will get by the Python parser.
+#
+# The two keys here are that Python will give a syntax error if
+# there's any whitespace at the beginning of the first line, and that
+# all lines at the same lexical nesting level must have identical
+# indentation.  Unfortunately the way code literals work, an entire
+# let block tends to have some initial indentation.  Rather than
+# trying to figure out what that is and strip it off, we prepend 'if
+# 1:' to make the let code the nested block inside the if (and have
+# the parser automatically deal with the indentation for us).
+#
+# We don't want to do this if (1) the code block is empty or (2) the
+# first line of the block doesn't have any whitespace at the front.
 
-    # List of tokens.  The lex module requires this.
-    tokens = reserved + (
-        # identifier
-        'ID',
+def fixPythonIndentation(s):
+    # get rid of blank lines first
+    s = re.sub(r'(?m)^\s*\n', '', s);
+    if (s != '' and re.match(r'[ \t]', s[0])):
+        s = 'if 1:\n' + s
+    return s
 
-        # integer literal
-        'INTLIT',
+# Error handler.  Just call exit.  Output formatted to work under
+# Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
+# prints a Python stack backtrace too (can be handy when trying to
+# debug the parser itself).
+def error(lineno, string, print_traceback = False):
+    spaces = ""
+    for (filename, line) in fileNameStack[0:-1]:
+        print spaces + "In file included from " + filename + ":"
+        spaces += "  "
+    # Print a Python stack backtrace if requested.
+    if (print_traceback):
+        traceback.print_exc()
+    if lineno != 0:
+        line_str = "%d:" % lineno
+    else:
+        line_str = ""
+    sys.exit(spaces + "%s:%s %s" % (fileNameStack[-1][0], line_str, string))
 
-        # string literal
-        'STRLIT',
+####################
+# Template objects.
+#
+# Template objects are format strings that allow substitution from
+# the attribute spaces of other objects (e.g. InstObjParams instances).
 
-        # code literal
-        'CODELIT',
+labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
 
-        # ( ) [ ] { } < > , ; . : :: *
-        'LPAREN', 'RPAREN',
-        'LBRACKET', 'RBRACKET',
-        'LBRACE', 'RBRACE',
-        'LESS', 'GREATER', 'EQUALS',
-        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
-        'ASTERISK',
+class Template(object):
+    def __init__(self, t):
+        self.template = t
 
-        # C preprocessor directives
-        'CPPDIRECTIVE'
+    def subst(self, d):
+        myDict = None
 
-    # The following are matched but never returned. commented out to
-    # suppress PLY warning
-        # newfile directive
-    #    'NEWFILE',
+        # Protect non-Python-dict substitutions (e.g. if there's a printf
+        # in the templated C++ code)
+        template = protect_non_subst_percents(self.template)
+        # CPU-model-specific substitutions are handled later (in GenCode).
+        template = protect_cpu_symbols(template)
 
-        # endfile directive
-    #    'ENDFILE'
-    )
+        # Build a dict ('myDict') to use for the template substitution.
+        # Start with the template namespace.  Make a copy since we're
+        # going to modify it.
+        myDict = parser.templateMap.copy()
 
-    # Regular expressions for token matching
-    t_LPAREN           = r'\('
-    t_RPAREN           = r'\)'
-    t_LBRACKET         = r'\['
-    t_RBRACKET         = r'\]'
-    t_LBRACE           = r'\{'
-    t_RBRACE           = r'\}'
-    t_LESS             = r'\<'
-    t_GREATER          = r'\>'
-    t_EQUALS           = r'='
-    t_COMMA            = r','
-    t_SEMI             = r';'
-    t_DOT              = r'\.'
-    t_COLON            = r':'
-    t_DBLCOLON         = r'::'
-    t_ASTERISK         = r'\*'
+        if isinstance(d, InstObjParams):
+            # If we're dealing with an InstObjParams object, we need
+            # to be a little more sophisticated.  The instruction-wide
+            # parameters are already formed, but the parameters which
+            # are only function wide still need to be generated.
+            compositeCode = ''
 
-    # Identifiers and reserved words
-    reserved_map = { }
-    for r in reserved:
-        reserved_map[r.lower()] = r
+            myDict.update(d.__dict__)
+            # The "operands" and "snippets" attributes of the InstObjParams
+            # objects are for internal use and not substitution.
+            del myDict['operands']
+            del myDict['snippets']
 
-    def t_ID(self, t):
-        r'[A-Za-z_]\w*'
-        t.type = self.reserved_map.get(t.value, 'ID')
-        return t
+            snippetLabels = [l for l in labelRE.findall(template)
+                             if d.snippets.has_key(l)]
 
-    # Integer literal
-    def t_INTLIT(self, t):
-        r'-?(0x[\da-fA-F]+)|\d+'
+            snippets = dict([(s, mungeSnippet(d.snippets[s]))
+                             for s in snippetLabels])
+
+            myDict.update(snippets)
+
+            compositeCode = ' '.join(map(str, snippets.values()))
+
+            # Add in template itself in case it references any
+            # operands explicitly (like Mem)
+            compositeCode += ' ' + template
+
+            operands = SubOperandList(compositeCode, d.operands)
+
+            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
+
+            is_src = lambda op: op.is_src
+            is_dest = lambda op: op.is_dest
+
+            myDict['op_src_decl'] = \
+                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
+            myDict['op_dest_decl'] = \
+                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
+
+            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
+            myDict['op_wb'] = operands.concatAttrStrings('op_wb')
+
+            if d.operands.memOperand:
+                myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
+                myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
+
+        elif isinstance(d, dict):
+            # if the argument is a dictionary, we just use it.
+            myDict.update(d)
+        elif hasattr(d, '__dict__'):
+            # if the argument is an object, we use its attribute map.
+            myDict.update(d.__dict__)
+        else:
+            raise TypeError, "Template.subst() arg must be or have dictionary"
+        return template % myDict
+
+    # Convert to string.  This handles the case when a template with a
+    # CPU-specific term gets interpolated into another template or into
+    # an output block.
+    def __str__(self):
+        return expand_cpu_symbols_to_string(self.template)
+
+################
+# Format object.
+#
+# A format object encapsulates an instruction format.  It must provide
+# a defineInst() method that generates the code for an instruction
+# definition.
+
+exportContextSymbols = ('InstObjParams', 'makeList', 're', 'string')
+
+exportContext = {}
+
+def updateExportContext():
+    exportContext.update(exportDict(*exportContextSymbols))
+    exportContext.update(parser.templateMap)
+
+def exportDict(*symNames):
+    return dict([(s, eval(s)) for s in symNames])
+
+
+class Format(object):
+    def __init__(self, id, params, code):
+        # constructor: just save away arguments
+        self.id = id
+        self.params = params
+        label = 'def format ' + id
+        self.user_code = compile(fixPythonIndentation(code), label, 'exec')
+        param_list = string.join(params, ", ")
+        f = '''def defInst(_code, _context, %s):
+                my_locals = vars().copy()
+                exec _code in _context, my_locals
+                return my_locals\n''' % param_list
+        c = compile(f, label + ' wrapper', 'exec')
+        exec c
+        self.func = defInst
+
+    def defineInst(self, name, args, lineno):
+        context = {}
+        updateExportContext()
+        context.update(exportContext)
+        if len(name):
+            Name = name[0].upper()
+            if len(name) > 1:
+                Name += name[1:]
+        context.update({ 'name': name, 'Name': Name })
         try:
-            t.value = int(t.value,0)
-        except ValueError:
-            error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)
-            t.value = 0
-        return t
+            vars = self.func(self.user_code, context, *args[0], **args[1])
+        except Exception, exc:
+            error(lineno, 'error defining "%s": %s.' % (name, exc))
+        for k in vars.keys():
+            if k not in ('header_output', 'decoder_output',
+                         'exec_output', 'decode_block'):
+                del vars[k]
+        return GenCode(**vars)
 
-    # String literal.  Note that these use only single quotes, and
-    # can span multiple lines.
-    def t_STRLIT(self, t):
-        r"(?m)'([^'])+'"
-        # strip off quotes
-        t.value = t.value[1:-1]
-        t.lexer.lineno += t.value.count('\n')
-        return t
+# Special null format to catch an implicit-format instruction
+# definition outside of any format block.
+class NoFormat(object):
+    def __init__(self):
+        self.defaultInst = ''
 
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

[m5-dev] changeset in m5: isa_parser: move code around to prepare for put...

Reply via email to