changeset e37de92468f1 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=e37de92468f1
description:
isa_parser: move code around to prepare for putting more stuff in the
class
diffstat:
1 file changed, 921 insertions(+), 923 deletions(-)
src/arch/isa_parser.py | 1844 +++++++++++++++++++++++-------------------------
diffs (truncated from 1901 to 300 lines):
diff -r 102004662eda -r e37de92468f1 src/arch/isa_parser.py
--- a/src/arch/isa_parser.py Fri Feb 26 18:14:48 2010 -0800
+++ b/src/arch/isa_parser.py Fri Feb 26 18:14:48 2010 -0800
@@ -36,702 +36,221 @@
from m5.util.grammar import Grammar
-class ISAParser(Grammar):
- def __init__(self, *args, **kwargs):
- super(ISAParser, self).__init__(*args, **kwargs)
- self.templateMap = {}
+###################
+# Utility functions
- #####################################################################
- #
- # Lexer
- #
- # The PLY lexer module takes two things as input:
- # - A list of token names (the string list 'tokens')
- # - A regular expression describing a match for each token. The
- # regexp for token FOO can be provided in two ways:
- # - as a string variable named t_FOO
- # - as the doc string for a function named t_FOO. In this case,
- # the function is also executed, allowing an action to be
- # associated with each token match.
- #
- #####################################################################
+#
+# Indent every line in string 's' by two spaces
+# (except preprocessor directives).
+# Used to make nested code blocks look pretty.
+#
+def indent(s):
+ return re.sub(r'(?m)^(?!#)', ' ', s)
- # Reserved words. These are listed separately as they are matched
- # using the same regexp as generic IDs, but distinguished in the
- # t_ID() function. The PLY documentation suggests this approach.
- reserved = (
- 'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
- 'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
- 'OUTPUT', 'SIGNED', 'TEMPLATE'
- )
+#
+# Munge a somewhat arbitrarily formatted piece of Python code
+# (e.g. from a format 'let' block) into something whose indentation
+# will get by the Python parser.
+#
+# The two keys here are that Python will give a syntax error if
+# there's any whitespace at the beginning of the first line, and that
+# all lines at the same lexical nesting level must have identical
+# indentation. Unfortunately the way code literals work, an entire
+# let block tends to have some initial indentation. Rather than
+# trying to figure out what that is and strip it off, we prepend 'if
+# 1:' to make the let code the nested block inside the if (and have
+# the parser automatically deal with the indentation for us).
+#
+# We don't want to do this if (1) the code block is empty or (2) the
+# first line of the block doesn't have any whitespace at the front.
- # List of tokens. The lex module requires this.
- tokens = reserved + (
- # identifier
- 'ID',
+def fixPythonIndentation(s):
+ # get rid of blank lines first
+ s = re.sub(r'(?m)^\s*\n', '', s);
+ if (s != '' and re.match(r'[ \t]', s[0])):
+ s = 'if 1:\n' + s
+ return s
- # integer literal
- 'INTLIT',
+# Error handler. Just call exit. Output formatted to work under
+# Emacs compile-mode. Optional 'print_traceback' arg, if set to True,
+# prints a Python stack backtrace too (can be handy when trying to
+# debug the parser itself).
+def error(lineno, string, print_traceback = False):
+ spaces = ""
+ for (filename, line) in fileNameStack[0:-1]:
+ print spaces + "In file included from " + filename + ":"
+ spaces += " "
+ # Print a Python stack backtrace if requested.
+ if (print_traceback):
+ traceback.print_exc()
+ if lineno != 0:
+ line_str = "%d:" % lineno
+ else:
+ line_str = ""
+ sys.exit(spaces + "%s:%s %s" % (fileNameStack[-1][0], line_str, string))
- # string literal
- 'STRLIT',
+####################
+# Template objects.
+#
+# Template objects are format strings that allow substitution from
+# the attribute spaces of other objects (e.g. InstObjParams instances).
- # code literal
- 'CODELIT',
+labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
- # ( ) [ ] { } < > , ; . : :: *
- 'LPAREN', 'RPAREN',
- 'LBRACKET', 'RBRACKET',
- 'LBRACE', 'RBRACE',
- 'LESS', 'GREATER', 'EQUALS',
- 'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
- 'ASTERISK',
+class Template(object):
+ def __init__(self, t):
+ self.template = t
- # C preprocessor directives
- 'CPPDIRECTIVE'
+ def subst(self, d):
+ myDict = None
- # The following are matched but never returned. commented out to
- # suppress PLY warning
- # newfile directive
- # 'NEWFILE',
+ # Protect non-Python-dict substitutions (e.g. if there's a printf
+ # in the templated C++ code)
+ template = protect_non_subst_percents(self.template)
+ # CPU-model-specific substitutions are handled later (in GenCode).
+ template = protect_cpu_symbols(template)
- # endfile directive
- # 'ENDFILE'
- )
+ # Build a dict ('myDict') to use for the template substitution.
+ # Start with the template namespace. Make a copy since we're
+ # going to modify it.
+ myDict = parser.templateMap.copy()
- # Regular expressions for token matching
- t_LPAREN = r'\('
- t_RPAREN = r'\)'
- t_LBRACKET = r'\['
- t_RBRACKET = r'\]'
- t_LBRACE = r'\{'
- t_RBRACE = r'\}'
- t_LESS = r'\<'
- t_GREATER = r'\>'
- t_EQUALS = r'='
- t_COMMA = r','
- t_SEMI = r';'
- t_DOT = r'\.'
- t_COLON = r':'
- t_DBLCOLON = r'::'
- t_ASTERISK = r'\*'
+ if isinstance(d, InstObjParams):
+ # If we're dealing with an InstObjParams object, we need
+ # to be a little more sophisticated. The instruction-wide
+ # parameters are already formed, but the parameters which
+ # are only function wide still need to be generated.
+ compositeCode = ''
- # Identifiers and reserved words
- reserved_map = { }
- for r in reserved:
- reserved_map[r.lower()] = r
+ myDict.update(d.__dict__)
+ # The "operands" and "snippets" attributes of the InstObjParams
+ # objects are for internal use and not substitution.
+ del myDict['operands']
+ del myDict['snippets']
- def t_ID(self, t):
- r'[A-Za-z_]\w*'
- t.type = self.reserved_map.get(t.value, 'ID')
- return t
+ snippetLabels = [l for l in labelRE.findall(template)
+ if d.snippets.has_key(l)]
- # Integer literal
- def t_INTLIT(self, t):
- r'-?(0x[\da-fA-F]+)|\d+'
+ snippets = dict([(s, mungeSnippet(d.snippets[s]))
+ for s in snippetLabels])
+
+ myDict.update(snippets)
+
+ compositeCode = ' '.join(map(str, snippets.values()))
+
+ # Add in template itself in case it references any
+ # operands explicitly (like Mem)
+ compositeCode += ' ' + template
+
+ operands = SubOperandList(compositeCode, d.operands)
+
+ myDict['op_decl'] = operands.concatAttrStrings('op_decl')
+
+ is_src = lambda op: op.is_src
+ is_dest = lambda op: op.is_dest
+
+ myDict['op_src_decl'] = \
+ operands.concatSomeAttrStrings(is_src, 'op_src_decl')
+ myDict['op_dest_decl'] = \
+ operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
+
+ myDict['op_rd'] = operands.concatAttrStrings('op_rd')
+ myDict['op_wb'] = operands.concatAttrStrings('op_wb')
+
+ if d.operands.memOperand:
+ myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
+ myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
+
+ elif isinstance(d, dict):
+ # if the argument is a dictionary, we just use it.
+ myDict.update(d)
+ elif hasattr(d, '__dict__'):
+ # if the argument is an object, we use its attribute map.
+ myDict.update(d.__dict__)
+ else:
+ raise TypeError, "Template.subst() arg must be or have dictionary"
+ return template % myDict
+
+ # Convert to string. This handles the case when a template with a
+ # CPU-specific term gets interpolated into another template or into
+ # an output block.
+ def __str__(self):
+ return expand_cpu_symbols_to_string(self.template)
+
+################
+# Format object.
+#
+# A format object encapsulates an instruction format. It must provide
+# a defineInst() method that generates the code for an instruction
+# definition.
+
+exportContextSymbols = ('InstObjParams', 'makeList', 're', 'string')
+
+exportContext = {}
+
+def updateExportContext():
+ exportContext.update(exportDict(*exportContextSymbols))
+ exportContext.update(parser.templateMap)
+
+def exportDict(*symNames):
+ return dict([(s, eval(s)) for s in symNames])
+
+
+class Format(object):
+ def __init__(self, id, params, code):
+ # constructor: just save away arguments
+ self.id = id
+ self.params = params
+ label = 'def format ' + id
+ self.user_code = compile(fixPythonIndentation(code), label, 'exec')
+ param_list = string.join(params, ", ")
+ f = '''def defInst(_code, _context, %s):
+ my_locals = vars().copy()
+ exec _code in _context, my_locals
+ return my_locals\n''' % param_list
+ c = compile(f, label + ' wrapper', 'exec')
+ exec c
+ self.func = defInst
+
+ def defineInst(self, name, args, lineno):
+ context = {}
+ updateExportContext()
+ context.update(exportContext)
+ if len(name):
+ Name = name[0].upper()
+ if len(name) > 1:
+ Name += name[1:]
+ context.update({ 'name': name, 'Name': Name })
try:
- t.value = int(t.value,0)
- except ValueError:
- error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)
- t.value = 0
- return t
+ vars = self.func(self.user_code, context, *args[0], **args[1])
+ except Exception, exc:
+ error(lineno, 'error defining "%s": %s.' % (name, exc))
+ for k in vars.keys():
+ if k not in ('header_output', 'decoder_output',
+ 'exec_output', 'decode_block'):
+ del vars[k]
+ return GenCode(**vars)
- # String literal. Note that these use only single quotes, and
- # can span multiple lines.
- def t_STRLIT(self, t):
- r"(?m)'([^'])+'"
- # strip off quotes
- t.value = t.value[1:-1]
- t.lexer.lineno += t.value.count('\n')
- return t
+# Special null format to catch an implicit-format instruction
+# definition outside of any format block.
+class NoFormat(object):
+ def __init__(self):
+ self.defaultInst = ''
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev