Gabe Black has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/56335 )

Change subject: arch: Encapsulate the ucode asm into a class.
......................................................................

arch: Encapsulate the ucode asm into a class.

Store the assembler state in this class, rather than tacking it onto the
parser. This will also pave the way to splitting the parser into a top
level parser, and an assembly block parser which follows different
rules.

Change-Id: Id4498a00ebc276940bc6a2f140a43460664d47da
---
M src/arch/micro_asm.py
1 file changed, 272 insertions(+), 261 deletions(-)



diff --git a/src/arch/micro_asm.py b/src/arch/micro_asm.py
index da52a61..6e3a3a0 100644
--- a/src/arch/micro_asm.py
+++ b/src/arch/micro_asm.py
@@ -103,14 +103,14 @@
         super().__init__(name)
         self.params = params

-    def handle(self, parser, container):
-        microop = parser.microops.get(self.name, None)
+    def handle(self, assembler, container):
+        microop = assembler.microops.get(self.name, None)
         if not microop:
             raise Exception(f'Unrecongized mnemonic: "{self.name}"')

         try:
             microop = eval(f'_cls({self.params})',
-                    {'_cls': microop}, parser.symbols)
+                    {'_cls': microop}, assembler.symbols)
         except:
             print_error(f'Error instantiating microop "{self.name}"')
             raise
@@ -122,13 +122,14 @@
         super().__init__(name, True)
         self.params = params

-    def handle(self, parser, container):
+    def handle(self, assembler, container):
         directive = container.directives.get(self.name, None)
         if not directive:
             raise Exception(f'Unrecognized directive: "{self.name}"')
         local = {'_dir': directive}
         try:
- eval(f'_dir({self.params})', {'_dir': directive}, parser.symbols)
+            eval(f'_dir({self.params})',
+                    {'_dir': directive}, assembler.symbols)
         except:
             print_error(f'Error executing directive "{self.name}"')
             raise
@@ -138,7 +139,7 @@
         super().__init__(name)
         self.is_extern = is_extern

-    def handle(self, parser, container):
+    def handle(self, assembler, container):
         container.add_label(self)

 ##########################################################################
@@ -152,306 +153,302 @@
     print("*** %s" % message)
     print()

-##########################################################################
-#
-# Lexer specification
-#
-##########################################################################
+class MicroAssembler:
+    ######################################################################
+    #
+    # Lexer specification
+    #
+    ######################################################################

-# Error handler.  Just call exit.  Output formatted to work under
-# Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
-# prints a Python stack backtrace too (can be handy when trying to
-# debug the parser itself).
-def error(lineno, string, print_traceback = False):
-    # Print a Python stack backtrace if requested.
-    if (print_traceback):
-        traceback.print_exc()
-    if lineno != 0:
-        line_str = "%d:" % lineno
-    else:
-        line_str = ""
-    sys.exit("%s %s" % (line_str, string))
+    # Error handler.  Just call exit.  Output formatted to work under
+    # Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
+    # prints a Python stack backtrace too (can be handy when trying to
+    # debug the parser itself).
+    def error(self, lineno, string, print_traceback = False):
+        # Print a Python stack backtrace if requested.
+        if (print_traceback):
+            traceback.print_exc()
+        if lineno != 0:
+            line_str = "%d:" % lineno
+        else:
+            line_str = ""
+        sys.exit("%s %s" % (line_str, string))

-reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
+    reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')

-tokens = reserved + (
-        # identifier
-        'ID',
-        # arguments for microops and directives
-        'PARAMS',
+    tokens = reserved + (
+            # identifier
+            'ID',
+            # arguments for microops and directives
+            'PARAMS',

-        'LPAREN', 'RPAREN',
-        'LBRACE', 'RBRACE',
-        'COLON', 'SEMI', 'DOT',
-        'NEWLINE'
-        )
+            'LPAREN', 'RPAREN',
+            'LBRACE', 'RBRACE',
+            'COLON', 'SEMI', 'DOT',
+            'NEWLINE'
+            )

-# New lines are ignored at the top level, but they end statements in the
-# assembler
-states = (
-    ('asm', 'exclusive'),
-    ('params', 'exclusive'),
-    ('header', 'exclusive'),
-)
+ # New lines are ignored at the top level, but they end statements in the
+    # assembler
+    states = (
+        ('asm', 'exclusive'),
+        ('params', 'exclusive'),
+        ('header', 'exclusive'),
+    )

-reserved_map = { }
-for r in reserved:
-    reserved_map[r.lower()] = r
+    reserved_map = { }
+    for r in reserved:
+        reserved_map[r.lower()] = r

-# Ignore comments
-def t_ANY_COMMENT(t):
-    r'\#[^\n]*(?=\n)'
+    # Ignore comments
+    def t_ANY_COMMENT(self, t):
+        r'\#[^\n]*(?=\n)'

-def t_ANY_MULTILINECOMMENT(t):
-    r'/\*([^/]|((?<!\*)/))*\*/'
+    def t_ANY_MULTILINECOMMENT(self, t):
+        r'/\*([^/]|((?<!\*)/))*\*/'

-# A colon marks the end of a label. It should follow an ID which will
-# put the lexer in the "params" state. Seeing the colon will put it back
-# in the "asm" state since it knows it saw a label and not a mnemonic.
-def t_params_COLON(t):
-    r':'
-    t.lexer.pop_state()
-    return t
+    # A colon marks the end of a label. It should follow an ID which will
+ # put the lexer in the "params" state. Seeing the colon will put it back
+    # in the "asm" state since it knows it saw a label and not a mnemonic.
+    def t_params_COLON(self, t):
+        r':'
+        t.lexer.pop_state()
+        return t

-# Parameters are a string of text which don't contain an unescaped statement
-# statement terminator, ie a newline or semi colon.
-def t_params_PARAMS(t):
-    r'([^\n;\\]|(\\[\n;\\]))+'
-    t.lineno += t.value.count('\n')
-    unescapeParamsRE = re.compile(r'(\\[\n;\\])')
-    def unescapeParams(mo):
-        val = mo.group(0)
-        return val[1]
-    t.value = unescapeParamsRE.sub(unescapeParams, t.value)
-    t.lexer.pop_state()
-    return t
+    # Parameters are a string of text which don't contain an unescaped
+    # statement statement terminator, ie a newline or semi colon.
+    def t_params_PARAMS(self, t):
+        r'([^\n;\\]|(\\[\n;\\]))+'
+        t.lineno += t.value.count('\n')
+        unescapeParamsRE = re.compile(r'(\\[\n;\\])')
+        def unescapeParams(mo):
+            val = mo.group(0)
+            return val[1]
+        t.value = unescapeParamsRE.sub(unescapeParams, t.value)
+        t.lexer.pop_state()
+        return t

-# An "ID" in the micro assembler is either a label, directive, or mnemonic
-# If it's either a directive or a mnemonic, it will be optionally followed by
-# parameters. If it's a label, the following colon will make the lexer stop
-# looking for parameters.
-def t_asm_ID(t):
-    r'[A-Za-z_]\w*'
-    t.type = reserved_map.get(t.value, 'ID')
- # If the ID is really "extern", we shouldn't start looking for parameters
-    # yet. The real ID, the label itself, is coming up.
-    if t.type != 'EXTERN':
-        t.lexer.push_state('params')
-    return t
+ # An "ID" in the micro assembler is either a label, directive, or mnemonic + # If it's either a directive or a mnemonic, it will be optionally followed + # by parameters. If it's a label, the following colon will make the lexer
+    # stop looking for parameters.
+    def t_asm_ID(self, t):
+        r'[A-Za-z_]\w*'
+        t.type = self.reserved_map.get(t.value, 'ID')
+        # If the ID is really "extern", we shouldn't start looking for
+        # parameters yet. The real ID, the label itself, is coming up.
+        if t.type != 'EXTERN':
+            t.lexer.push_state('params')
+        return t

-def t_header_ID(t):
-    r'[A-Za-z_]\w*'
-    return t
+    def t_header_ID(self, t):
+        r'[A-Za-z_]\w*'
+        return t

-# If there is a label and you're -not- in the assembler (which would be caught
-# above), don't start looking for parameters.
-def t_ANY_ID(t):
-    r'[A-Za-z_]\w*'
-    t.type = reserved_map.get(t.value, 'ID')
-    if t.type == 'MACROOP':
-        t.lexer.push_state('asm')
-        t.lexer.push_state('header')
-    elif t.type == 'ROM':
-        t.lexer.push_state('asm')
-        t.lexer.push_state('header')
-    return t
+    # If there is a label and you're -not- in the assembler (which would be
+    # caught above), don't start looking for parameters.
+    def t_ANY_ID(self, t):
+        r'[A-Za-z_]\w*'
+        t.type = self.reserved_map.get(t.value, 'ID')
+        if t.type == 'MACROOP':
+            t.lexer.push_state('asm')
+            t.lexer.push_state('header')
+        elif t.type == 'ROM':
+            t.lexer.push_state('asm')
+            t.lexer.push_state('header')
+        return t

-# Braces enter and exit micro assembly
-def t_header_LBRACE(t):
-    r'\{'
-    t.lexer.pop_state()
-    return t
+    # Braces enter and exit micro assembly
+    def t_header_LBRACE(self, t):
+        r'\{'
+        t.lexer.pop_state()
+        return t

-def t_asm_RBRACE(t):
-    r'\}'
-    t.lexer.pop_state()
-    return t
+    def t_asm_RBRACE(self, t):
+        r'\}'
+        t.lexer.pop_state()
+        return t

-# In the micro assembler, do line counting but also return a token. The
-# token is needed by the parser to detect the end of a statement.
-def t_asm_NEWLINE(t):
-    r'\n+'
-    t.lineno += t.value.count('\n')
-    return t
+    # In the micro assembler, do line counting but also return a token. The
+    # token is needed by the parser to detect the end of a statement.
+    def t_asm_NEWLINE(self, t):
+        r'\n+'
+        t.lineno += t.value.count('\n')
+        return t

-# A newline or semi colon when looking for params signals that the statement
-# is over and the lexer should go back to looking for regular assembly.
-def t_params_NEWLINE(t):
-    r'\n+'
-    t.lineno += t.value.count('\n')
-    t.lexer.pop_state()
-    return t
+    # A newline or semi colon when looking for params signals that the
+    # statement is over and the lexer should go back to looking for regular
+    # assembly.
+    def t_params_NEWLINE(self, t):
+        r'\n+'
+        t.lineno += t.value.count('\n')
+        t.lexer.pop_state()
+        return t

-def t_params_SEMI(t):
-    r';'
-    t.lexer.pop_state()
-    return t
+    def t_params_SEMI(self, t):
+        r';'
+        t.lexer.pop_state()
+        return t

-# Unless handled specially above, track newlines only for line counting.
-def t_ANY_NEWLINE(t):
-    r'\n+'
-    t.lineno += t.value.count('\n')
+ # Unless handled specially above, track newlines only for line counting.
+    def t_ANY_NEWLINE(self, t):
+        r'\n+'
+        t.lineno += t.value.count('\n')

-# Basic regular expressions to pick out simple tokens
-t_ANY_LPAREN = r'\('
-t_ANY_RPAREN = r'\)'
-t_ANY_SEMI   = r';'
-t_ANY_DOT    = r'\.'
+    # Basic regular expressions to pick out simple tokens
+    t_ANY_LPAREN = r'\('
+    t_ANY_RPAREN = r'\)'
+    t_ANY_SEMI   = r';'
+    t_ANY_DOT    = r'\.'

-t_ANY_ignore = ' \t\x0c'
+    t_ANY_ignore = ' \t\x0c'

-def t_ANY_error(t):
-    error(t.lineno, "illegal character '%s'" % t.value[0])
-    t.skip(1)
+    def t_ANY_error(self, t):
+        error(t.lineno, "illegal character '%s'" % t.value[0])
+        t.skip(1)

-##########################################################################
-#
-# Parser specification
-#
-##########################################################################
+    ######################################################################
+    #
+    # Parser specification
+    #
+    ######################################################################

-# Start symbol for a file which may have more than one "object" defined in it.
-def p_file(t):
-    'file : opt_objects'
+    # A file which may have one or more "object" defined in it.
+    def p_file(self, t):
+        'file : opt_objects'

-# The objects are optional.
-def p_opt_objects(t):
-    '''opt_objects : objects
-                   |'''
+    # The objects are optional.
+    def p_opt_objects(self, t):
+        '''opt_objects : objects
+                       |'''

-# One or more objects.
-def p_objects(t):
-    '''objects : objects object
-               | object'''
+    # One or more objects.
+    def p_objects(self, t):
+        '''objects : objects object
+                   | object'''

-# Objects can be of various types.
-def p_object(t):
-    '''object : rom_block
-              | macroop_def'''
+    # Objects can be of various types.
+    def p_object(self, t):
+        '''object : rom_block
+                  | macroop_def'''

-# Defines a section of microcode that should go in the current ROM.
-def p_rom_block(t):
-    'rom_block : DEF ROM block SEMI'
-    if not t.parser.rom:
-        print_error("Rom block found, but no Rom object specified.")
- raise TypeError("Rom block found, but no Rom object was specified.")
-    for statement in t[3].statements:
-        statement.handle(t.parser, t.parser.rom)
-    t[0] = t.parser.rom
+    # Defines a section of microcode that should go in the current ROM.
+    def p_rom_block(self, t):
+        'rom_block : DEF ROM block SEMI'
+        if not self.rom:
+            print_error("Rom block found, but no Rom object specified.")
+            raise TypeError("Rom block found, but nowhere to put it.")
+        for statement in t[3].statements:
+            statement.handle(self, self.rom)
+        t[0] = self.rom

-# Defines a macroop that jumps to an external label in the ROM.
-def p_macroop_jump(t):
-    'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
-    if not t.parser.rom_macroop_type:
-        print_error("ROM based macroop found, but no ROM macroop " +
-            "class was specified.")
-        raise TypeError("ROM based macroop found, but no ROM macroop " +
-            "class was specified.")
-    macroop = t.parser.rom_macroop_type(t[3], t[5])
-    t.parser.macroops[t[3]] = macroop
+    # Defines a macroop that jumps to an external label in the ROM.
+    def p_macroop_jump(self, t):
+        'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
+        if not self.rom_macroop_type:
+            print_error("ROM based macroop found, but no ROM macroop " +
+                "class was specified.")
+ raise TypeError("ROM based macroop found, but no ROM macroop " +
+                "class was specified.")
+        macroop = self.rom_macroop_type(t[3], t[5])
+        self.macroops[t[3]] = macroop


-# Defines a macroop that is combinationally generated.
-def p_macroop_def(t):
-    'macroop_def : DEF MACROOP ID block SEMI'
-    try:
-        curop = t.parser.macro_type(t[3])
-    except TypeError:
-        print_error("Error creating macroop object.")
-        raise
-    for statement in t[4].statements:
-        statement.handle(t.parser, curop)
-    t.parser.macroops[t[3]] = curop
+    # Defines a macroop that is combinationally generated.
+    def p_macroop_def(self, t):
+        'macroop_def : DEF MACROOP ID block SEMI'
+        try:
+            curop = self.macro_type(t[3])
+        except TypeError:
+            print_error("Error creating macroop object.")
+            raise
+        for statement in t[4].statements:
+            statement.handle(self, curop)
+        self.macroops[t[3]] = curop

-# A block of statements
-def p_block(t):
-    'block : LBRACE statements RBRACE'
-    block = Block()
-    block.statements = t[2]
-    t[0] = block
+    # A block of statements
+    def p_block(self, t):
+        'block : LBRACE statements RBRACE'
+        block = Block()
+        block.statements = t[2]
+        t[0] = block

-# One or more statements
-def p_statements_0(t):
-    'statements : statement'
-    t[0] = [t[1]] if t[1] else []
+    # One or more statements
+    def p_statements_0(self, t):
+        'statements : statement'
+        t[0] = [t[1]] if t[1] else []

-def p_statements_1(t):
-    'statements : statements statement'
-    if t[2]:
-        t[1].append(t[2])
-    t[0] = t[1]
+    def p_statements_1(self, t):
+        'statements : statements statement'
+        if t[2]:
+            t[1].append(t[2])
+        t[0] = t[1]

-# A statement can be of various types.
-def p_statement(t):
-    '''statement : empty_statement
-                 | label
-                 | microop
-                 | directive'''
-    t[0] = t[1]
+    # A statement can be of various types.
+    def p_statement(self, t):
+        '''statement : empty_statement
+                     | label
+                     | microop
+                     | directive'''
+        t[0] = t[1]

-# Parameters are optional.
-def p_opt_params_0(t):
-    'opt_params : PARAMS'
-    t[0] = t[1]
+    # Parameters are optional.
+    def p_opt_params_0(self, t):
+        'opt_params : PARAMS'
+        t[0] = t[1]

-def p_opt_params_1(t):
-    'opt_params :'
-    t[0] = ""
+    def p_opt_params_1(self, t):
+        'opt_params :'
+        t[0] = ""

-# Statements are often ended by newlines or a semi colon.
-def p_end_of_statement(t):
-    '''end_of_statement : NEWLINE
-                        | SEMI'''
-    pass
+    # Statements are often ended by newlines or a semi colon.
+    def p_end_of_statement(self, t):
+        '''end_of_statement : NEWLINE
+                            | SEMI'''
+        pass

-# Ignore empty statements.
-def p_empty_statement(t):
-    'empty_statement : end_of_statement'
-    pass
+    # Ignore empty statements.
+    def p_empty_statement(self, t):
+        'empty_statement : end_of_statement'
+        pass

-# A label in the microcode.
-def p_label_0(t):
-    'label : EXTERN ID COLON'
-    t[0] = Label(t[2], True)
+    # A label in the microcode.
+    def p_label_0(self, t):
+        'label : EXTERN ID COLON'
+        t[0] = Label(t[2], True)

-def p_label_1(t):
-    'label : ID COLON'
-    t[0] = Label(t[1], False)
+    def p_label_1(self, t):
+        'label : ID COLON'
+        t[0] = Label(t[1], False)

-# A microop with optional parameters.
-def p_microop(t):
-    'microop : ID opt_params end_of_statement'
-    t[0] = Microop(t[1], t[2])
+    # A microop with optional parameters.
+    def p_microop(self, t):
+        'microop : ID opt_params end_of_statement'
+        t[0] = Microop(t[1], t[2])

-# Directives for the macroop.
-def p_directive(t):
-    'directive : DOT ID opt_params end_of_statement'
-    t[0] = Directive(t[2], t[3])
+    # Directives for the macroop.
+    def p_directive(self, t):
+        'directive : DOT ID opt_params end_of_statement'
+        t[0] = Directive(t[2], t[3])

-# Parse error handler.  Note that the argument here is the offending
-# *token*, not a grammar symbol (hence the need to use t.value)
-def p_error(t):
-    if t:
-        error(t.lineno, "syntax error at '%s'" % t.value)
-    else:
-        error(0, "unknown syntax error", True)
+    # Parse error handler.  Note that the argument here is the offending
+    # *token*, not a grammar symbol (hence the need to use t.value)
+    def p_error(self, t):
+        if t:
+            error(t.lineno, "syntax error at '%s'" % t.value)
+        else:
+            error(0, "unknown syntax error", True)

-class MicroAssembler(object):
-
-    def __init__(self, macro_type, microops,
-            rom = None, rom_macroop_type = None):
-        self.lexer = lex.lex()
-        self.parser = yacc.yacc()
-        self.parser.macro_type = macro_type
-        self.parser.macroops = {}
-        self.parser.microops = microops
-        self.parser.rom = rom
-        self.parser.rom_macroop_type = rom_macroop_type
-        self.parser.symbols = {}
-        self.symbols = self.parser.symbols
+ def __init__(self, macro_type, microops, rom=None, rom_macroop_type=None):
+        self.lexer = lex.lex(object=self)
+        self.parser = yacc.yacc(module=self)
+        self.macro_type = macro_type
+        self.macroops = {}
+        self.microops = microops
+        self.rom = rom
+        self.rom_macroop_type = rom_macroop_type
+        self.symbols = {}

     def assemble(self, asm):
         self.parser.parse(asm, lexer=self.lexer)
-        macroops = self.parser.macroops
-        self.parser.macroops = {}
-        return macroops
+        return self.macroops

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/56335
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Id4498a00ebc276940bc6a2f140a43460664d47da
Gerrit-Change-Number: 56335
Gerrit-PatchSet: 1
Gerrit-Owner: Gabe Black <gabe.bl...@gmail.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to