Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: py3.6-wordcode Changeset: r94588:f0d5731c5251 Date: 2018-05-14 23:25 +0200 http://bitbucket.org/pypy/pypy/changeset/f0d5731c5251/
Log: fix code.dump() diff --git a/pypy/interpreter/test/test_pycode.py b/pypy/interpreter/test/test_pycode.py --- a/pypy/interpreter/test/test_pycode.py +++ b/pypy/interpreter/test/test_pycode.py @@ -16,4 +16,4 @@ print '>>>\n' + output + '\n<<<' assert ' 1 (7)' in output assert ' 4 (None)' in output - assert ' 19 RETURN_VALUE ' in output + assert ' 16 RETURN_VALUE' in output diff --git a/pypy/tool/dis3.py b/pypy/tool/dis3.py --- a/pypy/tool/dis3.py +++ b/pypy/tool/dis3.py @@ -1,20 +1,40 @@ -"""Disassembler of Python byte code into mnemonics. -Python 3 dis.py partly backported to Python 2""" +"""Disassembler of Python byte code into mnemonics.""" + +from __future__ import print_function import sys import types +import collections +import io from opcode3 import * from opcode3 import __all__ as _opcodes_all -__all__ = ["dis", "disassemble", "distb", "disco", - "findlinestarts", "findlabels"] + _opcodes_all +__all__ = ["code_info", "dis", "disassemble", "distb", "disco", + "findlinestarts", "findlabels", "show_code", + "get_instructions", "Instruction", "Bytecode"] + _opcodes_all del _opcodes_all -_have_code = (types.MethodType, types.FunctionType, types.CodeType, type) +_have_code = (types.MethodType, types.FunctionType, types.CodeType, + classmethod, staticmethod, type) + +FORMAT_VALUE = opmap['FORMAT_VALUE'] + +def _try_compile(source, name): + """Attempts to compile the given source, first as an expression and + then as a statement if the first approach fails. + + Utility function to accept strings in functions that otherwise + expect code objects + """ + try: + c = compile(source, name, 'eval') + except SyntaxError: + c = compile(source, name, 'exec') + return c def dis(x=None): - """Disassemble classes, methods, functions, or code. + """Disassemble classes, methods, functions, generators, or code. With no argument, disassemble the last traceback. @@ -22,30 +42,31 @@ if x is None: distb() return - if isinstance(x, types.InstanceType): - x = x.__class__ - if hasattr(x, 'im_func'): - x = x.im_func - if hasattr(x, 'func_code'): - x = x.func_code - if hasattr(x, 'co_code'): # PyCode needs co_code before __dict__ + if hasattr(x, '__func__'): # Method + x = x.__func__ + if hasattr(x, '__code__'): # Function + x = x.__code__ + if hasattr(x, 'gi_code'): # Generator + x = x.gi_code + if hasattr(x, 'co_code'): # Code object disassemble(x) - elif hasattr(x, '__dict__'): - items = x.__dict__.items() - items.sort() + elif hasattr(x, '__dict__'): # Class or module + items = sorted(x.__dict__.items()) for name, x1 in items: if isinstance(x1, _have_code): - print "Disassembly of %s:" % name + print("Disassembly of %s:" % name) try: dis(x1) except TypeError as msg: - print "Sorry:", msg - print - elif isinstance(x, str): - disassemble_string(x) + print("Sorry:", msg) + print() + elif isinstance(x, (bytes, bytearray)): # Raw bytecode + _disassemble_bytes(x) + elif isinstance(x, str): # Source code + _disassemble_str(x) else: - raise TypeError("don't know how to disassemble %s objects" % \ - type(x).__name__) + raise TypeError("don't know how to disassemble %s objects" % + type(x).__name__) def distb(tb=None): """Disassemble a traceback (default: last traceback).""" @@ -57,104 +78,290 @@ while tb.tb_next: tb = tb.tb_next disassemble(tb.tb_frame.f_code, tb.tb_lasti) +# The inspect module interrogates this dictionary to build its +# list of CO_* constants. It is also used by pretty_flags to +# turn the co_flags field into a human readable list. +COMPILER_FLAG_NAMES = { + 1: "OPTIMIZED", + 2: "NEWLOCALS", + 4: "VARARGS", + 8: "VARKEYWORDS", + 16: "NESTED", + 32: "GENERATOR", + 64: "NOFREE", + 128: "COROUTINE", + 256: "ITERABLE_COROUTINE", + 512: "ASYNC_GENERATOR", +} + +def pretty_flags(flags): + """Return pretty representation of code flags.""" + names = [] + for i in range(32): + flag = 1<<i + if flags & flag: + names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag))) + flags ^= flag + if not flags: + break + else: + names.append(hex(flags)) + return ", ".join(names) + +def _get_code_object(x): + """Helper to handle methods, functions, generators, strings and raw code objects""" + if hasattr(x, '__func__'): # Method + x = x.__func__ + if hasattr(x, '__code__'): # Function + x = x.__code__ + if hasattr(x, 'gi_code'): # Generator + x = x.gi_code + if isinstance(x, str): # Source code + x = _try_compile(x, "<disassembly>") + if hasattr(x, 'co_code'): # Code object + return x + raise TypeError("don't know how to disassemble %s objects" % + type(x).__name__) + +def code_info(x): + """Formatted details of methods, functions, or code.""" + return _format_code_info(_get_code_object(x)) + +def _format_code_info(co): + lines = [] + lines.append("Name: %s" % co.co_name) + lines.append("Filename: %s" % co.co_filename) + lines.append("Argument count: %s" % co.co_argcount) + lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) + lines.append("Number of locals: %s" % co.co_nlocals) + lines.append("Stack size: %s" % co.co_stacksize) + lines.append("Flags: %s" % pretty_flags(co.co_flags)) + if co.co_consts: + lines.append("Constants:") + for i_c in enumerate(co.co_consts): + lines.append("%4d: %r" % i_c) + if co.co_names: + lines.append("Names:") + for i_n in enumerate(co.co_names): + lines.append("%4d: %s" % i_n) + if co.co_varnames: + lines.append("Variable names:") + for i_n in enumerate(co.co_varnames): + lines.append("%4d: %s" % i_n) + if co.co_freevars: + lines.append("Free variables:") + for i_n in enumerate(co.co_freevars): + lines.append("%4d: %s" % i_n) + if co.co_cellvars: + lines.append("Cell variables:") + for i_n in enumerate(co.co_cellvars): + lines.append("%4d: %s" % i_n) + return "\n".join(lines) + +def show_code(co, file=None): + """Print details of methods, functions, or code to *file*. + + If *file* is not provided, the output is printed on stdout. + """ + print(code_info(co)) + +_Instruction = collections.namedtuple("_Instruction", + "opname opcode arg argval argrepr offset starts_line is_jump_target") + +class Instruction(_Instruction): + """Details for a bytecode operation + + Defined fields: + opname - human readable name for operation + opcode - numeric code for operation + arg - numeric argument to operation (if any), otherwise None + argval - resolved arg value (if known), otherwise same as arg + argrepr - human readable description of operation argument + offset - start index of operation within bytecode sequence + starts_line - line started by this opcode (if any), otherwise None + is_jump_target - True if other code jumps to here, otherwise False + """ + + def _disassemble(self, lineno_width=3, mark_as_current=False): + """Format instruction details for inclusion in disassembly output + + *lineno_width* sets the width of the line number field (0 omits it) + *mark_as_current* inserts a '-->' marker arrow as part of the line + """ + fields = [] + # Column: Source code line number + if lineno_width: + if self.starts_line is not None: + lineno_fmt = "%%%dd" % lineno_width + fields.append(lineno_fmt % self.starts_line) + else: + fields.append(' ' * lineno_width) + # Column: Current instruction indicator + if mark_as_current: + fields.append('-->') + else: + fields.append(' ') + # Column: Jump target marker + if self.is_jump_target: + fields.append('>>') + else: + fields.append(' ') + # Column: Instruction offset from start of code sequence + fields.append(repr(self.offset).rjust(4)) + # Column: Opcode name + fields.append(self.opname.ljust(20)) + # Column: Opcode argument + if self.arg is not None: + fields.append(repr(self.arg).rjust(5)) + # Column: Opcode argument details + if self.argrepr: + fields.append('(' + self.argrepr + ')') + return ' '.join(fields).rstrip() + + +def get_instructions(x, first_line=None): + """Iterator for the opcodes in methods, functions or code + + Generates a series of Instruction named tuples giving the details of + each operations in the supplied code. + + If *first_line* is not None, it indicates the line number that should + be reported for the first source line in the disassembled code. + Otherwise, the source line information (if any) is taken directly from + the disassembled code object. + """ + co = _get_code_object(x) + cell_names = co.co_cellvars + co.co_freevars + linestarts = dict(findlinestarts(co)) + if first_line is not None: + line_offset = first_line - co.co_firstlineno + else: + line_offset = 0 + return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, + co.co_consts, cell_names, linestarts, + line_offset) + +def _get_const_info(const_index, const_list): + """Helper to get optional details about const references + + Returns the dereferenced constant and its repr if the constant + list is defined. + Otherwise returns the constant index and its repr(). + """ + argval = const_index + if const_list is not None: + argval = const_list[const_index] + return argval, repr(argval) + +def _get_name_info(name_index, name_list): + """Helper to get optional details about named references + + Returns the dereferenced name as both value and repr if the name + list is defined. + Otherwise returns the name index and its repr(). + """ + argval = name_index + if name_list is not None: + argval = name_list[name_index] + argrepr = argval + else: + argrepr = repr(argval) + return argval, argrepr + + +def _get_instructions_bytes(code, varnames=None, names=None, constants=None, + cells=None, linestarts=None, line_offset=0): + """Iterate over the instructions in a bytecode string. + + Generates a sequence of Instruction namedtuples giving the details of each + opcode. Additional information about the code's runtime environment + (e.g. variable names, constants) can be specified using optional + arguments. + + """ + labels = findlabels(code) + starts_line = None + for offset, op, arg in _unpack_opargs(code): + if linestarts is not None: + starts_line = linestarts.get(offset, None) + if starts_line is not None: + starts_line += line_offset + is_jump_target = offset in labels + argval = None + argrepr = '' + if arg is not None: + # Set argval to the dereferenced value of the argument when + # available, and argrepr to the string representation of argval. + # _disassemble_bytes needs the string repr of the + # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. + argval = arg + if op in hasconst: + argval, argrepr = _get_const_info(arg, constants) + elif op in hasname: + argval, argrepr = _get_name_info(arg, names) + elif op in hasjrel: + argval = offset + 2 + arg + argrepr = "to " + repr(argval) + elif op in haslocal: + argval, argrepr = _get_name_info(arg, varnames) + elif op in hascompare: + argval = cmp_op[arg] + argrepr = argval + elif op in hasfree: + argval, argrepr = _get_name_info(arg, cells) + elif op == FORMAT_VALUE: + argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4)) + argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3] + if argval[1]: + if argrepr: + argrepr += ', ' + argrepr += 'with format' + yield Instruction(opname[op], op, + arg, argval, argrepr, + offset, starts_line, is_jump_target) + def disassemble(co, lasti=-1): """Disassemble a code object.""" - XXX - code = co.co_code - labels = findlabels(code) + cell_names = co.co_cellvars + co.co_freevars linestarts = dict(findlinestarts(co)) - n = len(code) - i = 0 - extended_arg = 0 - free = None - while i < n: - c = code[i] - op = ord(c) - if i in linestarts: - if i > 0: - print - print "%3d" % linestarts[i], - else: - print ' ', + _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names, + co.co_consts, cell_names, linestarts) - if i == lasti: print '-->', - else: print ' ', - if i in labels: print '>>', - else: print ' ', - print repr(i).rjust(4), - print opname[op].ljust(20), - i = i+1 - if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg - extended_arg = 0 - i = i+2 - if op == EXTENDED_ARG: - extended_arg = oparg*65536L - print repr(oparg).rjust(5), - if op in hasconst: - print '(' + repr(co.co_consts[oparg]) + ')', - elif op in hasname: - print '(' + co.co_names[oparg] + ')', - elif op in hasjrel: - print '(to ' + repr(i + oparg) + ')', - elif op in haslocal: - print '(' + co.co_varnames[oparg] + ')', - elif op in hascompare: - print '(' + cmp_op[oparg] + ')', - elif op in hasfree: - if free is None: - free = co.co_cellvars + co.co_freevars - print '(' + free[oparg] + ')', - elif op in hasnargs: - print '(%d positional, %d keyword pair)' % \ - (ord(code[i-2]), ord(code[i-1])), - print +def _disassemble_bytes(code, lasti=-1, varnames=None, names=None, + constants=None, cells=None, linestarts=None, + line_offset=0): + # Omit the line number column entirely if we have no line number info + show_lineno = linestarts is not None + # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000? + lineno_width = 3 if show_lineno else 0 + for instr in _get_instructions_bytes(code, varnames, names, + constants, cells, linestarts, + line_offset=line_offset): + new_source_line = (show_lineno and + instr.starts_line is not None and + instr.offset > 0) + if new_source_line: + print() + is_current_instr = instr.offset == lasti + print(instr._disassemble(lineno_width, is_current_instr)) -def disassemble_string(code, lasti=-1, varnames=None, names=None, - constants=None): - labels = findlabels(code) - n = len(code) - i = 0 - while i < n: - c = code[i] - op = ord(c) - if i == lasti: print '-->', - else: print ' ', - if i in labels: print '>>', - else: print ' ', - print repr(i).rjust(4), - print opname[op].ljust(15), - i = i+1 - if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 - i = i+2 - print repr(oparg).rjust(5), - if op in hasconst: - if constants: - print '(' + repr(constants[oparg]) + ')', - else: - print '(%d)'%oparg, - elif op in hasname: - if names is not None: - print '(' + names[oparg] + ')', - else: - print '(%d)'%oparg, - elif op in hasjrel: - print '(to ' + repr(i + oparg) + ')', - elif op in haslocal: - if varnames: - print '(' + varnames[oparg] + ')', - else: - print '(%d)' % oparg, - elif op in hascompare: - print '(' + cmp_op[oparg] + ')', - elif op in hasnargs: - print '(%d positional, %d keyword pair)' % \ - (ord(code[i-2]), ord(code[i-1])), - print +def _disassemble_str(source): + """Compile the source string, then disassemble the code object.""" + disassemble(_try_compile(source, '<dis>')) disco = disassemble # XXX For backwards compatibility +def _unpack_opargs(code): + extended_arg = 0 + for i in range(0, len(code), 2): + op = ord(code[i]) + if op >= HAVE_ARGUMENT: + arg = ord(code[i+1]) | extended_arg + extended_arg = (arg << 8) if op == EXTENDED_ARG else 0 + else: + arg = None + yield (i, op, arg) + def findlabels(code): """Detect all offsets in a byte code which are jump targets. @@ -162,23 +369,16 @@ """ labels = [] - n = len(code) - i = 0 - while i < n: - c = code[i] - op = ord(c) - i = i+1 - if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 - i = i+2 - label = -1 + for offset, op, arg in _unpack_opargs(code): + if arg is not None: if op in hasjrel: - label = i+oparg + label = offset + 2 + arg elif op in hasjabs: - label = oparg - if label >= 0: - if label not in labels: - labels.append(label) + label = arg + else: + continue + if label not in labels: + labels.append(label) return labels def findlinestarts(code): @@ -187,8 +387,8 @@ Generate pairs (offset, lineno) as described in Python/compile.c. """ - byte_increments = [ord(c) for c in code.co_lnotab[0::2]] - line_increments = [ord(c) for c in code.co_lnotab[1::2]] + byte_increments = code.co_lnotab[0::2] + line_increments = code.co_lnotab[1::2] lastlineno = None lineno = code.co_firstlineno @@ -206,27 +406,77 @@ if lineno != lastlineno: yield (addr, lineno) +class Bytecode: + """The bytecode operations of a piece of code + + Instantiate this with a function, method, string of code, or a code object + (as returned by compile()). + + Iterating over this yields the bytecode operations as Instruction instances. + """ + def __init__(self, x, first_line=None, current_offset=None): + self.codeobj = co = _get_code_object(x) + if first_line is None: + self.first_line = co.co_firstlineno + self._line_offset = 0 + else: + self.first_line = first_line + self._line_offset = first_line - co.co_firstlineno + self._cell_names = co.co_cellvars + co.co_freevars + self._linestarts = dict(findlinestarts(co)) + self._original_object = x + self.current_offset = current_offset + + def __iter__(self): + co = self.codeobj + return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, + co.co_consts, self._cell_names, + self._linestarts, + line_offset=self._line_offset) + + def __repr__(self): + return "{}({!r})".format(self.__class__.__name__, + self._original_object) + + @classmethod + def from_traceback(cls, tb): + """ Construct a Bytecode from the given traceback """ + while tb.tb_next: + tb = tb.tb_next + return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti) + + def info(self): + """Return formatted information about the code object.""" + return _format_code_info(self.codeobj) + + def dis(self): + """Return a formatted view of the bytecode operations.""" + co = self.codeobj + if self.current_offset is not None: + offset = self.current_offset + else: + offset = -1 + with io.StringIO() as output: + _disassemble_bytes(co.co_code, varnames=co.co_varnames, + names=co.co_names, constants=co.co_consts, + cells=self._cell_names, + linestarts=self._linestarts, + line_offset=self._line_offset, + file=output, + lasti=offset) + return output.getvalue() + + def _test(): """Simple test program to disassemble a file.""" - if sys.argv[1:]: - if sys.argv[2:]: - sys.stderr.write("usage: python dis.py [-|file]\n") - sys.exit(2) - fn = sys.argv[1] - if not fn or fn == "-": - fn = None - else: - fn = None - if fn is None: - f = sys.stdin - else: - f = open(fn) - source = f.read() - if fn is not None: - f.close() - else: - fn = "<stdin>" - code = compile(source, fn, "exec") + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-') + args = parser.parse_args() + with args.infile as infile: + source = infile.read() + code = compile(source, args.infile.name, "exec") dis(code) if __name__ == "__main__": diff --git a/pypy/tool/opcode3.py b/pypy/tool/opcode3.py --- a/pypy/tool/opcode3.py +++ b/pypy/tool/opcode3.py @@ -2,10 +2,8 @@ """ opcode module - potentially shared between dis and other modules which operate on bytecodes (e.g. peephole optimizers). -"Backported" from Python 3 to Python 2 land - an excact copy of lib-python/3/opcode.py """ - __all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", "haslocal", "hascompare", "hasfree", "opname", "opmap", "HAVE_ARGUMENT", "EXTENDED_ARG", "hasnargs"] @@ -33,10 +31,12 @@ haslocal = [] hascompare = [] hasfree = [] -hasnargs = [] # unused +hasnargs = [] opmap = {} -opname = ['<%r>' % (op,) for op in range(256)] +opname = [''] * 256 +for op in range(256): opname[op] = '<%r>' % (op,) +del op def def_op(name, op): opname[op] = name @@ -174,9 +174,11 @@ name_op('STORE_ANNOTATION', 127) # Index in name list def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3) -def_op('CALL_FUNCTION', 131) # #args -def_op('MAKE_FUNCTION', 132) # Flags +def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8) +hasnargs.append(131) +def_op('MAKE_FUNCTION', 132) # Number of args with default values def_op('BUILD_SLICE', 133) # Number of items +def_op('MAKE_CLOSURE', 134) def_op('LOAD_CLOSURE', 135) hasfree.append(135) def_op('LOAD_DEREF', 136) @@ -186,8 +188,12 @@ def_op('DELETE_DEREF', 138) hasfree.append(138) -def_op('CALL_FUNCTION_KW', 141) # #args + #kwargs -def_op('CALL_FUNCTION_EX', 142) # Flags +def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8) +hasnargs.append(140) +def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8) +hasnargs.append(141) +def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8) +hasnargs.append(142) jrel_op('SETUP_WITH', 143) @@ -198,6 +204,8 @@ def_op('LOAD_CLASSDEREF', 148) hasfree.append(148) +jrel_op('SETUP_ASYNC_WITH', 154) + def_op('EXTENDED_ARG', 144) EXTENDED_ARG = 144 @@ -207,12 +215,8 @@ def_op('BUILD_TUPLE_UNPACK', 152) def_op('BUILD_SET_UNPACK', 153) -jrel_op('SETUP_ASYNC_WITH', 154) - -def_op('FORMAT_VALUE', 155) -def_op('BUILD_CONST_KEY_MAP', 156) -def_op('BUILD_STRING', 157) -def_op('BUILD_TUPLE_UNPACK_WITH_CALL', 158) +def_op('FORMAT_VALUE', 155) # in CPython 3.6, but available in PyPy from 3.5 +def_op('BUILD_STRING', 157) # in CPython 3.6, but available in PyPy from 3.5 # pypy modification, experimental bytecode def_op('LOOKUP_METHOD', 201) # Index in name list _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit