Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: py3.6 Changeset: r94739:a2790e033e07 Date: 2018-06-08 15:50 +0200 http://bitbucket.org/pypy/pypy/changeset/a2790e033e07/
Log: merge py3.5 diff too long, truncating to 2000 out of 9430 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -33,7 +33,12 @@ 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 0e2d9a73f5a1818d0245d75daccdbe21b2d5c3ef release-pypy2.7-v5.4.1 +4909c06daf41ce88f87dc01c57959cadad4df4a8 RevDB-pypy2.7-v5.4.1 +4909c06daf41ce88f87dc01c57959cadad4df4a8 RevDB-pypy2.7-v5.4.1 +d7724c0a5700b895a47de44074cdf5fd659a988f RevDB-pypy2.7-v5.4.1 aff251e543859ce4508159dd9f1a82a2f553de00 release-pypy2.7-v5.6.0 +e90317857d27917bf840caf675832292ee070510 RevDB-pypy2.7-v5.6.1 +a24d6c7000c8099c73d3660857f7e3cee5ac045c RevDB-pypy2.7-v5.6.2 fa3249d55d15b9829e1be69cdf45b5a44cec902d release-pypy2.7-v5.7.0 b16a4363e930f6401bceb499b9520955504c6cb0 release-pypy3.5-v5.7.0 1aa2d8e03cdfab54b7121e93fda7e98ea88a30bf release-pypy2.7-v5.7.1 diff --git a/lib-python/3/opcode.py b/lib-python/3/opcode.py --- a/lib-python/3/opcode.py +++ b/lib-python/3/opcode.py @@ -224,5 +224,6 @@ def_op('CALL_METHOD', 202) # #args not including 'self' def_op('BUILD_LIST_FROM_ARG', 203) jrel_op('JUMP_IF_NOT_DEBUG', 204) # jump over assert statements +def_op('LOAD_REVDB_VAR', 205) # reverse debugger (syntax example: $5) del def_op, name_op, jrel_op, jabs_op diff --git a/lib_pypy/_sysconfigdata.py b/lib_pypy/_sysconfigdata.py --- a/lib_pypy/_sysconfigdata.py +++ b/lib_pypy/_sysconfigdata.py @@ -24,6 +24,15 @@ 'VERSION': sys.version[:3] } +if find_executable("gcc"): + build_time_vars.update({ + "CC": "gcc -pthread", + "GNULD": "yes", + "LDSHARED": "gcc -pthread -shared", + }) + if find_executable("g++"): + build_time_vars["CXX"] = "g++ -pthread" + if sys.platform[:6] == "darwin": import platform if platform.machine() == 'i386': @@ -36,12 +45,6 @@ arch = platform.machine() build_time_vars['LDSHARED'] += ' -undefined dynamic_lookup' build_time_vars['CC'] += ' -arch %s' % (arch,) + if "CXX" in build_time_vars: + build_time_vars['CXX'] += ' -arch %s' % (arch,) -if find_executable("gcc"): - build_time_vars.update({ - "CC": "gcc -pthread", - "GNULD": "yes", - "LDSHARED": "gcc -pthread -shared", - }) - if find_executable("g++"): - build_time_vars["CXX"] = "g++ -pthread" diff --git a/lib_pypy/grp.py b/lib_pypy/grp.py --- a/lib_pypy/grp.py +++ b/lib_pypy/grp.py @@ -5,6 +5,8 @@ import os from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -33,37 +35,39 @@ @builtinify def getgrgid(gid): - try: - res = lib.getgrgid(gid) - except TypeError: - gid = int(gid) - res = lib.getgrgid(gid) - import warnings - warnings.warn("group id must be int", DeprecationWarning) - if not res: - # XXX maybe check error eventually - raise KeyError(gid) - return _group_from_gstruct(res) - + with _lock: + try: + res = lib.getgrgid(gid) + except TypeError: + gid = int(gid) + res = lib.getgrgid(gid) + import warnings + warnings.warn("group id must be int", DeprecationWarning) + if not res: + # XXX maybe check error eventually + raise KeyError(gid) + return _group_from_gstruct(res) @builtinify def getgrnam(name): if not isinstance(name, str): raise TypeError("expected string") - res = lib.getgrnam(os.fsencode(name)) - if not res: - raise KeyError("'getgrnam(): name not found: %s'" % name) - return _group_from_gstruct(res) + with _lock: + res = lib.getgrnam(os.fsencode(name)) + if not res: + raise KeyError("'getgrnam(): name not found: %s'" % name) + return _group_from_gstruct(res) @builtinify def getgrall(): - lib.setgrent() lst = [] - while 1: - p = lib.getgrent() - if not p: - break - lst.append(_group_from_gstruct(p)) - lib.endgrent() + with _lock: + lib.setgrent() + while 1: + p = lib.getgrent() + if not p: + break + lst.append(_group_from_gstruct(p)) + lib.endgrent() return lst __all__ = ('struct_group', 'getgrgid', 'getgrnam', 'getgrall') diff --git a/lib_pypy/pwd.py b/lib_pypy/pwd.py --- a/lib_pypy/pwd.py +++ b/lib_pypy/pwd.py @@ -12,6 +12,8 @@ from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -54,10 +56,11 @@ Return the password database entry for the given numeric user ID. See pwd.__doc__ for more on password database entries. """ - pw = lib.getpwuid(uid) - if not pw: - raise KeyError("getpwuid(): uid not found: %s" % uid) - return _mkpwent(pw) + with _lock: + pw = lib.getpwuid(uid) + if not pw: + raise KeyError("getpwuid(): uid not found: %s" % uid) + return _mkpwent(pw) @builtinify def getpwnam(name): @@ -70,10 +73,11 @@ if not isinstance(name, basestring): raise TypeError("expected string") name = str(name) - pw = lib.getpwnam(name) - if not pw: - raise KeyError("getpwname(): name not found: %s" % name) - return _mkpwent(pw) + with _lock: + pw = lib.getpwnam(name) + if not pw: + raise KeyError("getpwname(): name not found: %s" % name) + return _mkpwent(pw) @builtinify def getpwall(): @@ -83,13 +87,14 @@ See pwd.__doc__ for more on password database entries. """ users = [] - lib.setpwent() - while True: - pw = lib.getpwent() - if not pw: - break - users.append(_mkpwent(pw)) - lib.endpwent() + with _lock: + lib.setpwent() + while True: + pw = lib.getpwent() + if not pw: + break + users.append(_mkpwent(pw)) + lib.endpwent() return users __all__ = ('struct_passwd', 'getpwuid', 'getpwnam', 'getpwall') diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -61,6 +61,11 @@ "termios", "_minimal_curses", ]) +reverse_debugger_disable_modules = set([ + "_continuation", "_vmprof", "_multiprocessing", + "micronumpy", + ]) + # XXX this should move somewhere else, maybe to platform ("is this posixish" # check or something) if sys.platform == "win32": @@ -297,6 +302,9 @@ modules = working_modules.copy() if config.translation.sandbox: modules = default_modules + if config.translation.reverse_debugger: + for mod in reverse_debugger_disable_modules: + setattr(config.objspace.usemodules, mod, False) # ignore names from 'essential_modules', notably 'exceptions', which # may not be present in config.objspace.usemodules at all modules = [name for name in modules if name not in essential_modules] diff --git a/pypy/doc/install.rst b/pypy/doc/install.rst --- a/pypy/doc/install.rst +++ b/pypy/doc/install.rst @@ -20,7 +20,7 @@ OS and architecture. You may be able to use either use the `most recent release`_ or one of our `development nightly build`_. These builds depend on dynamically linked libraries that may not be available on your -OS. See the section about `Linux binaries` for more info and alternatives that +OS. See the section about `Linux binaries`_ for more info and alternatives that may work on your system. Please note that the nightly builds are not diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -18,3 +18,17 @@ .. branch: crypt_h Include crypt.h for crypt() on Linux + +.. branch: gc-more-logging + +Log additional gc-minor and gc-collect-step info in the PYPYLOG + +.. branch: reverse-debugger + +The reverse-debugger branch has been merged. For more information, see +https://bitbucket.org/pypy/revdb + + +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -13,6 +13,13 @@ Use implementation-specific site directories in sysconfig like in Python2 + .. branch: alex_gaynor/remove-an-unneeded-call-into-openssl-th-1526429141011 Remove an unneeded call into OpenSSL, from cpython https://github.com/python/cpython/pull/6887 + + +.. branch: py3.5-reverse-debugger + +The reverse-debugger branch has been merged. For more information, see +https://bitbucket.org/pypy/revdb diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -88,11 +88,24 @@ run_protected() handles details like forwarding exceptions to sys.excepthook(), catching SystemExit, etc. """ + # don't use try:except: here, otherwise the exception remains + # visible in user code. Make sure revdb_stop is a callable, so + # that we can call it immediately after finally: below. Doing + # so minimizes the number of "blind" lines that we need to go + # back from, with "bstep", after we do "continue" in revdb. + if '__pypy__' in sys.builtin_module_names: + from __pypy__ import revdb_stop + else: + revdb_stop = None + if revdb_stop is None: + revdb_stop = lambda: None + try: # run it try: f(*fargs, **fkwds) finally: + revdb_stop() sys.settrace(None) sys.setprofile(None) except SystemExit as e: diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py --- a/pypy/interpreter/astcompiler/assemble.py +++ b/pypy/interpreter/astcompiler/assemble.py @@ -695,6 +695,7 @@ # TODO ops.BUILD_LIST_FROM_ARG: 1, + ops.LOAD_REVDB_VAR: 1, ops.LOAD_CLASSDEREF: 1, } diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py --- a/pypy/interpreter/astcompiler/ast.py +++ b/pypy/interpreter/astcompiler/ast.py @@ -1801,6 +1801,8 @@ return Num.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Str): return Str.from_object(space, w_node) + if space.isinstance_w(w_node, get(space).w_RevDBMetaVar): + return RevDBMetaVar.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_FormattedValue): return FormattedValue.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_JoinedStr): @@ -2711,6 +2713,41 @@ State.ast_type('Str', 'expr', ['s']) +class RevDBMetaVar(expr): + + def __init__(self, metavar, lineno, col_offset): + self.metavar = metavar + expr.__init__(self, lineno, col_offset) + + def walkabout(self, visitor): + visitor.visit_RevDBMetaVar(self) + + def mutate_over(self, visitor): + return visitor.visit_RevDBMetaVar(self) + + def to_object(self, space): + w_node = space.call_function(get(space).w_RevDBMetaVar) + w_metavar = space.newint(self.metavar) # int + space.setattr(w_node, space.newtext('metavar'), w_metavar) + w_lineno = space.newint(self.lineno) # int + space.setattr(w_node, space.newtext('lineno'), w_lineno) + w_col_offset = space.newint(self.col_offset) # int + space.setattr(w_node, space.newtext('col_offset'), w_col_offset) + return w_node + + @staticmethod + def from_object(space, w_node): + w_metavar = get_field(space, w_node, 'metavar', False) + w_lineno = get_field(space, w_node, 'lineno', False) + w_col_offset = get_field(space, w_node, 'col_offset', False) + _metavar = space.int_w(w_metavar) + _lineno = space.int_w(w_lineno) + _col_offset = space.int_w(w_col_offset) + return RevDBMetaVar(_metavar, _lineno, _col_offset) + +State.ast_type('RevDBMetaVar', 'expr', ['metavar']) + + class FormattedValue(expr): def __init__(self, value, conversion, format_spec, lineno, col_offset): @@ -4205,6 +4242,8 @@ return self.default_visitor(node) def visit_Str(self, node): return self.default_visitor(node) + def visit_RevDBMetaVar(self, node): + return self.default_visitor(node) def visit_FormattedValue(self, node): return self.default_visitor(node) def visit_JoinedStr(self, node): @@ -4444,6 +4483,9 @@ def visit_Str(self, node): pass + def visit_RevDBMetaVar(self, node): + pass + def visit_FormattedValue(self, node): node.value.walkabout(self) if node.format_spec: diff --git a/pypy/interpreter/astcompiler/astbuilder.py b/pypy/interpreter/astcompiler/astbuilder.py --- a/pypy/interpreter/astcompiler/astbuilder.py +++ b/pypy/interpreter/astcompiler/astbuilder.py @@ -1303,6 +1303,11 @@ else: # a dictionary display return self.handle_dictdisplay(maker, atom_node) + elif first_child_type == tokens.REVDBMETAVAR: + string = atom_node.get_child(0).get_value() + return ast.RevDBMetaVar(int(string[1:]), + atom_node.get_lineno(), + atom_node.get_column()) else: raise AssertionError("unknown atom") diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py --- a/pypy/interpreter/astcompiler/codegen.py +++ b/pypy/interpreter/astcompiler/codegen.py @@ -1677,6 +1677,20 @@ fmt.format_spec.walkabout(self) self.emit_op_arg(ops.FORMAT_VALUE, arg) + def _revdb_metavar(self, node): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import dbstate + if not dbstate.standard_code: + self.emit_op_arg(ops.LOAD_REVDB_VAR, node.metavar) + return True + return False + + def visit_RevDBMetaVar(self, node): + if self.space.reverse_debugging and self._revdb_metavar(node): + return + self.error("Unknown character ('$NUM' is only valid in the " + "reverse-debugger)", node) + class TopLevelCodeGenerator(PythonCodeGenerator): diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1168,7 +1168,7 @@ assert isinstance(s, ast.Bytes) assert space.eq_w(s.s, space.newbytes("hi implicitly extra")) raises(SyntaxError, self.get_first_expr, "b'hello' 'world'") - sentence = u"Die Männer ärgen sich!" + sentence = u"Die Männer ärgern sich!" source = u"# coding: utf-7\nstuff = '%s'" % (sentence,) info = pyparse.CompileInfo("<test>", "exec") tree = self.parser.parse_source(source.encode("utf-7"), info) diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -31,7 +31,7 @@ generator._resolve_block_targets(blocks) return generator, blocks -class TestCompiler: +class BaseTestCompiler: """These tests compile snippets of code and check them by running them with our own interpreter. These are thus not completely *unit* tests, but given that our interpreter is @@ -97,6 +97,9 @@ def error_test(self, source, exc_type): py.test.raises(exc_type, self.simple_test, source, None, None) + +class TestCompiler(BaseTestCompiler): + def test_issue_713(self): func = "def f(_=2): return (_ if _ else _) if False else _" yield self.st, func, "f()", 2 @@ -1247,6 +1250,24 @@ yield self.st, src, 'z', 0xd8 +class TestCompilerRevDB(BaseTestCompiler): + spaceconfig = {"translation.reverse_debugger": True} + + def test_revdb_metavar(self): + from pypy.interpreter.reverse_debugging import dbstate, setup_revdb + self.space.config.translation.reverse_debugger = True + self.space.reverse_debugging = True + try: + setup_revdb(self.space) + dbstate.standard_code = False + dbstate.metavars = [self.space.wrap(6)] + self.simple_test("x = 7*$0", "x", 42) + dbstate.standard_code = True + self.error_test("x = 7*$0", SyntaxError) + finally: + self.space.reverse_debugging = False + + class AppTestCompiler: def setup_class(cls): diff --git a/pypy/interpreter/astcompiler/tools/Python.asdl b/pypy/interpreter/astcompiler/tools/Python.asdl --- a/pypy/interpreter/astcompiler/tools/Python.asdl +++ b/pypy/interpreter/astcompiler/tools/Python.asdl @@ -77,6 +77,7 @@ | Call(expr func, expr* args, keyword* keywords) | Num(object n) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? + | RevDBMetaVar(int metavar) | FormattedValue(expr value, int? conversion, expr? format_spec) | JoinedStr(expr* values) | Bytes(bytes s) diff --git a/pypy/interpreter/astcompiler/validate.py b/pypy/interpreter/astcompiler/validate.py --- a/pypy/interpreter/astcompiler/validate.py +++ b/pypy/interpreter/astcompiler/validate.py @@ -461,6 +461,9 @@ node.slice.walkabout(self) self._validate_expr(node.value) + def visit_RevDBMetaVar(self, node): + pass + # Subscripts def visit_Slice(self, node): if node.lower: diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -405,6 +405,8 @@ """Base class for the interpreter-level implementations of object spaces. http://pypy.readthedocs.org/en/latest/objspace.html""" + reverse_debugging = False + @not_rpython def __init__(self, config=None): "Basic initialization of objects." @@ -416,6 +418,7 @@ from pypy.config.pypyoption import get_pypy_config config = get_pypy_config(translating=False) self.config = config + self.reverse_debugging = config.translation.reverse_debugger self.builtin_modules = {} self.reloading_modules = {} @@ -433,6 +436,9 @@ def startup(self): # To be called before using the space + if self.reverse_debugging: + self._revdb_startup() + self.threadlocals.enter_thread(self) # Initialize already imported builtin modules @@ -823,7 +829,8 @@ w_u1 = self.interned_strings.get(u) if w_u1 is None: w_u1 = w_u - self.interned_strings.set(u, w_u1) + if self._side_effects_ok(): + self.interned_strings.set(u, w_u1) return w_u1 def new_interned_str(self, s): @@ -835,9 +842,39 @@ w_s1 = self.interned_strings.get(u) if w_s1 is None: w_s1 = self.newunicode(u) - self.interned_strings.set(u, w_s1) + if self._side_effects_ok(): + self.interned_strings.set(u, w_s1) return w_s1 + def _revdb_startup(self): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import setup_revdb + setup_revdb(self) + + def _revdb_standard_code(self): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import dbstate + return dbstate.standard_code + + def _side_effects_ok(self): + # For the reverse debugger: we run compiled watchpoint + # expressions in a fast way that will crash if they have + # side-effects. The obvious Python code with side-effects is + # documented "don't do that"; but some non-obvious side + # effects are also common, like interning strings (from + # unmarshalling the code object containing the watchpoint + # expression) to the two attribute caches in mapdict.py and + # typeobject.py. For now, we have to identify such places + # that are not acceptable for "reasonable" read-only + # watchpoint expressions, and write: + # + # if not space._side_effects_ok(): + # don't cache. + # + if self.reverse_debugging: + return self._revdb_standard_code() + return True + def get_interned_str(self, s): """Assumes an identifier (utf-8 encoded str). Returns None if the identifier is not interned, or not a valid utf-8 string at all. diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -71,6 +71,8 @@ return frame def enter(self, frame): + if self.space.reverse_debugging: + self._revdb_enter(frame) frame.f_backref = self.topframeref self.topframeref = jit.virtual_ref(frame) @@ -91,6 +93,8 @@ # be accessed also later frame_vref() jit.virtual_ref_finish(frame_vref, frame) + if self.space.reverse_debugging: + self._revdb_leave(got_exception) # ________________________________________________________________ @@ -160,6 +164,8 @@ Like bytecode_trace() but doesn't invoke any other events besides the trace function. """ + if self.space.reverse_debugging: + self._revdb_potential_stop_point(frame) if (frame.get_w_f_trace() is None or self.is_tracing or self.gettrace() is None): return @@ -373,6 +379,21 @@ if self.space.check_signal_action is not None: self.space.check_signal_action.perform(self, None) + def _revdb_enter(self, frame): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import enter_call + enter_call(self.topframeref(), frame) + + def _revdb_leave(self, got_exception): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import leave_call + leave_call(self.topframeref(), got_exception) + + def _revdb_potential_stop_point(self, frame): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import potential_stop_point + potential_stop_point(frame) + def _freeze_(self): raise Exception("ExecutionContext instances should not be seen during" " translation. Now is a good time to inspect the" diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py --- a/pypy/interpreter/pyframe.py +++ b/pypy/interpreter/pyframe.py @@ -783,9 +783,11 @@ def fget_f_builtins(self, space): return self.get_builtin().getdict(space) + def get_f_back(self): + return ExecutionContext.getnextframe_nohidden(self) + def fget_f_back(self, space): - f_back = ExecutionContext.getnextframe_nohidden(self) - return f_back + return self.get_f_back() def fget_f_lasti(self, space): return self.space.newint(self.last_instr) diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py --- a/pypy/interpreter/pyopcode.py +++ b/pypy/interpreter/pyopcode.py @@ -442,6 +442,8 @@ self.FORMAT_VALUE(oparg, next_instr) elif opcode == opcodedesc.BUILD_STRING.index: self.BUILD_STRING(oparg, next_instr) + elif opcode == opcodedesc.LOAD_REVDB_VAR.index: + self.LOAD_REVDB_VAR(oparg, next_instr) else: self.MISSING_OPCODE(oparg, next_instr) @@ -1141,9 +1143,16 @@ # final result and returns. In that case, we can just continue # with the next bytecode. + def _revdb_jump_backward(self, jumpto): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import jump_backward + jump_backward(self, jumpto) + def jump_absolute(self, jumpto, ec): # this function is overridden by pypy.module.pypyjit.interp_jit check_nonneg(jumpto) + if self.space.reverse_debugging: + self._revdb_jump_backward(jumpto) return jumpto def JUMP_FORWARD(self, jumpby, next_instr): @@ -1451,21 +1460,12 @@ @jit.unroll_safe def BUILD_SET_UNPACK(self, itemcount, next_instr): space = self.space - w_sum = space.newset() + w_set = space.newset() for i in range(itemcount, 0, -1): w_item = self.peekvalue(i-1) - # cannot use w_sum.update, w_item might not be a set - iterator = space.iter(w_item) - while True: - try: - w_value = space.next(iterator) - except OperationError: - break - w_sum.add(w_value) - while itemcount != 0: - self.popvalue() - itemcount -= 1 - self.pushvalue(w_sum) + space.call_method(w_set, "update", w_item) + self.popvalues(itemcount) + self.pushvalue(w_set) @jit.unroll_safe def list_unpack_helper(frame, itemcount): @@ -1474,9 +1474,7 @@ for i in range(itemcount, 0, -1): w_item = frame.peekvalue(i-1) w_sum.extend(w_item) - while itemcount != 0: - frame.popvalue() - itemcount -= 1 + frame.popvalues(itemcount) return w_sum @jit.unroll_safe @@ -1516,9 +1514,7 @@ space.call_method(w_dict, 'update', w_item) if with_call and space.len_w(w_dict) < expected_length: self._build_map_unpack_error(itemcount) - while itemcount > 0: - self.popvalue() - itemcount -= 1 + self.popvalues(itemcount) self.pushvalue(w_dict) @jit.dont_look_inside @@ -1686,6 +1682,19 @@ w_res = space.newunicode(u''.join(lst)) self.pushvalue(w_res) + def _revdb_load_var(self, oparg): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import load_metavar + w_var = load_metavar(oparg) + self.pushvalue(w_var) + + def LOAD_REVDB_VAR(self, oparg, next_instr): + if self.space.reverse_debugging: + self._revdb_load_var(oparg) + else: + self.MISSING_OPCODE(oparg, next_instr) + + ### ____________________________________________________________ ### class ExitFrame(Exception): diff --git a/pypy/interpreter/pyparser/data/Grammar2.7 b/pypy/interpreter/pyparser/data/Grammar2.7 --- a/pypy/interpreter/pyparser/data/Grammar2.7 +++ b/pypy/interpreter/pyparser/data/Grammar2.7 @@ -104,7 +104,7 @@ '[' [listmaker] ']' | '{' [dictorsetmaker] '}' | '`' testlist1 '`' | - NAME | NUMBER | STRING+) + NAME | NUMBER | STRING+ | '$NUM') listmaker: test ( list_for | (',' test)* [','] ) testlist_comp: test ( comp_for | (',' test)* [','] ) lambdef: 'lambda' [varargslist] ':' test diff --git a/pypy/interpreter/pyparser/data/Grammar3.2 b/pypy/interpreter/pyparser/data/Grammar3.2 --- a/pypy/interpreter/pyparser/data/Grammar3.2 +++ b/pypy/interpreter/pyparser/data/Grammar3.2 @@ -103,7 +103,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') + NAME | NUMBER | STRING+ | '$NUM' | '...' | 'None' | 'True' | 'False') testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] diff --git a/pypy/interpreter/pyparser/data/Grammar3.3 b/pypy/interpreter/pyparser/data/Grammar3.3 --- a/pypy/interpreter/pyparser/data/Grammar3.3 +++ b/pypy/interpreter/pyparser/data/Grammar3.3 @@ -103,7 +103,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') + NAME | NUMBER | STRING+ | '$NUM' | '...' | 'None' | 'True' | 'False') testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] diff --git a/pypy/interpreter/pyparser/data/Grammar3.5 b/pypy/interpreter/pyparser/data/Grammar3.5 --- a/pypy/interpreter/pyparser/data/Grammar3.5 +++ b/pypy/interpreter/pyparser/data/Grammar3.5 @@ -108,7 +108,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') + NAME | NUMBER | STRING+ | '$NUM' | '...' | 'None' | 'True' | 'False') testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] diff --git a/pypy/interpreter/pyparser/data/Grammar3.6 b/pypy/interpreter/pyparser/data/Grammar3.6 --- a/pypy/interpreter/pyparser/data/Grammar3.6 +++ b/pypy/interpreter/pyparser/data/Grammar3.6 @@ -107,7 +107,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') + NAME | NUMBER | STRING+ | '$NUM' | '...' | 'None' | 'True' | 'False') testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] diff --git a/pypy/interpreter/pyparser/dfa_generated.py b/pypy/interpreter/pyparser/dfa_generated.py --- a/pypy/interpreter/pyparser/dfa_generated.py +++ b/pypy/interpreter/pyparser/dfa_generated.py @@ -6,42 +6,43 @@ from pypy.interpreter.pyparser import automata accepts = [True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, - True, False, False, False, True, False, False, - False, True, False, False, True, False, False, - True, False, False, True, False, False, True, - False, False, True, False, True, False, True, - False, True, False, False, False, False, True, True, False, False, False, False, True, False, - True, False, True, False, True, False, True, True, - False, True, False, True, False, False, True, - True, True, True, True] + False, False, True, False, False, True, False, + False, True, False, True, False, True, False, + False, True, False, False, True, False, True, + False, True, False, True, False, False, False, + False, True, True, False, False, False, False, + True, False, True, False, True, False, True, + False, True, True, False, True, False, True, + False, False, True, True, True, True, True] states = [ # 0 {'\t': 0, '\n': 15, '\x0c': 0, - '\r': 16, ' ': 0, '!': 11, '"': 18, - '#': 20, '%': 14, '&': 14, "'": 17, - '(': 15, ')': 15, '*': 8, '+': 14, - ',': 15, '-': 12, '.': 7, '/': 13, - '0': 5, '1': 6, '2': 6, '3': 6, - '4': 6, '5': 6, '6': 6, '7': 6, - '8': 6, '9': 6, ':': 15, ';': 15, - '<': 10, '=': 14, '>': 9, '@': 14, - 'A': 1, 'B': 2, 'C': 1, 'D': 1, - 'E': 1, 'F': 2, 'G': 1, 'H': 1, - 'I': 1, 'J': 1, 'K': 1, 'L': 1, - 'M': 1, 'N': 1, 'O': 1, 'P': 1, - 'Q': 1, 'R': 3, 'S': 1, 'T': 1, - 'U': 4, 'V': 1, 'W': 1, 'X': 1, - 'Y': 1, 'Z': 1, '[': 15, '\\': 19, - ']': 15, '^': 14, '_': 1, '`': 15, - 'a': 1, 'b': 2, 'c': 1, 'd': 1, - 'e': 1, 'f': 2, 'g': 1, 'h': 1, - 'i': 1, 'j': 1, 'k': 1, 'l': 1, - 'm': 1, 'n': 1, 'o': 1, 'p': 1, - 'q': 1, 'r': 3, 's': 1, 't': 1, - 'u': 4, 'v': 1, 'w': 1, 'x': 1, - 'y': 1, 'z': 1, '{': 15, '|': 14, - '}': 15, '~': 15, '\x80': 1}, + '\r': 16, ' ': 0, '!': 11, '"': 19, + '#': 21, '$': 17, '%': 14, '&': 14, + "'": 18, '(': 15, ')': 15, '*': 8, + '+': 14, ',': 15, '-': 12, '.': 7, + '/': 13, '0': 5, '1': 6, '2': 6, + '3': 6, '4': 6, '5': 6, '6': 6, + '7': 6, '8': 6, '9': 6, ':': 15, + ';': 15, '<': 10, '=': 14, '>': 9, + '@': 14, 'A': 1, 'B': 2, 'C': 1, + 'D': 1, 'E': 1, 'F': 2, 'G': 1, + 'H': 1, 'I': 1, 'J': 1, 'K': 1, + 'L': 1, 'M': 1, 'N': 1, 'O': 1, + 'P': 1, 'Q': 1, 'R': 3, 'S': 1, + 'T': 1, 'U': 4, 'V': 1, 'W': 1, + 'X': 1, 'Y': 1, 'Z': 1, '[': 15, + '\\': 20, ']': 15, '^': 14, '_': 1, + '`': 15, 'a': 1, 'b': 2, 'c': 1, + 'd': 1, 'e': 1, 'f': 2, 'g': 1, + 'h': 1, 'i': 1, 'j': 1, 'k': 1, + 'l': 1, 'm': 1, 'n': 1, 'o': 1, + 'p': 1, 'q': 1, 'r': 3, 's': 1, + 't': 1, 'u': 4, 'v': 1, 'w': 1, + 'x': 1, 'y': 1, 'z': 1, '{': 15, + '|': 14, '}': 15, '~': 15, + '\x80': 1}, # 1 {'0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, @@ -60,7 +61,7 @@ 't': 1, 'u': 1, 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 2 - {'"': 18, "'": 17, '0': 1, '1': 1, + {'"': 19, "'": 18, '0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1, 'A': 1, 'B': 1, 'C': 1, 'D': 1, @@ -78,7 +79,7 @@ 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 3 - {'"': 18, "'": 17, '0': 1, '1': 1, + {'"': 19, "'": 18, '0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1, 'A': 1, 'B': 4, 'C': 1, 'D': 1, @@ -96,7 +97,7 @@ 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 4 - {'"': 18, "'": 17, '0': 1, '1': 1, + {'"': 19, "'": 18, '0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1, 'A': 1, 'B': 1, 'C': 1, 'D': 1, @@ -114,21 +115,21 @@ 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 5 - {'.': 27, '0': 24, '1': 26, '2': 26, - '3': 26, '4': 26, '5': 26, '6': 26, - '7': 26, '8': 26, '9': 26, 'B': 23, - 'E': 28, 'J': 15, 'O': 22, 'X': 21, - '_': 25, 'b': 23, 'e': 28, 'j': 15, - 'o': 22, 'x': 21}, + {'.': 28, '0': 25, '1': 27, '2': 27, + '3': 27, '4': 27, '5': 27, '6': 27, + '7': 27, '8': 27, '9': 27, 'B': 24, + 'E': 29, 'J': 15, 'O': 23, 'X': 22, + '_': 26, 'b': 24, 'e': 29, 'j': 15, + 'o': 23, 'x': 22}, # 6 - {'.': 27, '0': 6, '1': 6, '2': 6, + {'.': 28, '0': 6, '1': 6, '2': 6, '3': 6, '4': 6, '5': 6, '6': 6, - '7': 6, '8': 6, '9': 6, 'E': 28, - 'J': 15, '_': 29, 'e': 28, 'j': 15}, + '7': 6, '8': 6, '9': 6, 'E': 29, + 'J': 15, '_': 30, 'e': 29, 'j': 15}, # 7 - {'.': 31, '0': 30, '1': 30, '2': 30, - '3': 30, '4': 30, '5': 30, '6': 30, - '7': 30, '8': 30, '9': 30}, + {'.': 32, '0': 31, '1': 31, '2': 31, + '3': 31, '4': 31, '5': 31, '6': 31, + '7': 31, '8': 31, '9': 31}, # 8 {'*': 14, '=': 15}, # 9 @@ -148,239 +149,247 @@ # 16 {'\n': 15}, # 17 - {automata.DEFAULT: 35, '\n': 32, - '\r': 32, "'": 33, '\\': 34}, + {'0': 33, '1': 33, '2': 33, '3': 33, + '4': 33, '5': 33, '6': 33, '7': 33, + '8': 33, '9': 33}, # 18 - {automata.DEFAULT: 38, '\n': 32, - '\r': 32, '"': 36, '\\': 37}, + {automata.DEFAULT: 37, '\n': 34, + '\r': 34, "'": 35, '\\': 36}, # 19 + {automata.DEFAULT: 40, '\n': 34, + '\r': 34, '"': 38, '\\': 39}, + # 20 {'\n': 15, '\r': 16}, - # 20 - {automata.DEFAULT: 20, '\n': 32, '\r': 32}, # 21 - {'0': 39, '1': 39, '2': 39, '3': 39, - '4': 39, '5': 39, '6': 39, '7': 39, - '8': 39, '9': 39, 'A': 39, 'B': 39, - 'C': 39, 'D': 39, 'E': 39, 'F': 39, - '_': 40, 'a': 39, 'b': 39, 'c': 39, - 'd': 39, 'e': 39, 'f': 39}, + {automata.DEFAULT: 21, '\n': 34, '\r': 34}, # 22 {'0': 41, '1': 41, '2': 41, '3': 41, '4': 41, '5': 41, '6': 41, '7': 41, - '_': 42}, + '8': 41, '9': 41, 'A': 41, 'B': 41, + 'C': 41, 'D': 41, 'E': 41, 'F': 41, + '_': 42, 'a': 41, 'b': 41, 'c': 41, + 'd': 41, 'e': 41, 'f': 41}, # 23 - {'0': 43, '1': 43, '_': 44}, + {'0': 43, '1': 43, '2': 43, '3': 43, + '4': 43, '5': 43, '6': 43, '7': 43, + '_': 44}, # 24 - {'.': 27, '0': 24, '1': 26, '2': 26, - '3': 26, '4': 26, '5': 26, '6': 26, - '7': 26, '8': 26, '9': 26, 'E': 28, - 'J': 15, '_': 25, 'e': 28, 'j': 15}, + {'0': 45, '1': 45, '_': 46}, # 25 - {'0': 45, '1': 46, '2': 46, '3': 46, - '4': 46, '5': 46, '6': 46, '7': 46, - '8': 46, '9': 46}, + {'.': 28, '0': 25, '1': 27, '2': 27, + '3': 27, '4': 27, '5': 27, '6': 27, + '7': 27, '8': 27, '9': 27, 'E': 29, + 'J': 15, '_': 26, 'e': 29, 'j': 15}, # 26 - {'.': 27, '0': 26, '1': 26, '2': 26, - '3': 26, '4': 26, '5': 26, '6': 26, - '7': 26, '8': 26, '9': 26, 'E': 28, - 'J': 15, '_': 47, 'e': 28, 'j': 15}, + {'0': 47, '1': 48, '2': 48, '3': 48, + '4': 48, '5': 48, '6': 48, '7': 48, + '8': 48, '9': 48}, # 27 - {'0': 27, '1': 27, '2': 27, '3': 27, - '4': 27, '5': 27, '6': 27, '7': 27, - '8': 27, '9': 27, 'E': 48, 'J': 15, - 'e': 48, 'j': 15}, + {'.': 28, '0': 27, '1': 27, '2': 27, + '3': 27, '4': 27, '5': 27, '6': 27, + '7': 27, '8': 27, '9': 27, 'E': 29, + 'J': 15, '_': 49, 'e': 29, 'j': 15}, # 28 - {'+': 49, '-': 49, '0': 50, '1': 50, - '2': 50, '3': 50, '4': 50, '5': 50, - '6': 50, '7': 50, '8': 50, '9': 50}, + {'0': 28, '1': 28, '2': 28, '3': 28, + '4': 28, '5': 28, '6': 28, '7': 28, + '8': 28, '9': 28, 'E': 50, 'J': 15, + 'e': 50, 'j': 15}, # 29 - {'0': 51, '1': 51, '2': 51, '3': 51, - '4': 51, '5': 51, '6': 51, '7': 51, - '8': 51, '9': 51}, + {'+': 51, '-': 51, '0': 52, '1': 52, + '2': 52, '3': 52, '4': 52, '5': 52, + '6': 52, '7': 52, '8': 52, '9': 52}, # 30 - {'0': 30, '1': 30, '2': 30, '3': 30, - '4': 30, '5': 30, '6': 30, '7': 30, - '8': 30, '9': 30, 'E': 48, 'J': 15, - '_': 52, 'e': 48, 'j': 15}, + {'0': 53, '1': 53, '2': 53, '3': 53, + '4': 53, '5': 53, '6': 53, '7': 53, + '8': 53, '9': 53}, # 31 + {'0': 31, '1': 31, '2': 31, '3': 31, + '4': 31, '5': 31, '6': 31, '7': 31, + '8': 31, '9': 31, 'E': 50, 'J': 15, + '_': 54, 'e': 50, 'j': 15}, + # 32 {'.': 15}, - # 32 + # 33 + {'0': 33, '1': 33, '2': 33, '3': 33, + '4': 33, '5': 33, '6': 33, '7': 33, + '8': 33, '9': 33}, + # 34 {}, - # 33 + # 35 {"'": 15}, - # 34 - {automata.DEFAULT: 53, '\n': 15, '\r': 16}, - # 35 - {automata.DEFAULT: 35, '\n': 32, - '\r': 32, "'": 15, '\\': 34}, # 36 + {automata.DEFAULT: 55, '\n': 15, '\r': 16}, + # 37 + {automata.DEFAULT: 37, '\n': 34, + '\r': 34, "'": 15, '\\': 36}, + # 38 {'"': 15}, - # 37 - {automata.DEFAULT: 54, '\n': 15, '\r': 16}, - # 38 - {automata.DEFAULT: 38, '\n': 32, - '\r': 32, '"': 15, '\\': 37}, # 39 - {'0': 39, '1': 39, '2': 39, '3': 39, - '4': 39, '5': 39, '6': 39, '7': 39, - '8': 39, '9': 39, 'A': 39, 'B': 39, - 'C': 39, 'D': 39, 'E': 39, 'F': 39, - '_': 55, 'a': 39, 'b': 39, 'c': 39, - 'd': 39, 'e': 39, 'f': 39}, + {automata.DEFAULT: 56, '\n': 15, '\r': 16}, # 40 - {'0': 56, '1': 56, '2': 56, '3': 56, - '4': 56, '5': 56, '6': 56, '7': 56, - '8': 56, '9': 56, 'A': 56, 'B': 56, - 'C': 56, 'D': 56, 'E': 56, 'F': 56, - 'a': 56, 'b': 56, 'c': 56, 'd': 56, - 'e': 56, 'f': 56}, + {automata.DEFAULT: 40, '\n': 34, + '\r': 34, '"': 15, '\\': 39}, # 41 {'0': 41, '1': 41, '2': 41, '3': 41, '4': 41, '5': 41, '6': 41, '7': 41, - '_': 57}, + '8': 41, '9': 41, 'A': 41, 'B': 41, + 'C': 41, 'D': 41, 'E': 41, 'F': 41, + '_': 57, 'a': 41, 'b': 41, 'c': 41, + 'd': 41, 'e': 41, 'f': 41}, # 42 {'0': 58, '1': 58, '2': 58, '3': 58, - '4': 58, '5': 58, '6': 58, '7': 58}, + '4': 58, '5': 58, '6': 58, '7': 58, + '8': 58, '9': 58, 'A': 58, 'B': 58, + 'C': 58, 'D': 58, 'E': 58, 'F': 58, + 'a': 58, 'b': 58, 'c': 58, 'd': 58, + 'e': 58, 'f': 58}, # 43 - {'0': 43, '1': 43, '_': 59}, + {'0': 43, '1': 43, '2': 43, '3': 43, + '4': 43, '5': 43, '6': 43, '7': 43, + '_': 59}, # 44 - {'0': 60, '1': 60}, + {'0': 60, '1': 60, '2': 60, '3': 60, + '4': 60, '5': 60, '6': 60, '7': 60}, # 45 - {'.': 27, '0': 45, '1': 46, '2': 46, - '3': 46, '4': 46, '5': 46, '6': 46, - '7': 46, '8': 46, '9': 46, 'E': 28, - 'J': 15, '_': 25, 'e': 28, 'j': 15}, + {'0': 45, '1': 45, '_': 61}, # 46 - {'.': 27, '0': 46, '1': 46, '2': 46, - '3': 46, '4': 46, '5': 46, '6': 46, - '7': 46, '8': 46, '9': 46, 'E': 28, - 'J': 15, '_': 47, 'e': 28, 'j': 15}, + {'0': 62, '1': 62}, # 47 - {'0': 46, '1': 46, '2': 46, '3': 46, - '4': 46, '5': 46, '6': 46, '7': 46, - '8': 46, '9': 46}, + {'.': 28, '0': 47, '1': 48, '2': 48, + '3': 48, '4': 48, '5': 48, '6': 48, + '7': 48, '8': 48, '9': 48, 'E': 29, + 'J': 15, '_': 26, 'e': 29, 'j': 15}, # 48 - {'+': 61, '-': 61, '0': 62, '1': 62, - '2': 62, '3': 62, '4': 62, '5': 62, - '6': 62, '7': 62, '8': 62, '9': 62}, + {'.': 28, '0': 48, '1': 48, '2': 48, + '3': 48, '4': 48, '5': 48, '6': 48, + '7': 48, '8': 48, '9': 48, 'E': 29, + 'J': 15, '_': 49, 'e': 29, 'j': 15}, # 49 - {'0': 50, '1': 50, '2': 50, '3': 50, - '4': 50, '5': 50, '6': 50, '7': 50, - '8': 50, '9': 50}, + {'0': 48, '1': 48, '2': 48, '3': 48, + '4': 48, '5': 48, '6': 48, '7': 48, + '8': 48, '9': 48}, # 50 - {'0': 50, '1': 50, '2': 50, '3': 50, - '4': 50, '5': 50, '6': 50, '7': 50, - '8': 50, '9': 50, 'J': 15, '_': 63, + {'+': 63, '-': 63, '0': 64, '1': 64, + '2': 64, '3': 64, '4': 64, '5': 64, + '6': 64, '7': 64, '8': 64, '9': 64}, + # 51 + {'0': 52, '1': 52, '2': 52, '3': 52, + '4': 52, '5': 52, '6': 52, '7': 52, + '8': 52, '9': 52}, + # 52 + {'0': 52, '1': 52, '2': 52, '3': 52, + '4': 52, '5': 52, '6': 52, '7': 52, + '8': 52, '9': 52, 'J': 15, '_': 65, 'j': 15}, - # 51 - {'.': 27, '0': 51, '1': 51, '2': 51, - '3': 51, '4': 51, '5': 51, '6': 51, - '7': 51, '8': 51, '9': 51, 'E': 28, - 'J': 15, '_': 29, 'e': 28, 'j': 15}, - # 52 + # 53 + {'.': 28, '0': 53, '1': 53, '2': 53, + '3': 53, '4': 53, '5': 53, '6': 53, + '7': 53, '8': 53, '9': 53, 'E': 29, + 'J': 15, '_': 30, 'e': 29, 'j': 15}, + # 54 + {'0': 66, '1': 66, '2': 66, '3': 66, + '4': 66, '5': 66, '6': 66, '7': 66, + '8': 66, '9': 66}, + # 55 + {automata.DEFAULT: 55, '\n': 34, + '\r': 34, "'": 15, '\\': 36}, + # 56 + {automata.DEFAULT: 56, '\n': 34, + '\r': 34, '"': 15, '\\': 39}, + # 57 + {'0': 67, '1': 67, '2': 67, '3': 67, + '4': 67, '5': 67, '6': 67, '7': 67, + '8': 67, '9': 67, 'A': 67, 'B': 67, + 'C': 67, 'D': 67, 'E': 67, 'F': 67, + 'a': 67, 'b': 67, 'c': 67, 'd': 67, + 'e': 67, 'f': 67}, + # 58 + {'0': 58, '1': 58, '2': 58, '3': 58, + '4': 58, '5': 58, '6': 58, '7': 58, + '8': 58, '9': 58, 'A': 58, 'B': 58, + 'C': 58, 'D': 58, 'E': 58, 'F': 58, + '_': 68, 'a': 58, 'b': 58, 'c': 58, + 'd': 58, 'e': 58, 'f': 58}, + # 59 + {'0': 69, '1': 69, '2': 69, '3': 69, + '4': 69, '5': 69, '6': 69, '7': 69}, + # 60 + {'0': 60, '1': 60, '2': 60, '3': 60, + '4': 60, '5': 60, '6': 60, '7': 60, + '_': 70}, + # 61 + {'0': 71, '1': 71}, + # 62 + {'0': 62, '1': 62, '_': 72}, + # 63 {'0': 64, '1': 64, '2': 64, '3': 64, '4': 64, '5': 64, '6': 64, '7': 64, '8': 64, '9': 64}, - # 53 - {automata.DEFAULT: 53, '\n': 32, - '\r': 32, "'": 15, '\\': 34}, - # 54 - {automata.DEFAULT: 54, '\n': 32, - '\r': 32, '"': 15, '\\': 37}, - # 55 - {'0': 65, '1': 65, '2': 65, '3': 65, - '4': 65, '5': 65, '6': 65, '7': 65, - '8': 65, '9': 65, 'A': 65, 'B': 65, - 'C': 65, 'D': 65, 'E': 65, 'F': 65, - 'a': 65, 'b': 65, 'c': 65, 'd': 65, - 'e': 65, 'f': 65}, - # 56 - {'0': 56, '1': 56, '2': 56, '3': 56, - '4': 56, '5': 56, '6': 56, '7': 56, - '8': 56, '9': 56, 'A': 56, 'B': 56, - 'C': 56, 'D': 56, 'E': 56, 'F': 56, - '_': 66, 'a': 56, 'b': 56, 'c': 56, - 'd': 56, 'e': 56, 'f': 56}, - # 57 - {'0': 67, '1': 67, '2': 67, '3': 67, - '4': 67, '5': 67, '6': 67, '7': 67}, - # 58 - {'0': 58, '1': 58, '2': 58, '3': 58, - '4': 58, '5': 58, '6': 58, '7': 58, - '_': 68}, - # 59 - {'0': 69, '1': 69}, - # 60 - {'0': 60, '1': 60, '_': 70}, - # 61 - {'0': 62, '1': 62, '2': 62, '3': 62, - '4': 62, '5': 62, '6': 62, '7': 62, - '8': 62, '9': 62}, - # 62 - {'0': 62, '1': 62, '2': 62, '3': 62, - '4': 62, '5': 62, '6': 62, '7': 62, - '8': 62, '9': 62, 'J': 15, '_': 71, - 'j': 15}, - # 63 - {'0': 72, '1': 72, '2': 72, '3': 72, - '4': 72, '5': 72, '6': 72, '7': 72, - '8': 72, '9': 72}, # 64 {'0': 64, '1': 64, '2': 64, '3': 64, '4': 64, '5': 64, '6': 64, '7': 64, - '8': 64, '9': 64, 'E': 48, 'J': 15, - '_': 52, 'e': 48, 'j': 15}, + '8': 64, '9': 64, 'J': 15, '_': 73, + 'j': 15}, # 65 - {'0': 65, '1': 65, '2': 65, '3': 65, - '4': 65, '5': 65, '6': 65, '7': 65, - '8': 65, '9': 65, 'A': 65, 'B': 65, - 'C': 65, 'D': 65, 'E': 65, 'F': 65, - '_': 55, 'a': 65, 'b': 65, 'c': 65, - 'd': 65, 'e': 65, 'f': 65}, + {'0': 74, '1': 74, '2': 74, '3': 74, + '4': 74, '5': 74, '6': 74, '7': 74, + '8': 74, '9': 74}, # 66 - {'0': 73, '1': 73, '2': 73, '3': 73, - '4': 73, '5': 73, '6': 73, '7': 73, - '8': 73, '9': 73, 'A': 73, 'B': 73, - 'C': 73, 'D': 73, 'E': 73, 'F': 73, - 'a': 73, 'b': 73, 'c': 73, 'd': 73, - 'e': 73, 'f': 73}, + {'0': 66, '1': 66, '2': 66, '3': 66, + '4': 66, '5': 66, '6': 66, '7': 66, + '8': 66, '9': 66, 'E': 50, 'J': 15, + '_': 54, 'e': 50, 'j': 15}, # 67 {'0': 67, '1': 67, '2': 67, '3': 67, '4': 67, '5': 67, '6': 67, '7': 67, - '_': 57}, + '8': 67, '9': 67, 'A': 67, 'B': 67, + 'C': 67, 'D': 67, 'E': 67, 'F': 67, + '_': 57, 'a': 67, 'b': 67, 'c': 67, + 'd': 67, 'e': 67, 'f': 67}, # 68 - {'0': 74, '1': 74, '2': 74, '3': 74, - '4': 74, '5': 74, '6': 74, '7': 74}, + {'0': 75, '1': 75, '2': 75, '3': 75, + '4': 75, '5': 75, '6': 75, '7': 75, + '8': 75, '9': 75, 'A': 75, 'B': 75, + 'C': 75, 'D': 75, 'E': 75, 'F': 75, + 'a': 75, 'b': 75, 'c': 75, 'd': 75, + 'e': 75, 'f': 75}, # 69 - {'0': 69, '1': 69, '_': 59}, + {'0': 69, '1': 69, '2': 69, '3': 69, + '4': 69, '5': 69, '6': 69, '7': 69, + '_': 59}, # 70 - {'0': 75, '1': 75}, + {'0': 76, '1': 76, '2': 76, '3': 76, + '4': 76, '5': 76, '6': 76, '7': 76}, # 71 - {'0': 76, '1': 76, '2': 76, '3': 76, - '4': 76, '5': 76, '6': 76, '7': 76, - '8': 76, '9': 76}, + {'0': 71, '1': 71, '_': 61}, # 72 - {'0': 72, '1': 72, '2': 72, '3': 72, - '4': 72, '5': 72, '6': 72, '7': 72, - '8': 72, '9': 72, 'J': 15, '_': 63, - 'j': 15}, + {'0': 77, '1': 77}, # 73 - {'0': 73, '1': 73, '2': 73, '3': 73, - '4': 73, '5': 73, '6': 73, '7': 73, - '8': 73, '9': 73, 'A': 73, 'B': 73, - 'C': 73, 'D': 73, 'E': 73, 'F': 73, - '_': 66, 'a': 73, 'b': 73, 'c': 73, - 'd': 73, 'e': 73, 'f': 73}, + {'0': 78, '1': 78, '2': 78, '3': 78, + '4': 78, '5': 78, '6': 78, '7': 78, + '8': 78, '9': 78}, # 74 {'0': 74, '1': 74, '2': 74, '3': 74, '4': 74, '5': 74, '6': 74, '7': 74, - '_': 68}, + '8': 74, '9': 74, 'J': 15, '_': 65, + 'j': 15}, # 75 - {'0': 75, '1': 75, '_': 70}, + {'0': 75, '1': 75, '2': 75, '3': 75, + '4': 75, '5': 75, '6': 75, '7': 75, + '8': 75, '9': 75, 'A': 75, 'B': 75, + 'C': 75, 'D': 75, 'E': 75, 'F': 75, + '_': 68, 'a': 75, 'b': 75, 'c': 75, + 'd': 75, 'e': 75, 'f': 75}, # 76 {'0': 76, '1': 76, '2': 76, '3': 76, '4': 76, '5': 76, '6': 76, '7': 76, - '8': 76, '9': 76, 'J': 15, '_': 71, + '_': 70}, + # 77 + {'0': 77, '1': 77, '_': 72}, + # 78 + {'0': 78, '1': 78, '2': 78, '3': 78, + '4': 78, '5': 78, '6': 78, '7': 78, + '8': 78, '9': 78, 'J': 15, '_': 73, 'j': 15}, ] pseudoDFA = automata.DFA(states, accepts) diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -45,7 +45,7 @@ self.tok = self.tokens[index] def skip(self, n): - if self.tok[0] == n: + if self.tok.token_type == n: self.next() return True else: @@ -53,7 +53,7 @@ def skip_name(self, name): from pypy.interpreter.pyparser import pygram - if self.tok[0] == pygram.tokens.NAME and self.tok[1] == name: + if self.tok.token_type == pygram.tokens.NAME and self.tok.value == name: self.next() return True else: @@ -61,8 +61,8 @@ def next_feature_name(self): from pypy.interpreter.pyparser import pygram - if self.tok[0] == pygram.tokens.NAME: - name = self.tok[1] + if self.tok.token_type == pygram.tokens.NAME: + name = self.tok.value self.next() if self.skip_name("as"): self.skip(pygram.tokens.NAME) @@ -99,7 +99,7 @@ # somewhere inside the last __future__ import statement # (at the start would be fine too, but it's easier to grab a # random position inside) - last_position = (it.tok[2], it.tok[3]) + last_position = (it.tok.lineno, it.tok.column) result |= future_flags.get_compiler_feature(it.next_feature_name()) while it.skip(pygram.tokens.COMMA): result |= future_flags.get_compiler_feature(it.next_feature_name()) diff --git a/pypy/interpreter/pyparser/gendfa.py b/pypy/interpreter/pyparser/gendfa.py old mode 100755 new mode 100644 --- a/pypy/interpreter/pyparser/gendfa.py +++ b/pypy/interpreter/pyparser/gendfa.py @@ -166,7 +166,10 @@ makeEOL(), chainStr(states, "..."), groupStr(states, "@:;.,`")) - funny = group(states, operator, bracket, special) + revdb_metavar = chain(states, + groupStr(states, "$"), + atleastonce(states, makeDigits())) + funny = group(states, operator, bracket, special, revdb_metavar) # ____________________________________________________________ def makeStrPrefix (): return group(states, diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -28,11 +28,24 @@ new.symbol_ids = self.symbol_ids new.symbols_names = self.symbol_names new.keyword_ids = self.keyword_ids + new.token_to_error_string = self.token_to_error_string new.dfas = self.dfas new.labels = self.labels new.token_ids = self.token_ids return new + + def classify(self, token): + """Find the label for a token.""" + if token.token_type == self.KEYWORD_TOKEN: + label_index = self.keyword_ids.get(token.value, -1) + if label_index != -1: + return label_index + label_index = self.token_ids.get(token.token_type, -1) + if label_index == -1: + raise ParseError("invalid token", token) + return label_index + def _freeze_(self): # Remove some attributes not used in parsing. try: @@ -65,6 +78,33 @@ b[pos] |= bit return str(b) + +class Token(object): + def __init__(self, token_type, value, lineno, column, line): + self.token_type = token_type + self.value = value + self.lineno = lineno + # 0-based offset + self.column = column + self.line = line + + def __repr__(self): + return "Token(%s, %s)" % (self.token_type, self.value) + + def __eq__(self, other): + # for tests + return ( + self.token_type == other.token_type and + self.value == other.value and + self.lineno == other.lineno and + self.column == other.column and + self.line == other.line + ) + + def __ne__(self, other): + return not self == other + + class Node(object): __slots__ = ("type", ) @@ -105,6 +145,11 @@ self.lineno = lineno self.column = column + @staticmethod + def fromtoken(token): + return Terminal( + token.token_type, token.value, token.lineno, token.column) + def __repr__(self): return "Terminal(type=%s, value=%r)" % (self.type, self.value) @@ -193,20 +238,14 @@ class ParseError(Exception): - def __init__(self, msg, token_type, value, lineno, column, line, - expected=-1, expected_str=None): + def __init__(self, msg, token, expected=-1, expected_str=None): self.msg = msg - self.token_type = token_type - self.value = value - self.lineno = lineno - # this is a 0-based index - self.column = column - self.line = line + self.token = token self.expected = expected self.expected_str = expected_str def __str__(self): - return "ParserError(%s, %r)" % (self.token_type, self.value) + return "ParserError(%s)" % (self.token, ) class StackEntry(object): @@ -249,8 +288,8 @@ self.root = None self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0) - def add_token(self, token_type, value, lineno, column, line): - label_index = self.classify(token_type, value, lineno, column, line) + def add_token(self, token): + label_index = self.grammar.classify(token) sym_id = 0 # for the annotator while True: dfa = self.stack.dfa @@ -261,7 +300,7 @@ sym_id = self.grammar.labels[i] if label_index == i: # We matched a non-terminal. - self.shift(next_state, token_type, value, lineno, column) + self.shift(next_state, token) state = states[next_state] # While the only possible action is to accept, pop nodes off # the stack. @@ -278,8 +317,7 @@ sub_node_dfa = self.grammar.dfas[sym_id - 256] # Check if this token can start a child node. if sub_node_dfa.could_match_token(label_index): - self.push(sub_node_dfa, next_state, sym_id, lineno, - column) + self.push(sub_node_dfa, next_state, sym_id) break else: # We failed to find any arcs to another state, so unless this @@ -287,8 +325,7 @@ if is_accepting: self.pop() if self.stack is None: - raise ParseError("too much input", token_type, value, - lineno, column, line) + raise ParseError("too much input", token) else: # If only one possible input would satisfy, attach it to the # error. @@ -299,28 +336,16 @@ else: expected = -1 expected_str = None - raise ParseError("bad input", token_type, value, lineno, - column, line, expected, expected_str) + raise ParseError("bad input", token, expected, expected_str) - def classify(self, token_type, value, lineno, column, line): - """Find the label for a token.""" - if token_type == self.grammar.KEYWORD_TOKEN: - label_index = self.grammar.keyword_ids.get(value, -1) - if label_index != -1: - return label_index - label_index = self.grammar.token_ids.get(token_type, -1) - if label_index == -1: - raise ParseError("invalid token", token_type, value, lineno, column, - line) - return label_index - def shift(self, next_state, token_type, value, lineno, column): + def shift(self, next_state, token): """Shift a non-terminal and prepare for the next state.""" - new_node = Terminal(token_type, value, lineno, column) + new_node = Terminal.fromtoken(token) self.stack.node_append_child(new_node) self.stack.state = next_state - def push(self, next_dfa, next_state, node_type, lineno, column): + def push(self, next_dfa, next_state, node_type): """Push a terminal and adjust the current state.""" self.stack.state = next_state self.stack = self.stack.push(next_dfa, 0) diff --git a/pypy/interpreter/pyparser/pygram.py b/pypy/interpreter/pyparser/pygram.py --- a/pypy/interpreter/pyparser/pygram.py +++ b/pypy/interpreter/pyparser/pygram.py @@ -20,6 +20,13 @@ python_grammar = _get_python_grammar() +python_grammar_revdb = python_grammar.shared_copy() +copied_token_ids = python_grammar.token_ids.copy() +python_grammar_revdb.token_ids = copied_token_ids + +metavar_token_id = pytoken.python_tokens['REVDBMETAVAR'] +del python_grammar.token_ids[metavar_token_id] + class _Tokens(object): pass for tok_name, idx in pytoken.python_tokens.iteritems(): @@ -36,3 +43,11 @@ syms._rev_lookup = rev_lookup # for debugging del _get_python_grammar, _Tokens, tok_name, sym_name, idx + +def choose_grammar(print_function, revdb): + assert print_function + if revdb: + return python_grammar_revdb + else: + return python_grammar + diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py --- a/pypy/interpreter/pyparser/pyparse.py +++ b/pypy/interpreter/pyparser/pyparse.py @@ -163,48 +163,57 @@ flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) - tp = 0 try: - last_value_seen = None - next_value_seen = None + last_token_seen = None + next_token_seen = None try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) + except error.TokenError as e: + e.filename = compile_info.filename + raise + except error.TokenIndentationError as e: + e.filename = compile_info.filename + raise - newflags, last_future_import = ( - future.add_future_flags(self.future_flags, tokens)) - compile_info.last_future_import = last_future_import - compile_info.flags |= newflags - self.grammar = pygram.python_grammar + newflags, last_future_import = ( + future.add_future_flags(self.future_flags, tokens)) + compile_info.last_future_import = last_future_import + compile_info.flags |= newflags + + self.grammar = pygram.choose_grammar( + print_function=True, + revdb=self.space.config.translation.reverse_debugger) + try: tokens_stream = iter(tokens) - for tp, value, lineno, column, line in tokens_stream: - next_value_seen = value - if self.add_token(tp, value, lineno, column, line): + for token in tokens_stream: + next_token_seen = token + if self.add_token(token): break - last_value_seen = value - last_value_seen = None - next_value_seen = None + last_token_seen = token + last_token_seen = None + next_token_seen = None if compile_info.mode == 'single': - for tp, value, lineno, column, line in tokens_stream: - if tp == pygram.tokens.ENDMARKER: + for token in tokens_stream: + if token.token_type == pygram.tokens.ENDMARKER: break - if tp == pygram.tokens.NEWLINE: + if token.token_type == pygram.tokens.NEWLINE: continue - if tp == pygram.tokens.COMMENT: - for tp, _, _, _, _ in tokens_stream: - if tp == pygram.tokens.NEWLINE: + if token.token_type == pygram.tokens.COMMENT: + for token in tokens_stream: + if token.token_type == pygram.tokens.NEWLINE: break else: new_err = error.SyntaxError msg = ("multiple statements found while " "compiling a single statement") - raise new_err(msg, lineno, column, - line, compile_info.filename) + raise new_err(msg, token.lineno, token.column, + token.line, compile_info.filename) except error.TokenError as e: e.filename = compile_info.filename @@ -216,17 +225,18 @@ # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError - if tp == pygram.tokens.INDENT: + if token.token_type == pygram.tokens.INDENT: msg = "unexpected indent" elif e.expected == pygram.tokens.INDENT: msg = "expected an indented block" else: new_err = error.SyntaxError - if (last_value_seen in ('print', 'exec') and - bool(next_value_seen) and - next_value_seen != '('): + if (last_token_seen is not None and + last_token_seen.value in ('print', 'exec') and + next_token_seen is not None and + next_token_seen.value != '('): msg = "Missing parentheses in call to '%s'" % ( - last_value_seen,) + last_token_seen,) else: msg = "invalid syntax" if e.expected_str is not None: @@ -234,7 +244,7 @@ # parser.ParseError(...).column is 0-based, but the offsets in the # exceptions in the error module are 1-based, hence the '+ 1' - raise new_err(msg, e.lineno, e.column + 1, e.line, + raise new_err(msg, e.token.lineno, e.token.column + 1, e.token.line, compile_info.filename) else: tree = self.root diff --git a/pypy/interpreter/pyparser/pytoken.py b/pypy/interpreter/pyparser/pytoken.py --- a/pypy/interpreter/pyparser/pytoken.py +++ b/pypy/interpreter/pyparser/pytoken.py @@ -72,5 +72,6 @@ # extra PyPy-specific tokens _add_tok("COMMENT") _add_tok("NL") +_add_tok("REVDBMETAVAR", "$NUM") del _add_tok diff --git a/pypy/interpreter/pyparser/pytokenize.py b/pypy/interpreter/pyparser/pytokenize.py --- a/pypy/interpreter/pyparser/pytokenize.py +++ b/pypy/interpreter/pyparser/pytokenize.py @@ -1,9 +1,6 @@ # ______________________________________________________________________ """Module pytokenize -THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED -TO BE ANNOTABLE (Mainly made lists homogeneous) - This is a modified version of Ka-Ping Yee's tokenize module found in the Python standard library. @@ -12,7 +9,6 @@ expressions have been replaced with hand built DFA's using the basil.util.automata module. -$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $ """ # ______________________________________________________________________ @@ -87,22 +83,3 @@ tabsize = 8 alttabsize = 1 - -# PYPY MODIFICATION: removed TokenError class as it's not needed here - -# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here - -# PYPY MODIFICATION: removed printtoken() as it's not needed here - -# PYPY MODIFICATION: removed tokenize() as it's not needed here - -# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here - -# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified -# in pythonlexer.py - -# PYPY MODIFICATION: removed main() as it's not needed here - -# ______________________________________________________________________ -# End of pytokenize.py - diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py --- a/pypy/interpreter/pyparser/pytokenizer.py +++ b/pypy/interpreter/pyparser/pytokenizer.py @@ -1,4 +1,5 @@ from pypy.interpreter.pyparser import automata +from pypy.interpreter.pyparser.parser import Token from pypy.interpreter.pyparser.pygram import tokens from pypy.interpreter.pyparser.pytoken import python_opmap from pypy.interpreter.pyparser.error import TokenError, TokenIndentationError, TabError @@ -144,7 +145,7 @@ endmatch = endDFA.recognize(line) if endmatch >= 0: pos = end = endmatch - tok = (tokens.STRING, contstr + line[:end], strstart[0], + tok = Token(tokens.STRING, contstr + line[:end], strstart[0], strstart[1], line) token_list.append(tok) last_comment = '' @@ -152,7 +153,7 @@ contline = None elif (needcont and not line.endswith('\\\n') and not line.endswith('\\\r\n')): - tok = (tokens.ERRORTOKEN, contstr + line, strstart[0], + tok = Token(tokens.ERRORTOKEN, contstr + line, strstart[0], strstart[1], line) token_list.append(tok) last_comment = '' @@ -200,13 +201,13 @@ raise TabError(lnum, pos, line) indents.append(column) altindents.append(altcolumn) - token_list.append((tokens.INDENT, line[:pos], lnum, 0, line)) + token_list.append(Token(tokens.INDENT, line[:pos], lnum, 0, line)) last_comment = '' else: while column < indents[-1]: indents.pop() altindents.pop() - token_list.append((tokens.DEDENT, '', lnum, pos, line)) + token_list.append(Token(tokens.DEDENT, '', lnum, pos, line)) last_comment = '' if column != indents[-1]: err = "unindent does not match any outer indentation level" @@ -246,13 +247,13 @@ if (initial in numchars or \ (initial == '.' and token != '.' and token != '...')): # ordinary number - token_list.append((tokens.NUMBER, token, lnum, start, line)) + token_list.append(Token(tokens.NUMBER, token, lnum, start, line)) last_comment = '' elif initial in '\r\n': if not parenstack: if async_def: async_def_nl = True - tok = (tokens.NEWLINE, last_comment, lnum, start, line) + tok = Token(tokens.NEWLINE, last_comment, lnum, start, line) token_list.append(tok) last_comment = '' elif initial == '#': @@ -267,7 +268,7 @@ if endmatch >= 0: # all on one line pos = endmatch token = line[start:pos] - tok = (tokens.STRING, token, lnum, start, line) + tok = Token(tokens.STRING, token, lnum, start, line) token_list.append(tok) last_comment = '' else: @@ -286,7 +287,7 @@ contline = line break else: # ordinary string - tok = (tokens.STRING, token, lnum, start, line) + tok = Token(tokens.STRING, token, lnum, start, line) token_list.append(tok) last_comment = '' elif (initial in namechars or # ordinary name @@ -303,11 +304,11 @@ if async_def: # inside 'async def' function if token == 'async': - token_list.append((tokens.ASYNC, token, lnum, start, line)) + token_list.append(Token(tokens.ASYNC, token, lnum, start, line)) elif token == 'await': - token_list.append((tokens.AWAIT, token, lnum, start, line)) + token_list.append(Token(tokens.AWAIT, token, lnum, start, line)) else: - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) elif token == 'async': # async token, look ahead #ahead token if pos < max: @@ -319,16 +320,20 @@ if ahead_token == 'def': async_def = True async_def_indent = indents[-1] - token_list.append((tokens.ASYNC, token, lnum, start, line)) + token_list.append(Token(tokens.ASYNC, token, lnum, start, line)) else: - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) else: - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) else: - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) last_comment = '' elif initial == '\\': # continued stmt continued = 1 + elif initial == '$': + token_list.append(Token(tokens.REVDBMETAVAR, token, + lnum, start, line)) + last_comment = '' else: if initial in '([{': parenstack.append((initial, lnum, start, line)) @@ -351,7 +356,7 @@ punct = python_opmap[token] else: punct = tokens.OP - token_list.append((punct, token, lnum, start, line)) + token_list.append(Token(punct, token, lnum, start, line)) last_comment = '' else: start = whiteSpaceDFA.recognize(line, pos) @@ -360,22 +365,22 @@ if start<max and line[start] in single_quoted: raise TokenError("end of line (EOL) while scanning string literal", line, lnum, start+1, token_list) - tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line) + tok = Token(tokens.ERRORTOKEN, line[pos], lnum, pos, line) token_list.append(tok) last_comment = '' pos = pos + 1 lnum -= 1 if not (flags & consts.PyCF_DONT_IMPLY_DEDENT): - if token_list and token_list[-1][0] != tokens.NEWLINE: - tok = (tokens.NEWLINE, '', lnum, 0, '\n') + if token_list and token_list[-1].token_type != tokens.NEWLINE: + tok = Token(tokens.NEWLINE, '', lnum, 0, '\n') token_list.append(tok) for indent in indents[1:]: # pop remaining indent levels - token_list.append((tokens.DEDENT, '', lnum, pos, line)) - tok = (tokens.NEWLINE, '', lnum, 0, '\n') + token_list.append(Token(tokens.DEDENT, '', lnum, pos, line)) + tok = Token(tokens.NEWLINE, '', lnum, 0, '\n') token_list.append(tok) - token_list.append((tokens.ENDMARKER, '', lnum, pos, line)) + token_list.append(Token(tokens.ENDMARKER, '', lnum, pos, line)) return token_list diff --git a/pypy/interpreter/pyparser/test/test_automata.py b/pypy/interpreter/pyparser/test/test_automata.py --- a/pypy/interpreter/pyparser/test/test_automata.py +++ b/pypy/interpreter/pyparser/test/test_automata.py @@ -1,4 +1,4 @@ -from pypy.interpreter.pyparser.automata import DFA, DEFAULT +from pypy.interpreter.pyparser.automata import DFA, NonGreedyDFA, DEFAULT def test_states(): d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True]) @@ -10,3 +10,20 @@ assert d.states == "\x01\x00" assert d.defaults == "\xff\x00" assert d.max_char == 1 + +def test_recognize(): + d = DFA([{"a": 1}, {"b": 0}], [False, True]) + assert d.recognize("ababab") == 5 + assert d.recognize("c") == -1 + + d = DFA([{"a": 1}, {DEFAULT: 0}], [False, True]) + assert d.recognize("a,a?ab") == 5 + assert d.recognize("c") == -1 + + d = NonGreedyDFA([{"a": 1}, {"b": 0}], [False, True]) + assert d.recognize("ababab") == 1 + assert d.recognize("c") == -1 + + d = NonGreedyDFA([{"a": 1}, {DEFAULT: 0}], [False, True]) + assert d.recognize("a,a?ab") == 1 + assert d.recognize("c") == -1 diff --git a/pypy/interpreter/pyparser/test/test_parser.py b/pypy/interpreter/pyparser/test/test_parser.py --- a/pypy/interpreter/pyparser/test/test_parser.py +++ b/pypy/interpreter/pyparser/test/test_parser.py @@ -20,7 +20,7 @@ rl = StringIO.StringIO(input + "\n").readline gen = tokenize.generate_tokens(rl) for tp, value, begin, end, line in gen: - if self.add_token(tp, value, begin[0], begin[1], line): + if self.add_token(parser.Token(tp, value, begin[0], begin[1], line)): py.test.raises(StopIteration, gen.next) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit