Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8 Changeset: r94458:d4baff192be4 Date: 2018-04-29 21:18 +0300 http://bitbucket.org/pypy/pypy/changeset/d4baff192be4/
Log: merge default into branch diff too long, truncating to 2000 out of 11084 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -51,3 +51,5 @@ 0000000000000000000000000000000000000000 release-pypy3.5-v5.10.0 09f9160b643e3f02ccb8c843b2fbb4e5cbf54082 release-pypy3.5-v5.10.0 3f6eaa010fce78cc7973bdc1dfdb95970f08fed2 release-pypy3.5-v5.10.1 +ab0b9caf307db6592905a80b8faffd69b39005b8 release-pypy2.7-v6.0.0 +fdd60ed87e941677e8ea11acf9f1819466521bf2 release-pypy3.5-v6.0.0 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -6,36 +6,36 @@ Except when otherwise stated (look for LICENSE files in directories or information at the beginning of each file) all software and documentation in the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', 'demo', 'lib_pypy', -'py', and '_pytest' directories is licensed as follows: +'py', and '_pytest' directories is licensed as follows: The MIT License - Permission is hereby granted, free of charge, to any person - obtaining a copy of this software and associated documentation - files (the "Software"), to deal in the Software without - restriction, including without limitation the rights to use, - copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PyPy Copyright holders 2003-2018 ------------------------------------ +-------------------------------- Except when otherwise stated (look for LICENSE files or information at the beginning of each file) the files in the 'pypy' directory are each -copyrighted by one or more of the following people and organizations: +copyrighted by one or more of the following people and organizations: Armin Rigo Maciej Fijalkowski @@ -89,13 +89,13 @@ Niko Matsakis Alexander Hesse Ludovic Aubry + stian Jacob Hallen Jason Creighton Mark Young Alex Martelli Spenser Bauman Michal Bendowski - stian Jan de Mooij Tyler Wade Vincent Legoll @@ -123,10 +123,10 @@ Wenzhu Man Konstantin Lopuhin John Witulski + Jeremy Thurgood Greg Price Ivan Sichmann Freitas Dario Bertini - Jeremy Thurgood Mark Pearse Simon Cross Tobias Pape @@ -145,18 +145,19 @@ Adrian Kuhn tav Georg Brandl + Joannah Nanjekye Bert Freudenberg Stian Andreassen Wanja Saatkamp Mike Blume - Joannah Nanjekye Gerald Klix Oscar Nierstrasz Rami Chowdhury Stefan H. Muller + Dodan Mihai Tim Felgentreff Eugene Oden - Dodan Mihai + Colin Valliant Jeff Terrace Henry Mason Vasily Kuznetsov @@ -225,12 +226,14 @@ Vaibhav Sood Reuben Cummings Attila Gobi + Floris Bruynooghe Christopher Pope Tristan Arthur Christian Tismer Dan Stromberg Carl Meyer Florin Papa + Arianna Avanzini Jens-Uwe Mager Valentina Mukhamedzhanova Stefano Parmesan @@ -244,15 +247,18 @@ Lukas Vacek Omer Katz Jacek Generowicz + Tomasz Dziopa Sylvain Thenault Jakub Stasiak Andrew Dalke Alejandro J. Cura Vladimir Kryachko Gabriel + Thomas Hisch Mark Williams Kunal Grover Nathan Taylor + Barry Hart Travis Francis Athougies Yasir Suhail Sergey Kishchenko @@ -260,6 +266,7 @@ Lutz Paelike Ian Foote Philipp Rustemeuer + Logan Chien Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez @@ -295,19 +302,20 @@ Akira Li Gustavo Niemeyer Rafał Gałczyński - Logan Chien Lucas Stadler roberto@goyle Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson + Matt Jackson Nikolay Zinov rafalgalczyn...@gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown + Miro Hrončok Anthony Sottile Nate Bragg Ben Darnell @@ -315,7 +323,6 @@ Godefroid Chappelle Julian Berman Michael Hudson-Doyle - Floris Bruynooghe Stephan Busemann Dan Colish timo @@ -357,6 +364,7 @@ Michael Chermside Anna Ravencroft remarkablerocket + Pauli Virtanen Petre Vijiac Berker Peksag Christian Muirhead @@ -381,6 +389,7 @@ Graham Markall Dan Loewenherz werat + Andrew Stepanov Niclas Olofsson Chris Pressey Tobias Diaz @@ -395,14 +404,14 @@ m...@funkyhat.org Stefan Marr - Heinrich-Heine University, Germany + Heinrich-Heine University, Germany Open End AB (formerly AB Strakt), Sweden - merlinux GmbH, Germany - tismerysoft GmbH, Germany - Logilab Paris, France - DFKI GmbH, Germany + merlinux GmbH, Germany + tismerysoft GmbH, Germany + Logilab Paris, France + DFKI GmbH, Germany Impara, Germany - Change Maker, Sweden + Change Maker, Sweden University of California Berkeley, USA Google Inc. King's College London @@ -410,14 +419,14 @@ The PyPy Logo as used by http://speed.pypy.org and others was created by Samuel Reis and is distributed on terms of Creative Commons Share Alike License. - -License for 'lib-python/2.7' -============================ + +License for 'lib-python/2.7, lib-python/3' +========================================== Except when otherwise stated (look for LICENSE files or copyright/license -information at the beginning of each file) the files in the 'lib-python/2.7' +information at the beginning of each file) the files in the 'lib-python' directory are all copyrighted by the Python Software Foundation and licensed -under the terms that you can find here: https://docs.python.org/2/license.html +under the terms that you can find here: https://docs.python.org/3/license.html License for 'pypy/module/unicodedata/' ====================================== @@ -441,9 +450,9 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/dotviewer/font/NOTICE b/dotviewer/font/COPYING.txt rename from dotviewer/font/NOTICE rename to dotviewer/font/COPYING.txt diff --git a/lib-python/2.7/re.py b/lib-python/2.7/re.py --- a/lib-python/2.7/re.py +++ b/lib-python/2.7/re.py @@ -225,7 +225,7 @@ _pattern_type = type(sre_compile.compile("", 0)) -_MAXCACHE = 100 +_MAXCACHE = 1000 def _compile(*key): # internal: compile pattern diff --git a/lib-python/2.7/test/test_eof.py b/lib-python/2.7/test/test_eof.py --- a/lib-python/2.7/test/test_eof.py +++ b/lib-python/2.7/test/test_eof.py @@ -5,7 +5,7 @@ class EOFTestCase(unittest.TestCase): def test_EOFC(self): - expect = "EOL while scanning string literal (<string>, line 1)" + expect = "end of line (EOL) while scanning string literal (<string>, line 1)" try: eval("""'this is a test\ """) @@ -15,7 +15,7 @@ raise test_support.TestFailed def test_EOFS(self): - expect = ("EOF while scanning triple-quoted string literal " + expect = ("end of file (EOF) while scanning triple-quoted string literal " "(<string>, line 1)") try: eval("""'''this is a test""") diff --git a/lib-python/2.7/test/test_generators.py b/lib-python/2.7/test/test_generators.py --- a/lib-python/2.7/test/test_generators.py +++ b/lib-python/2.7/test/test_generators.py @@ -398,7 +398,10 @@ 0 >>> type(i.gi_frame) <type 'frame'> ->>> i.gi_running = 42 + +PyPy prints "readonly attribute 'gi_running'" so ignore the exception detail + +>>> i.gi_running = 42 # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... TypeError: readonly attribute diff --git a/lib-python/2.7/test/test_genexps.py b/lib-python/2.7/test/test_genexps.py --- a/lib-python/2.7/test/test_genexps.py +++ b/lib-python/2.7/test/test_genexps.py @@ -87,7 +87,7 @@ >>> dict(a = i for i in xrange(10)) Traceback (most recent call last): ... - SyntaxError: invalid syntax + SyntaxError: invalid syntax (expected ')') Verify that parenthesis are required when used as a keyword argument value diff --git a/lib-python/2.7/test/test_traceback.py b/lib-python/2.7/test/test_traceback.py --- a/lib-python/2.7/test/test_traceback.py +++ b/lib-python/2.7/test/test_traceback.py @@ -123,10 +123,7 @@ self.assertEqual(len(err), 4) self.assertEqual(err[1].strip(), "print(2)") self.assertIn("^", err[2]) - if check_impl_detail(): - self.assertEqual(err[1].find("p"), err[2].find("^")) - if check_impl_detail(pypy=True): - self.assertEqual(err[1].find("2)") + 1, err[2].find("^")) + self.assertEqual(err[1].find("p"), err[2].find("^")) def test_base_exception(self): # Test that exceptions derived from BaseException are formatted right diff --git a/lib-python/2.7/threading.py b/lib-python/2.7/threading.py --- a/lib-python/2.7/threading.py +++ b/lib-python/2.7/threading.py @@ -351,6 +351,21 @@ # forward-compatibility reasons we do the same. waiter.acquire() gotit = True + except AttributeError: + # someone patched the 'waiter' class, probably. + # Fall back to the standard CPython logic. + # See the CPython lib for the comments about it... + endtime = _time() + timeout + delay = 0.0005 # 500 us -> initial delay of 1 ms + while True: + gotit = waiter.acquire(0) + if gotit: + break + remaining = endtime - _time() + if remaining <= 0: + break + delay = min(delay * 2, remaining, .05) + _sleep(delay) else: gotit = waiter.acquire(False) if not gotit: diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -82,8 +82,11 @@ def _CData_output(self, resarray, base=None, index=-1): from _rawffi.alt import types # If a char_p or unichar_p is received, skip the string interpretation - if base._ffiargtype != types.Pointer(types.char_p) and \ - base._ffiargtype != types.Pointer(types.unichar_p): + try: + deref = type(base)._deref_ffiargtype() + except AttributeError: + deref = None + if deref != types.char_p and deref != types.unichar_p: # this seems to be a string if we're array of char, surprise! from ctypes import c_char, c_wchar if self._type_ is c_char: @@ -120,6 +123,12 @@ value = self(*value) return _CDataMeta.from_param(self, value) + def _build_ffiargtype(self): + return _ffi.types.Pointer(self._type_.get_ffi_argtype()) + + def _deref_ffiargtype(self): + return self._type_.get_ffi_argtype() + def array_get_slice_params(self, index): if hasattr(self, '_length_'): start, stop, step = index.indices(self._length_) @@ -248,6 +257,5 @@ _type_ = base ) cls = ArrayMeta(name, (Array,), tpdict) - cls._ffiargtype = _ffi.types.Pointer(base.get_ffi_argtype()) ARRAY_CACHE[key] = cls return cls diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -49,10 +49,13 @@ else: return self.from_param(as_parameter) + def _build_ffiargtype(self): + return _shape_to_ffi_type(self._ffiargshape_) + def get_ffi_argtype(self): if self._ffiargtype: return self._ffiargtype - self._ffiargtype = _shape_to_ffi_type(self._ffiargshape_) + self._ffiargtype = self._build_ffiargtype() return self._ffiargtype def _CData_output(self, resbuffer, base=None, index=-1): diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -70,7 +70,12 @@ self._ffiarray = ffiarray self.__init__ = __init__ self._type_ = TP - self._ffiargtype = _ffi.types.Pointer(TP.get_ffi_argtype()) + + def _build_ffiargtype(self): + return _ffi.types.Pointer(self._type_.get_ffi_argtype()) + + def _deref_ffiargtype(self): + return self._type_.get_ffi_argtype() from_address = cdata_from_address diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -160,6 +160,10 @@ raise AttributeError("_fields_ is final") if self in [f[1] for f in value]: raise AttributeError("Structure or union cannot contain itself") + if self._ffiargtype is not None: + raise NotImplementedError("Too late to set _fields_: we already " + "said to libffi that the structure type %s is opaque" + % (self,)) names_and_fields( self, value, self.__bases__[0], diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -56,13 +56,13 @@ Niko Matsakis Alexander Hesse Ludovic Aubry + stian Jacob Hallen Jason Creighton Mark Young Alex Martelli Spenser Bauman Michal Bendowski - stian Jan de Mooij Tyler Wade Vincent Legoll @@ -90,10 +90,10 @@ Wenzhu Man Konstantin Lopuhin John Witulski + Jeremy Thurgood Greg Price Ivan Sichmann Freitas Dario Bertini - Jeremy Thurgood Mark Pearse Simon Cross Tobias Pape @@ -112,18 +112,19 @@ Adrian Kuhn tav Georg Brandl + Joannah Nanjekye Bert Freudenberg Stian Andreassen Wanja Saatkamp Mike Blume - Joannah Nanjekye Gerald Klix Oscar Nierstrasz Rami Chowdhury Stefan H. Muller + Dodan Mihai Tim Felgentreff Eugene Oden - Dodan Mihai + Colin Valliant Jeff Terrace Henry Mason Vasily Kuznetsov @@ -192,12 +193,14 @@ Vaibhav Sood Reuben Cummings Attila Gobi + Floris Bruynooghe Christopher Pope Tristan Arthur Christian Tismer Dan Stromberg Carl Meyer Florin Papa + Arianna Avanzini Jens-Uwe Mager Valentina Mukhamedzhanova Stefano Parmesan @@ -211,6 +214,7 @@ Lukas Vacek Omer Katz Jacek Generowicz + Tomasz Dziopa Sylvain Thenault Jakub Stasiak Andrew Dalke @@ -221,6 +225,7 @@ Mark Williams Kunal Grover Nathan Taylor + Barry Hart Travis Francis Athougies Yasir Suhail Sergey Kishchenko @@ -228,6 +233,7 @@ Lutz Paelike Ian Foote Philipp Rustemeuer + Logan Chien Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez @@ -263,19 +269,20 @@ Akira Li Gustavo Niemeyer Rafał Gałczyński - Logan Chien Lucas Stadler roberto@goyle Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson + Matt Jackson Nikolay Zinov rafalgalczyn...@gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown + Miro Hrončok Anthony Sottile Nate Bragg Ben Darnell @@ -283,7 +290,6 @@ Godefroid Chappelle Julian Berman Michael Hudson-Doyle - Floris Bruynooghe Stephan Busemann Dan Colish timo @@ -325,6 +331,7 @@ Michael Chermside Anna Ravencroft remarkablerocket + Pauli Virtanen Petre Vijiac Berker Peksag Christian Muirhead @@ -349,6 +356,7 @@ Graham Markall Dan Loewenherz werat + Andrew Stepanov Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -10,89 +10,6 @@ PyPy. -.. _extension-modules: - -Extension modules ------------------ - -List of extension modules that we support: - -* Supported as built-in modules (in :source:`pypy/module/`): - - __builtin__ - :doc:`__pypy__ <__pypy__-module>` - _ast - _codecs - _collections - :doc:`_continuation <stackless>` - :doc:`_ffi <discussion/ctypes-implementation>` - _hashlib - _io - _locale - _lsprof - _md5 - :doc:`_minimal_curses <config/objspace.usemodules._minimal_curses>` - _multiprocessing - _random - :doc:`_rawffi <discussion/ctypes-implementation>` - _sha - _socket - _sre - _ssl - _warnings - _weakref - _winreg - array - binascii - bz2 - cStringIO - cmath - `cpyext`_ - crypt - errno - exceptions - fcntl - gc - imp - itertools - marshal - math - mmap - operator - parser - posix - pyexpat - select - signal - struct - symbol - sys - termios - thread - time - token - unicodedata - zipimport - zlib - - When translated on Windows, a few Unix-only modules are skipped, - and the following module is built instead: - - _winreg - -* Supported by being rewritten in pure Python (possibly using ``cffi``): - see the :source:`lib_pypy/` directory. Examples of modules that we - support this way: ``ctypes``, ``cPickle``, ``cmath``, ``dbm``, ``datetime``... - Note that some modules are both in there and in the list above; - by default, the built-in module is used (but can be disabled - at translation time). - -The extension modules (i.e. modules written in C, in the standard CPython) -that are neither mentioned above nor in :source:`lib_pypy/` are not available in PyPy. -(You may have a chance to use them anyway with `cpyext`_.) - -.. _cpyext: http://morepypy.blogspot.com/2010/04/using-cpython-extension-modules-with.html - Differences related to garbage collection strategies ---------------------------------------------------- @@ -559,7 +476,96 @@ environment variable. CPython searches for ``vcvarsall.bat`` somewhere **above** that value. +* SyntaxError_ s try harder to give details about the cause of the failure, so + the error messages are not the same as in CPython + + +.. _extension-modules: + +Extension modules +----------------- + +List of extension modules that we support: + +* Supported as built-in modules (in :source:`pypy/module/`): + + __builtin__ + :doc:`__pypy__ <__pypy__-module>` + _ast + _codecs + _collections + :doc:`_continuation <stackless>` + :doc:`_ffi <discussion/ctypes-implementation>` + _hashlib + _io + _locale + _lsprof + _md5 + :doc:`_minimal_curses <config/objspace.usemodules._minimal_curses>` + _multiprocessing + _random + :doc:`_rawffi <discussion/ctypes-implementation>` + _sha + _socket + _sre + _ssl + _warnings + _weakref + _winreg + array + binascii + bz2 + cStringIO + cmath + `cpyext`_ + crypt + errno + exceptions + fcntl + gc + imp + itertools + marshal + math + mmap + operator + parser + posix + pyexpat + select + signal + struct + symbol + sys + termios + thread + time + token + unicodedata + zipimport + zlib + + When translated on Windows, a few Unix-only modules are skipped, + and the following module is built instead: + + _winreg + +* Supported by being rewritten in pure Python (possibly using ``cffi``): + see the :source:`lib_pypy/` directory. Examples of modules that we + support this way: ``ctypes``, ``cPickle``, ``cmath``, ``dbm``, ``datetime``... + Note that some modules are both in there and in the list above; + by default, the built-in module is used (but can be disabled + at translation time). + +The extension modules (i.e. modules written in C, in the standard CPython) +that are neither mentioned above nor in :source:`lib_pypy/` are not available in PyPy. +(You may have a chance to use them anyway with `cpyext`_.) + +.. _cpyext: http://morepypy.blogspot.com/2010/04/using-cpython-extension-modules-with.html + + .. _`is ignored in PyPy`: http://bugs.python.org/issue14621 .. _`little point`: http://events.ccc.de/congress/2012/Fahrplan/events/5152.en.html .. _`#2072`: https://bitbucket.org/pypy/pypy/issue/2072/ .. _`issue #2653`: https://bitbucket.org/pypy/pypy/issues/2653/ +.. _SyntaxError: https://morepypy.blogspot.co.il/2018/04/improving-syntaxerror-in-pypy.html diff --git a/pypy/doc/gc_info.rst b/pypy/doc/gc_info.rst --- a/pypy/doc/gc_info.rst +++ b/pypy/doc/gc_info.rst @@ -121,6 +121,166 @@ alive by GC objects, but not accounted in the GC +GC Hooks +-------- + +GC hooks are user-defined functions which are called whenever a specific GC +event occur, and can be used to monitor GC activity and pauses. You can +install the hooks by setting the following attributes: + +``gc.hook.on_gc_minor`` + Called whenever a minor collection occurs. It corresponds to + ``gc-minor`` sections inside ``PYPYLOG``. + +``gc.hook.on_gc_collect_step`` + Called whenever an incremental step of a major collection occurs. It + corresponds to ``gc-collect-step`` sections inside ``PYPYLOG``. + +``gc.hook.on_gc_collect`` + Called after the last incremental step, when a major collection is fully + done. It corresponds to ``gc-collect-done`` sections inside ``PYPYLOG``. + +To uninstall a hook, simply set the corresponding attribute to ``None``. To +install all hooks at once, you can call ``gc.hooks.set(obj)``, which will look +for methods ``on_gc_*`` on ``obj``. To uninstall all the hooks at once, you +can call ``gc.hooks.reset()``. + +The functions called by the hooks receive a single ``stats`` argument, which +contains various statistics about the event. + +Note that PyPy cannot call the hooks immediately after a GC event, but it has +to wait until it reaches a point in which the interpreter is in a known state +and calling user-defined code is harmless. It might happen that multiple +events occur before the hook is invoked: in this case, you can inspect the +value ``stats.count`` to know how many times the event occurred since the last +time the hook was called. Similarly, ``stats.duration`` contains the +**total** time spent by the GC for this specific event since the last time the +hook was called. + +On the other hand, all the other fields of the ``stats`` object are relative +only to the **last** event of the series. + +The attributes for ``GcMinorStats`` are: + +``count`` + The number of minor collections occurred since the last hook call. + +``duration`` + The total time spent inside minor collections since the last hook + call. See below for more information on the unit. + +``duration_min`` + The duration of the fastest minor collection since the last hook call. + +``duration_max`` + The duration of the slowest minor collection since the last hook call. + + ``total_memory_used`` + The amount of memory used at the end of the minor collection, in + bytes. This include the memory used in arenas (for GC-managed memory) and + raw-malloced memory (e.g., the content of numpy arrays). + +``pinned_objects`` + the number of pinned objects. + + +The attributes for ``GcCollectStepStats`` are: + +``count``, ``duration``, ``duration_min``, ``duration_max`` + See above. + +``oldstate``, ``newstate`` + Integers which indicate the state of the GC before and after the step. + +The value of ``oldstate`` and ``newstate`` is one of these constants, defined +inside ``gc.GcCollectStepStats``: ``STATE_SCANNING``, ``STATE_MARKING``, +``STATE_SWEEPING``, ``STATE_FINALIZING``. It is possible to get a string +representation of it by indexing the ``GC_STATS`` tuple. + + +The attributes for ``GcCollectStats`` are: + +``count`` + See above. + +``num_major_collects`` + The total number of major collections which have been done since the + start. Contrarily to ``count``, this is an always-growing counter and it's + not reset between invocations. + +``arenas_count_before``, ``arenas_count_after`` + Number of arenas used before and after the major collection. + +``arenas_bytes`` + Total number of bytes used by GC-managed objects. + +``rawmalloc_bytes_before``, ``rawmalloc_bytes_after`` + Total number of bytes used by raw-malloced objects, before and after the + major collection. + +Note that ``GcCollectStats`` has **not** got a ``duration`` field. This is +because all the GC work is done inside ``gc-collect-step``: +``gc-collect-done`` is used only to give additional stats, but doesn't do any +actual work. + +A note about the ``duration`` field: depending on the architecture and +operating system, PyPy uses different ways to read timestamps, so ``duration`` +is expressed in varying units. It is possible to know which by calling +``__pypy__.debug_get_timestamp_unit()``, which can be one of the following +values: + +``tsc`` + The default on ``x86`` machines: timestamps are expressed in CPU ticks, as + read by the `Time Stamp Counter`_. + +``ns`` + Timestamps are expressed in nanoseconds. + +``QueryPerformanceCounter`` + On Windows, in case for some reason ``tsc`` is not available: timestamps + are read using the win API ``QueryPerformanceCounter()``. + + +Unfortunately, there does not seem to be a reliable standard way for +converting ``tsc`` ticks into nanoseconds, although in practice on modern CPUs +it is enough to divide the ticks by the maximum nominal frequency of the CPU. +For this reason, PyPy gives the raw value, and leaves the job of doing the +conversion to external libraries. + +Here is an example of GC hooks in use:: + + import sys + import gc + + class MyHooks(object): + done = False + + def on_gc_minor(self, stats): + print 'gc-minor: count = %02d, duration = %d' % (stats.count, + stats.duration) + + def on_gc_collect_step(self, stats): + old = gc.GcCollectStepStats.GC_STATES[stats.oldstate] + new = gc.GcCollectStepStats.GC_STATES[stats.newstate] + print 'gc-collect-step: %s --> %s' % (old, new) + print ' count = %02d, duration = %d' % (stats.count, + stats.duration) + + def on_gc_collect(self, stats): + print 'gc-collect-done: count = %02d' % stats.count + self.done = True + + hooks = MyHooks() + gc.hooks.set(hooks) + + # simulate some GC activity + lst = [] + while not hooks.done: + lst = [lst, 1, 2, 3] + + +.. _`Time Stamp Counter`: https://en.wikipedia.org/wiki/Time_Stamp_Counter + .. _minimark-environment-variables: Environment variables diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -40,6 +40,8 @@ sure things are ported back to the trunk and to the branch as necessary. +* Make sure the RPython builds on the buildbot pass with no failures + * Maybe bump the SOABI number in module/imp/importing. This has many implications, so make sure the PyPy community agrees to the change. diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-v6.0.0.rst release-v5.10.1.rst release-v5.10.0.rst release-v5.9.0.rst diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -7,6 +7,8 @@ .. toctree:: whatsnew-head.rst + whatsnew-pypy2-6.0.0.rst + whatsnew-pypy2-5.10.0.rst whatsnew-pypy2-5.10.0.rst whatsnew-pypy2-5.9.0.rst whatsnew-pypy2-5.8.0.rst diff --git a/pypy/doc/release-v6.0.0.rst b/pypy/doc/release-v6.0.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-v6.0.0.rst @@ -0,0 +1,123 @@ +====================================== +PyPy2.7 and PyPy3.5 v6.0 dual release +====================================== + +The PyPy team is proud to release both PyPy2.7 v6.0 (an interpreter supporting +Python 2.7 syntax), and a PyPy3.5 v6.0 (an interpreter supporting Python +3.5 syntax). The two releases are both based on much the same codebase, thus +the dual release. + +This release is a feature release following our previous 5.10 incremental +release in late December 2017. Our C-API compatibility layer ``cpyext`` is +now much faster (see the `blog post`_) as well as more complete. We have made +many other improvements in speed and CPython compatibility. Since the changes +affect the included python development header files, all c-extension modules must +be recompiled for this version. + +Until we can work with downstream providers to distribute builds with PyPy, we +have made packages for some common packages `available as wheels`_. You may +compile yourself using ``pip install --no-build-isolation <package>``, the +``no-build-isolation`` is currently needed for pip v10. + +First-time python users are often stumped by silly typos and omissions when +getting started writing code. We have improved our parser to emit more friendly +`syntax errors`_, making PyPy not only faster but more friendly. + +The GC now has `hooks`_ to gain more insights into its performance + +The Matplotlib TkAgg backend now works with PyPy, as do pygame and pygobject_. + +We updated the `cffi`_ module included in PyPy to version 1.11.5, and the +`cppyy`_ backend to 0.6.0. Please use these to wrap your C and C++ code, +respectively, for a JIT friendly experience. + +As always, this release is 100% compatible with the previous one and fixed +several issues and bugs raised by the growing community of PyPy users. +We strongly recommend updating. + +The Windows PyPy3.5 release is still considered beta-quality. There are open +issues with unicode handling especially around system calls and c-extensions. + +The utf8 branch that changes internal representation of unicode to utf8 did not +make it into the release, so there is still more goodness coming. We also +began working on a Python3.6 implementation, help is welcome. + +You can download the v6.0 releases here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. If PyPy is not quite good enough for your needs, we are available for +direct consulting work. + +We would also like to thank our contributors and encourage new people to join +the project. PyPy has many layers and we need help with all of them: `PyPy`_ +and `RPython`_ documentation improvements, tweaking popular `modules`_ to run +on pypy, or general `help`_ with making RPython's JIT even better. + +.. _`PyPy`: index.html +.. _`RPython`: https://rpython.readthedocs.org +.. _`modules`: project-ideas.html#make-more-python-modules-pypy-friendly +.. _`help`: project-ideas.html +.. _`blog post`: https://morepypy.blogspot.it/2017/10/cape-of-good-hope-for-pypy-hello-from.html +.. _pygobject: https://lazka.github.io/posts/2018-04_pypy-pygobject/index.html +.. _`syntax errors`: https://morepypy.blogspot.com/2018/04/improving-syntaxerror-in-pypy.html +.. _`hooks`: gc_info.html#gc-hooks +.. _`cffi`: http://cffi.readthedocs.io +.. _`cppyy`: https://cppyy.readthedocs.io +.. _`available as wheels`: https://github.com/antocuni/pypy-wheels + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7 and CPython 3.5. It's fast (`PyPy and CPython 2.7.x`_ performance comparison) +due to its integrated tracing JIT compiler. + +We also welcome developers of other `dynamic languages`_ to see what RPython +can do for them. + +The PyPy release supports: + + * **x86** machines on most common operating systems + (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) + + * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux, + + * big- and little-endian variants of **PPC64** running Linux, + + * **s390x** running Linux + +.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org +.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + +Changelog +========= + +* Speed up C-API method calls, and make most Py*_Check calls C macros +* Speed up C-API slot method calls +* Enable TkAgg backend support for matplotlib +* support ``hastzinfo`` and ``tzinfo`` in the C-API ``PyDateTime*`` structures +* datetime.h is now more similar to CPython +* We now support ``PyUnicode_AsUTF{16,32}String``, ``_PyLong_AsByteArray``, + ``_PyLong_AsByteArrayO``, +* PyPy3.5 on Windows is compiled with the Microsoft Visual Compiler v14, like + CPython +* Fix performance of attribute lookup when more than 80 attributes are used +* Improve performance on passing built-in types to C-API C code +* Improve the performance of datetime and timedelta by skipping the consistency + checks of the datetime values (they are correct by construction) +* Improve handling of ``bigint`` s, including fixing ``int_divmod`` +* Improve reporting of GC statistics +* Accept unicode filenames in ``dbm.open()`` +* Improve RPython support for half-floats +* Added missing attributes to C-API ``instancemethod`` on pypy3 +* Store error state in thread-local storage for C-API. +* Fix JIT bugs exposed in the sre module +* Improve speed of Python parser, improve ParseError messages and SyntaxError +* Handle JIT hooks more efficiently +* Fix a rare GC bug exposed by intensive use of cpyext ``Buffer`` s + +We also refactored many parts of the JIT bridge optimizations, as well as cpyext +internals, and together with new contributors fixed issues, added new +documentation, and cleaned up the codebase. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,73 +1,12 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== +========================== +What's new in PyPy2.7 6.0+ +========================== -.. this is a revision shortly after release-pypy2.7-v5.10.0 -.. startrev: 6b024edd9d12 +.. this is a revision shortly after release-pypy-6.0.0 +.. startrev: e50e11af23f1 -.. branch: cpyext-avoid-roundtrip -Big refactoring of some cpyext code, which avoids a lot of nonsense when -calling C from Python and vice-versa: the result is a big speedup in -function/method calls, up to 6 times faster. -.. branch: cpyext-datetime2 - -Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD - - -.. branch: mapdict-size-limit - -Fix a corner case of mapdict: When an instance is used like a dict (using -``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are -added, then the performance using mapdict is linear in the number of -attributes. This is now fixed (by switching to a regular dict after 80 -attributes). - - -.. branch: cpyext-faster-arg-passing - -When using cpyext, improve the speed of passing certain objects from PyPy to C -code, most notably None, True, False, types, all instances of C-defined types. -Before, a dict lookup was needed every time such an object crossed over, now it -is just a field read. - - -.. branch: 2634_datetime_timedelta_performance - -Improve datetime + timedelta performance. - -.. branch: memory-accounting - -Improve way to describe memory - -.. branch: msvc14 - -Allow compilaiton with Visual Studio 2017 compiler suite on windows - -.. branch: refactor-slots - -Refactor cpyext slots. - - -.. branch: call-loopinvariant-into-bridges - -Speed up branchy code that does a lot of function inlining by saving one call -to read the TLS in most bridges. - -.. branch: rpython-sprint - -Refactor in rpython signatures - -.. branch: cpyext-tls-operror2 - -Store error state thread-locally in executioncontext, fixes issue #2764 - -.. branch: cpyext-fast-typecheck - -Optimize `Py*_Check` for `Bool`, `Float`, `Set`. Also refactor and simplify -`W_PyCWrapperObject` which is used to call slots from the C-API, greatly -improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks .. branch: unicode-utf8-re .. branch: utf8-io diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-pypy2-6.0.0.rst copy from pypy/doc/whatsnew-head.rst copy to pypy/doc/whatsnew-pypy2-6.0.0.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-pypy2-6.0.0.rst @@ -69,8 +69,60 @@ `W_PyCWrapperObject` which is used to call slots from the C-API, greatly improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks -.. branch: unicode-utf8-re -.. branch: utf8-io -Utf8 handling for unicode +.. branch: fix-sre-problems +Fix two (unrelated) JIT bugs manifesting in the re module: + +- green fields are broken and were thus disabled, plus their usage removed from + the _sre implementation + +- in rare "trace is too long" situations, the JIT could break behaviour + arbitrarily. + +.. branch: jit-hooks-can-be-disabled + +Be more efficient about JIT hooks. Make it possible for the frontend to declare +that jit hooks are currently not enabled at all. in that case, the list of ops +does not have to be created in the case of the on_abort hook (which is +expensive). + + +.. branch: pyparser-improvements + +Improve speed of Python parser, improve ParseError messages slightly. + +.. branch: ioctl-arg-size + +Work around possible bugs in upstream ioctl users, like CPython allocate at +least 1024 bytes for the arg in calls to ``ioctl(fd, request, arg)``. Fixes +issue #2776 + +.. branch: cpyext-subclass-setattr + +Fix for python-level classes that inherit from C-API types, previously the +`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj` +which led to cases where instance attributes were lost. Fixes issue #2793 + + +.. branch: pyparser-improvements-2 + +Improve line offsets that are reported by SyntaxError. Improve error messages +for a few situations, including mismatched parenthesis. + +.. branch: issue2752 + +Fix a rare GC bug that was introduced more than one year ago, but was +not diagnosed before issue #2752. + +.. branch: gc-hooks + +Introduce GC hooks, as documented in doc/gc_info.rst + +.. branch: gc-hook-better-timestamp + +Improve GC hooks + +.. branch: cppyy-packaging + +Update backend to 0.6.0 and support exceptions through wrappers diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -215,6 +215,7 @@ usage = SUPPRESS_USAGE take_options = True + space = None def opt_parser(self, config): parser = to_optparse(config, useoptions=["objspace.*"], @@ -364,15 +365,21 @@ from pypy.module.pypyjit.hooks import pypy_hooks return PyPyJitPolicy(pypy_hooks) + def get_gchooks(self): + from pypy.module.gc.hook import LowLevelGcHooks + if self.space is None: + raise Exception("get_gchooks must be called afeter get_entry_point") + return self.space.fromcache(LowLevelGcHooks) + def get_entry_point(self, config): - space = make_objspace(config) + self.space = make_objspace(config) # manually imports app_main.py filename = os.path.join(pypydir, 'interpreter', 'app_main.py') app = gateway.applevel(open(filename).read(), 'app_main.py', 'app_main') app.hidden_applevel = False - w_dict = app.getwdict(space) - entry_point, _ = create_entry_point(space, w_dict) + w_dict = app.getwdict(self.space) + entry_point, _ = create_entry_point(self.space, w_dict) return entry_point, None, PyPyAnnotatorPolicy() @@ -381,7 +388,7 @@ 'jitpolicy', 'get_entry_point', 'get_additional_config_options']: ns[name] = getattr(self, name) - + ns['get_gchooks'] = self.get_gchooks PyPyTarget().interface(globals()) diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -404,7 +404,7 @@ self._periodic_actions = [] self._nonperiodic_actions = [] self.has_bytecode_counter = False - self.fired_actions = None + self._fired_actions_reset() # the default value is not 100, unlike CPython 2.7, but a much # larger value, because we use a technique that not only allows # but actually *forces* another thread to run whenever the counter @@ -416,13 +416,28 @@ """Request for the action to be run before the next opcode.""" if not action._fired: action._fired = True - if self.fired_actions is None: - self.fired_actions = [] - self.fired_actions.append(action) + self._fired_actions_append(action) # set the ticker to -1 in order to force action_dispatcher() # to run at the next possible bytecode self.reset_ticker(-1) + def _fired_actions_reset(self): + # linked list of actions. We cannot use a normal RPython list because + # we want AsyncAction.fire() to be marked as @rgc.collect: this way, + # we can call it from e.g. GcHooks or cpyext's dealloc_trigger. + self._fired_actions_first = None + self._fired_actions_last = None + + @rgc.no_collect + def _fired_actions_append(self, action): + assert action._next is None + if self._fired_actions_first is None: + self._fired_actions_first = action + self._fired_actions_last = action + else: + self._fired_actions_last._next = action + self._fired_actions_last = action + @not_rpython def register_periodic_action(self, action, use_bytecode_counter): """ @@ -467,19 +482,26 @@ action.perform(ec, frame) # nonperiodic actions - list = self.fired_actions - if list is not None: - self.fired_actions = None + action = self._fired_actions_first + if action: + self._fired_actions_reset() # NB. in case there are several actions, we reset each # 'action._fired' to false only when we're about to call # 'action.perform()'. This means that if # 'action.fire()' happens to be called any time before # the corresponding perform(), the fire() has no # effect---which is the effect we want, because - # perform() will be called anyway. - for action in list: + # perform() will be called anyway. All such pending + # actions with _fired == True are still inside the old + # chained list. As soon as we reset _fired to False, + # we also reset _next to None and we are ready for + # another fire(). + while action is not None: + next_action = action._next + action._next = None action._fired = False action.perform(ec, frame) + action = next_action self.action_dispatcher = action_dispatcher @@ -512,10 +534,12 @@ to occur between two opcodes, not at a completely random time. """ _fired = False + _next = None def __init__(self, space): self.space = space + @rgc.no_collect def fire(self): """Request for the action to be run before the next opcode. The action must have been registered at space initalization time.""" diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py --- a/pypy/interpreter/pyparser/error.py +++ b/pypy/interpreter/pyparser/error.py @@ -6,6 +6,7 @@ lastlineno=0): self.msg = msg self.lineno = lineno + # NB: offset is a 1-based index! self.offset = offset self.text = text self.filename = filename diff --git a/pypy/interpreter/pyparser/metaparser.py b/pypy/interpreter/pyparser/metaparser.py --- a/pypy/interpreter/pyparser/metaparser.py +++ b/pypy/interpreter/pyparser/metaparser.py @@ -147,8 +147,10 @@ for label, next in state.arcs.iteritems(): arcs.append((self.make_label(gram, label), dfa.index(next))) states.append((arcs, state.is_final)) - gram.dfas.append((states, self.make_first(gram, name))) - assert len(gram.dfas) - 1 == gram.symbol_ids[name] - 256 + symbol_id = gram.symbol_ids[name] + dfa = parser.DFA(symbol_id, states, self.make_first(gram, name)) + gram.dfas.append(dfa) + assert len(gram.dfas) - 1 == symbol_id - 256 gram.start = gram.symbol_ids[self.start_symbol] return gram @@ -162,6 +164,13 @@ else: gram.labels.append(gram.symbol_ids[label]) gram.symbol_to_label[label] = label_index + first = self.first[label] + if len(first) == 1: + first, = first + if not first[0].isupper(): + first = first.strip("\"'") + assert label_index not in gram.token_to_error_string + gram.token_to_error_string[label_index] = first return label_index elif label.isupper(): token_index = gram.TOKENS[label] @@ -183,7 +192,7 @@ else: gram.labels.append(gram.KEYWORD_TOKEN) gram.keyword_ids[value] = label_index - return label_index + result = label_index else: try: token_index = gram.OPERATOR_MAP[value] @@ -194,7 +203,10 @@ else: gram.labels.append(token_index) gram.token_ids[token_index] = label_index - return label_index + result = label_index + assert result not in gram.token_to_error_string + gram.token_to_error_string[result] = value + return result def make_first(self, gram, name): original_firsts = self.first[name] diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -1,6 +1,7 @@ """ A CPython inspired RPython parser. """ +from rpython.rlib.objectmodel import not_rpython class Grammar(object): @@ -16,6 +17,7 @@ self.symbol_names = {} self.symbol_to_label = {} self.keyword_ids = {} + self.token_to_error_string = {} self.dfas = [] self.labels = [0] self.token_ids = {} @@ -41,6 +43,27 @@ pass return True +class DFA(object): + def __init__(self, symbol_id, states, first): + self.symbol_id = symbol_id + self.states = states + self.first = self._first_to_string(first) + + def could_match_token(self, label_index): + pos = label_index >> 3 + bit = 1 << (label_index & 0b111) + return bool(ord(self.first[label_index >> 3]) & bit) + + @staticmethod + @not_rpython + def _first_to_string(first): + l = sorted(first.keys()) + b = bytearray(32) + for label_index in l: + pos = label_index >> 3 + bit = 1 << (label_index & 0b111) + b[pos] |= bit + return str(b) class Node(object): @@ -127,14 +150,17 @@ class Nonterminal(AbstractNonterminal): __slots__ = ("_children", ) - def __init__(self, type, children): + def __init__(self, type, children=None): Node.__init__(self, type) + if children is None: + children = [] self._children = children def __repr__(self): return "Nonterminal(type=%s, children=%r)" % (self.type, self._children) def get_child(self, i): + assert self._children is not None return self._children[i] def num_children(self): @@ -168,25 +194,50 @@ class ParseError(Exception): def __init__(self, msg, token_type, value, lineno, column, line, - expected=-1): + expected=-1, expected_str=None): self.msg = msg self.token_type = token_type self.value = value self.lineno = lineno + # this is a 0-based index self.column = column self.line = line self.expected = expected + self.expected_str = expected_str def __str__(self): return "ParserError(%s, %r)" % (self.token_type, self.value) +class StackEntry(object): + def __init__(self, next, dfa, state): + self.next = next + self.dfa = dfa + self.state = state + self.node = None + + def push(self, dfa, state): + return StackEntry(self, dfa, state) + + def pop(self): + return self.next + + def node_append_child(self, child): + node = self.node + if node is None: + self.node = Nonterminal1(self.dfa.symbol_id, child) + elif isinstance(node, Nonterminal1): + newnode = self.node = Nonterminal( + self.dfa.symbol_id, [node._child, child]) + else: + self.node.append_child(child) + + class Parser(object): def __init__(self, grammar): self.grammar = grammar self.root = None - self.stack = None def prepare(self, start=-1): """Setup the parser for parsing. @@ -196,16 +247,15 @@ if start == -1: start = self.grammar.start self.root = None - current_node = Nonterminal(start, []) - self.stack = [] - self.stack.append((self.grammar.dfas[start - 256], 0, current_node)) + self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0) def add_token(self, token_type, value, lineno, column, line): label_index = self.classify(token_type, value, lineno, column, line) sym_id = 0 # for the annotator while True: - dfa, state_index, node = self.stack[-1] - states, first = dfa + dfa = self.stack.dfa + state_index = self.stack.state + states = dfa.states arcs, is_accepting = states[state_index] for i, next_state in arcs: sym_id = self.grammar.labels[i] @@ -217,16 +267,17 @@ # the stack. while state[1] and not state[0]: self.pop() - if not self.stack: + if self.stack is None: # Parsing is done. return True - dfa, state_index, node = self.stack[-1] - state = dfa[0][state_index] + dfa = self.stack.dfa + state_index = self.stack.state + state = dfa.states[state_index] return False elif sym_id >= 256: sub_node_dfa = self.grammar.dfas[sym_id - 256] # Check if this token can start a child node. - if label_index in sub_node_dfa[1]: + if sub_node_dfa.could_match_token(label_index): self.push(sub_node_dfa, next_state, sym_id, lineno, column) break @@ -235,7 +286,7 @@ # state is accepting, it's invalid input. if is_accepting: self.pop() - if not self.stack: + if self.stack is None: raise ParseError("too much input", token_type, value, lineno, column, line) else: @@ -243,10 +294,13 @@ # error. if len(arcs) == 1: expected = sym_id + expected_str = self.grammar.token_to_error_string.get( + arcs[0][0], None) else: expected = -1 + expected_str = None raise ParseError("bad input", token_type, value, lineno, - column, line, expected) + column, line, expected, expected_str) def classify(self, token_type, value, lineno, column, line): """Find the label for a token.""" @@ -262,26 +316,22 @@ def shift(self, next_state, token_type, value, lineno, column): """Shift a non-terminal and prepare for the next state.""" - dfa, state, node = self.stack[-1] new_node = Terminal(token_type, value, lineno, column) - node.append_child(new_node) - self.stack[-1] = (dfa, next_state, node) + self.stack.node_append_child(new_node) + self.stack.state = next_state def push(self, next_dfa, next_state, node_type, lineno, column): """Push a terminal and adjust the current state.""" - dfa, state, node = self.stack[-1] - new_node = Nonterminal(node_type, []) - self.stack[-1] = (dfa, next_state, node) - self.stack.append((next_dfa, 0, new_node)) + self.stack.state = next_state + self.stack = self.stack.push(next_dfa, 0) def pop(self): """Pop an entry off the stack and make its node a child of the last.""" - dfa, state, node = self.stack.pop() + top = self.stack + self.stack = top.pop() + node = top.node + assert node is not None if self.stack: - # we are now done with node, so we can store it more efficiently if - # it has just one child - if node.num_children() == 1: - node = Nonterminal1(node.type, node.get_child(0)) - self.stack[-1][2].append_child(node) + self.stack.node_append_child(node) else: self.root = node diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py --- a/pypy/interpreter/pyparser/pyparse.py +++ b/pypy/interpreter/pyparser/pyparse.py @@ -132,7 +132,11 @@ w_message = space.str(e.get_w_value(space)) raise error.SyntaxError(space.text_w(w_message)) raise + if enc is not None: + compile_info.encoding = enc + return self._parse(textsrc, compile_info) + def _parse(self, textsrc, compile_info): flags = compile_info.flags # The tokenizer is very picky about how it wants its input. @@ -181,13 +185,16 @@ else: new_err = error.SyntaxError msg = "invalid syntax" - raise new_err(msg, e.lineno, e.column, e.line, + if e.expected_str is not None: + msg += " (expected '%s')" % e.expected_str + + # parser.ParseError(...).column is 0-based, but the offsets in the + # exceptions in the error module are 1-based, hence the '+ 1' + raise new_err(msg, e.lineno, e.column + 1, e.line, compile_info.filename) else: tree = self.root finally: # Avoid hanging onto the tree. self.root = None - if enc is not None: - compile_info.encoding = enc return tree diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py --- a/pypy/interpreter/pyparser/pytokenizer.py +++ b/pypy/interpreter/pyparser/pytokenizer.py @@ -73,14 +73,14 @@ logical line; continuation lines are included. """ token_list = [] - lnum = parenlev = continued = 0 + lnum = continued = 0 namechars = NAMECHARS numchars = NUMCHARS contstr, needcont = '', 0 contline = None indents = [0] last_comment = '' - parenlevstart = (0, 0, "") + parenstack = [] # make the annotator happy endDFA = DUMMY_DFA @@ -97,7 +97,7 @@ if contstr: if not line: raise TokenError( - "EOF while scanning triple-quoted string literal", + "end of file (EOF) while scanning triple-quoted string literal", strstart[2], strstart[0], strstart[1]+1, token_list, lnum-1) endmatch = endDFA.recognize(line) @@ -123,7 +123,7 @@ contline = contline + line continue - elif parenlev == 0 and not continued: # new statement + elif not parenstack and not continued: # new statement if not line: break column = 0 while pos < max: # measure leading whitespace @@ -143,21 +143,21 @@ token_list.append((tokens.INDENT, line[:pos], lnum, 0, line)) last_comment = '' while column < indents[-1]: - indents = indents[:-1] + indents.pop() token_list.append((tokens.DEDENT, '', lnum, pos, line)) last_comment = '' if column != indents[-1]: err = "unindent does not match any outer indentation level" - raise TokenIndentationError(err, line, lnum, 0, token_list) + raise TokenIndentationError(err, line, lnum, column+1, token_list) else: # continued statement if not line: - if parenlev > 0: - lnum1, start1, line1 = parenlevstart + if parenstack: + _, lnum1, start1, line1 = parenstack[0] raise TokenError("parenthesis is never closed", line1, lnum1, start1 + 1, token_list, lnum) - raise TokenError("EOF in multi-line statement", line, - lnum, 0, token_list) + raise TokenError("end of file (EOF) in multi-line statement", line, + lnum, 0, token_list) # XXX why is the offset 0 here? continued = 0 while pos < max: @@ -180,7 +180,7 @@ token_list.append((tokens.NUMBER, token, lnum, start, line)) last_comment = '' elif initial in '\r\n': - if parenlev <= 0: + if not parenstack: tok = (tokens.NEWLINE, last_comment, lnum, start, line) token_list.append(tok) last_comment = '' @@ -222,14 +222,22 @@ continued = 1 else: if initial in '([{': - if parenlev == 0: - parenlevstart = (lnum, start, line) - parenlev = parenlev + 1 + parenstack.append((initial, lnum, start, line)) elif initial in ')]}': - parenlev = parenlev - 1 - if parenlev < 0: + if not parenstack: raise TokenError("unmatched '%s'" % initial, line, lnum, start + 1, token_list) + opening, lnum1, start1, line1 = parenstack.pop() + if not ((opening == "(" and initial == ")") or + (opening == "[" and initial == "]") or + (opening == "{" and initial == "}")): + msg = "closing parenthesis '%s' does not match opening parenthesis '%s'" % ( + initial, opening) + + if lnum1 != lnum: + msg += " on line " + str(lnum1) + raise TokenError( + msg, line, lnum, start + 1, token_list) if token in python_opmap: punct = python_opmap[token] else: @@ -241,7 +249,7 @@ if start < 0: start = pos if start<max and line[start] in single_quoted: - raise TokenError("EOL while scanning string literal", + raise TokenError("end of line (EOL) while scanning string literal", line, lnum, start+1, token_list) tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line) token_list.append(tok) diff --git a/pypy/interpreter/pyparser/test/targetparse.py b/pypy/interpreter/pyparser/test/targetparse.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/pyparser/test/targetparse.py @@ -0,0 +1,50 @@ +import sys +import os +ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +print ROOT +sys.path.insert(0, str(ROOT)) +import time +from pypy.interpreter.pyparser import pyparse + + + +class FakeSpace(object): + pass + +fakespace = FakeSpace() + +def bench(fn, s): + a = time.clock() + info = pyparse.CompileInfo("<string>", "exec") + parser = pyparse.PythonParser(fakespace) + tree = parser._parse(s, info) + b = time.clock() + print fn, (b-a) + + +def entry_point(argv): + if len(argv) == 2: + fn = argv[1] + else: + fn = "../../../../rpython/rlib/unicodedata/unicodedb_5_2_0.py" + fd = os.open(fn, os.O_RDONLY, 0777) + res = [] + while True: + s = os.read(fd, 4096) + if not s: + break + res.append(s) + os.close(fd) + s = "".join(res) + print len(s) + bench(fn, s) + + return 0 + +# _____ Define and setup target ___ + +def target(*args): + return entry_point, None + +if __name__ == '__main__': + entry_point(sys.argv) diff --git a/pypy/interpreter/pyparser/test/test_metaparser.py b/pypy/interpreter/pyparser/test/test_metaparser.py --- a/pypy/interpreter/pyparser/test/test_metaparser.py +++ b/pypy/interpreter/pyparser/test/test_metaparser.py @@ -34,8 +34,8 @@ assert len(g.dfas) == 1 eval_sym = g.symbol_ids["eval"] assert g.start == eval_sym - states, first = g.dfas[eval_sym - 256] - assert states == [([(1, 1)], False), ([], True)] + dfa = g.dfas[eval_sym - 256] + assert dfa.states == [([(1, 1)], False), ([], True)] assert g.labels[0] == 0 def test_load_python_grammars(self): @@ -51,7 +51,7 @@ def test_items(self): g = self.gram_for("foo: NAME STRING OP '+'") assert len(g.dfas) == 1 - states = g.dfas[g.symbol_ids["foo"] - 256][0] + states = g.dfas[g.symbol_ids["foo"] - 256].states last = states[0][0][0][1] for state in states[1:-1]: assert last < state[0][0][1] diff --git a/pypy/interpreter/pyparser/test/test_parser.py b/pypy/interpreter/pyparser/test/test_parser.py --- a/pypy/interpreter/pyparser/test/test_parser.py +++ b/pypy/interpreter/pyparser/test/test_parser.py @@ -7,6 +7,12 @@ from pypy.interpreter.pyparser.test.test_metaparser import MyGrammar +def test_char_set(): + first = {5: None, 9: None, 100: None, 255:None} + p = parser.DFA(None, None, first) + for i in range(256): + assert p.could_match_token(i) == (i in first) + class SimpleParser(parser.Parser): def parse(self, input): @@ -55,8 +61,7 @@ n = parser.Terminal(tp, value, 0, 0) else: tp = gram.symbol_ids[data[0]] - children = [] - n = parser.Nonterminal(tp, children) + n = parser.Nonterminal(tp) new_indent = count_indent(line) if new_indent >= last_indent: if new_indent == last_indent and node_stack: @@ -291,3 +296,37 @@ NEWLINE ENDMARKER""" assert tree_from_string(expected, gram) == p.parse("hi 42 end") + + + def test_optimized_terminal(self): + gram = """foo: bar baz 'end' NEWLINE ENDMARKER +bar: NAME +baz: NUMBER +""" + p, gram = self.parser_for(gram, False) + expected = """ + foo + bar + NAME "a_name" + baz + NUMBER "42" + NAME "end" + NEWLINE + ENDMARKER""" + input = "a_name 42 end" + tree = p.parse(input) + assert tree_from_string(expected, gram) == tree + assert isinstance(tree, parser.Nonterminal) + assert isinstance(tree.get_child(0), parser.Nonterminal1) + assert isinstance(tree.get_child(1), parser.Nonterminal1) + + + def test_error_string(self): + p, gram = self.parser_for( + "foo: 'if' NUMBER '+' NUMBER" + ) + info = py.test.raises(parser.ParseError, p.parse, "if 42") + info.value.expected_str is None + info = py.test.raises(parser.ParseError, p.parse, "if 42 42") + info.value.expected_str == '+' + diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py --- a/pypy/interpreter/pyparser/test/test_pyparse.py +++ b/pypy/interpreter/pyparser/test/test_pyparse.py @@ -76,14 +76,14 @@ exc = py.test.raises(SyntaxError, parse, "name another for").value assert exc.msg == "invalid syntax" assert exc.lineno == 1 - assert exc.offset == 5 + assert exc.offset == 6 assert exc.text.startswith("name another for") exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value - assert exc.msg == "EOL while scanning string literal" + assert exc.msg == "end of line (EOL) while scanning string literal" assert exc.lineno == 1 assert exc.offset == 5 exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value - assert exc.msg == "EOF while scanning triple-quoted string literal" + assert exc.msg == "end of file (EOF) while scanning triple-quoted string literal" assert exc.lineno == 1 assert exc.offset == 5 assert exc.lastlineno == 3 @@ -112,7 +112,7 @@ assert exc.msg == "expected an indented block" assert exc.lineno == 3 assert exc.text.startswith("pass") - assert exc.offset == 0 + assert exc.offset == 1 input = "hi\n indented" exc = py.test.raises(IndentationError, parse, input).value assert exc.msg == "unexpected indent" @@ -120,6 +120,7 @@ exc = py.test.raises(IndentationError, parse, input).value assert exc.msg == "unindent does not match any outer indentation level" assert exc.lineno == 3 + assert exc.offset == 3 def test_mac_newline(self): self.parse("this_is\ra_mac\rfile") _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit