[pypy-commit] pypy unicode-utf8: merge default into branch

mattip Mon, 30 Apr 2018 09:28:31 -0700

Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8
Changeset: r94458:d4baff192be4
Date: 2018-04-29 21:18 +0300
http://bitbucket.org/pypy/pypy/changeset/d4baff192be4/


Log:    merge default into branch

diff too long, truncating to 2000 out of 11084 lines

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -51,3 +51,5 @@
 0000000000000000000000000000000000000000 release-pypy3.5-v5.10.0
 09f9160b643e3f02ccb8c843b2fbb4e5cbf54082 release-pypy3.5-v5.10.0
 3f6eaa010fce78cc7973bdc1dfdb95970f08fed2 release-pypy3.5-v5.10.1
+ab0b9caf307db6592905a80b8faffd69b39005b8 release-pypy2.7-v6.0.0
+fdd60ed87e941677e8ea11acf9f1819466521bf2 release-pypy3.5-v6.0.0
diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -6,36 +6,36 @@
 Except when otherwise stated (look for LICENSE files in directories or
 information at the beginning of each file) all software and documentation in
 the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', 'demo', 'lib_pypy',
-'py', and '_pytest' directories is licensed as follows: 
+'py', and '_pytest' directories is licensed as follows:
 
     The MIT License
 
-    Permission is hereby granted, free of charge, to any person 
-    obtaining a copy of this software and associated documentation 
-    files (the "Software"), to deal in the Software without 
-    restriction, including without limitation the rights to use, 
-    copy, modify, merge, publish, distribute, sublicense, and/or 
-    sell copies of the Software, and to permit persons to whom the 
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use,
+    copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the
     Software is furnished to do so, subject to the following conditions:
 
-    The above copyright notice and this permission notice shall be included 
+    The above copyright notice and this permission notice shall be included
     in all copies or substantial portions of the Software.
 
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY, 
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     DEALINGS IN THE SOFTWARE.
 
 
 PyPy Copyright holders 2003-2018
------------------------------------ 
+--------------------------------
 
 Except when otherwise stated (look for LICENSE files or information at
 the beginning of each file) the files in the 'pypy' directory are each
-copyrighted by one or more of the following people and organizations:    
+copyrighted by one or more of the following people and organizations:
 
   Armin Rigo
   Maciej Fijalkowski
@@ -89,13 +89,13 @@
   Niko Matsakis
   Alexander Hesse
   Ludovic Aubry
+  stian
   Jacob Hallen
   Jason Creighton
   Mark Young
   Alex Martelli
   Spenser Bauman
   Michal Bendowski
-  stian
   Jan de Mooij
   Tyler Wade
   Vincent Legoll
@@ -123,10 +123,10 @@
   Wenzhu Man
   Konstantin Lopuhin
   John Witulski
+  Jeremy Thurgood
   Greg Price
   Ivan Sichmann Freitas
   Dario Bertini
-  Jeremy Thurgood
   Mark Pearse
   Simon Cross
   Tobias Pape
@@ -145,18 +145,19 @@
   Adrian Kuhn
   tav
   Georg Brandl
+  Joannah Nanjekye
   Bert Freudenberg
   Stian Andreassen
   Wanja Saatkamp
   Mike Blume
-  Joannah Nanjekye
   Gerald Klix
   Oscar Nierstrasz
   Rami Chowdhury
   Stefan H. Muller
+  Dodan Mihai
   Tim Felgentreff
   Eugene Oden
-  Dodan Mihai
+  Colin Valliant
   Jeff Terrace
   Henry Mason
   Vasily Kuznetsov
@@ -225,12 +226,14 @@
   Vaibhav Sood
   Reuben Cummings
   Attila Gobi
+  Floris Bruynooghe
   Christopher Pope
   Tristan Arthur
   Christian Tismer 
   Dan Stromberg
   Carl Meyer
   Florin Papa
+  Arianna Avanzini
   Jens-Uwe Mager
   Valentina Mukhamedzhanova
   Stefano Parmesan
@@ -244,15 +247,18 @@
   Lukas Vacek
   Omer Katz
   Jacek Generowicz
+  Tomasz Dziopa
   Sylvain Thenault
   Jakub Stasiak
   Andrew Dalke
   Alejandro J. Cura
   Vladimir Kryachko
   Gabriel
+  Thomas Hisch
   Mark Williams
   Kunal Grover
   Nathan Taylor
+  Barry Hart
   Travis Francis Athougies
   Yasir Suhail
   Sergey Kishchenko
@@ -260,6 +266,7 @@
   Lutz Paelike
   Ian Foote
   Philipp Rustemeuer
+  Logan Chien
   Catalin Gabriel Manciu
   Jacob Oscarson
   Ryan Gonzalez
@@ -295,19 +302,20 @@
   Akira Li
   Gustavo Niemeyer
   Rafa&#322; Ga&#322;czy&#324;ski
-  Logan Chien
   Lucas Stadler
   roberto@goyle
   Matt Bogosian
   Yury V. Zaytsev
   florinpapa
   Anders Sigfridsson
+  Matt Jackson
   Nikolay Zinov
   rafalgalczyn...@gmail.com
   Joshua Gilbert
   Anna Katrina Dominguez
   Kim Jin Su
   Amber Brown
+  Miro Hron&#269;ok
   Anthony Sottile
   Nate Bragg
   Ben Darnell
@@ -315,7 +323,6 @@
   Godefroid Chappelle
   Julian Berman
   Michael Hudson-Doyle
-  Floris Bruynooghe
   Stephan Busemann
   Dan Colish
   timo
@@ -357,6 +364,7 @@
   Michael Chermside
   Anna Ravencroft
   remarkablerocket
+  Pauli Virtanen
   Petre Vijiac
   Berker Peksag
   Christian Muirhead
@@ -381,6 +389,7 @@
   Graham Markall
   Dan Loewenherz
   werat
+  Andrew Stepanov
   Niclas Olofsson
   Chris Pressey
   Tobias Diaz
@@ -395,14 +404,14 @@
   m...@funkyhat.org
   Stefan Marr
 
-  Heinrich-Heine University, Germany 
+  Heinrich-Heine University, Germany
   Open End AB (formerly AB Strakt), Sweden
-  merlinux GmbH, Germany 
-  tismerysoft GmbH, Germany 
-  Logilab Paris, France 
-  DFKI GmbH, Germany 
+  merlinux GmbH, Germany
+  tismerysoft GmbH, Germany
+  Logilab Paris, France
+  DFKI GmbH, Germany
   Impara, Germany
-  Change Maker, Sweden 
+  Change Maker, Sweden
   University of California Berkeley, USA
   Google Inc.
   King's College London
@@ -410,14 +419,14 @@
 The PyPy Logo as used by http://speed.pypy.org and others was created
 by Samuel Reis and is distributed on terms of Creative Commons Share Alike
 License.
- 
-License for 'lib-python/2.7'
-============================
+
+License for 'lib-python/2.7, lib-python/3'
+==========================================
 
 Except when otherwise stated (look for LICENSE files or copyright/license
-information at the beginning of each file) the files in the 'lib-python/2.7'
+information at the beginning of each file) the files in the 'lib-python'
 directory are all copyrighted by the Python Software Foundation and licensed
-under the terms that you can find here: https://docs.python.org/2/license.html
+under the terms that you can find here: https://docs.python.org/3/license.html
 
 License for 'pypy/module/unicodedata/'
 ======================================
@@ -441,9 +450,9 @@
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-  
+
      http://www.apache.org/licenses/LICENSE-2.0
-  
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
diff --git a/dotviewer/font/NOTICE b/dotviewer/font/COPYING.txt
rename from dotviewer/font/NOTICE
rename to dotviewer/font/COPYING.txt
diff --git a/lib-python/2.7/re.py b/lib-python/2.7/re.py
--- a/lib-python/2.7/re.py
+++ b/lib-python/2.7/re.py
@@ -225,7 +225,7 @@
 
 _pattern_type = type(sre_compile.compile("", 0))
 
-_MAXCACHE = 100
+_MAXCACHE = 1000
 
 def _compile(*key):
     # internal: compile pattern
diff --git a/lib-python/2.7/test/test_eof.py b/lib-python/2.7/test/test_eof.py
--- a/lib-python/2.7/test/test_eof.py
+++ b/lib-python/2.7/test/test_eof.py
@@ -5,7 +5,7 @@
 
 class EOFTestCase(unittest.TestCase):
     def test_EOFC(self):
-        expect = "EOL while scanning string literal (<string>, line 1)"
+        expect = "end of line (EOL) while scanning string literal (<string>, 
line 1)"
         try:
             eval("""'this is a test\
             """)
@@ -15,7 +15,7 @@
             raise test_support.TestFailed
 
     def test_EOFS(self):
-        expect = ("EOF while scanning triple-quoted string literal "
+        expect = ("end of file (EOF) while scanning triple-quoted string 
literal "
                   "(<string>, line 1)")
         try:
             eval("""'''this is a test""")
diff --git a/lib-python/2.7/test/test_generators.py 
b/lib-python/2.7/test/test_generators.py
--- a/lib-python/2.7/test/test_generators.py
+++ b/lib-python/2.7/test/test_generators.py
@@ -398,7 +398,10 @@
 0
 >>> type(i.gi_frame)
 <type 'frame'>
->>> i.gi_running = 42
+
+PyPy prints "readonly attribute 'gi_running'" so ignore the exception detail
+
+>>> i.gi_running = 42 # doctest: +IGNORE_EXCEPTION_DETAIL
 Traceback (most recent call last):
   ...
 TypeError: readonly attribute
diff --git a/lib-python/2.7/test/test_genexps.py 
b/lib-python/2.7/test/test_genexps.py
--- a/lib-python/2.7/test/test_genexps.py
+++ b/lib-python/2.7/test/test_genexps.py
@@ -87,7 +87,7 @@
     >>> dict(a = i for i in xrange(10))
     Traceback (most recent call last):
        ...
-    SyntaxError: invalid syntax
+    SyntaxError: invalid syntax (expected ')')
 
 Verify that parenthesis are required when used as a keyword argument value
 
diff --git a/lib-python/2.7/test/test_traceback.py 
b/lib-python/2.7/test/test_traceback.py
--- a/lib-python/2.7/test/test_traceback.py
+++ b/lib-python/2.7/test/test_traceback.py
@@ -123,10 +123,7 @@
         self.assertEqual(len(err), 4)
         self.assertEqual(err[1].strip(), "print(2)")
         self.assertIn("^", err[2])
-        if check_impl_detail():
-            self.assertEqual(err[1].find("p"), err[2].find("^"))
-        if check_impl_detail(pypy=True):
-            self.assertEqual(err[1].find("2)") + 1, err[2].find("^"))
+        self.assertEqual(err[1].find("p"), err[2].find("^"))
 
     def test_base_exception(self):
         # Test that exceptions derived from BaseException are formatted right
diff --git a/lib-python/2.7/threading.py b/lib-python/2.7/threading.py
--- a/lib-python/2.7/threading.py
+++ b/lib-python/2.7/threading.py
@@ -351,6 +351,21 @@
                         # forward-compatibility reasons we do the same.
                         waiter.acquire()
                         gotit = True
+                    except AttributeError:
+                        # someone patched the 'waiter' class, probably.
+                        # Fall back to the standard CPython logic.
+                        # See the CPython lib for the comments about it...
+                        endtime = _time() + timeout
+                        delay = 0.0005 # 500 us -> initial delay of 1 ms
+                        while True:
+                            gotit = waiter.acquire(0)
+                            if gotit:
+                                break
+                            remaining = endtime - _time()
+                            if remaining <= 0:
+                                break
+                            delay = min(delay * 2, remaining, .05)
+                            _sleep(delay)
                 else:
                     gotit = waiter.acquire(False)
                 if not gotit:
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -82,8 +82,11 @@
     def _CData_output(self, resarray, base=None, index=-1):
         from _rawffi.alt import types
         # If a char_p or unichar_p is received, skip the string interpretation
-        if base._ffiargtype != types.Pointer(types.char_p) and \
-           base._ffiargtype != types.Pointer(types.unichar_p):
+        try:
+            deref = type(base)._deref_ffiargtype()
+        except AttributeError:
+            deref = None
+        if deref != types.char_p and deref != types.unichar_p:
             # this seems to be a string if we're array of char, surprise!
             from ctypes import c_char, c_wchar
             if self._type_ is c_char:
@@ -120,6 +123,12 @@
                 value = self(*value)
         return _CDataMeta.from_param(self, value)
 
+    def _build_ffiargtype(self):
+        return _ffi.types.Pointer(self._type_.get_ffi_argtype())
+
+    def _deref_ffiargtype(self):
+        return self._type_.get_ffi_argtype()
+
 def array_get_slice_params(self, index):
     if hasattr(self, '_length_'):
         start, stop, step = index.indices(self._length_)
@@ -248,6 +257,5 @@
             _type_ = base
         )
         cls = ArrayMeta(name, (Array,), tpdict)
-        cls._ffiargtype = _ffi.types.Pointer(base.get_ffi_argtype())
         ARRAY_CACHE[key] = cls
         return cls
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -49,10 +49,13 @@
         else:
             return self.from_param(as_parameter)
 
+    def _build_ffiargtype(self):
+        return _shape_to_ffi_type(self._ffiargshape_)
+
     def get_ffi_argtype(self):
         if self._ffiargtype:
             return self._ffiargtype
-        self._ffiargtype = _shape_to_ffi_type(self._ffiargshape_)
+        self._ffiargtype = self._build_ffiargtype()
         return self._ffiargtype
 
     def _CData_output(self, resbuffer, base=None, index=-1):
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -70,7 +70,12 @@
         self._ffiarray = ffiarray
         self.__init__ = __init__
         self._type_ = TP
-        self._ffiargtype = _ffi.types.Pointer(TP.get_ffi_argtype())
+
+    def _build_ffiargtype(self):
+        return _ffi.types.Pointer(self._type_.get_ffi_argtype())
+
+    def _deref_ffiargtype(self):
+        return self._type_.get_ffi_argtype()
 
     from_address = cdata_from_address
 
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -160,6 +160,10 @@
             raise AttributeError("_fields_ is final")
         if self in [f[1] for f in value]:
             raise AttributeError("Structure or union cannot contain itself")
+        if self._ffiargtype is not None:
+            raise NotImplementedError("Too late to set _fields_: we already "
+                        "said to libffi that the structure type %s is opaque"
+                        % (self,))
         names_and_fields(
             self,
             value, self.__bases__[0],
diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst
--- a/pypy/doc/contributor.rst
+++ b/pypy/doc/contributor.rst
@@ -56,13 +56,13 @@
   Niko Matsakis
   Alexander Hesse
   Ludovic Aubry
+  stian
   Jacob Hallen
   Jason Creighton
   Mark Young
   Alex Martelli
   Spenser Bauman
   Michal Bendowski
-  stian
   Jan de Mooij
   Tyler Wade
   Vincent Legoll
@@ -90,10 +90,10 @@
   Wenzhu Man
   Konstantin Lopuhin
   John Witulski
+  Jeremy Thurgood
   Greg Price
   Ivan Sichmann Freitas
   Dario Bertini
-  Jeremy Thurgood
   Mark Pearse
   Simon Cross
   Tobias Pape
@@ -112,18 +112,19 @@
   Adrian Kuhn
   tav
   Georg Brandl
+  Joannah Nanjekye
   Bert Freudenberg
   Stian Andreassen
   Wanja Saatkamp
   Mike Blume
-  Joannah Nanjekye
   Gerald Klix
   Oscar Nierstrasz
   Rami Chowdhury
   Stefan H. Muller
+  Dodan Mihai
   Tim Felgentreff
   Eugene Oden
-  Dodan Mihai
+  Colin Valliant
   Jeff Terrace
   Henry Mason
   Vasily Kuznetsov
@@ -192,12 +193,14 @@
   Vaibhav Sood
   Reuben Cummings
   Attila Gobi
+  Floris Bruynooghe
   Christopher Pope
   Tristan Arthur
   Christian Tismer 
   Dan Stromberg
   Carl Meyer
   Florin Papa
+  Arianna Avanzini
   Jens-Uwe Mager
   Valentina Mukhamedzhanova
   Stefano Parmesan
@@ -211,6 +214,7 @@
   Lukas Vacek
   Omer Katz
   Jacek Generowicz
+  Tomasz Dziopa
   Sylvain Thenault
   Jakub Stasiak
   Andrew Dalke
@@ -221,6 +225,7 @@
   Mark Williams
   Kunal Grover
   Nathan Taylor
+  Barry Hart
   Travis Francis Athougies
   Yasir Suhail
   Sergey Kishchenko
@@ -228,6 +233,7 @@
   Lutz Paelike
   Ian Foote
   Philipp Rustemeuer
+  Logan Chien
   Catalin Gabriel Manciu
   Jacob Oscarson
   Ryan Gonzalez
@@ -263,19 +269,20 @@
   Akira Li
   Gustavo Niemeyer
   Rafa&#322; Ga&#322;czy&#324;ski
-  Logan Chien
   Lucas Stadler
   roberto@goyle
   Matt Bogosian
   Yury V. Zaytsev
   florinpapa
   Anders Sigfridsson
+  Matt Jackson
   Nikolay Zinov
   rafalgalczyn...@gmail.com
   Joshua Gilbert
   Anna Katrina Dominguez
   Kim Jin Su
   Amber Brown
+  Miro Hron&#269;ok
   Anthony Sottile
   Nate Bragg
   Ben Darnell
@@ -283,7 +290,6 @@
   Godefroid Chappelle
   Julian Berman
   Michael Hudson-Doyle
-  Floris Bruynooghe
   Stephan Busemann
   Dan Colish
   timo
@@ -325,6 +331,7 @@
   Michael Chermside
   Anna Ravencroft
   remarkablerocket
+  Pauli Virtanen
   Petre Vijiac
   Berker Peksag
   Christian Muirhead
@@ -349,6 +356,7 @@
   Graham Markall
   Dan Loewenherz
   werat
+  Andrew Stepanov
   Niclas Olofsson
   Chris Pressey
   Tobias Diaz
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -10,89 +10,6 @@
 PyPy.
 
 
-.. _extension-modules:
-
-Extension modules
------------------
-
-List of extension modules that we support:
-
-* Supported as built-in modules (in :source:`pypy/module/`):
-
-    __builtin__
-    :doc:`__pypy__ <__pypy__-module>`
-    _ast
-    _codecs
-    _collections
-    :doc:`_continuation <stackless>`
-    :doc:`_ffi <discussion/ctypes-implementation>`
-    _hashlib
-    _io
-    _locale
-    _lsprof
-    _md5
-    :doc:`_minimal_curses <config/objspace.usemodules._minimal_curses>`
-    _multiprocessing
-    _random
-    :doc:`_rawffi <discussion/ctypes-implementation>`
-    _sha
-    _socket
-    _sre
-    _ssl
-    _warnings
-    _weakref
-    _winreg
-    array
-    binascii
-    bz2
-    cStringIO
-    cmath
-    `cpyext`_
-    crypt
-    errno
-    exceptions
-    fcntl
-    gc
-    imp
-    itertools
-    marshal
-    math
-    mmap
-    operator
-    parser
-    posix
-    pyexpat
-    select
-    signal
-    struct
-    symbol
-    sys
-    termios
-    thread
-    time
-    token
-    unicodedata
-    zipimport
-    zlib
-
-  When translated on Windows, a few Unix-only modules are skipped,
-  and the following module is built instead:
-
-    _winreg
-
-* Supported by being rewritten in pure Python (possibly using ``cffi``):
-  see the :source:`lib_pypy/` directory.  Examples of modules that we
-  support this way: ``ctypes``, ``cPickle``, ``cmath``, ``dbm``, 
``datetime``...
-  Note that some modules are both in there and in the list above;
-  by default, the built-in module is used (but can be disabled
-  at translation time).
-
-The extension modules (i.e. modules written in C, in the standard CPython)
-that are neither mentioned above nor in :source:`lib_pypy/` are not available 
in PyPy.
-(You may have a chance to use them anyway with `cpyext`_.)
-
-.. _cpyext: 
http://morepypy.blogspot.com/2010/04/using-cpython-extension-modules-with.html
-
 
 Differences related to garbage collection strategies
 ----------------------------------------------------
@@ -559,7 +476,96 @@
   environment variable. CPython searches for ``vcvarsall.bat`` somewhere 
**above**
   that value.
 
+* SyntaxError_ s try harder to give details about the cause of the failure, so
+  the error messages are not the same as in CPython
+
+
+.. _extension-modules:
+
+Extension modules
+-----------------
+
+List of extension modules that we support:
+
+* Supported as built-in modules (in :source:`pypy/module/`):
+
+    __builtin__
+    :doc:`__pypy__ <__pypy__-module>`
+    _ast
+    _codecs
+    _collections
+    :doc:`_continuation <stackless>`
+    :doc:`_ffi <discussion/ctypes-implementation>`
+    _hashlib
+    _io
+    _locale
+    _lsprof
+    _md5
+    :doc:`_minimal_curses <config/objspace.usemodules._minimal_curses>`
+    _multiprocessing
+    _random
+    :doc:`_rawffi <discussion/ctypes-implementation>`
+    _sha
+    _socket
+    _sre
+    _ssl
+    _warnings
+    _weakref
+    _winreg
+    array
+    binascii
+    bz2
+    cStringIO
+    cmath
+    `cpyext`_
+    crypt
+    errno
+    exceptions
+    fcntl
+    gc
+    imp
+    itertools
+    marshal
+    math
+    mmap
+    operator
+    parser
+    posix
+    pyexpat
+    select
+    signal
+    struct
+    symbol
+    sys
+    termios
+    thread
+    time
+    token
+    unicodedata
+    zipimport
+    zlib
+
+  When translated on Windows, a few Unix-only modules are skipped,
+  and the following module is built instead:
+
+    _winreg
+
+* Supported by being rewritten in pure Python (possibly using ``cffi``):
+  see the :source:`lib_pypy/` directory.  Examples of modules that we
+  support this way: ``ctypes``, ``cPickle``, ``cmath``, ``dbm``, 
``datetime``...
+  Note that some modules are both in there and in the list above;
+  by default, the built-in module is used (but can be disabled
+  at translation time).
+
+The extension modules (i.e. modules written in C, in the standard CPython)
+that are neither mentioned above nor in :source:`lib_pypy/` are not available 
in PyPy.
+(You may have a chance to use them anyway with `cpyext`_.)
+
+.. _cpyext: 
http://morepypy.blogspot.com/2010/04/using-cpython-extension-modules-with.html
+
+
 .. _`is ignored in PyPy`: http://bugs.python.org/issue14621
 .. _`little point`: 
http://events.ccc.de/congress/2012/Fahrplan/events/5152.en.html
 .. _`#2072`: https://bitbucket.org/pypy/pypy/issue/2072/
 .. _`issue #2653`: https://bitbucket.org/pypy/pypy/issues/2653/
+.. _SyntaxError: 
https://morepypy.blogspot.co.il/2018/04/improving-syntaxerror-in-pypy.html
diff --git a/pypy/doc/gc_info.rst b/pypy/doc/gc_info.rst
--- a/pypy/doc/gc_info.rst
+++ b/pypy/doc/gc_info.rst
@@ -121,6 +121,166 @@
   alive by GC objects, but not accounted in the GC
 
 
+GC Hooks
+--------
+
+GC hooks are user-defined functions which are called whenever a specific GC
+event occur, and can be used to monitor GC activity and pauses.  You can
+install the hooks by setting the following attributes:
+
+``gc.hook.on_gc_minor``
+    Called whenever a minor collection occurs. It corresponds to
+    ``gc-minor`` sections inside ``PYPYLOG``.
+
+``gc.hook.on_gc_collect_step``
+    Called whenever an incremental step of a major collection occurs. It
+    corresponds to ``gc-collect-step`` sections inside ``PYPYLOG``.
+
+``gc.hook.on_gc_collect``
+    Called after the last incremental step, when a major collection is fully
+    done. It corresponds to ``gc-collect-done`` sections inside ``PYPYLOG``.
+
+To uninstall a hook, simply set the corresponding attribute to ``None``.  To
+install all hooks at once, you can call ``gc.hooks.set(obj)``, which will look
+for methods ``on_gc_*`` on ``obj``.  To uninstall all the hooks at once, you
+can call ``gc.hooks.reset()``.
+
+The functions called by the hooks receive a single ``stats`` argument, which
+contains various statistics about the event.
+
+Note that PyPy cannot call the hooks immediately after a GC event, but it has
+to wait until it reaches a point in which the interpreter is in a known state
+and calling user-defined code is harmless.  It might happen that multiple
+events occur before the hook is invoked: in this case, you can inspect the
+value ``stats.count`` to know how many times the event occurred since the last
+time the hook was called.  Similarly, ``stats.duration`` contains the
+**total** time spent by the GC for this specific event since the last time the
+hook was called.
+
+On the other hand, all the other fields of the ``stats`` object are relative
+only to the **last** event of the series.
+
+The attributes for ``GcMinorStats`` are:
+
+``count``
+    The number of minor collections occurred since the last hook call.
+
+``duration``
+    The total time spent inside minor collections since the last hook
+    call. See below for more information on the unit.
+
+``duration_min``
+    The duration of the fastest minor collection since the last hook call.
+    
+``duration_max``
+    The duration of the slowest minor collection since the last hook call.
+
+ ``total_memory_used``
+    The amount of memory used at the end of the minor collection, in
+    bytes. This include the memory used in arenas (for GC-managed memory) and
+    raw-malloced memory (e.g., the content of numpy arrays).
+
+``pinned_objects``
+    the number of pinned objects.
+
+
+The attributes for ``GcCollectStepStats`` are:
+
+``count``, ``duration``, ``duration_min``, ``duration_max``
+    See above.
+
+``oldstate``, ``newstate``
+    Integers which indicate the state of the GC before and after the step.
+
+The value of ``oldstate`` and ``newstate`` is one of these constants, defined
+inside ``gc.GcCollectStepStats``: ``STATE_SCANNING``, ``STATE_MARKING``,
+``STATE_SWEEPING``, ``STATE_FINALIZING``.  It is possible to get a string
+representation of it by indexing the ``GC_STATS`` tuple.
+
+
+The attributes for ``GcCollectStats`` are:
+
+``count``
+    See above.
+
+``num_major_collects``
+    The total number of major collections which have been done since the
+    start. Contrarily to ``count``, this is an always-growing counter and it's
+    not reset between invocations.
+
+``arenas_count_before``, ``arenas_count_after``
+    Number of arenas used before and after the major collection.
+
+``arenas_bytes``
+    Total number of bytes used by GC-managed objects.
+
+``rawmalloc_bytes_before``, ``rawmalloc_bytes_after``
+    Total number of bytes used by raw-malloced objects, before and after the
+    major collection.
+
+Note that ``GcCollectStats`` has **not** got a ``duration`` field. This is
+because all the GC work is done inside ``gc-collect-step``:
+``gc-collect-done`` is used only to give additional stats, but doesn't do any
+actual work.
+
+A note about the ``duration`` field: depending on the architecture and
+operating system, PyPy uses different ways to read timestamps, so ``duration``
+is expressed in varying units. It is possible to know which by calling
+``__pypy__.debug_get_timestamp_unit()``, which can be one of the following
+values:
+
+``tsc``
+    The default on ``x86`` machines: timestamps are expressed in CPU ticks, as
+    read by the `Time Stamp Counter`_.
+
+``ns``
+    Timestamps are expressed in nanoseconds.
+
+``QueryPerformanceCounter``
+    On Windows, in case for some reason ``tsc`` is not available: timestamps
+    are read using the win API ``QueryPerformanceCounter()``.
+
+
+Unfortunately, there does not seem to be a reliable standard way for
+converting ``tsc`` ticks into nanoseconds, although in practice on modern CPUs
+it is enough to divide the ticks by the maximum nominal frequency of the CPU.
+For this reason, PyPy gives the raw value, and leaves the job of doing the
+conversion to external libraries.
+
+Here is an example of GC hooks in use::
+
+    import sys
+    import gc
+
+    class MyHooks(object):
+        done = False
+
+        def on_gc_minor(self, stats):
+            print 'gc-minor:        count = %02d, duration = %d' % 
(stats.count,
+                                                                    
stats.duration)
+
+        def on_gc_collect_step(self, stats):
+            old = gc.GcCollectStepStats.GC_STATES[stats.oldstate]
+            new = gc.GcCollectStepStats.GC_STATES[stats.newstate]
+            print 'gc-collect-step: %s --> %s' % (old, new)
+            print '                 count = %02d, duration = %d' % 
(stats.count,
+                                                                    
stats.duration)
+
+        def on_gc_collect(self, stats):
+            print 'gc-collect-done: count = %02d' % stats.count
+            self.done = True
+
+    hooks = MyHooks()
+    gc.hooks.set(hooks)
+
+    # simulate some GC activity
+    lst = []
+    while not hooks.done:
+        lst = [lst, 1, 2, 3]
+
+
+.. _`Time Stamp Counter`: https://en.wikipedia.org/wiki/Time_Stamp_Counter    
+    
 .. _minimark-environment-variables:
 
 Environment variables
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -40,6 +40,8 @@
   sure things are ported back to the trunk and to the branch as
   necessary.
 
+* Make sure the RPython builds on the buildbot pass with no failures
+
 * Maybe bump the SOABI number in module/imp/importing. This has many
   implications, so make sure the PyPy community agrees to the change.
 
diff --git a/pypy/doc/index-of-release-notes.rst 
b/pypy/doc/index-of-release-notes.rst
--- a/pypy/doc/index-of-release-notes.rst
+++ b/pypy/doc/index-of-release-notes.rst
@@ -6,6 +6,7 @@
 
 .. toctree::
 
+   release-v6.0.0.rst
    release-v5.10.1.rst
    release-v5.10.0.rst
    release-v5.9.0.rst
diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst
--- a/pypy/doc/index-of-whatsnew.rst
+++ b/pypy/doc/index-of-whatsnew.rst
@@ -7,6 +7,8 @@
 .. toctree::
 
    whatsnew-head.rst
+   whatsnew-pypy2-6.0.0.rst
+   whatsnew-pypy2-5.10.0.rst
    whatsnew-pypy2-5.10.0.rst
    whatsnew-pypy2-5.9.0.rst
    whatsnew-pypy2-5.8.0.rst
diff --git a/pypy/doc/release-v6.0.0.rst b/pypy/doc/release-v6.0.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-v6.0.0.rst
@@ -0,0 +1,123 @@
+======================================
+PyPy2.7 and PyPy3.5 v6.0 dual release
+======================================
+
+The PyPy team is proud to release both PyPy2.7 v6.0 (an interpreter supporting
+Python 2.7 syntax), and a PyPy3.5 v6.0 (an interpreter supporting Python
+3.5 syntax). The two releases are both based on much the same codebase, thus
+the dual release.
+
+This release is a feature release following our previous 5.10 incremental
+release in late December 2017. Our C-API compatibility layer ``cpyext`` is
+now much faster (see the `blog post`_) as well as more complete. We have made
+many other improvements in speed and CPython compatibility. Since the changes
+affect the included python development header files, all c-extension modules 
must
+be recompiled for this version.
+
+Until we can work with downstream providers to distribute builds with PyPy, we
+have made packages for some common packages `available as wheels`_. You may
+compile yourself using ``pip install --no-build-isolation <package>``, the
+``no-build-isolation`` is currently needed for pip v10.
+
+First-time python users are often stumped by silly typos and omissions when
+getting started writing code. We have improved our parser to emit more friendly
+`syntax errors`_,  making PyPy not only faster but more friendly.
+
+The GC now has `hooks`_ to gain more insights into its performance
+
+The Matplotlib TkAgg backend now works with PyPy, as do pygame and pygobject_.
+
+We updated the `cffi`_ module included in PyPy to version 1.11.5, and the
+`cppyy`_ backend to 0.6.0. Please use these to wrap your C and C++ code,
+respectively, for a JIT friendly experience.
+
+As always, this release is 100% compatible with the previous one and fixed
+several issues and bugs raised by the growing community of PyPy users.
+We strongly recommend updating.
+
+The Windows PyPy3.5 release is still considered beta-quality. There are open
+issues with unicode handling especially around system calls and c-extensions.
+
+The utf8 branch that changes internal representation of unicode to utf8 did not
+make it into the release, so there is still more goodness coming. We also
+began working on a Python3.6 implementation, help is welcome.
+
+You can download the v6.0 releases here:
+
+    http://pypy.org/download.html
+
+We would like to thank our donors for the continued support of the PyPy
+project. If PyPy is not quite good enough for your needs, we are available for
+direct consulting work.
+
+We would also like to thank our contributors and encourage new people to join
+the project. PyPy has many layers and we need help with all of them: `PyPy`_
+and `RPython`_ documentation improvements, tweaking popular `modules`_ to run
+on pypy, or general `help`_ with making RPython's JIT even better.
+
+.. _`PyPy`: index.html
+.. _`RPython`: https://rpython.readthedocs.org
+.. _`modules`: project-ideas.html#make-more-python-modules-pypy-friendly
+.. _`help`: project-ideas.html
+.. _`blog post`: 
https://morepypy.blogspot.it/2017/10/cape-of-good-hope-for-pypy-hello-from.html
+.. _pygobject: https://lazka.github.io/posts/2018-04_pypy-pygobject/index.html
+.. _`syntax errors`: 
https://morepypy.blogspot.com/2018/04/improving-syntaxerror-in-pypy.html
+.. _`hooks`: gc_info.html#gc-hooks
+.. _`cffi`: http://cffi.readthedocs.io
+.. _`cppyy`: https://cppyy.readthedocs.io
+.. _`available as wheels`: https://github.com/antocuni/pypy-wheels
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7 and CPython 3.5. It's fast (`PyPy and CPython 2.7.x`_ performance 
comparison)
+due to its integrated tracing JIT compiler.
+
+We also welcome developers of other `dynamic languages`_ to see what RPython
+can do for them.
+
+The PyPy release supports:
+
+  * **x86** machines on most common operating systems
+    (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD)
+
+  * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux,
+
+  * big- and little-endian variants of **PPC64** running Linux,
+
+  * **s390x** running Linux
+
+.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org
+.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html
+
+Changelog
+=========
+
+* Speed up C-API method calls, and make most Py*_Check calls C macros
+* Speed up C-API slot method calls
+* Enable TkAgg backend support for matplotlib
+* support ``hastzinfo`` and ``tzinfo`` in the C-API ``PyDateTime*`` structures
+* datetime.h is now more similar to CPython
+* We now support ``PyUnicode_AsUTF{16,32}String``, ``_PyLong_AsByteArray``,
+  ``_PyLong_AsByteArrayO``,
+* PyPy3.5 on Windows is compiled with the Microsoft Visual Compiler v14, like
+  CPython
+* Fix performance of attribute lookup when more than 80 attributes are used
+* Improve performance on passing built-in types to C-API C code
+* Improve the performance of datetime and timedelta by skipping the consistency
+  checks of the datetime values (they are correct by construction)
+* Improve handling of ``bigint`` s, including fixing ``int_divmod``
+* Improve reporting of GC statistics
+* Accept unicode filenames in ``dbm.open()``
+* Improve RPython support for half-floats
+* Added missing attributes to C-API ``instancemethod`` on pypy3
+* Store error state in thread-local storage for C-API.
+* Fix JIT bugs exposed in the sre module
+* Improve speed of Python parser, improve ParseError messages and SyntaxError
+* Handle JIT hooks more efficiently
+* Fix a rare GC bug exposed by intensive use of cpyext ``Buffer`` s
+
+We also refactored many parts of the JIT bridge optimizations, as well as 
cpyext
+internals, and together with new contributors fixed issues, added new
+documentation, and cleaned up the codebase.
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -1,73 +1,12 @@
-===========================
-What's new in PyPy2.7 5.10+
-===========================
+==========================
+What's new in PyPy2.7 6.0+
+==========================
 
-.. this is a revision shortly after release-pypy2.7-v5.10.0
-.. startrev: 6b024edd9d12
+.. this is a revision shortly after release-pypy-6.0.0
+.. startrev: e50e11af23f1
 
-.. branch: cpyext-avoid-roundtrip
 
-Big refactoring of some cpyext code, which avoids a lot of nonsense when
-calling C from Python and vice-versa: the result is a big speedup in
-function/method calls, up to 6 times faster.
 
-.. branch: cpyext-datetime2
-
-Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD
-
-
-.. branch: mapdict-size-limit
-
-Fix a corner case of mapdict: When an instance is used like a dict (using
-``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are
-added, then the performance using mapdict is linear in the number of
-attributes. This is now fixed (by switching to a regular dict after 80
-attributes).
-
-
-.. branch: cpyext-faster-arg-passing
-
-When using cpyext, improve the speed of passing certain objects from PyPy to C
-code, most notably None, True, False, types, all instances of C-defined types.
-Before, a dict lookup was needed every time such an object crossed over, now it
-is just a field read.
-
-
-.. branch: 2634_datetime_timedelta_performance
-
-Improve datetime + timedelta performance.
-
-.. branch: memory-accounting
-
-Improve way to describe memory
-
-.. branch: msvc14
-
-Allow compilaiton with Visual Studio 2017 compiler suite on windows
-
-.. branch: refactor-slots
-
-Refactor cpyext slots.
-
-
-.. branch: call-loopinvariant-into-bridges
-
-Speed up branchy code that does a lot of function inlining by saving one call
-to read the TLS in most bridges.
-
-.. branch: rpython-sprint
-
-Refactor in rpython signatures
-
-.. branch: cpyext-tls-operror2
-
-Store error state thread-locally in executioncontext, fixes issue #2764
-
-.. branch: cpyext-fast-typecheck
-
-Optimize `Py*_Check` for `Bool`, `Float`, `Set`. Also refactor and simplify
-`W_PyCWrapperObject` which is used to call slots from the C-API, greatly
-improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks
 
 .. branch: unicode-utf8-re
 .. branch: utf8-io
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-pypy2-6.0.0.rst
copy from pypy/doc/whatsnew-head.rst
copy to pypy/doc/whatsnew-pypy2-6.0.0.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-pypy2-6.0.0.rst
@@ -69,8 +69,60 @@
 `W_PyCWrapperObject` which is used to call slots from the C-API, greatly
 improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks
 
-.. branch: unicode-utf8-re
-.. branch: utf8-io
-Utf8 handling for unicode
 
+.. branch: fix-sre-problems
 
+Fix two (unrelated) JIT bugs manifesting in the re module:
+
+- green fields are broken and were thus disabled, plus their usage removed from
+  the _sre implementation
+
+- in rare "trace is too long" situations, the JIT could break behaviour
+  arbitrarily.
+
+.. branch: jit-hooks-can-be-disabled
+
+Be more efficient about JIT hooks. Make it possible for the frontend to declare
+that jit hooks are currently not enabled at all. in that case, the list of ops
+does not have to be created in the case of the on_abort hook (which is
+expensive).
+
+
+.. branch: pyparser-improvements
+
+Improve speed of Python parser, improve ParseError messages slightly.
+
+.. branch: ioctl-arg-size
+
+Work around possible bugs in upstream ioctl users, like CPython allocate at
+least 1024 bytes for the arg in calls to ``ioctl(fd, request, arg)``. Fixes
+issue #2776
+
+.. branch: cpyext-subclass-setattr
+
+Fix for python-level classes that inherit from C-API types, previously the
+`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj`
+which led to cases where instance attributes were lost. Fixes issue #2793
+
+
+.. branch: pyparser-improvements-2
+
+Improve line offsets that are reported by SyntaxError. Improve error messages
+for a few situations, including mismatched parenthesis.
+
+.. branch: issue2752
+
+Fix a rare GC bug that was introduced more than one year ago, but was
+not diagnosed before issue #2752.
+
+.. branch: gc-hooks
+
+Introduce GC hooks, as documented in doc/gc_info.rst
+
+.. branch: gc-hook-better-timestamp
+
+Improve GC hooks
+
+.. branch: cppyy-packaging
+
+Update backend to 0.6.0 and support exceptions through wrappers
diff --git a/pypy/goal/targetpypystandalone.py 
b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -215,6 +215,7 @@
     usage = SUPPRESS_USAGE
 
     take_options = True
+    space = None
 
     def opt_parser(self, config):
         parser = to_optparse(config, useoptions=["objspace.*"],
@@ -364,15 +365,21 @@
         from pypy.module.pypyjit.hooks import pypy_hooks
         return PyPyJitPolicy(pypy_hooks)
 
+    def get_gchooks(self):
+        from pypy.module.gc.hook import LowLevelGcHooks
+        if self.space is None:
+            raise Exception("get_gchooks must be called afeter 
get_entry_point")
+        return self.space.fromcache(LowLevelGcHooks)
+
     def get_entry_point(self, config):
-        space = make_objspace(config)
+        self.space = make_objspace(config)
 
         # manually imports app_main.py
         filename = os.path.join(pypydir, 'interpreter', 'app_main.py')
         app = gateway.applevel(open(filename).read(), 'app_main.py', 
'app_main')
         app.hidden_applevel = False
-        w_dict = app.getwdict(space)
-        entry_point, _ = create_entry_point(space, w_dict)
+        w_dict = app.getwdict(self.space)
+        entry_point, _ = create_entry_point(self.space, w_dict)
 
         return entry_point, None, PyPyAnnotatorPolicy()
 
@@ -381,7 +388,7 @@
                      'jitpolicy', 'get_entry_point',
                      'get_additional_config_options']:
             ns[name] = getattr(self, name)
-
+        ns['get_gchooks'] = self.get_gchooks
 
 PyPyTarget().interface(globals())
 
diff --git a/pypy/interpreter/executioncontext.py 
b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -404,7 +404,7 @@
         self._periodic_actions = []
         self._nonperiodic_actions = []
         self.has_bytecode_counter = False
-        self.fired_actions = None
+        self._fired_actions_reset()
         # the default value is not 100, unlike CPython 2.7, but a much
         # larger value, because we use a technique that not only allows
         # but actually *forces* another thread to run whenever the counter
@@ -416,13 +416,28 @@
         """Request for the action to be run before the next opcode."""
         if not action._fired:
             action._fired = True
-            if self.fired_actions is None:
-                self.fired_actions = []
-            self.fired_actions.append(action)
+            self._fired_actions_append(action)
             # set the ticker to -1 in order to force action_dispatcher()
             # to run at the next possible bytecode
             self.reset_ticker(-1)
 
+    def _fired_actions_reset(self):
+        # linked list of actions. We cannot use a normal RPython list because
+        # we want AsyncAction.fire() to be marked as @rgc.collect: this way,
+        # we can call it from e.g. GcHooks or cpyext's dealloc_trigger.
+        self._fired_actions_first = None
+        self._fired_actions_last = None
+
+    @rgc.no_collect
+    def _fired_actions_append(self, action):
+        assert action._next is None
+        if self._fired_actions_first is None:
+            self._fired_actions_first = action
+            self._fired_actions_last = action
+        else:
+            self._fired_actions_last._next = action
+            self._fired_actions_last = action
+
     @not_rpython
     def register_periodic_action(self, action, use_bytecode_counter):
         """
@@ -467,19 +482,26 @@
                 action.perform(ec, frame)
 
             # nonperiodic actions
-            list = self.fired_actions
-            if list is not None:
-                self.fired_actions = None
+            action = self._fired_actions_first
+            if action:
+                self._fired_actions_reset()
                 # NB. in case there are several actions, we reset each
                 # 'action._fired' to false only when we're about to call
                 # 'action.perform()'.  This means that if
                 # 'action.fire()' happens to be called any time before
                 # the corresponding perform(), the fire() has no
                 # effect---which is the effect we want, because
-                # perform() will be called anyway.
-                for action in list:
+                # perform() will be called anyway.  All such pending
+                # actions with _fired == True are still inside the old
+                # chained list.  As soon as we reset _fired to False,
+                # we also reset _next to None and we are ready for
+                # another fire().
+                while action is not None:
+                    next_action = action._next
+                    action._next = None
                     action._fired = False
                     action.perform(ec, frame)
+                    action = next_action
 
         self.action_dispatcher = action_dispatcher
 
@@ -512,10 +534,12 @@
     to occur between two opcodes, not at a completely random time.
     """
     _fired = False
+    _next = None
 
     def __init__(self, space):
         self.space = space
 
+    @rgc.no_collect
     def fire(self):
         """Request for the action to be run before the next opcode.
         The action must have been registered at space initalization time."""
diff --git a/pypy/interpreter/pyparser/error.py 
b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -6,6 +6,7 @@
                  lastlineno=0):
         self.msg = msg
         self.lineno = lineno
+        # NB: offset is a 1-based index!
         self.offset = offset
         self.text = text
         self.filename = filename
diff --git a/pypy/interpreter/pyparser/metaparser.py 
b/pypy/interpreter/pyparser/metaparser.py
--- a/pypy/interpreter/pyparser/metaparser.py
+++ b/pypy/interpreter/pyparser/metaparser.py
@@ -147,8 +147,10 @@
                 for label, next in state.arcs.iteritems():
                     arcs.append((self.make_label(gram, label), 
dfa.index(next)))
                 states.append((arcs, state.is_final))
-            gram.dfas.append((states, self.make_first(gram, name)))
-            assert len(gram.dfas) - 1 == gram.symbol_ids[name] - 256
+            symbol_id = gram.symbol_ids[name]
+            dfa = parser.DFA(symbol_id, states, self.make_first(gram, name))
+            gram.dfas.append(dfa)
+            assert len(gram.dfas) - 1 == symbol_id - 256
         gram.start = gram.symbol_ids[self.start_symbol]
         return gram
 
@@ -162,6 +164,13 @@
                 else:
                     gram.labels.append(gram.symbol_ids[label])
                     gram.symbol_to_label[label] = label_index
+                    first = self.first[label]
+                    if len(first) == 1:
+                        first, = first
+                        if not first[0].isupper():
+                            first = first.strip("\"'")
+                            assert label_index not in 
gram.token_to_error_string
+                            gram.token_to_error_string[label_index] = first
                     return label_index
             elif label.isupper():
                 token_index = gram.TOKENS[label]
@@ -183,7 +192,7 @@
                 else:
                     gram.labels.append(gram.KEYWORD_TOKEN)
                     gram.keyword_ids[value] = label_index
-                    return label_index
+                    result = label_index
             else:
                 try:
                     token_index = gram.OPERATOR_MAP[value]
@@ -194,7 +203,10 @@
                 else:
                     gram.labels.append(token_index)
                     gram.token_ids[token_index] = label_index
-                    return label_index
+                    result = label_index
+            assert result not in gram.token_to_error_string
+            gram.token_to_error_string[result] = value
+            return result
 
     def make_first(self, gram, name):
         original_firsts = self.first[name]
diff --git a/pypy/interpreter/pyparser/parser.py 
b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -1,6 +1,7 @@
 """
 A CPython inspired RPython parser.
 """
+from rpython.rlib.objectmodel import not_rpython
 
 
 class Grammar(object):
@@ -16,6 +17,7 @@
         self.symbol_names = {}
         self.symbol_to_label = {}
         self.keyword_ids = {}
+        self.token_to_error_string = {}
         self.dfas = []
         self.labels = [0]
         self.token_ids = {}
@@ -41,6 +43,27 @@
             pass
         return True
 
+class DFA(object):
+    def __init__(self, symbol_id, states, first):
+        self.symbol_id = symbol_id
+        self.states = states
+        self.first = self._first_to_string(first)
+
+    def could_match_token(self, label_index):
+        pos = label_index >> 3
+        bit = 1 << (label_index & 0b111)
+        return bool(ord(self.first[label_index >> 3]) & bit)
+
+    @staticmethod
+    @not_rpython
+    def _first_to_string(first):
+        l = sorted(first.keys())
+        b = bytearray(32)
+        for label_index in l:
+            pos = label_index >> 3
+            bit = 1 << (label_index & 0b111)
+            b[pos] |= bit
+        return str(b)
 
 class Node(object):
 
@@ -127,14 +150,17 @@
 
 class Nonterminal(AbstractNonterminal):
     __slots__ = ("_children", )
-    def __init__(self, type, children):
+    def __init__(self, type, children=None):
         Node.__init__(self, type)
+        if children is None:
+            children = []
         self._children = children
 
     def __repr__(self):
         return "Nonterminal(type=%s, children=%r)" % (self.type, 
self._children)
 
     def get_child(self, i):
+        assert self._children is not None
         return self._children[i]
 
     def num_children(self):
@@ -168,25 +194,50 @@
 class ParseError(Exception):
 
     def __init__(self, msg, token_type, value, lineno, column, line,
-                 expected=-1):
+                 expected=-1, expected_str=None):
         self.msg = msg
         self.token_type = token_type
         self.value = value
         self.lineno = lineno
+        # this is a 0-based index
         self.column = column
         self.line = line
         self.expected = expected
+        self.expected_str = expected_str
 
     def __str__(self):
         return "ParserError(%s, %r)" % (self.token_type, self.value)
 
 
+class StackEntry(object):
+    def __init__(self, next, dfa, state):
+        self.next = next
+        self.dfa = dfa
+        self.state = state
+        self.node = None
+
+    def push(self, dfa, state):
+        return StackEntry(self, dfa, state)
+
+    def pop(self):
+        return self.next
+
+    def node_append_child(self, child):
+        node = self.node
+        if node is None:
+            self.node = Nonterminal1(self.dfa.symbol_id, child)
+        elif isinstance(node, Nonterminal1):
+            newnode = self.node = Nonterminal(
+                    self.dfa.symbol_id, [node._child, child])
+        else:
+            self.node.append_child(child)
+
+
 class Parser(object):
 
     def __init__(self, grammar):
         self.grammar = grammar
         self.root = None
-        self.stack = None
 
     def prepare(self, start=-1):
         """Setup the parser for parsing.
@@ -196,16 +247,15 @@
         if start == -1:
             start = self.grammar.start
         self.root = None
-        current_node = Nonterminal(start, [])
-        self.stack = []
-        self.stack.append((self.grammar.dfas[start - 256], 0, current_node))
+        self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0)
 
     def add_token(self, token_type, value, lineno, column, line):
         label_index = self.classify(token_type, value, lineno, column, line)
         sym_id = 0 # for the annotator
         while True:
-            dfa, state_index, node = self.stack[-1]
-            states, first = dfa
+            dfa = self.stack.dfa
+            state_index = self.stack.state
+            states = dfa.states
             arcs, is_accepting = states[state_index]
             for i, next_state in arcs:
                 sym_id = self.grammar.labels[i]
@@ -217,16 +267,17 @@
                     # the stack.
                     while state[1] and not state[0]:
                         self.pop()
-                        if not self.stack:
+                        if self.stack is None:
                             # Parsing is done.
                             return True
-                        dfa, state_index, node = self.stack[-1]
-                        state = dfa[0][state_index]
+                        dfa = self.stack.dfa
+                        state_index = self.stack.state
+                        state = dfa.states[state_index]
                     return False
                 elif sym_id >= 256:
                     sub_node_dfa = self.grammar.dfas[sym_id - 256]
                     # Check if this token can start a child node.
-                    if label_index in sub_node_dfa[1]:
+                    if sub_node_dfa.could_match_token(label_index):
                         self.push(sub_node_dfa, next_state, sym_id, lineno,
                                   column)
                         break
@@ -235,7 +286,7 @@
                 # state is accepting, it's invalid input.
                 if is_accepting:
                     self.pop()
-                    if not self.stack:
+                    if self.stack is None:
                         raise ParseError("too much input", token_type, value,
                                          lineno, column, line)
                 else:
@@ -243,10 +294,13 @@
                     # error.
                     if len(arcs) == 1:
                         expected = sym_id
+                        expected_str = self.grammar.token_to_error_string.get(
+                                arcs[0][0], None)
                     else:
                         expected = -1
+                        expected_str = None
                     raise ParseError("bad input", token_type, value, lineno,
-                                     column, line, expected)
+                                     column, line, expected, expected_str)
 
     def classify(self, token_type, value, lineno, column, line):
         """Find the label for a token."""
@@ -262,26 +316,22 @@
 
     def shift(self, next_state, token_type, value, lineno, column):
         """Shift a non-terminal and prepare for the next state."""
-        dfa, state, node = self.stack[-1]
         new_node = Terminal(token_type, value, lineno, column)
-        node.append_child(new_node)
-        self.stack[-1] = (dfa, next_state, node)
+        self.stack.node_append_child(new_node)
+        self.stack.state = next_state
 
     def push(self, next_dfa, next_state, node_type, lineno, column):
         """Push a terminal and adjust the current state."""
-        dfa, state, node = self.stack[-1]
-        new_node = Nonterminal(node_type, [])
-        self.stack[-1] = (dfa, next_state, node)
-        self.stack.append((next_dfa, 0, new_node))
+        self.stack.state = next_state
+        self.stack = self.stack.push(next_dfa, 0)
 
     def pop(self):
         """Pop an entry off the stack and make its node a child of the last."""
-        dfa, state, node = self.stack.pop()
+        top = self.stack
+        self.stack = top.pop()
+        node = top.node
+        assert node is not None
         if self.stack:
-            # we are now done with node, so we can store it more efficiently if
-            # it has just one child
-            if node.num_children() == 1:
-                node = Nonterminal1(node.type, node.get_child(0))
-            self.stack[-1][2].append_child(node)
+            self.stack.node_append_child(node)
         else:
             self.root = node
diff --git a/pypy/interpreter/pyparser/pyparse.py 
b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -132,7 +132,11 @@
                         w_message = space.str(e.get_w_value(space))
                         raise error.SyntaxError(space.text_w(w_message))
                     raise
+        if enc is not None:
+            compile_info.encoding = enc
+        return self._parse(textsrc, compile_info)
 
+    def _parse(self, textsrc, compile_info):
         flags = compile_info.flags
 
         # The tokenizer is very picky about how it wants its input.
@@ -181,13 +185,16 @@
                 else:
                     new_err = error.SyntaxError
                     msg = "invalid syntax"
-                raise new_err(msg, e.lineno, e.column, e.line,
+                    if e.expected_str is not None:
+                        msg += " (expected '%s')" % e.expected_str
+
+                # parser.ParseError(...).column is 0-based, but the offsets in 
the
+                # exceptions in the error module are 1-based, hence the '+ 1'
+                raise new_err(msg, e.lineno, e.column + 1, e.line,
                               compile_info.filename)
             else:
                 tree = self.root
         finally:
             # Avoid hanging onto the tree.
             self.root = None
-        if enc is not None:
-            compile_info.encoding = enc
         return tree
diff --git a/pypy/interpreter/pyparser/pytokenizer.py 
b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -73,14 +73,14 @@
         logical line; continuation lines are included.
     """
     token_list = []
-    lnum = parenlev = continued = 0
+    lnum = continued = 0
     namechars = NAMECHARS
     numchars = NUMCHARS
     contstr, needcont = '', 0
     contline = None
     indents = [0]
     last_comment = ''
-    parenlevstart = (0, 0, "")
+    parenstack = []
 
     # make the annotator happy
     endDFA = DUMMY_DFA
@@ -97,7 +97,7 @@
         if contstr:
             if not line:
                 raise TokenError(
-                    "EOF while scanning triple-quoted string literal",
+                    "end of file (EOF) while scanning triple-quoted string 
literal",
                     strstart[2], strstart[0], strstart[1]+1,
                     token_list, lnum-1)
             endmatch = endDFA.recognize(line)
@@ -123,7 +123,7 @@
                 contline = contline + line
                 continue
 
-        elif parenlev == 0 and not continued:  # new statement
+        elif not parenstack and not continued:  # new statement
             if not line: break
             column = 0
             while pos < max:                   # measure leading whitespace
@@ -143,21 +143,21 @@
                 token_list.append((tokens.INDENT, line[:pos], lnum, 0, line))
                 last_comment = ''
             while column < indents[-1]:
-                indents = indents[:-1]
+                indents.pop()
                 token_list.append((tokens.DEDENT, '', lnum, pos, line))
                 last_comment = ''
             if column != indents[-1]:
                 err = "unindent does not match any outer indentation level"
-                raise TokenIndentationError(err, line, lnum, 0, token_list)
+                raise TokenIndentationError(err, line, lnum, column+1, 
token_list)
 
         else:                                  # continued statement
             if not line:
-                if parenlev > 0:
-                    lnum1, start1, line1 = parenlevstart
+                if parenstack:
+                    _, lnum1, start1, line1 = parenstack[0]
                     raise TokenError("parenthesis is never closed", line1,
                                      lnum1, start1 + 1, token_list, lnum)
-                raise TokenError("EOF in multi-line statement", line,
-                                 lnum, 0, token_list)
+                raise TokenError("end of file (EOF) in multi-line statement", 
line,
+                                 lnum, 0, token_list) # XXX why is the offset 
0 here?
             continued = 0
 
         while pos < max:
@@ -180,7 +180,7 @@
                     token_list.append((tokens.NUMBER, token, lnum, start, 
line))
                     last_comment = ''
                 elif initial in '\r\n':
-                    if parenlev <= 0:
+                    if not parenstack:
                         tok = (tokens.NEWLINE, last_comment, lnum, start, line)
                         token_list.append(tok)
                     last_comment = ''
@@ -222,14 +222,22 @@
                     continued = 1
                 else:
                     if initial in '([{':
-                        if parenlev == 0:
-                            parenlevstart = (lnum, start, line)
-                        parenlev = parenlev + 1
+                        parenstack.append((initial, lnum, start, line))
                     elif initial in ')]}':
-                        parenlev = parenlev - 1
-                        if parenlev < 0:
+                        if not parenstack:
                             raise TokenError("unmatched '%s'" % initial, line,
                                              lnum, start + 1, token_list)
+                        opening, lnum1, start1, line1 = parenstack.pop()
+                        if not ((opening == "(" and initial == ")") or
+                                (opening == "[" and initial == "]") or
+                                (opening == "{" and initial == "}")):
+                            msg = "closing parenthesis '%s' does not match 
opening parenthesis '%s'" % (
+                                        initial, opening)
+
+                            if lnum1 != lnum:
+                                msg += " on line " + str(lnum1)
+                            raise TokenError(
+                                    msg, line, lnum, start + 1, token_list)
                     if token in python_opmap:
                         punct = python_opmap[token]
                     else:
@@ -241,7 +249,7 @@
                 if start < 0:
                     start = pos
                 if start<max and line[start] in single_quoted:
-                    raise TokenError("EOL while scanning string literal",
+                    raise TokenError("end of line (EOL) while scanning string 
literal",
                              line, lnum, start+1, token_list)
                 tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line)
                 token_list.append(tok)
diff --git a/pypy/interpreter/pyparser/test/targetparse.py 
b/pypy/interpreter/pyparser/test/targetparse.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/pyparser/test/targetparse.py
@@ -0,0 +1,50 @@
+import sys
+import os
+ROOT =  
os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+print ROOT
+sys.path.insert(0, str(ROOT))
+import time
+from pypy.interpreter.pyparser import pyparse
+
+
+
+class FakeSpace(object):
+    pass
+
+fakespace = FakeSpace()
+
+def bench(fn, s):
+    a = time.clock()
+    info = pyparse.CompileInfo("<string>", "exec")
+    parser = pyparse.PythonParser(fakespace)
+    tree = parser._parse(s, info)
+    b = time.clock()
+    print fn, (b-a)
+
+
+def entry_point(argv):
+    if len(argv) == 2:
+        fn = argv[1]
+    else:
+        fn = "../../../../rpython/rlib/unicodedata/unicodedb_5_2_0.py"
+    fd = os.open(fn, os.O_RDONLY, 0777)
+    res = []
+    while True:
+        s = os.read(fd, 4096)
+        if not s:
+            break
+        res.append(s)
+    os.close(fd)
+    s = "".join(res)
+    print len(s)
+    bench(fn, s)
+
+    return 0
+
+# _____ Define and setup target ___
+
+def target(*args):
+    return entry_point, None
+
+if __name__ == '__main__':
+    entry_point(sys.argv)
diff --git a/pypy/interpreter/pyparser/test/test_metaparser.py 
b/pypy/interpreter/pyparser/test/test_metaparser.py
--- a/pypy/interpreter/pyparser/test/test_metaparser.py
+++ b/pypy/interpreter/pyparser/test/test_metaparser.py
@@ -34,8 +34,8 @@
         assert len(g.dfas) == 1
         eval_sym = g.symbol_ids["eval"]
         assert g.start == eval_sym
-        states, first = g.dfas[eval_sym - 256]
-        assert states == [([(1, 1)], False), ([], True)]
+        dfa = g.dfas[eval_sym - 256]
+        assert dfa.states == [([(1, 1)], False), ([], True)]
         assert g.labels[0] == 0
 
     def test_load_python_grammars(self):
@@ -51,7 +51,7 @@
     def test_items(self):
         g = self.gram_for("foo: NAME STRING OP '+'")
         assert len(g.dfas) == 1
-        states = g.dfas[g.symbol_ids["foo"] - 256][0]
+        states = g.dfas[g.symbol_ids["foo"] - 256].states
         last = states[0][0][0][1]
         for state in states[1:-1]:
             assert last < state[0][0][1]
diff --git a/pypy/interpreter/pyparser/test/test_parser.py 
b/pypy/interpreter/pyparser/test/test_parser.py
--- a/pypy/interpreter/pyparser/test/test_parser.py
+++ b/pypy/interpreter/pyparser/test/test_parser.py
@@ -7,6 +7,12 @@
 from pypy.interpreter.pyparser.test.test_metaparser import MyGrammar
 
 
+def test_char_set():
+    first = {5: None, 9: None, 100: None, 255:None}
+    p = parser.DFA(None, None, first)
+    for i in range(256):
+        assert p.could_match_token(i) == (i in first)
+
 class SimpleParser(parser.Parser):
 
     def parse(self, input):
@@ -55,8 +61,7 @@
             n = parser.Terminal(tp, value, 0, 0)
         else:
             tp = gram.symbol_ids[data[0]]
-            children = []
-            n = parser.Nonterminal(tp, children)
+            n = parser.Nonterminal(tp)
         new_indent = count_indent(line)
         if new_indent >= last_indent:
             if new_indent == last_indent and node_stack:
@@ -291,3 +296,37 @@
             NEWLINE
             ENDMARKER"""
         assert tree_from_string(expected, gram) == p.parse("hi 42 end")
+
+
+    def test_optimized_terminal(self):
+        gram = """foo: bar baz 'end' NEWLINE ENDMARKER
+bar: NAME
+baz: NUMBER
+"""
+        p, gram = self.parser_for(gram, False)
+        expected = """
+        foo
+            bar
+                NAME "a_name"
+            baz
+                NUMBER "42"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        input = "a_name 42 end"
+        tree = p.parse(input)
+        assert tree_from_string(expected, gram) == tree
+        assert isinstance(tree, parser.Nonterminal)
+        assert isinstance(tree.get_child(0), parser.Nonterminal1)
+        assert isinstance(tree.get_child(1), parser.Nonterminal1)
+
+
+    def test_error_string(self):
+        p, gram = self.parser_for(
+            "foo: 'if' NUMBER '+' NUMBER"
+        )
+        info = py.test.raises(parser.ParseError, p.parse, "if 42")
+        info.value.expected_str is None
+        info = py.test.raises(parser.ParseError, p.parse, "if 42 42")
+        info.value.expected_str == '+'
+
diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py 
b/pypy/interpreter/pyparser/test/test_pyparse.py
--- a/pypy/interpreter/pyparser/test/test_pyparse.py
+++ b/pypy/interpreter/pyparser/test/test_pyparse.py
@@ -76,14 +76,14 @@
         exc = py.test.raises(SyntaxError, parse, "name another for").value
         assert exc.msg == "invalid syntax"
         assert exc.lineno == 1
-        assert exc.offset == 5
+        assert exc.offset == 6
         assert exc.text.startswith("name another for")
         exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value
-        assert exc.msg == "EOL while scanning string literal"
+        assert exc.msg == "end of line (EOL) while scanning string literal"
         assert exc.lineno == 1
         assert exc.offset == 5
         exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value
-        assert exc.msg == "EOF while scanning triple-quoted string literal"
+        assert exc.msg == "end of file (EOF) while scanning triple-quoted 
string literal"
         assert exc.lineno == 1
         assert exc.offset == 5
         assert exc.lastlineno == 3
@@ -112,7 +112,7 @@
         assert exc.msg == "expected an indented block"
         assert exc.lineno == 3
         assert exc.text.startswith("pass")
-        assert exc.offset == 0
+        assert exc.offset == 1
         input = "hi\n    indented"
         exc = py.test.raises(IndentationError, parse, input).value
         assert exc.msg == "unexpected indent"
@@ -120,6 +120,7 @@
         exc = py.test.raises(IndentationError, parse, input).value
         assert exc.msg == "unindent does not match any outer indentation level"
         assert exc.lineno == 3
+        assert exc.offset == 3
 
     def test_mac_newline(self):
         self.parse("this_is\ra_mac\rfile")
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: merge default into branch

Reply via email to