Author: Matti Picus <[email protected]>
Branch: release-pypy2.7-v7.x
Changeset: r96317:990cef41fe11
Date: 2019-03-14 17:19 +0200
http://bitbucket.org/pypy/pypy/changeset/990cef41fe11/

Log:    merge default into release

diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -306,8 +306,8 @@
     return w_err_handler
 
 
-@unwrap_spec(errors='text')
-def encode(space, w_obj, w_encoding=None, errors='strict'):
+@unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def encode(space, w_obj, encoding=None, errors=None):
     """encode(obj, [encoding[,errors]]) -> object
 
     Encodes obj using the codec registered for encoding. encoding defaults
@@ -317,13 +317,19 @@
     'xmlcharrefreplace' as well as any other name registered with
     codecs.register_error that can handle ValueErrors.
     """
-    if w_encoding is None:
+    if encoding is None:
         encoding = space.sys.defaultencoding
+    w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
+    if errors:
+        w_res = space.call_function(w_encoder, w_obj, space.newtext(errors))
     else:
-        encoding = space.text_w(w_encoding)
-    w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
-    w_res = space.call_function(w_encoder, w_obj, space.newtext(errors))
-    return space.getitem(w_res, space.newint(0))
+        w_res = space.call_function(w_encoder, w_obj)
+    w_retval = space.getitem(w_res, space.newint(0))
+    if not space.isinstance_w(w_retval, space.w_bytes):
+        raise oefmt(space.w_TypeError,
+                    "encoder did not return an string object (type '%T')",
+                    w_retval)
+    return w_retval
 
 @unwrap_spec(errors='text_or_none')
 def readbuffer_encode(space, w_data, errors='strict'):
@@ -335,8 +341,8 @@
     s = space.getarg_w('t#', w_data)
     return space.newtuple([space.newbytes(s), space.newint(len(s))])
 
-@unwrap_spec(errors='text')
-def decode(space, w_obj, w_encoding=None, errors='strict'):
+@unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def decode(space, w_obj, encoding=None, errors=None):
     """decode(obj, [encoding[,errors]]) -> object
 
     Decodes obj using the codec registered for encoding. encoding defaults
@@ -346,19 +352,17 @@
     as well as any other name registered with codecs.register_error that is
     able to handle ValueErrors.
     """
-    if w_encoding is None:
+    if encoding is None:
         encoding = space.sys.defaultencoding
+    w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
+    if errors:
+        w_res = space.call_function(w_decoder, w_obj, space.newtext(errors))
     else:
-        encoding = space.text_w(w_encoding)
-    w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
-    if space.is_true(w_decoder):
-        w_res = space.call_function(w_decoder, w_obj, space.newtext(errors))
-        if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res) 
!= 2):
-            raise oefmt(space.w_TypeError,
-                        "encoder must return a tuple (object, integer)")
-        return space.getitem(w_res, space.newint(0))
-    else:
-        assert 0, "XXX, what to do here?"
+        w_res = space.call_function(w_decoder, w_obj)
+    if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res) != 
2):
+        raise oefmt(space.w_TypeError,
+                    "encoder must return a tuple (object, integer)")
+    return space.getitem(w_res, space.newint(0))
 
 @unwrap_spec(errors='text')
 def register_error(space, errors, w_handler):
diff --git a/pypy/module/_codecs/test/test_codecs.py 
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -468,8 +468,10 @@
                 return (encode_one, decode_one, None, None)
             return None
         _codecs.register(search_function)
-        assert u"hello".encode("onearg") == 'foo'
-        assert b"hello".decode("onearg") == 'foo'
+        assert u"hello".encode("onearg") == b'foo'
+        assert b"hello".decode("onearg") == u'foo'
+        assert _codecs.encode(u"hello", "onearg") == b'foo'
+        assert _codecs.decode(b"hello", "onearg") == u'foo'
 
     def test_cpytest_decode(self):
         import codecs
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1188,7 +1188,9 @@
     state.C.get_pyos_inputhook = rffi.llexternal(
         '_PyPy_get_PyOS_InputHook', [], FUNCPTR,
         compilation_info=eci, _nowrapper=True)
-
+    state.C.tuple_new = rffi.llexternal(
+        'tuple_new', [PyTypeObjectPtr, PyObject, PyObject], PyObject,
+        compilation_info=eci, _nowrapper=True)
 
 def init_function(func):
     INIT_FUNCTIONS.append(func)
diff --git a/pypy/module/cpyext/include/tupleobject.h 
b/pypy/module/cpyext/include/tupleobject.h
--- a/pypy/module/cpyext/include/tupleobject.h
+++ b/pypy/module/cpyext/include/tupleobject.h
@@ -18,6 +18,7 @@
 
 PyAPI_FUNC(PyObject *) PyTuple_New(Py_ssize_t size);
 PyAPI_FUNC(void) _PyPy_tuple_dealloc(PyObject *);
+PyAPI_FUNC(PyObject *) tuple_new(PyTypeObject *type, PyObject *args, PyObject 
*kwds);
 
 /* defined in varargswrapper.c */
 PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...);
diff --git a/pypy/module/cpyext/src/tupleobject.c 
b/pypy/module/cpyext/src/tupleobject.c
--- a/pypy/module/cpyext/src/tupleobject.c
+++ b/pypy/module/cpyext/src/tupleobject.c
@@ -89,3 +89,48 @@
 done:
     Py_TRASHCAN_SAFE_END(op)
 }
+
+static PyObject *
+tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+
+PyObject *
+tuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyObject *arg = NULL;
+    static char *kwlist[] = {"sequence", 0};
+
+    if (type != &PyTuple_Type)
+        return tuple_subtype_new(type, args, kwds);
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:tuple", kwlist, &arg))
+        return NULL;
+
+    if (arg == NULL)
+        return PyTuple_New(0);
+    else
+        return PySequence_Tuple(arg);
+}
+
+static PyObject *
+tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyObject *tmp, *newobj, *item;
+    Py_ssize_t i, n;
+
+    assert(PyType_IsSubtype(type, &PyTuple_Type));
+    tmp = tuple_new(&PyTuple_Type, args, kwds);
+    if (tmp == NULL)
+        return NULL;
+    assert(PyTuple_Check(tmp));
+    newobj = type->tp_alloc(type, n = PyTuple_GET_SIZE(tmp));
+    if (newobj == NULL)
+        return NULL;
+    for (i = 0; i < n; i++) {
+        item = PyTuple_GET_ITEM(tmp, i);
+        Py_INCREF(item);
+        PyTuple_SET_ITEM(newobj, i, item);
+    }
+    Py_DECREF(tmp);
+    return newobj;
+}
+
+
diff --git a/pypy/module/cpyext/test/test_tupleobject.py 
b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -226,3 +226,42 @@
             raises(SystemError, module.set_after_use, s)
         else:
             module.set_after_use(s)
+
+    def test_mp_length(self):
+        # issue 2968: creating a subclass of tuple in C led to recursion
+        # since the default tp_new needs to build a w_obj, but that needs
+        # to call space.len_w, which needs to call tp_new.
+        module = self.import_extension('foo', [
+            ("get_size", "METH_NOARGS",
+             """
+                return (PyObject*)&THPSizeType;
+             """),
+            ], prologue='''
+                #include "Python.h"
+
+                struct THPSize {
+                  PyTupleObject tuple;
+                } THPSize;
+
+                static PyMappingMethods THPSize_as_mapping = {
+                    0, //PyTuple_Type.tp_as_mapping->mp_length,
+                    0,
+                    0
+                };
+
+                PyTypeObject THPSizeType = {
+                  PyVarObject_HEAD_INIT(0, 0)
+                  "torch.Size",                          /* tp_name */
+                  sizeof(THPSize),                       /* tp_basicsize */
+                };
+            ''' , more_init = '''
+                THPSize_as_mapping.mp_length = 
PyTuple_Type.tp_as_mapping->mp_length;
+                THPSizeType.tp_base = &PyTuple_Type;
+                THPSizeType.tp_flags = Py_TPFLAGS_DEFAULT;
+                THPSizeType.tp_as_mapping = &THPSize_as_mapping;
+                THPSizeType.tp_new = PyTuple_Type.tp_new;
+                if (PyType_Ready(&THPSizeType) < 0) INITERROR;
+            ''')
+        SZ = module.get_size()
+        s = SZ((1, 2, 3))
+        assert len(s) == 3
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -796,6 +796,11 @@
         update_all_slots(space, w_type, pto)
     else:
         update_all_slots_builtin(space, w_type, pto)
+
+    # XXX generlize this pattern for various slot functions implemented in C
+    if space.is_w(w_type, space.w_tuple):
+        pto.c_tp_new = state.C.tuple_new
+
     if not pto.c_tp_new:
         base_object_pyo = make_ref(space, space.w_object)
         base_object_pto = rffi.cast(PyTypeObjectPtr, base_object_pyo)
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1071,16 +1071,17 @@
     return encoding, errors
 
 
-def encode_object(space, w_object, encoding, errors):
-    w_encoder = None
+def encode_object(space, w_obj, encoding, errors):
+    from pypy.module._codecs.interp_codecs import encode
     if errors is None or errors == 'strict':
+        # fast path
         if ((encoding is None and space.sys.defaultencoding == 'ascii') or
              encoding == 'ascii'):
-            s = space.utf8_w(w_object)
+            s = space.utf8_w(w_obj)
             try:
                 rutf8.check_ascii(s)
             except rutf8.CheckError as a:
-                if space.isinstance_w(w_object, space.w_unicode):
+                if space.isinstance_w(w_obj, space.w_unicode):
                     eh = unicodehelper.encode_error_handler(space)
                 else:
                     # must be a bytes-like object. In order to encode it,
@@ -1093,32 +1094,17 @@
             return space.newbytes(s)
         if ((encoding is None and space.sys.defaultencoding == 'utf8') or
              encoding == 'utf-8' or encoding == 'utf8' or encoding == 'UTF-8'):
-            utf8 = space.utf8_w(w_object)
+            utf8 = space.utf8_w(w_obj)
             if rutf8.has_surrogates(utf8):
                 utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
             return space.newbytes(utf8)
-    if encoding is None:
-        # Get the encoder functions as a wrapped object.
-        # This lookup is cached.
-        w_encoder = space.sys.get_w_default_encoder()
-    if w_encoder is None:
-        from pypy.module._codecs.interp_codecs import lookup_codec
-        w_encoder = space.getitem(lookup_codec(space, encoding), 
space.newint(0))
-    if errors is None:
-        w_restuple = space.call_function(w_encoder, w_object)
-    else:
-        w_errors = space.newtext(errors)
-        w_restuple = space.call_function(w_encoder, w_object, w_errors)
-    w_retval = space.getitem(w_restuple, space.newint(0))
-    if not space.isinstance_w(w_retval, space.w_bytes):
-        raise oefmt(space.w_TypeError,
-                    "encoder did not return an string object (type '%T')",
-                    w_retval)
-    return w_retval
+    return encode(space, w_obj, encoding, errors) 
 
 
 def decode_object(space, w_obj, encoding, errors):
+    from pypy.module._codecs.interp_codecs import lookup_codec, decode 
     if errors is None or errors == 'strict':
+        # fast paths
         if encoding is None:
             encoding = getdefaultencoding(space)
         if encoding == 'ascii':
@@ -1133,20 +1119,9 @@
                 s = space.charbuf_w(w_obj)
             lgt = unicodehelper.check_utf8_or_raise(space, s)
             return space.newutf8(s, lgt)
-    w_decoder = None
     if encoding is None:
-        # Get the decoder functions as a wrapped object.
-        # This lookup is cached.
-        w_decoder = space.sys.get_w_default_decoder()
-    if w_decoder is None:
-        from pypy.module._codecs.interp_codecs import lookup_codec
-        w_decoder = space.getitem(lookup_codec(space, encoding), 
space.newint(1))
-    if errors is None:
-        w_retval = space.call_function(w_decoder, w_obj)
-    else:
-        w_retval = space.call_function(w_decoder, w_obj, space.newtext(errors))
-    return space.getitem(w_retval, space.newint(0))
-
+        encoding = space.sys.defaultencoding
+    return decode(space, w_obj, encoding, errors)
 
 def unicode_from_encoded_object(space, w_obj, encoding, errors):
     # explicitly block bytearray on 2.7
diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py
--- a/rpython/rlib/rsre/rsre_utf8.py
+++ b/rpython/rlib/rsre/rsre_utf8.py
@@ -40,17 +40,23 @@
     prev_indirect = prev
 
     def next_n(self, position, n, end_position):
-        for i in range(n):
+        i = 0
+        # avoid range(n) since n can be quite large
+        while i < n:
             if position >= end_position:
                 raise EndOfString
             position = rutf8.next_codepoint_pos(self._utf8, position)
+            i += 1
         return position
 
     def prev_n(self, position, n, start_position):
-        for i in range(n):
+        i = 0
+        # avoid range(n) since n can be quite large
+        while i < n:
             if position <= start_position:
                 raise EndOfString
             position = rutf8.prev_codepoint_pos(self._utf8, position)
+            i += 1
         assert position >= 0
         return position
 
diff --git a/rpython/rlib/test/test_rawrefcount_boehm.py 
b/rpython/rlib/test/test_rawrefcount_boehm.py
--- a/rpython/rlib/test/test_rawrefcount_boehm.py
+++ b/rpython/rlib/test/test_rawrefcount_boehm.py
@@ -111,7 +111,7 @@
         pyobjs.append(varname)
         return varname
 
-    for op in draw(strategies.lists(operations, average_size=250)):
+    for op in draw(strategies.lists(operations)):
         if op == 'new_gcobj':
             new_gcobj()
         elif op == 'new_pyobj':
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to