Author: Matti Picus <[email protected]>
Branch: release-pypy2.7-v7.x
Changeset: r96317:990cef41fe11
Date: 2019-03-14 17:19 +0200
http://bitbucket.org/pypy/pypy/changeset/990cef41fe11/
Log: merge default into release
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -306,8 +306,8 @@
return w_err_handler
-@unwrap_spec(errors='text')
-def encode(space, w_obj, w_encoding=None, errors='strict'):
+@unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def encode(space, w_obj, encoding=None, errors=None):
"""encode(obj, [encoding[,errors]]) -> object
Encodes obj using the codec registered for encoding. encoding defaults
@@ -317,13 +317,19 @@
'xmlcharrefreplace' as well as any other name registered with
codecs.register_error that can handle ValueErrors.
"""
- if w_encoding is None:
+ if encoding is None:
encoding = space.sys.defaultencoding
+ w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
+ if errors:
+ w_res = space.call_function(w_encoder, w_obj, space.newtext(errors))
else:
- encoding = space.text_w(w_encoding)
- w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
- w_res = space.call_function(w_encoder, w_obj, space.newtext(errors))
- return space.getitem(w_res, space.newint(0))
+ w_res = space.call_function(w_encoder, w_obj)
+ w_retval = space.getitem(w_res, space.newint(0))
+ if not space.isinstance_w(w_retval, space.w_bytes):
+ raise oefmt(space.w_TypeError,
+ "encoder did not return an string object (type '%T')",
+ w_retval)
+ return w_retval
@unwrap_spec(errors='text_or_none')
def readbuffer_encode(space, w_data, errors='strict'):
@@ -335,8 +341,8 @@
s = space.getarg_w('t#', w_data)
return space.newtuple([space.newbytes(s), space.newint(len(s))])
-@unwrap_spec(errors='text')
-def decode(space, w_obj, w_encoding=None, errors='strict'):
+@unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def decode(space, w_obj, encoding=None, errors=None):
"""decode(obj, [encoding[,errors]]) -> object
Decodes obj using the codec registered for encoding. encoding defaults
@@ -346,19 +352,17 @@
as well as any other name registered with codecs.register_error that is
able to handle ValueErrors.
"""
- if w_encoding is None:
+ if encoding is None:
encoding = space.sys.defaultencoding
+ w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
+ if errors:
+ w_res = space.call_function(w_decoder, w_obj, space.newtext(errors))
else:
- encoding = space.text_w(w_encoding)
- w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
- if space.is_true(w_decoder):
- w_res = space.call_function(w_decoder, w_obj, space.newtext(errors))
- if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res)
!= 2):
- raise oefmt(space.w_TypeError,
- "encoder must return a tuple (object, integer)")
- return space.getitem(w_res, space.newint(0))
- else:
- assert 0, "XXX, what to do here?"
+ w_res = space.call_function(w_decoder, w_obj)
+ if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res) !=
2):
+ raise oefmt(space.w_TypeError,
+ "encoder must return a tuple (object, integer)")
+ return space.getitem(w_res, space.newint(0))
@unwrap_spec(errors='text')
def register_error(space, errors, w_handler):
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -468,8 +468,10 @@
return (encode_one, decode_one, None, None)
return None
_codecs.register(search_function)
- assert u"hello".encode("onearg") == 'foo'
- assert b"hello".decode("onearg") == 'foo'
+ assert u"hello".encode("onearg") == b'foo'
+ assert b"hello".decode("onearg") == u'foo'
+ assert _codecs.encode(u"hello", "onearg") == b'foo'
+ assert _codecs.decode(b"hello", "onearg") == u'foo'
def test_cpytest_decode(self):
import codecs
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1188,7 +1188,9 @@
state.C.get_pyos_inputhook = rffi.llexternal(
'_PyPy_get_PyOS_InputHook', [], FUNCPTR,
compilation_info=eci, _nowrapper=True)
-
+ state.C.tuple_new = rffi.llexternal(
+ 'tuple_new', [PyTypeObjectPtr, PyObject, PyObject], PyObject,
+ compilation_info=eci, _nowrapper=True)
def init_function(func):
INIT_FUNCTIONS.append(func)
diff --git a/pypy/module/cpyext/include/tupleobject.h
b/pypy/module/cpyext/include/tupleobject.h
--- a/pypy/module/cpyext/include/tupleobject.h
+++ b/pypy/module/cpyext/include/tupleobject.h
@@ -18,6 +18,7 @@
PyAPI_FUNC(PyObject *) PyTuple_New(Py_ssize_t size);
PyAPI_FUNC(void) _PyPy_tuple_dealloc(PyObject *);
+PyAPI_FUNC(PyObject *) tuple_new(PyTypeObject *type, PyObject *args, PyObject
*kwds);
/* defined in varargswrapper.c */
PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...);
diff --git a/pypy/module/cpyext/src/tupleobject.c
b/pypy/module/cpyext/src/tupleobject.c
--- a/pypy/module/cpyext/src/tupleobject.c
+++ b/pypy/module/cpyext/src/tupleobject.c
@@ -89,3 +89,48 @@
done:
Py_TRASHCAN_SAFE_END(op)
}
+
+static PyObject *
+tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+
+PyObject *
+tuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyObject *arg = NULL;
+ static char *kwlist[] = {"sequence", 0};
+
+ if (type != &PyTuple_Type)
+ return tuple_subtype_new(type, args, kwds);
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:tuple", kwlist, &arg))
+ return NULL;
+
+ if (arg == NULL)
+ return PyTuple_New(0);
+ else
+ return PySequence_Tuple(arg);
+}
+
+static PyObject *
+tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyObject *tmp, *newobj, *item;
+ Py_ssize_t i, n;
+
+ assert(PyType_IsSubtype(type, &PyTuple_Type));
+ tmp = tuple_new(&PyTuple_Type, args, kwds);
+ if (tmp == NULL)
+ return NULL;
+ assert(PyTuple_Check(tmp));
+ newobj = type->tp_alloc(type, n = PyTuple_GET_SIZE(tmp));
+ if (newobj == NULL)
+ return NULL;
+ for (i = 0; i < n; i++) {
+ item = PyTuple_GET_ITEM(tmp, i);
+ Py_INCREF(item);
+ PyTuple_SET_ITEM(newobj, i, item);
+ }
+ Py_DECREF(tmp);
+ return newobj;
+}
+
+
diff --git a/pypy/module/cpyext/test/test_tupleobject.py
b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -226,3 +226,42 @@
raises(SystemError, module.set_after_use, s)
else:
module.set_after_use(s)
+
+ def test_mp_length(self):
+ # issue 2968: creating a subclass of tuple in C led to recursion
+ # since the default tp_new needs to build a w_obj, but that needs
+ # to call space.len_w, which needs to call tp_new.
+ module = self.import_extension('foo', [
+ ("get_size", "METH_NOARGS",
+ """
+ return (PyObject*)&THPSizeType;
+ """),
+ ], prologue='''
+ #include "Python.h"
+
+ struct THPSize {
+ PyTupleObject tuple;
+ } THPSize;
+
+ static PyMappingMethods THPSize_as_mapping = {
+ 0, //PyTuple_Type.tp_as_mapping->mp_length,
+ 0,
+ 0
+ };
+
+ PyTypeObject THPSizeType = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "torch.Size", /* tp_name */
+ sizeof(THPSize), /* tp_basicsize */
+ };
+ ''' , more_init = '''
+ THPSize_as_mapping.mp_length =
PyTuple_Type.tp_as_mapping->mp_length;
+ THPSizeType.tp_base = &PyTuple_Type;
+ THPSizeType.tp_flags = Py_TPFLAGS_DEFAULT;
+ THPSizeType.tp_as_mapping = &THPSize_as_mapping;
+ THPSizeType.tp_new = PyTuple_Type.tp_new;
+ if (PyType_Ready(&THPSizeType) < 0) INITERROR;
+ ''')
+ SZ = module.get_size()
+ s = SZ((1, 2, 3))
+ assert len(s) == 3
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -796,6 +796,11 @@
update_all_slots(space, w_type, pto)
else:
update_all_slots_builtin(space, w_type, pto)
+
+ # XXX generlize this pattern for various slot functions implemented in C
+ if space.is_w(w_type, space.w_tuple):
+ pto.c_tp_new = state.C.tuple_new
+
if not pto.c_tp_new:
base_object_pyo = make_ref(space, space.w_object)
base_object_pto = rffi.cast(PyTypeObjectPtr, base_object_pyo)
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1071,16 +1071,17 @@
return encoding, errors
-def encode_object(space, w_object, encoding, errors):
- w_encoder = None
+def encode_object(space, w_obj, encoding, errors):
+ from pypy.module._codecs.interp_codecs import encode
if errors is None or errors == 'strict':
+ # fast path
if ((encoding is None and space.sys.defaultencoding == 'ascii') or
encoding == 'ascii'):
- s = space.utf8_w(w_object)
+ s = space.utf8_w(w_obj)
try:
rutf8.check_ascii(s)
except rutf8.CheckError as a:
- if space.isinstance_w(w_object, space.w_unicode):
+ if space.isinstance_w(w_obj, space.w_unicode):
eh = unicodehelper.encode_error_handler(space)
else:
# must be a bytes-like object. In order to encode it,
@@ -1093,32 +1094,17 @@
return space.newbytes(s)
if ((encoding is None and space.sys.defaultencoding == 'utf8') or
encoding == 'utf-8' or encoding == 'utf8' or encoding == 'UTF-8'):
- utf8 = space.utf8_w(w_object)
+ utf8 = space.utf8_w(w_obj)
if rutf8.has_surrogates(utf8):
utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
return space.newbytes(utf8)
- if encoding is None:
- # Get the encoder functions as a wrapped object.
- # This lookup is cached.
- w_encoder = space.sys.get_w_default_encoder()
- if w_encoder is None:
- from pypy.module._codecs.interp_codecs import lookup_codec
- w_encoder = space.getitem(lookup_codec(space, encoding),
space.newint(0))
- if errors is None:
- w_restuple = space.call_function(w_encoder, w_object)
- else:
- w_errors = space.newtext(errors)
- w_restuple = space.call_function(w_encoder, w_object, w_errors)
- w_retval = space.getitem(w_restuple, space.newint(0))
- if not space.isinstance_w(w_retval, space.w_bytes):
- raise oefmt(space.w_TypeError,
- "encoder did not return an string object (type '%T')",
- w_retval)
- return w_retval
+ return encode(space, w_obj, encoding, errors)
def decode_object(space, w_obj, encoding, errors):
+ from pypy.module._codecs.interp_codecs import lookup_codec, decode
if errors is None or errors == 'strict':
+ # fast paths
if encoding is None:
encoding = getdefaultencoding(space)
if encoding == 'ascii':
@@ -1133,20 +1119,9 @@
s = space.charbuf_w(w_obj)
lgt = unicodehelper.check_utf8_or_raise(space, s)
return space.newutf8(s, lgt)
- w_decoder = None
if encoding is None:
- # Get the decoder functions as a wrapped object.
- # This lookup is cached.
- w_decoder = space.sys.get_w_default_decoder()
- if w_decoder is None:
- from pypy.module._codecs.interp_codecs import lookup_codec
- w_decoder = space.getitem(lookup_codec(space, encoding),
space.newint(1))
- if errors is None:
- w_retval = space.call_function(w_decoder, w_obj)
- else:
- w_retval = space.call_function(w_decoder, w_obj, space.newtext(errors))
- return space.getitem(w_retval, space.newint(0))
-
+ encoding = space.sys.defaultencoding
+ return decode(space, w_obj, encoding, errors)
def unicode_from_encoded_object(space, w_obj, encoding, errors):
# explicitly block bytearray on 2.7
diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py
--- a/rpython/rlib/rsre/rsre_utf8.py
+++ b/rpython/rlib/rsre/rsre_utf8.py
@@ -40,17 +40,23 @@
prev_indirect = prev
def next_n(self, position, n, end_position):
- for i in range(n):
+ i = 0
+ # avoid range(n) since n can be quite large
+ while i < n:
if position >= end_position:
raise EndOfString
position = rutf8.next_codepoint_pos(self._utf8, position)
+ i += 1
return position
def prev_n(self, position, n, start_position):
- for i in range(n):
+ i = 0
+ # avoid range(n) since n can be quite large
+ while i < n:
if position <= start_position:
raise EndOfString
position = rutf8.prev_codepoint_pos(self._utf8, position)
+ i += 1
assert position >= 0
return position
diff --git a/rpython/rlib/test/test_rawrefcount_boehm.py
b/rpython/rlib/test/test_rawrefcount_boehm.py
--- a/rpython/rlib/test/test_rawrefcount_boehm.py
+++ b/rpython/rlib/test/test_rawrefcount_boehm.py
@@ -111,7 +111,7 @@
pyobjs.append(varname)
return varname
- for op in draw(strategies.lists(operations, average_size=250)):
+ for op in draw(strategies.lists(operations)):
if op == 'new_gcobj':
new_gcobj()
elif op == 'new_pyobj':
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit