Author: Matti Picus <[email protected]>
Branch: better-PyDict_Next
Changeset: r89024:d9b07fbc433c
Date: 2016-12-12 16:23 +0200
http://bitbucket.org/pypy/pypy/changeset/d9b07fbc433c/
Log: merge default into branch
diff --git a/pypy/interpreter/test/test_unicodehelper.py
b/pypy/interpreter/test/test_unicodehelper.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -0,0 +1,26 @@
+from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8
+
+class FakeSpace:
+ pass
+
+def test_encode_utf8():
+ space = FakeSpace()
+ assert encode_utf8(space, u"abc") == "abc"
+ assert encode_utf8(space, u"\u1234") == "\xe1\x88\xb4"
+ assert encode_utf8(space, u"\ud800") == "\xed\xa0\x80"
+ assert encode_utf8(space, u"\udc00") == "\xed\xb0\x80"
+ # for the following test, go to lengths to avoid CPython's optimizer
+ # and .pyc file storage, which collapse the two surrogates into one
+ c = u"\udc00"
+ assert encode_utf8(space, u"\ud800" + c) == "\xf0\x90\x80\x80"
+
+def test_decode_utf8():
+ space = FakeSpace()
+ assert decode_utf8(space, "abc") == u"abc"
+ assert decode_utf8(space, "\xe1\x88\xb4") == u"\u1234"
+ assert decode_utf8(space, "\xed\xa0\x80") == u"\ud800"
+ assert decode_utf8(space, "\xed\xb0\x80") == u"\udc00"
+ got = decode_utf8(space, "\xed\xa0\x80\xed\xb0\x80")
+ assert map(ord, got) == [0xd800, 0xdc00]
+ got = decode_utf8(space, "\xf0\x90\x80\x80")
+ assert map(ord, got) == [0x10000]
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -51,6 +51,10 @@
return result
def decode_utf8(space, string):
+ # Surrogates are accepted and not treated specially at all.
+ # If there happen to be two 3-bytes encoding a pair of surrogates,
+ # you still get two surrogate unicode characters in the result.
+ # These are the Python2 rules; Python3 differs.
result, consumed = runicode.str_decode_utf_8(
string, len(string), "strict",
final=True, errorhandler=decode_error_handler(space),
@@ -59,8 +63,9 @@
def encode_utf8(space, uni):
# Note that this function never raises UnicodeEncodeError,
- # since surrogate pairs are allowed.
- # This is not the case with Python3.
+ # since surrogates are allowed, either paired or lone.
+ # A paired surrogate is considered like the non-BMP character
+ # it stands for. These are the Python2 rules; Python3 differs.
return runicode.unicode_encode_utf_8(
uni, len(uni), "strict",
errorhandler=raise_unicode_exception_encode,
diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py
--- a/pypy/module/cpyext/pyobject.py
+++ b/pypy/module/cpyext/pyobject.py
@@ -25,11 +25,9 @@
basestruct = PyObject.TO
W_BaseObject = W_ObjectObject
- def get_dealloc(self, space):
+ def get_dealloc(self):
from pypy.module.cpyext.typeobject import subtype_dealloc
- return llhelper(
- subtype_dealloc.api_func.functype,
- subtype_dealloc.api_func.get_wrapper(space))
+ return subtype_dealloc
def allocate(self, space, w_type, itemcount=0):
# similar to PyType_GenericAlloc?
@@ -109,10 +107,8 @@
return tp_alloc(space, w_type, itemcount)
if tp_dealloc:
- def get_dealloc(self, space):
- return llhelper(
- tp_dealloc.api_func.functype,
- tp_dealloc.api_func.get_wrapper(space))
+ def get_dealloc(self):
+ return tp_dealloc
if tp_attach:
def attach(self, space, pyobj, w_obj, w_userdata=None):
diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py
--- a/pypy/module/cpyext/slotdefs.py
+++ b/pypy/module/cpyext/slotdefs.py
@@ -59,6 +59,9 @@
"expected %d-%d arguments, got %d",
low, high, space.len_w(w_ob))
+def llslot(space, func):
+ return llhelper(func.api_func.functype, func.api_func.get_wrapper(space))
+
def wrap_init(space, w_self, w_args, func, w_kwargs):
func_init = rffi.cast(initproc, func)
res = generic_cpy_call(space, func_init, w_self, w_args, w_kwargs)
@@ -106,7 +109,7 @@
args_w = space.fixedview(w_args)
arg3 = space.w_None
if len(args_w) > 1:
- arg3 = args_w[1]
+ arg3 = args_w[1]
return generic_cpy_call(space, func_ternary, w_self, args_w[0], arg3)
def wrap_ternaryfunc_r(space, w_self, w_args, func):
@@ -121,7 +124,7 @@
Py_DecRef(space, ref)
arg3 = space.w_None
if len(args_w) > 1:
- arg3 = args_w[1]
+ arg3 = args_w[1]
return generic_cpy_call(space, func_ternary, args_w[0], w_self, arg3)
@@ -322,7 +325,7 @@
self.strides = [1]
else:
self.strides = strides
- self.ndim = ndim
+ self.ndim = ndim
self.itemsize = itemsize
self.readonly = readonly
@@ -472,7 +475,6 @@
@func_renamer("cpyext_%s_%s" % (name.replace('.', '_'),
typedef.name))
def slot_func(space, w_self):
return space.call_function(slot_fn, w_self)
- api_func = slot_func.api_func
handled = True
# binary functions
@@ -499,7 +501,6 @@
@func_renamer("cpyext_%s_%s" % (name.replace('.', '_'),
typedef.name))
def slot_func(space, w_self, w_arg):
return space.call_function(slot_fn, w_self, w_arg)
- api_func = slot_func.api_func
handled = True
# binary-with-Py_ssize_t-type
@@ -517,7 +518,6 @@
@func_renamer("cpyext_%s_%s" % (name.replace('.', '_'),
typedef.name))
def slot_func(space, w_self, arg):
return space.call_function(slot_fn, w_self, space.wrap(arg))
- api_func = slot_func.api_func
handled = True
# ternary functions
@@ -532,7 +532,6 @@
@func_renamer("cpyext_%s_%s" % (name.replace('.', '_'),
typedef.name))
def slot_func(space, w_self, w_arg1, w_arg2):
return space.call_function(slot_fn, w_self, w_arg1, w_arg2)
- api_func = slot_func.api_func
handled = True
if handled:
@@ -552,7 +551,7 @@
else:
space.call_function(delattr_fn, w_self, w_name)
return 0
- api_func = slot_tp_setattro.api_func
+ slot_func = slot_tp_setattro
elif name == 'tp_getattro':
getattr_fn = w_type.getdictvalue(space, '__getattribute__')
if getattr_fn is None:
@@ -562,7 +561,7 @@
@func_renamer("cpyext_tp_getattro_%s" % (typedef.name,))
def slot_tp_getattro(space, w_self, w_name):
return space.call_function(getattr_fn, w_self, w_name)
- api_func = slot_tp_getattro.api_func
+ slot_func = slot_tp_getattro
elif name == 'tp_call':
call_fn = w_type.getdictvalue(space, '__call__')
if call_fn is None:
@@ -574,7 +573,7 @@
args = Arguments(space, [w_self],
w_stararg=w_args, w_starstararg=w_kwds)
return space.call_args(call_fn, args)
- api_func = slot_tp_call.api_func
+ slot_func = slot_tp_call
elif name == 'tp_iternext':
iternext_fn = w_type.getdictvalue(space, 'next')
@@ -590,7 +589,7 @@
if not e.match(space, space.w_StopIteration):
raise
return None
- api_func = slot_tp_iternext.api_func
+ slot_func = slot_tp_iternext
elif name == 'tp_init':
init_fn = w_type.getdictvalue(space, '__init__')
@@ -605,7 +604,7 @@
w_stararg=w_args, w_starstararg=w_kwds)
space.call_args(init_fn, args)
return 0
- api_func = slot_tp_init.api_func
+ slot_func = slot_tp_init
elif name == 'tp_new':
new_fn = w_type.getdictvalue(space, '__new__')
if new_fn is None:
@@ -617,12 +616,12 @@
args = Arguments(space, [w_self],
w_stararg=w_args, w_starstararg=w_kwds)
return space.call_args(space.get(new_fn, w_self), args)
- api_func = slot_tp_new.api_func
+ slot_func = slot_tp_new
elif name == 'tp_as_buffer.c_bf_getbuffer':
buff_fn = w_type.getdictvalue(space, '__buffer__')
if buff_fn is None:
return
- @cpython_api([PyObject, Py_bufferP, rffi.INT_real],
+ @cpython_api([PyObject, Py_bufferP, rffi.INT_real],
rffi.INT_real, header=None, error=-1)
@func_renamer("cpyext_%s_%s" % (name.replace('.', '_'), typedef.name))
def buff_w(space, w_self, view, flags):
@@ -646,14 +645,14 @@
return 0
# XXX remove this when it no longer crashes a translated PyPy
return
- api_func = buff_w.api_func
+ slot_func = buff_w
else:
# missing: tp_as_number.nb_nonzero, tp_as_number.nb_coerce
# tp_as_sequence.c_sq_contains, tp_as_sequence.c_sq_length
# richcmpfunc(s)
return
- return lambda: llhelper(api_func.functype, api_func.get_wrapper(space))
+ return slot_func
PyWrapperFlag_KEYWORDS = 1
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -3,7 +3,6 @@
from rpython.rlib import jit
from rpython.rlib.objectmodel import specialize
from rpython.rlib.rstring import rsplit
-from rpython.rtyper.annlowlevel import llhelper
from rpython.rtyper.lltypesystem import rffi, lltype
from pypy.interpreter.baseobjspace import W_Root, DescrMismatch
@@ -28,7 +27,8 @@
PyObject, make_ref, create_ref, from_ref, get_typedescr, make_typedescr,
track_reference, Py_DecRef, as_pyobj)
from pypy.module.cpyext.slotdefs import (
- slotdefs_for_tp_slots, slotdefs_for_wrappers, get_slot_tp_function)
+ slotdefs_for_tp_slots, slotdefs_for_wrappers, get_slot_tp_function,
+ llslot)
from pypy.module.cpyext.state import State
from pypy.module.cpyext.structmember import PyMember_GetOne, PyMember_SetOne
from pypy.module.cpyext.typeobjectdefs import (
@@ -273,21 +273,14 @@
# XXX special case iternext
continue
- slot_func_helper = None
-
if slot_func is None and typedef is not None:
- get_slot = get_slot_tp_function(space, typedef, slot_name)
- if get_slot:
- slot_func_helper = get_slot()
- elif slot_func:
- slot_func_helper = llhelper(slot_func.api_func.functype,
- slot_func.api_func.get_wrapper(space))
-
- if slot_func_helper is None:
+ slot_func = get_slot_tp_function(space, typedef, slot_name)
+ if not slot_func:
if WARN_ABOUT_MISSING_SLOT_FUNCTIONS:
os.write(2, "%s defined by %s but no slot function defined!\n"
% (
method_name, w_type.getname(space)))
continue
+ slot_func_helper = llslot(space, slot_func)
# XXX special case wrapper-functions and use a "specific" slot func
@@ -393,9 +386,8 @@
def setup_new_method_def(space):
ptr = get_new_method_def(space)
- ptr.c_ml_meth = rffi.cast(PyCFunction_typedef,
- llhelper(tp_new_wrapper.api_func.functype,
- tp_new_wrapper.api_func.get_wrapper(space)))
+ ptr.c_ml_meth = rffi.cast(
+ PyCFunction_typedef, llslot(space, tp_new_wrapper))
def add_tp_new_wrapper(space, dict_w, pto):
if "__new__" in dict_w:
@@ -518,8 +510,7 @@
def subtype_dealloc(space, obj):
pto = obj.c_ob_type
base = pto
- this_func_ptr = llhelper(subtype_dealloc.api_func.functype,
- subtype_dealloc.api_func.get_wrapper(space))
+ this_func_ptr = llslot(space, subtype_dealloc)
while base.c_tp_dealloc == this_func_ptr:
base = base.c_tp_base
assert base
@@ -621,46 +612,31 @@
return
c_buf = lltype.malloc(PyBufferProcs, flavor='raw', zero=True)
lltype.render_immortal(c_buf)
- c_buf.c_bf_getsegcount = llhelper(bf_segcount.api_func.functype,
- bf_segcount.api_func.get_wrapper(space))
+ c_buf.c_bf_getsegcount = llslot(space, bf_segcount)
if space.is_w(w_type, space.w_str):
# Special case: str doesn't support get_raw_address(), so we have a
# custom get*buffer that instead gives the address of the char* in the
# PyBytesObject*!
- c_buf.c_bf_getreadbuffer = llhelper(
- str_getreadbuffer.api_func.functype,
- str_getreadbuffer.api_func.get_wrapper(space))
- c_buf.c_bf_getcharbuffer = llhelper(
- str_getcharbuffer.api_func.functype,
- str_getcharbuffer.api_func.get_wrapper(space))
+ c_buf.c_bf_getreadbuffer = llslot(space, str_getreadbuffer)
+ c_buf.c_bf_getcharbuffer = llslot(space, str_getcharbuffer)
elif space.is_w(w_type, space.w_unicode):
# Special case: unicode doesn't support get_raw_address(), so we have a
# custom get*buffer that instead gives the address of the char* in the
# PyUnicodeObject*!
- c_buf.c_bf_getreadbuffer = llhelper(
- unicode_getreadbuffer.api_func.functype,
- unicode_getreadbuffer.api_func.get_wrapper(space))
+ c_buf.c_bf_getreadbuffer = llslot(space, unicode_getreadbuffer)
elif space.is_w(w_type, space.w_buffer):
# Special case: we store a permanent address on the cpyext wrapper,
# so we'll reuse that.
# Note: we could instead store a permanent address on the buffer
object,
# and use get_raw_address()
- c_buf.c_bf_getreadbuffer = llhelper(
- buf_getreadbuffer.api_func.functype,
- buf_getreadbuffer.api_func.get_wrapper(space))
- c_buf.c_bf_getcharbuffer = llhelper(
- buf_getcharbuffer.api_func.functype,
- buf_getcharbuffer.api_func.get_wrapper(space))
+ c_buf.c_bf_getreadbuffer = llslot(space, buf_getreadbuffer)
+ c_buf.c_bf_getcharbuffer = llslot(space, buf_getcharbuffer)
else:
# use get_raw_address()
- c_buf.c_bf_getreadbuffer = llhelper(bf_getreadbuffer.api_func.functype,
-
bf_getreadbuffer.api_func.get_wrapper(space))
- c_buf.c_bf_getcharbuffer = llhelper(bf_getcharbuffer.api_func.functype,
-
bf_getcharbuffer.api_func.get_wrapper(space))
+ c_buf.c_bf_getreadbuffer = llslot(space, bf_getreadbuffer)
+ c_buf.c_bf_getcharbuffer = llslot(space, bf_getcharbuffer)
if bufspec == 'read-write':
- c_buf.c_bf_getwritebuffer = llhelper(
- bf_getwritebuffer.api_func.functype,
- bf_getwritebuffer.api_func.get_wrapper(space))
+ c_buf.c_bf_getwritebuffer = llslot(space, bf_getwritebuffer)
pto.c_tp_as_buffer = c_buf
pto.c_tp_flags |= Py_TPFLAGS_HAVE_GETCHARBUFFER
pto.c_tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER
@@ -721,12 +697,10 @@
# dealloc
if space.gettypeobject(w_type.layout.typedef) is w_type:
# only for the exact type, like 'space.w_tuple' or 'space.w_list'
- pto.c_tp_dealloc = typedescr.get_dealloc(space)
+ pto.c_tp_dealloc = llslot(space, typedescr.get_dealloc())
else:
# for all subtypes, use subtype_dealloc()
- pto.c_tp_dealloc = llhelper(
- subtype_dealloc.api_func.functype,
- subtype_dealloc.api_func.get_wrapper(space))
+ pto.c_tp_dealloc = llslot(space, subtype_dealloc)
if space.is_w(w_type, space.w_str):
pto.c_tp_itemsize = 1
elif space.is_w(w_type, space.w_tuple):
@@ -734,10 +708,8 @@
# buffer protocol
setup_buffer_procs(space, w_type, pto)
- pto.c_tp_free = llhelper(PyObject_Free.api_func.functype,
- PyObject_Free.api_func.get_wrapper(space))
- pto.c_tp_alloc = llhelper(PyType_GenericAlloc.api_func.functype,
- PyType_GenericAlloc.api_func.get_wrapper(space))
+ pto.c_tp_free = llslot(space, PyObject_Free)
+ pto.c_tp_alloc = llslot(space, PyType_GenericAlloc)
builder = space.fromcache(StaticObjectBuilder)
if ((pto.c_tp_flags & Py_TPFLAGS_HEAPTYPE) != 0
and builder.cpyext_type_init is None):
@@ -928,15 +900,11 @@
if not pto.c_tp_setattro:
from pypy.module.cpyext.object import PyObject_GenericSetAttr
- pto.c_tp_setattro = llhelper(
- PyObject_GenericSetAttr.api_func.functype,
- PyObject_GenericSetAttr.api_func.get_wrapper(space))
+ pto.c_tp_setattro = llslot(space, PyObject_GenericSetAttr)
if not pto.c_tp_getattro:
from pypy.module.cpyext.object import PyObject_GenericGetAttr
- pto.c_tp_getattro = llhelper(
- PyObject_GenericGetAttr.api_func.functype,
- PyObject_GenericGetAttr.api_func.get_wrapper(space))
+ pto.c_tp_getattro = llslot(space, PyObject_GenericGetAttr)
if w_obj.is_cpytype():
Py_DecRef(space, pto.c_tp_dict)
diff --git a/rpython/jit/backend/ppc/regalloc.py
b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -1066,7 +1066,6 @@
prepare_cond_call_value_r = prepare_cond_call_value_i
-
def notimplemented(self, op):
msg = '[PPC/regalloc] %s not implemented\n' % op.getopname()
if we_are_translated():
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -374,10 +374,11 @@
_COND_CALL_SAVE_REGS = [r.r11, r.r2, r.r3, r.r4, r.r5]
def emit_cond_call(self, op, arglocs, regalloc):
+ resloc = arglocs[0]
+ arglocs = arglocs[1:]
fcond = self.guard_success_cc
self.guard_success_cc = c.cond_none
assert fcond.value != c.cond_none.value
- fcond = c.negate(fcond)
jmp_adr = self.mc.get_relative_pos()
self.mc.reserve_cond_jump() # patched later to a relative branch
@@ -411,6 +412,8 @@
self.mc.BASR(r.r14, r.r14)
# restoring the registers saved above, and doing pop_gcmap(), is left
# to the cond_call_slowpath helper. We never have any result value.
+ if resloc is not None:
+ self.mc.LGR(resloc, r.RES)
relative_target = self.mc.currpos() - jmp_adr
pmc = OverwritingBuilder(self.mc, jmp_adr, 1)
pmc.BRCL(fcond, l.imm(relative_target))
@@ -419,6 +422,9 @@
# guard_no_exception too
self.previous_cond_call_jcond = jmp_adr, fcond
+ emit_cond_call_value_i = emit_cond_call
+ emit_cond_call_value_r = emit_cond_call
+
class AllocOpAssembler(object):
_mixin_ = True
diff --git a/rpython/jit/backend/zarch/regalloc.py
b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -1107,7 +1107,7 @@
def prepare_cond_call(self, op):
self.load_condition_into_cc(op.getarg(0))
- locs = []
+ locs = [None]
# support between 0 and 4 integer arguments
assert 2 <= op.numargs() <= 2 + 4
for i in range(1, op.numargs()):
@@ -1116,6 +1116,22 @@
locs.append(loc)
return locs
+ def prepare_cond_call_value_i(self, op):
+ x = self.ensure_reg(op.getarg(0))
+ self.load_condition_into_cc(op.getarg(0))
+ self.rm.force_allocate_reg(op, selected_reg=x) # spilled if survives
+ # ^^^ if arg0!=0, we jump over the next block of code (the call)
+ locs = [x]
+ # support between 0 and 4 integer arguments
+ assert 2 <= op.numargs() <= 2 + 4
+ for i in range(1, op.numargs()):
+ loc = self.loc(op.getarg(i))
+ assert loc.type != FLOAT
+ locs.append(loc)
+ return locs # [res, function, args...]
+
+ prepare_cond_call_value_r = prepare_cond_call_value_i
+
def prepare_cond_call_gc_wb(self, op):
arglocs = [self.ensure_reg(op.getarg(0))]
return arglocs
diff --git a/rpython/jit/codewriter/support.py
b/rpython/jit/codewriter/support.py
--- a/rpython/jit/codewriter/support.py
+++ b/rpython/jit/codewriter/support.py
@@ -142,10 +142,14 @@
assert len(lst) == len(args_v), (
"not supported so far: 'greens' variables contain Void")
# a crash here means that you have to reorder the variable named in
- # the JitDriver. Indeed, greens and reds must both be sorted: first
- # all INTs, followed by all REFs, followed by all FLOATs.
+ # the JitDriver.
lst2 = sort_vars(lst)
- assert lst == lst2
+ assert lst == lst2, ("You have to reorder the variables named in "
+ "the JitDriver (both the 'greens' and 'reds' independently). "
+ "They must be sorted like this: first all the integer-like, "
+ "then all the pointer-like, and finally the floats.\n"
+ "Got: %r\n"
+ "Expected: %r" % (lst, lst2))
return lst
#
return (_sort(greens_v, True), _sort(reds_v, False))
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -327,6 +327,16 @@
def unicode_encode_utf_8(s, size, errors, errorhandler=None,
allow_surrogates=allow_surrogate_by_default):
+ # In this function, allow_surrogates can be:
+ #
+ # * True: surrogates are always allowed. A valid surrogate pair
+ # is replaced with the non-BMP unicode char it stands for,
+ # which is then encoded as 4 bytes.
+ #
+ # * False: surrogates are always forbidden.
+ #
+ # See also unicode_encode_utf8sp().
+ #
if errorhandler is None:
errorhandler = default_unicode_error_encode
return unicode_encode_utf_8_impl(s, size, errors, errorhandler,
@@ -391,6 +401,33 @@
_encodeUCS4(result, ch)
return result.build()
+def unicode_encode_utf8sp(s, size):
+ # Surrogate-preserving utf-8 encoding. Any surrogate character
+ # turns into its 3-bytes encoding, whether it is paired or not.
+ # This should always be reversible, and the reverse is the regular
+ # str_decode_utf_8() with allow_surrogates=True.
+ assert(size >= 0)
+ result = StringBuilder(size)
+ pos = 0
+ while pos < size:
+ ch = ord(s[pos])
+ pos += 1
+ if ch < 0x80:
+ # Encode ASCII
+ result.append(chr(ch))
+ elif ch < 0x0800:
+ # Encode Latin-1
+ result.append(chr((0xc0 | (ch >> 6))))
+ result.append(chr((0x80 | (ch & 0x3f))))
+ elif ch < 0x10000:
+ # Encode UCS2 Unicode ordinals, and surrogates
+ result.append((chr((0xe0 | (ch >> 12)))))
+ result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
+ result.append((chr((0x80 | (ch & 0x3f)))))
+ else:
+ _encodeUCS4(result, ch)
+ return result.build()
+
# ____________________________________________________________
# utf-16
diff --git a/rpython/rlib/test/test_runicode.py
b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -812,6 +812,21 @@
py.test.raises(UnicodeEncodeError, encoder, u' 12, \u1234 ', 7, None)
assert encoder(u'u\u1234', 2, 'replace') == 'u?'
+ def test_encode_utf8sp(self):
+ # for the following test, go to lengths to avoid CPython's optimizer
+ # and .pyc file storage, which collapse the two surrogates into one
+ c = u"\udc00"
+ for input, expected in [
+ (u"", ""),
+ (u"abc", "abc"),
+ (u"\u1234", "\xe1\x88\xb4"),
+ (u"\ud800", "\xed\xa0\x80"),
+ (u"\udc00", "\xed\xb0\x80"),
+ (u"\ud800" + c, "\xed\xa0\x80\xed\xb0\x80"),
+ ]:
+ got = runicode.unicode_encode_utf8sp(input, len(input))
+ assert got == expected
+
class TestTranslation(object):
def setup_class(cls):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit