Author: Richard Plangger <[email protected]>
Branch: strbuf-as-buffer
Changeset: r89038:b6a80f1a44e0
Date: 2016-12-13 11:45 +0100
http://bitbucket.org/pypy/pypy/changeset/b6a80f1a44e0/
Log: merged default
diff --git a/pypy/interpreter/test/test_unicodehelper.py
b/pypy/interpreter/test/test_unicodehelper.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -0,0 +1,26 @@
+from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8
+
+class FakeSpace:
+ pass
+
+def test_encode_utf8():
+ space = FakeSpace()
+ assert encode_utf8(space, u"abc") == "abc"
+ assert encode_utf8(space, u"\u1234") == "\xe1\x88\xb4"
+ assert encode_utf8(space, u"\ud800") == "\xed\xa0\x80"
+ assert encode_utf8(space, u"\udc00") == "\xed\xb0\x80"
+ # for the following test, go to lengths to avoid CPython's optimizer
+ # and .pyc file storage, which collapse the two surrogates into one
+ c = u"\udc00"
+ assert encode_utf8(space, u"\ud800" + c) == "\xf0\x90\x80\x80"
+
+def test_decode_utf8():
+ space = FakeSpace()
+ assert decode_utf8(space, "abc") == u"abc"
+ assert decode_utf8(space, "\xe1\x88\xb4") == u"\u1234"
+ assert decode_utf8(space, "\xed\xa0\x80") == u"\ud800"
+ assert decode_utf8(space, "\xed\xb0\x80") == u"\udc00"
+ got = decode_utf8(space, "\xed\xa0\x80\xed\xb0\x80")
+ assert map(ord, got) == [0xd800, 0xdc00]
+ got = decode_utf8(space, "\xf0\x90\x80\x80")
+ assert map(ord, got) == [0x10000]
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -51,6 +51,10 @@
return result
def decode_utf8(space, string):
+ # Surrogates are accepted and not treated specially at all.
+ # If there happen to be two 3-bytes encoding a pair of surrogates,
+ # you still get two surrogate unicode characters in the result.
+ # These are the Python2 rules; Python3 differs.
result, consumed = runicode.str_decode_utf_8(
string, len(string), "strict",
final=True, errorhandler=decode_error_handler(space),
@@ -59,8 +63,9 @@
def encode_utf8(space, uni):
# Note that this function never raises UnicodeEncodeError,
- # since surrogate pairs are allowed.
- # This is not the case with Python3.
+ # since surrogates are allowed, either paired or lone.
+ # A paired surrogate is considered like the non-BMP character
+ # it stands for. These are the Python2 rules; Python3 differs.
return runicode.unicode_encode_utf_8(
uni, len(uni), "strict",
errorhandler=raise_unicode_exception_encode,
diff --git a/rpython/jit/backend/ppc/regalloc.py
b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -1066,7 +1066,6 @@
prepare_cond_call_value_r = prepare_cond_call_value_i
-
def notimplemented(self, op):
msg = '[PPC/regalloc] %s not implemented\n' % op.getopname()
if we_are_translated():
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -374,10 +374,11 @@
_COND_CALL_SAVE_REGS = [r.r11, r.r2, r.r3, r.r4, r.r5]
def emit_cond_call(self, op, arglocs, regalloc):
+ resloc = arglocs[0]
+ arglocs = arglocs[1:]
fcond = self.guard_success_cc
self.guard_success_cc = c.cond_none
assert fcond.value != c.cond_none.value
- fcond = c.negate(fcond)
jmp_adr = self.mc.get_relative_pos()
self.mc.reserve_cond_jump() # patched later to a relative branch
@@ -411,6 +412,8 @@
self.mc.BASR(r.r14, r.r14)
# restoring the registers saved above, and doing pop_gcmap(), is left
# to the cond_call_slowpath helper. We never have any result value.
+ if resloc is not None:
+ self.mc.LGR(resloc, r.RES)
relative_target = self.mc.currpos() - jmp_adr
pmc = OverwritingBuilder(self.mc, jmp_adr, 1)
pmc.BRCL(fcond, l.imm(relative_target))
@@ -419,6 +422,9 @@
# guard_no_exception too
self.previous_cond_call_jcond = jmp_adr, fcond
+ emit_cond_call_value_i = emit_cond_call
+ emit_cond_call_value_r = emit_cond_call
+
class AllocOpAssembler(object):
_mixin_ = True
diff --git a/rpython/jit/backend/zarch/regalloc.py
b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -1107,7 +1107,7 @@
def prepare_cond_call(self, op):
self.load_condition_into_cc(op.getarg(0))
- locs = []
+ locs = [None]
# support between 0 and 4 integer arguments
assert 2 <= op.numargs() <= 2 + 4
for i in range(1, op.numargs()):
@@ -1116,6 +1116,22 @@
locs.append(loc)
return locs
+ def prepare_cond_call_value_i(self, op):
+ x = self.ensure_reg(op.getarg(0))
+ self.load_condition_into_cc(op.getarg(0))
+ self.rm.force_allocate_reg(op, selected_reg=x) # spilled if survives
+ # ^^^ if arg0!=0, we jump over the next block of code (the call)
+ locs = [x]
+ # support between 0 and 4 integer arguments
+ assert 2 <= op.numargs() <= 2 + 4
+ for i in range(1, op.numargs()):
+ loc = self.loc(op.getarg(i))
+ assert loc.type != FLOAT
+ locs.append(loc)
+ return locs # [res, function, args...]
+
+ prepare_cond_call_value_r = prepare_cond_call_value_i
+
def prepare_cond_call_gc_wb(self, op):
arglocs = [self.ensure_reg(op.getarg(0))]
return arglocs
diff --git a/rpython/jit/codewriter/support.py
b/rpython/jit/codewriter/support.py
--- a/rpython/jit/codewriter/support.py
+++ b/rpython/jit/codewriter/support.py
@@ -142,10 +142,14 @@
assert len(lst) == len(args_v), (
"not supported so far: 'greens' variables contain Void")
# a crash here means that you have to reorder the variable named in
- # the JitDriver. Indeed, greens and reds must both be sorted: first
- # all INTs, followed by all REFs, followed by all FLOATs.
+ # the JitDriver.
lst2 = sort_vars(lst)
- assert lst == lst2
+ assert lst == lst2, ("You have to reorder the variables named in "
+ "the JitDriver (both the 'greens' and 'reds' independently). "
+ "They must be sorted like this: first all the integer-like, "
+ "then all the pointer-like, and finally the floats.\n"
+ "Got: %r\n"
+ "Expected: %r" % (lst, lst2))
return lst
#
return (_sort(greens_v, True), _sort(reds_v, False))
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -1778,22 +1778,23 @@
finally:
lltype.free(l_utsbuf, flavor='raw')
-# These are actually macros on some/most systems
-c_makedev = external('makedev', [rffi.INT, rffi.INT], rffi.INT, macro=True)
-c_major = external('major', [rffi.INT], rffi.INT, macro=True)
-c_minor = external('minor', [rffi.INT], rffi.INT, macro=True)
+if sys.platform != 'win32':
+ # These are actually macros on some/most systems
+ c_makedev = external('makedev', [rffi.INT, rffi.INT], rffi.INT, macro=True)
+ c_major = external('major', [rffi.INT], rffi.INT, macro=True)
+ c_minor = external('minor', [rffi.INT], rffi.INT, macro=True)
-@replace_os_function('makedev')
-def makedev(maj, min):
- return c_makedev(maj, min)
+ @replace_os_function('makedev')
+ def makedev(maj, min):
+ return c_makedev(maj, min)
-@replace_os_function('major')
-def major(dev):
- return c_major(dev)
+ @replace_os_function('major')
+ def major(dev):
+ return c_major(dev)
-@replace_os_function('minor')
-def minor(dev):
- return c_minor(dev)
+ @replace_os_function('minor')
+ def minor(dev):
+ return c_minor(dev)
#___________________________________________________________________
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -327,6 +327,16 @@
def unicode_encode_utf_8(s, size, errors, errorhandler=None,
allow_surrogates=allow_surrogate_by_default):
+ # In this function, allow_surrogates can be:
+ #
+ # * True: surrogates are always allowed. A valid surrogate pair
+ # is replaced with the non-BMP unicode char it stands for,
+ # which is then encoded as 4 bytes.
+ #
+ # * False: surrogates are always forbidden.
+ #
+ # See also unicode_encode_utf8sp().
+ #
if errorhandler is None:
errorhandler = default_unicode_error_encode
return unicode_encode_utf_8_impl(s, size, errors, errorhandler,
@@ -391,6 +401,33 @@
_encodeUCS4(result, ch)
return result.build()
+def unicode_encode_utf8sp(s, size):
+ # Surrogate-preserving utf-8 encoding. Any surrogate character
+ # turns into its 3-bytes encoding, whether it is paired or not.
+ # This should always be reversible, and the reverse is the regular
+ # str_decode_utf_8() with allow_surrogates=True.
+ assert(size >= 0)
+ result = StringBuilder(size)
+ pos = 0
+ while pos < size:
+ ch = ord(s[pos])
+ pos += 1
+ if ch < 0x80:
+ # Encode ASCII
+ result.append(chr(ch))
+ elif ch < 0x0800:
+ # Encode Latin-1
+ result.append(chr((0xc0 | (ch >> 6))))
+ result.append(chr((0x80 | (ch & 0x3f))))
+ elif ch < 0x10000:
+ # Encode UCS2 Unicode ordinals, and surrogates
+ result.append((chr((0xe0 | (ch >> 12)))))
+ result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
+ result.append((chr((0x80 | (ch & 0x3f)))))
+ else:
+ _encodeUCS4(result, ch)
+ return result.build()
+
# ____________________________________________________________
# utf-16
diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py
--- a/rpython/rlib/test/test_rposix.py
+++ b/rpython/rlib/test/test_rposix.py
@@ -281,6 +281,7 @@
def test_isatty(self):
assert rposix.isatty(-1) is False
+ @py.test.mark.skipif("not hasattr(rposix, 'makedev')")
def test_makedev(self):
dev = rposix.makedev(24, 7)
assert rposix.major(dev) == 24
diff --git a/rpython/rlib/test/test_runicode.py
b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -812,6 +812,21 @@
py.test.raises(UnicodeEncodeError, encoder, u' 12, \u1234 ', 7, None)
assert encoder(u'u\u1234', 2, 'replace') == 'u?'
+ def test_encode_utf8sp(self):
+ # for the following test, go to lengths to avoid CPython's optimizer
+ # and .pyc file storage, which collapse the two surrogates into one
+ c = u"\udc00"
+ for input, expected in [
+ (u"", ""),
+ (u"abc", "abc"),
+ (u"\u1234", "\xe1\x88\xb4"),
+ (u"\ud800", "\xed\xa0\x80"),
+ (u"\udc00", "\xed\xb0\x80"),
+ (u"\ud800" + c, "\xed\xa0\x80\xed\xb0\x80"),
+ ]:
+ got = runicode.unicode_encode_utf8sp(input, len(input))
+ assert got == expected
+
class TestTranslation(object):
def setup_class(cls):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit