Author: Armin Rigo <[email protected]>
Branch:
Changeset: r88992:81769ca3299e
Date: 2016-12-10 15:09 +0100
http://bitbucket.org/pypy/pypy/changeset/81769ca3299e/
Log: Test for unicodehelper.{decode_utf8,encode_utf8}
diff --git a/pypy/interpreter/test/test_unicodehelper.py
b/pypy/interpreter/test/test_unicodehelper.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -0,0 +1,26 @@
+from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8
+
+class FakeSpace:
+ pass
+
+def test_encode_utf8():
+ space = FakeSpace()
+ assert encode_utf8(space, u"abc") == "abc"
+ assert encode_utf8(space, u"\u1234") == "\xe1\x88\xb4"
+ assert encode_utf8(space, u"\ud800") == "\xed\xa0\x80"
+ assert encode_utf8(space, u"\udc00") == "\xed\xb0\x80"
+ # for the following test, go to lengths to avoid CPython's optimizer
+ # and .pyc file storage, which collapse the two surrogates into one
+ c = u"\udc00"
+ assert encode_utf8(space, u"\ud800" + c) == "\xf0\x90\x80\x80"
+
+def test_decode_utf8():
+ space = FakeSpace()
+ assert decode_utf8(space, "abc") == u"abc"
+ assert decode_utf8(space, "\xe1\x88\xb4") == u"\u1234"
+ assert decode_utf8(space, "\xed\xa0\x80") == u"\ud800"
+ assert decode_utf8(space, "\xed\xb0\x80") == u"\udc00"
+ got = decode_utf8(space, "\xed\xa0\x80\xed\xb0\x80")
+ assert map(ord, got) == [0xd800, 0xdc00]
+ got = decode_utf8(space, "\xf0\x90\x80\x80")
+ assert map(ord, got) == [0x10000]
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -61,6 +61,8 @@
# Note that this function never raises UnicodeEncodeError,
# since surrogate pairs are allowed.
# This is not the case with Python3.
+ # Also, note that the two characters \d800\dc00 are considered as
+ # a paired surrogate, and turn into a single 4-byte utf8 char.
return runicode.unicode_encode_utf_8(
uni, len(uni), "strict",
errorhandler=raise_unicode_exception_encode,
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit