Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.5
Changeset: r91155:1ca1708d0606
Date: 2017-04-19 15:28 +0200
http://bitbucket.org/pypy/pypy/changeset/1ca1708d0606/
Log: Disable lone surrogates in utf-16 and utf-32 encoders
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -629,6 +629,21 @@
wrap_encoder.func_name = rname
globals()[name] = wrap_encoder
+def make_utf_encoder_wrapper(name):
+ rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
+ assert hasattr(runicode, rname)
+ @unwrap_spec(uni=unicode, errors='text_or_none')
+ def wrap_encoder(space, uni, errors="strict"):
+ if errors is None:
+ errors = 'strict'
+ state = space.fromcache(CodecState)
+ func = getattr(runicode, rname)
+ result = func(uni, len(uni), errors, state.encode_error_handler,
+ allow_surrogates=False)
+ return space.newtuple([space.newbytes(result), space.newint(len(uni))])
+ wrap_encoder.func_name = rname
+ globals()[name] = wrap_encoder
+
def make_decoder_wrapper(name):
rname = "str_decode_%s" % (name.replace("_decode", ""), )
assert hasattr(runicode, rname)
@@ -650,16 +665,20 @@
"ascii_encode",
"latin_1_encode",
"utf_7_encode",
+ "unicode_escape_encode",
+ "raw_unicode_escape_encode",
+ ]:
+ make_encoder_wrapper(encoder)
+
+for encoder in [
"utf_16_encode",
"utf_16_be_encode",
"utf_16_le_encode",
"utf_32_encode",
"utf_32_be_encode",
"utf_32_le_encode",
- "unicode_escape_encode",
- "raw_unicode_escape_encode",
]:
- make_encoder_wrapper(encoder)
+ make_utf_encoder_wrapper(encoder)
for decoder in [
"ascii_decode",
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -758,6 +758,11 @@
s = "\u5678".encode("latin-1", "test.bad_handler")
assert s == b'\xe9'
+ def test_lone_surrogates(self):
+ for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+ 'utf-32', 'utf-32-le', 'utf-32-be'):
+ raises(UnicodeEncodeError, u'\ud800'.encode, encoding)
+
def test_charmap_encode(self):
assert 'xxx'.encode('charmap') == b'xxx'
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit