Author: Amaury Forgeot d'Arc <[email protected]>
Branch:
Changeset: r60630:fa5a0d941349
Date: 2013-01-28 20:21 +0100
http://bitbucket.org/pypy/pypy/changeset/fa5a0d941349/
Log: encode() should raise UnicodeEncodeError.
(questions for Antonio: why are lone surrogate not allowed? Do all
callers expect a RPython exception? And shouldn't RPython follow
python2 semantics?)
diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py
--- a/rpython/rtyper/rstr.py
+++ b/rpython/rtyper/rstr.py
@@ -80,7 +80,7 @@
assert s is not None
bytes = self.runicode_encode_utf_8(
s, len(s), 'strict',
- errorhandler=self.ll_raise_unicode_exception_decode,
+ errorhandler=self.ll_raise_unicode_exception_encode,
allow_surrogates=False)
return self.ll.llstr(bytes)
diff --git a/rpython/rtyper/test/test_runicode.py
b/rpython/rtyper/test/test_runicode.py
--- a/rpython/rtyper/test/test_runicode.py
+++ b/rpython/rtyper/test/test_runicode.py
@@ -106,6 +106,12 @@
assert self.ll_to_string(self.interpret(f, [38])) == f(38)
+ def g(n):
+ x = u'\ud800' + unichr(n)
+ return x.encode('utf-8')
+
+ self.interpret_raises(UnicodeEncodeError, g, [38])
+
def test_utf_8_encoding_annotation(self):
from rpython.rlib.runicode import unicode_encode_utf_8
def errorhandler(errors, encoding, msg, u,
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit