Author: Amaury Forgeot d'Arc <[email protected]>
Branch: 
Changeset: r60630:fa5a0d941349
Date: 2013-01-28 20:21 +0100
http://bitbucket.org/pypy/pypy/changeset/fa5a0d941349/

Log:    encode() should raise UnicodeEncodeError.

        (questions for Antonio: why are lone surrogate not allowed? Do all
        callers expect a RPython exception? And shouldn't RPython follow
        python2 semantics?)

diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py
--- a/rpython/rtyper/rstr.py
+++ b/rpython/rtyper/rstr.py
@@ -80,7 +80,7 @@
         assert s is not None
         bytes = self.runicode_encode_utf_8(
             s, len(s), 'strict',
-            errorhandler=self.ll_raise_unicode_exception_decode,
+            errorhandler=self.ll_raise_unicode_exception_encode,
             allow_surrogates=False)
         return self.ll.llstr(bytes)
 
diff --git a/rpython/rtyper/test/test_runicode.py 
b/rpython/rtyper/test/test_runicode.py
--- a/rpython/rtyper/test/test_runicode.py
+++ b/rpython/rtyper/test/test_runicode.py
@@ -106,6 +106,12 @@
 
         assert self.ll_to_string(self.interpret(f, [38])) == f(38)
 
+        def g(n):
+            x = u'\ud800' + unichr(n)
+            return x.encode('utf-8')
+
+        self.interpret_raises(UnicodeEncodeError, g, [38])
+
     def test_utf_8_encoding_annotation(self):
         from rpython.rlib.runicode import unicode_encode_utf_8
         def errorhandler(errors, encoding, msg, u,
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to