Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3k
Changeset: r57532:5ad25f0c04b3
Date: 2012-09-23 22:27 +0200
http://bitbucket.org/pypy/pypy/changeset/5ad25f0c04b3/

Log:    Now that unicode.encode('utf8') can fail, it's important to build
        the cached utf8 copy only when requested. Otherwise chr(0xd800)
        crashes the interpreter...

        This is difficult to test unfortunately, because the untranslated
        version does not fail.

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -45,7 +45,7 @@
     def __init__(w_self, unistr):
         assert isinstance(unistr, unicode)
         w_self._value = unistr
-        w_self._utf8 = unistr.encode('utf-8')
+        w_self._utf8 = None
 
     def __repr__(w_self):
         """ representation for debugging purposes """
@@ -64,6 +64,13 @@
         return self._value
 
     def identifier_w(self, space):
+        if self._utf8 is None:
+            from pypy.objspace.std.unicodetype import encode_error_handler
+            from pypy.rlib.runicode import unicode_encode_utf_8
+            u = self._value
+            eh = encode_error_handler(space)
+            self._utf8 = unicode_encode_utf_8(u, len(u), None,
+                                              errorhandler=eh)
         return self._utf8
 
 W_UnicodeObject.EMPTY = W_UnicodeObject(u'')
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to