Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3k
Changeset: r57532:5ad25f0c04b3
Date: 2012-09-23 22:27 +0200
http://bitbucket.org/pypy/pypy/changeset/5ad25f0c04b3/
Log: Now that unicode.encode('utf8') can fail, it's important to build
the cached utf8 copy only when requested. Otherwise chr(0xd800)
crashes the interpreter...
This is difficult to test unfortunately, because the untranslated
version does not fail.
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -45,7 +45,7 @@
def __init__(w_self, unistr):
assert isinstance(unistr, unicode)
w_self._value = unistr
- w_self._utf8 = unistr.encode('utf-8')
+ w_self._utf8 = None
def __repr__(w_self):
""" representation for debugging purposes """
@@ -64,6 +64,13 @@
return self._value
def identifier_w(self, space):
+ if self._utf8 is None:
+ from pypy.objspace.std.unicodetype import encode_error_handler
+ from pypy.rlib.runicode import unicode_encode_utf_8
+ u = self._value
+ eh = encode_error_handler(space)
+ self._utf8 = unicode_encode_utf_8(u, len(u), None,
+ errorhandler=eh)
return self._utf8
W_UnicodeObject.EMPTY = W_UnicodeObject(u'')
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit