Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r94759:eee2a361ae13 Date: 2018-06-13 20:21 -0700 http://bitbucket.org/pypy/pypy/changeset/eee2a361ae13/
Log: fix interned stringes, space.newtext, use of W_Unicode._value diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -424,7 +424,7 @@ self.builtin_modules = {} self.reloading_modules = {} - self.interned_strings = make_weak_value_dictionary(self, unicode, W_Root) + self.interned_strings = make_weak_value_dictionary(self, str, W_Root) self.actionflag = ActionFlag() # changed by the signal module self.check_signal_action = None # changed by the signal module make_finalizer_queue(W_Root, self) @@ -826,7 +826,7 @@ assert isinstance(w_u, W_Root) # and is not None u = self.unicode_w(w_u) if not we_are_translated(): - assert type(u) is unicode + assert type(u) is str w_u1 = self.interned_strings.get(u) if w_u1 is None: w_u1 = w_u @@ -839,12 +839,12 @@ # returns a "text" object (ie str in python2 and unicode in python3) if not we_are_translated(): assert type(s) is str - u = s.decode('utf-8') - w_s1 = self.interned_strings.get(u) + #u = s.decode('utf-8') + w_s1 = self.interned_strings.get(s) if w_s1 is None: - w_s1 = self.newunicode(u) + w_s1 = self.newtext(s) if self._side_effects_ok(): - self.interned_strings.set(u, w_s1) + self.interned_strings.set(s, w_s1) return w_s1 def _revdb_startup(self): @@ -1619,7 +1619,7 @@ an utf-8 encoded rpython string. """ assert w_obj is not None - return w_obj.text_w(self) + return w_obj.utf8_w(self) @not_rpython # tests only; should be replaced with bytes_w or text_w def str_w(self, w_obj): diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -11,7 +11,6 @@ class Hit(Exception): pass -from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii from pypy.interpreter import unicodehelper as uh from pypy.module._codecs.interp_codecs import CodecState diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize from rpython.rlib.rstring import StringBuilder -from rpython.rlib import rutf8 +from rpython.rlib import rutf8, runicode from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rtyper.lltypesystem import rffi from pypy.module.unicodedata import unicodedb diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1184,7 +1184,7 @@ # we should implement the same shortcuts as we do for BytesDictStrategy def decodekey_str(self, key): - return str_decode_utf8(self.space, key, allow_surrogates=True) + return str_decode_utf8(key, "string", True, None, allow_surrogates=True)[0] def setitem_str(self, w_dict, key, w_value): assert key is not None diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -388,7 +388,8 @@ return W_BytearrayObject(l) def newtext(self, s): - return self.newtext(str_decode_utf8(self, s, allow_surrogates=True)) + lgt = rutf8.check_utf8(s, True) + return W_UnicodeObject(s, lgt) def newtext_or_none(self, s): if s is None: diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1827,7 +1827,7 @@ def unicode_to_decimal_w(space, w_unistr, allow_surrogates=False): if not isinstance(w_unistr, W_UnicodeObject): raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr) - value = _rpy_unicode_to_decimal_w(space, w_unistr._value) + value = _rpy_unicode_to_decimal_w(space, w_unistr.utf8_w(space)) return unicodehelper.encode_utf8(space, value, allow_surrogates=allow_surrogates) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit