Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94759:eee2a361ae13
Date: 2018-06-13 20:21 -0700
http://bitbucket.org/pypy/pypy/changeset/eee2a361ae13/
Log: fix interned stringes, space.newtext, use of W_Unicode._value
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -424,7 +424,7 @@
self.builtin_modules = {}
self.reloading_modules = {}
- self.interned_strings = make_weak_value_dictionary(self, unicode,
W_Root)
+ self.interned_strings = make_weak_value_dictionary(self, str, W_Root)
self.actionflag = ActionFlag() # changed by the signal module
self.check_signal_action = None # changed by the signal module
make_finalizer_queue(W_Root, self)
@@ -826,7 +826,7 @@
assert isinstance(w_u, W_Root) # and is not None
u = self.unicode_w(w_u)
if not we_are_translated():
- assert type(u) is unicode
+ assert type(u) is str
w_u1 = self.interned_strings.get(u)
if w_u1 is None:
w_u1 = w_u
@@ -839,12 +839,12 @@
# returns a "text" object (ie str in python2 and unicode in python3)
if not we_are_translated():
assert type(s) is str
- u = s.decode('utf-8')
- w_s1 = self.interned_strings.get(u)
+ #u = s.decode('utf-8')
+ w_s1 = self.interned_strings.get(s)
if w_s1 is None:
- w_s1 = self.newunicode(u)
+ w_s1 = self.newtext(s)
if self._side_effects_ok():
- self.interned_strings.set(u, w_s1)
+ self.interned_strings.set(s, w_s1)
return w_s1
def _revdb_startup(self):
@@ -1619,7 +1619,7 @@
an utf-8 encoded rpython string.
"""
assert w_obj is not None
- return w_obj.text_w(self)
+ return w_obj.utf8_w(self)
@not_rpython # tests only; should be replaced with bytes_w or text_w
def str_w(self, w_obj):
diff --git a/pypy/interpreter/test/test_unicodehelper.py
b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -11,7 +11,6 @@
class Hit(Exception):
pass
-from pypy.interpreter.unicodehelper import str_decode_utf8
from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
from pypy.interpreter import unicodehelper as uh
from pypy.module._codecs.interp_codecs import CodecState
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -3,7 +3,7 @@
from pypy.interpreter.error import OperationError, oefmt
from rpython.rlib.objectmodel import specialize
from rpython.rlib.rstring import StringBuilder
-from rpython.rlib import rutf8
+from rpython.rlib import rutf8, runicode
from rpython.rlib.rarithmetic import r_uint, intmask
from rpython.rtyper.lltypesystem import rffi
from pypy.module.unicodedata import unicodedb
diff --git a/pypy/objspace/std/dictmultiobject.py
b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1184,7 +1184,7 @@
# we should implement the same shortcuts as we do for BytesDictStrategy
def decodekey_str(self, key):
- return str_decode_utf8(self.space, key, allow_surrogates=True)
+ return str_decode_utf8(key, "string", True, None,
allow_surrogates=True)[0]
def setitem_str(self, w_dict, key, w_value):
assert key is not None
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -388,7 +388,8 @@
return W_BytearrayObject(l)
def newtext(self, s):
- return self.newtext(str_decode_utf8(self, s, allow_surrogates=True))
+ lgt = rutf8.check_utf8(s, True)
+ return W_UnicodeObject(s, lgt)
def newtext_or_none(self, s):
if s is None:
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1827,7 +1827,7 @@
def unicode_to_decimal_w(space, w_unistr, allow_surrogates=False):
if not isinstance(w_unistr, W_UnicodeObject):
raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
- value = _rpy_unicode_to_decimal_w(space, w_unistr._value)
+ value = _rpy_unicode_to_decimal_w(space, w_unistr.utf8_w(space))
return unicodehelper.encode_utf8(space, value,
allow_surrogates=allow_surrogates)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit