[pypy-commit] pypy unicode-utf8-py3: fix interned stringes, space.newtext, use of W_Unicode._value

mattip Wed, 13 Jun 2018 22:43:35 -0700

Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8-py3
Changeset: r94759:eee2a361ae13
Date: 2018-06-13 20:21 -0700
http://bitbucket.org/pypy/pypy/changeset/eee2a361ae13/


Log:    fix interned stringes, space.newtext, use of W_Unicode._value

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -424,7 +424,7 @@
         self.builtin_modules = {}
         self.reloading_modules = {}
 
-        self.interned_strings = make_weak_value_dictionary(self, unicode, 
W_Root)
+        self.interned_strings = make_weak_value_dictionary(self, str, W_Root)
         self.actionflag = ActionFlag()    # changed by the signal module
         self.check_signal_action = None   # changed by the signal module
         make_finalizer_queue(W_Root, self)
@@ -826,7 +826,7 @@
         assert isinstance(w_u, W_Root)   # and is not None
         u = self.unicode_w(w_u)
         if not we_are_translated():
-            assert type(u) is unicode
+            assert type(u) is str
         w_u1 = self.interned_strings.get(u)
         if w_u1 is None:
             w_u1 = w_u
@@ -839,12 +839,12 @@
         # returns a "text" object (ie str in python2 and unicode in python3)
         if not we_are_translated():
             assert type(s) is str
-        u = s.decode('utf-8')
-        w_s1 = self.interned_strings.get(u)
+        #u = s.decode('utf-8')
+        w_s1 = self.interned_strings.get(s)
         if w_s1 is None:
-            w_s1 = self.newunicode(u)
+            w_s1 = self.newtext(s)
             if self._side_effects_ok():
-                self.interned_strings.set(u, w_s1)
+                self.interned_strings.set(s, w_s1)
         return w_s1
 
     def _revdb_startup(self):
@@ -1619,7 +1619,7 @@
             an utf-8 encoded rpython string.
         """
         assert w_obj is not None
-        return w_obj.text_w(self)
+        return w_obj.utf8_w(self)
 
     @not_rpython    # tests only; should be replaced with bytes_w or text_w
     def str_w(self, w_obj):
diff --git a/pypy/interpreter/test/test_unicodehelper.py 
b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -11,7 +11,6 @@
 class Hit(Exception):
     pass
 
-from pypy.interpreter.unicodehelper import str_decode_utf8
 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
 from pypy.interpreter import unicodehelper as uh
 from pypy.module._codecs.interp_codecs import CodecState
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -3,7 +3,7 @@
 from pypy.interpreter.error import OperationError, oefmt
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib import rutf8
+from rpython.rlib import rutf8, runicode
 from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rtyper.lltypesystem import rffi
 from pypy.module.unicodedata import unicodedb
diff --git a/pypy/objspace/std/dictmultiobject.py 
b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1184,7 +1184,7 @@
     # we should implement the same shortcuts as we do for BytesDictStrategy
 
     def decodekey_str(self, key):
-        return str_decode_utf8(self.space, key, allow_surrogates=True)
+        return str_decode_utf8(key, "string", True, None, 
allow_surrogates=True)[0]
 
     def setitem_str(self, w_dict, key, w_value):
         assert key is not None
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -388,7 +388,8 @@
         return W_BytearrayObject(l)
 
     def newtext(self, s):
-        return self.newtext(str_decode_utf8(self, s, allow_surrogates=True))
+        lgt = rutf8.check_utf8(s, True)
+        return W_UnicodeObject(s, lgt)
 
     def newtext_or_none(self, s):
         if s is None:
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1827,7 +1827,7 @@
 def unicode_to_decimal_w(space, w_unistr, allow_surrogates=False):
     if not isinstance(w_unistr, W_UnicodeObject):
         raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
-    value = _rpy_unicode_to_decimal_w(space, w_unistr._value)
+    value = _rpy_unicode_to_decimal_w(space, w_unistr.utf8_w(space))
     return unicodehelper.encode_utf8(space, value,
                                      allow_surrogates=allow_surrogates)
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: fix interned stringes, space.newtext, use of W_Unicode._value

Reply via email to