[pypy-commit] pypy unicode-utf8: remove remaining space.newunicode, continue fix in f8aaef6e3548, fix translation

mattip Mon, 19 Mar 2018 05:17:34 -0700

Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8
Changeset: r94000:1ea028ef8faa
Date: 2018-03-19 13:16 +0100
http://bitbucket.org/pypy/pypy/changeset/1ea028ef8faa/


Log:    remove remaining space.newunicode, continue fix in f8aaef6e3548, fix
        translation

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1783,9 +1783,8 @@
 
     def unicode0_w(self, w_obj):
         "Like unicode_w, but rejects strings with NUL bytes."
-        xxxx
         from rpython.rlib import rstring
-        result = w_obj.unicode_w(self)
+        result = w_obj.utf8_w(self).decode('utf8')
         if u'\x00' in result:
             raise oefmt(self.w_TypeError,
                         "argument must be a unicode string without NUL "
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -224,6 +224,7 @@
 if sys.platform == 'win32':
     def utf8_encode_mbcs(s, errors, errorhandler):
         from rpython.rlib import runicode
+        s = s.decode('utf-8')
         slen = len(s)
         res = runicode.unicode_encode_mbcs(s, slen, errors, errorhandler)
         return res
@@ -512,7 +513,7 @@
             builder.append_char('\\')
             builder.append_code(ord(ch))
 
-    return builder.build(), pos, builder.get_length()
+    return builder.build(), pos, builder.getlength()
 
 def wcharpsize2utf8(space, wcharp, size):
     """Safe version of rffi.wcharpsize2utf8.
@@ -574,7 +575,7 @@
         pos = hexescape(builder, s, pos, digits,
                            "rawunicodeescape", errorhandler, message, errors)
 
-    return builder.build(), pos, builder.get_length()
+    return builder.build(), pos, builder.getlength()
 
 _utf8_encode_unicode_escape = rutf8.make_utf8_escape_function()
 
diff --git a/pypy/module/__pypy__/interp_builders.py 
b/pypy/module/__pypy__/interp_builders.py
--- a/pypy/module/__pypy__/interp_builders.py
+++ b/pypy/module/__pypy__/interp_builders.py
@@ -77,7 +77,7 @@
         self.builder.append_slice(w_unicode._utf8, byte_start, byte_end)
 
     def descr_build(self, space):
-        w_s = space.newutf8(self.builder.build(), self.builder.get_length())
+        w_s = space.newutf8(self.builder.build(), self.builder.getlength())
         # after build(), we can continue to append more strings
         # to the same builder.  This is supported since
         # 2ff5087aca28 in RPython.
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -703,7 +703,7 @@
             builder.append(data)
             remaining -= len(data)
 
-        return space.newutf8(builder.build(), builder.get_length())
+        return space.newutf8(builder.build(), builder.getlength())
 
     def _scan_line_ending(self, limit):
         if self.readuniversal:
diff --git a/pypy/module/_pypyjson/targetjson.py 
b/pypy/module/_pypyjson/targetjson.py
--- a/pypy/module/_pypyjson/targetjson.py
+++ b/pypy/module/_pypyjson/targetjson.py
@@ -93,9 +93,6 @@
         assert isinstance(key, W_Unicode)
         d.dictval[key.unival] = value
 
-    def newunicode(self, x):
-        return W_Unicode(x)
-
     def newtext(self, x):
         return W_String(x)
     newbytes = newtext
diff --git a/pypy/module/posix/interp_posix.py 
b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -97,6 +97,9 @@
                 return func(fname1, fname2, *args)
     return dispatch
 
+def u2utf8(space, u_str):
+    return space.newutf8(u_str.encode('utf-8'), len(u_str))
+
 @unwrap_spec(flag=c_int, mode=c_int)
 def open(space, w_fname, flag, mode=0777):
     """Open a file (for low level IO).
@@ -422,7 +425,7 @@
         if space.isinstance_w(w_path, space.w_unicode):
             path = FileEncoder(space, w_path)
             fullpath = rposix.getfullpathname(path)
-            w_fullpath = space.newunicode(fullpath)
+            w_fullpath = u2utf8(space, fullpath)
         else:
             path = space.bytes0_w(w_path)
             fullpath = rposix.getfullpathname(path)
@@ -449,7 +452,7 @@
         except OSError as e:
             raise wrap_oserror(space, e)
         else:
-            return space.newunicode(cur)
+            return u2utf8(space, cur)
 else:
     def getcwdu(space):
         """Return the current working directory as a unicode string."""
@@ -588,7 +591,7 @@
                             raise
                         w_res = w_bytes
                 elif isinstance(res, unicode):
-                    w_res = space.newunicode(res)
+                    w_res = u2utf8(space, res)
                 else:
                     assert False
                 result_w[i] = w_res
diff --git a/pypy/module/struct/formatiterator.py 
b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -186,7 +186,7 @@
         elif isinstance(value, str):
             w_value = self.space.newbytes(value)
         elif isinstance(value, unicode):
-            w_value = self.space.newunicode(value)
+            w_value = self.space.newutf8(value.decode('utf-8'), len(value))
         else:
             assert 0, "unreachable"
         self.result_w.append(w_value)
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -212,9 +212,6 @@
     def newutf8(self, x, l):
         return w_some_obj()
 
-    def newunicode(self, a):
-        return w_some_obj()
-
     newtext = newbytes
     newtext_or_none = newbytes
     newfilename = newbytes
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -50,7 +50,7 @@
     @staticmethod
     def from_utf8builder(builder):
         return W_UnicodeObject(
-            builder.build(), builder.get_length())
+            builder.build(), builder.getlength())
 
     def __repr__(self):
         """representation for debugging purposes"""
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -725,7 +725,7 @@
         return self._s.build()
 
     @always_inline
-    def get_length(self):
+    def getlength(self):
         return self._lgt
 
 class Utf8StringIterator(object):
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -169,41 +169,41 @@
     s = rutf8.Utf8StringBuilder()
     s.append("foo")
     s.append_char("x")
-    assert s.get_length() == 4
+    assert s.getlength() == 4
     assert s.build() == "foox"
     s.append(u"\u1234".encode("utf8"))
-    assert s.get_length() == 5
+    assert s.getlength() == 5
     assert s.build().decode("utf8") == u"foox\u1234"
     s.append("foo")
     s.append_char("x")
-    assert s.get_length() == 9
+    assert s.getlength() == 9
     assert s.build().decode("utf8") == u"foox\u1234foox"
 
     s = rutf8.Utf8StringBuilder()
     s.append_code(0x1234)
     assert s.build().decode("utf8") == u"\u1234"
-    assert s.get_length() == 1
+    assert s.getlength() == 1
     s.append_code(0xD800)
-    assert s.get_length() == 2
+    assert s.getlength() == 2
 
     s = rutf8.Utf8StringBuilder()
     s.append_utf8("abc", 3)
-    assert s.get_length() == 3
+    assert s.getlength() == 3
     assert s.build().decode("utf8") == u"abc"
 
     s.append_utf8(u"\u1234".encode("utf8"), 1)
     assert s.build().decode("utf8") == u"abc\u1234"
-    assert s.get_length() == 4
+    assert s.getlength() == 4
 
     s.append_code(0xD800)
-    assert s.get_length() == 5
+    assert s.getlength() == 5
 
 def test_utf8_string_builder_bad_code():
     s = rutf8.Utf8StringBuilder()
     with pytest.raises(ValueError):
         s.append_code(0x110000)
     assert s.build() == ''
-    assert s.get_length() == 0
+    assert s.getlength() == 0
 
 @given(strategies.text())
 def test_utf8_iterator(arg):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: remove remaining space.newunicode, continue fix in f8aaef6e3548, fix translation

Reply via email to