Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8 Changeset: r94000:1ea028ef8faa Date: 2018-03-19 13:16 +0100 http://bitbucket.org/pypy/pypy/changeset/1ea028ef8faa/
Log: remove remaining space.newunicode, continue fix in f8aaef6e3548, fix translation diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1783,9 +1783,8 @@ def unicode0_w(self, w_obj): "Like unicode_w, but rejects strings with NUL bytes." - xxxx from rpython.rlib import rstring - result = w_obj.unicode_w(self) + result = w_obj.utf8_w(self).decode('utf8') if u'\x00' in result: raise oefmt(self.w_TypeError, "argument must be a unicode string without NUL " diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -224,6 +224,7 @@ if sys.platform == 'win32': def utf8_encode_mbcs(s, errors, errorhandler): from rpython.rlib import runicode + s = s.decode('utf-8') slen = len(s) res = runicode.unicode_encode_mbcs(s, slen, errors, errorhandler) return res @@ -512,7 +513,7 @@ builder.append_char('\\') builder.append_code(ord(ch)) - return builder.build(), pos, builder.get_length() + return builder.build(), pos, builder.getlength() def wcharpsize2utf8(space, wcharp, size): """Safe version of rffi.wcharpsize2utf8. @@ -574,7 +575,7 @@ pos = hexescape(builder, s, pos, digits, "rawunicodeescape", errorhandler, message, errors) - return builder.build(), pos, builder.get_length() + return builder.build(), pos, builder.getlength() _utf8_encode_unicode_escape = rutf8.make_utf8_escape_function() diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py --- a/pypy/module/__pypy__/interp_builders.py +++ b/pypy/module/__pypy__/interp_builders.py @@ -77,7 +77,7 @@ self.builder.append_slice(w_unicode._utf8, byte_start, byte_end) def descr_build(self, space): - w_s = space.newutf8(self.builder.build(), self.builder.get_length()) + w_s = space.newutf8(self.builder.build(), self.builder.getlength()) # after build(), we can continue to append more strings # to the same builder. This is supported since # 2ff5087aca28 in RPython. diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -703,7 +703,7 @@ builder.append(data) remaining -= len(data) - return space.newutf8(builder.build(), builder.get_length()) + return space.newutf8(builder.build(), builder.getlength()) def _scan_line_ending(self, limit): if self.readuniversal: diff --git a/pypy/module/_pypyjson/targetjson.py b/pypy/module/_pypyjson/targetjson.py --- a/pypy/module/_pypyjson/targetjson.py +++ b/pypy/module/_pypyjson/targetjson.py @@ -93,9 +93,6 @@ assert isinstance(key, W_Unicode) d.dictval[key.unival] = value - def newunicode(self, x): - return W_Unicode(x) - def newtext(self, x): return W_String(x) newbytes = newtext diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -97,6 +97,9 @@ return func(fname1, fname2, *args) return dispatch +def u2utf8(space, u_str): + return space.newutf8(u_str.encode('utf-8'), len(u_str)) + @unwrap_spec(flag=c_int, mode=c_int) def open(space, w_fname, flag, mode=0777): """Open a file (for low level IO). @@ -422,7 +425,7 @@ if space.isinstance_w(w_path, space.w_unicode): path = FileEncoder(space, w_path) fullpath = rposix.getfullpathname(path) - w_fullpath = space.newunicode(fullpath) + w_fullpath = u2utf8(space, fullpath) else: path = space.bytes0_w(w_path) fullpath = rposix.getfullpathname(path) @@ -449,7 +452,7 @@ except OSError as e: raise wrap_oserror(space, e) else: - return space.newunicode(cur) + return u2utf8(space, cur) else: def getcwdu(space): """Return the current working directory as a unicode string.""" @@ -588,7 +591,7 @@ raise w_res = w_bytes elif isinstance(res, unicode): - w_res = space.newunicode(res) + w_res = u2utf8(space, res) else: assert False result_w[i] = w_res diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py --- a/pypy/module/struct/formatiterator.py +++ b/pypy/module/struct/formatiterator.py @@ -186,7 +186,7 @@ elif isinstance(value, str): w_value = self.space.newbytes(value) elif isinstance(value, unicode): - w_value = self.space.newunicode(value) + w_value = self.space.newutf8(value.decode('utf-8'), len(value)) else: assert 0, "unreachable" self.result_w.append(w_value) diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -212,9 +212,6 @@ def newutf8(self, x, l): return w_some_obj() - def newunicode(self, a): - return w_some_obj() - newtext = newbytes newtext_or_none = newbytes newfilename = newbytes diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -50,7 +50,7 @@ @staticmethod def from_utf8builder(builder): return W_UnicodeObject( - builder.build(), builder.get_length()) + builder.build(), builder.getlength()) def __repr__(self): """representation for debugging purposes""" diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -725,7 +725,7 @@ return self._s.build() @always_inline - def get_length(self): + def getlength(self): return self._lgt class Utf8StringIterator(object): diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -169,41 +169,41 @@ s = rutf8.Utf8StringBuilder() s.append("foo") s.append_char("x") - assert s.get_length() == 4 + assert s.getlength() == 4 assert s.build() == "foox" s.append(u"\u1234".encode("utf8")) - assert s.get_length() == 5 + assert s.getlength() == 5 assert s.build().decode("utf8") == u"foox\u1234" s.append("foo") s.append_char("x") - assert s.get_length() == 9 + assert s.getlength() == 9 assert s.build().decode("utf8") == u"foox\u1234foox" s = rutf8.Utf8StringBuilder() s.append_code(0x1234) assert s.build().decode("utf8") == u"\u1234" - assert s.get_length() == 1 + assert s.getlength() == 1 s.append_code(0xD800) - assert s.get_length() == 2 + assert s.getlength() == 2 s = rutf8.Utf8StringBuilder() s.append_utf8("abc", 3) - assert s.get_length() == 3 + assert s.getlength() == 3 assert s.build().decode("utf8") == u"abc" s.append_utf8(u"\u1234".encode("utf8"), 1) assert s.build().decode("utf8") == u"abc\u1234" - assert s.get_length() == 4 + assert s.getlength() == 4 s.append_code(0xD800) - assert s.get_length() == 5 + assert s.getlength() == 5 def test_utf8_string_builder_bad_code(): s = rutf8.Utf8StringBuilder() with pytest.raises(ValueError): s.append_code(0x110000) assert s.build() == '' - assert s.get_length() == 0 + assert s.getlength() == 0 @given(strategies.text()) def test_utf8_iterator(arg): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit