Author: Matti Picus <[email protected]>
Branch: unicode-utf8
Changeset: r94000:1ea028ef8faa
Date: 2018-03-19 13:16 +0100
http://bitbucket.org/pypy/pypy/changeset/1ea028ef8faa/
Log: remove remaining space.newunicode, continue fix in f8aaef6e3548, fix
translation
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1783,9 +1783,8 @@
def unicode0_w(self, w_obj):
"Like unicode_w, but rejects strings with NUL bytes."
- xxxx
from rpython.rlib import rstring
- result = w_obj.unicode_w(self)
+ result = w_obj.utf8_w(self).decode('utf8')
if u'\x00' in result:
raise oefmt(self.w_TypeError,
"argument must be a unicode string without NUL "
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -224,6 +224,7 @@
if sys.platform == 'win32':
def utf8_encode_mbcs(s, errors, errorhandler):
from rpython.rlib import runicode
+ s = s.decode('utf-8')
slen = len(s)
res = runicode.unicode_encode_mbcs(s, slen, errors, errorhandler)
return res
@@ -512,7 +513,7 @@
builder.append_char('\\')
builder.append_code(ord(ch))
- return builder.build(), pos, builder.get_length()
+ return builder.build(), pos, builder.getlength()
def wcharpsize2utf8(space, wcharp, size):
"""Safe version of rffi.wcharpsize2utf8.
@@ -574,7 +575,7 @@
pos = hexescape(builder, s, pos, digits,
"rawunicodeescape", errorhandler, message, errors)
- return builder.build(), pos, builder.get_length()
+ return builder.build(), pos, builder.getlength()
_utf8_encode_unicode_escape = rutf8.make_utf8_escape_function()
diff --git a/pypy/module/__pypy__/interp_builders.py
b/pypy/module/__pypy__/interp_builders.py
--- a/pypy/module/__pypy__/interp_builders.py
+++ b/pypy/module/__pypy__/interp_builders.py
@@ -77,7 +77,7 @@
self.builder.append_slice(w_unicode._utf8, byte_start, byte_end)
def descr_build(self, space):
- w_s = space.newutf8(self.builder.build(), self.builder.get_length())
+ w_s = space.newutf8(self.builder.build(), self.builder.getlength())
# after build(), we can continue to append more strings
# to the same builder. This is supported since
# 2ff5087aca28 in RPython.
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -703,7 +703,7 @@
builder.append(data)
remaining -= len(data)
- return space.newutf8(builder.build(), builder.get_length())
+ return space.newutf8(builder.build(), builder.getlength())
def _scan_line_ending(self, limit):
if self.readuniversal:
diff --git a/pypy/module/_pypyjson/targetjson.py
b/pypy/module/_pypyjson/targetjson.py
--- a/pypy/module/_pypyjson/targetjson.py
+++ b/pypy/module/_pypyjson/targetjson.py
@@ -93,9 +93,6 @@
assert isinstance(key, W_Unicode)
d.dictval[key.unival] = value
- def newunicode(self, x):
- return W_Unicode(x)
-
def newtext(self, x):
return W_String(x)
newbytes = newtext
diff --git a/pypy/module/posix/interp_posix.py
b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -97,6 +97,9 @@
return func(fname1, fname2, *args)
return dispatch
+def u2utf8(space, u_str):
+ return space.newutf8(u_str.encode('utf-8'), len(u_str))
+
@unwrap_spec(flag=c_int, mode=c_int)
def open(space, w_fname, flag, mode=0777):
"""Open a file (for low level IO).
@@ -422,7 +425,7 @@
if space.isinstance_w(w_path, space.w_unicode):
path = FileEncoder(space, w_path)
fullpath = rposix.getfullpathname(path)
- w_fullpath = space.newunicode(fullpath)
+ w_fullpath = u2utf8(space, fullpath)
else:
path = space.bytes0_w(w_path)
fullpath = rposix.getfullpathname(path)
@@ -449,7 +452,7 @@
except OSError as e:
raise wrap_oserror(space, e)
else:
- return space.newunicode(cur)
+ return u2utf8(space, cur)
else:
def getcwdu(space):
"""Return the current working directory as a unicode string."""
@@ -588,7 +591,7 @@
raise
w_res = w_bytes
elif isinstance(res, unicode):
- w_res = space.newunicode(res)
+ w_res = u2utf8(space, res)
else:
assert False
result_w[i] = w_res
diff --git a/pypy/module/struct/formatiterator.py
b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -186,7 +186,7 @@
elif isinstance(value, str):
w_value = self.space.newbytes(value)
elif isinstance(value, unicode):
- w_value = self.space.newunicode(value)
+ w_value = self.space.newutf8(value.decode('utf-8'), len(value))
else:
assert 0, "unreachable"
self.result_w.append(w_value)
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -212,9 +212,6 @@
def newutf8(self, x, l):
return w_some_obj()
- def newunicode(self, a):
- return w_some_obj()
-
newtext = newbytes
newtext_or_none = newbytes
newfilename = newbytes
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -50,7 +50,7 @@
@staticmethod
def from_utf8builder(builder):
return W_UnicodeObject(
- builder.build(), builder.get_length())
+ builder.build(), builder.getlength())
def __repr__(self):
"""representation for debugging purposes"""
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -725,7 +725,7 @@
return self._s.build()
@always_inline
- def get_length(self):
+ def getlength(self):
return self._lgt
class Utf8StringIterator(object):
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -169,41 +169,41 @@
s = rutf8.Utf8StringBuilder()
s.append("foo")
s.append_char("x")
- assert s.get_length() == 4
+ assert s.getlength() == 4
assert s.build() == "foox"
s.append(u"\u1234".encode("utf8"))
- assert s.get_length() == 5
+ assert s.getlength() == 5
assert s.build().decode("utf8") == u"foox\u1234"
s.append("foo")
s.append_char("x")
- assert s.get_length() == 9
+ assert s.getlength() == 9
assert s.build().decode("utf8") == u"foox\u1234foox"
s = rutf8.Utf8StringBuilder()
s.append_code(0x1234)
assert s.build().decode("utf8") == u"\u1234"
- assert s.get_length() == 1
+ assert s.getlength() == 1
s.append_code(0xD800)
- assert s.get_length() == 2
+ assert s.getlength() == 2
s = rutf8.Utf8StringBuilder()
s.append_utf8("abc", 3)
- assert s.get_length() == 3
+ assert s.getlength() == 3
assert s.build().decode("utf8") == u"abc"
s.append_utf8(u"\u1234".encode("utf8"), 1)
assert s.build().decode("utf8") == u"abc\u1234"
- assert s.get_length() == 4
+ assert s.getlength() == 4
s.append_code(0xD800)
- assert s.get_length() == 5
+ assert s.getlength() == 5
def test_utf8_string_builder_bad_code():
s = rutf8.Utf8StringBuilder()
with pytest.raises(ValueError):
s.append_code(0x110000)
assert s.build() == ''
- assert s.get_length() == 0
+ assert s.getlength() == 0
@given(strategies.text())
def test_utf8_iterator(arg):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit