Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r95399:5a6b7f57a324 Date: 2018-11-29 22:08 -0800 http://bitbucket.org/pypy/pypy/changeset/5a6b7f57a324/
Log: disallow tuple input to newtext, and also refactor some unicode/utf8 recoding diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -115,7 +115,7 @@ return W_FString(substr, rawmode) else: v = unicodehelper.str_decode_utf8(substr, 'strict', True, None) - return space.newtext(v) + return space.newtext(*v) v = PyString_DecodeEscape(space, substr, 'strict', encoding) return space.newbytes(v) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -771,7 +771,7 @@ self._check_closed(space) self._writeflush(space) limit = convert_size(space, w_limit) - return space.newtext(self._readline(space, limit)) + return space.newtext(*self._readline(space, limit)) def _readline(self, space, limit): # This is a separate function so that readline_w() can be jitted. diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1154,7 +1154,7 @@ raise oefmt(space.w_ValueError, "array contains a unicode character out of " "range(0x110000)") - return space.newtext(item) + return space.newtext(rutf8.unichr_as_utf8(ord(item)), 1) assert 0, "unreachable" # interface diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -459,8 +459,8 @@ _set_module_object(space, "timezone", space.newint(timezone)) _set_module_object(space, 'daylight', space.newint(daylight)) - tzname_w = [space.newtext(tzname[0].decode('latin-1')), - space.newtext(tzname[1].decode('latin-1'))] + tzname_w = [space.newtext(tzname[0]), + space.newtext(tzname[1])] _set_module_object(space, 'tzname', space.newtuple(tzname_w)) _set_module_object(space, 'altzone', space.newint(altzone)) diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -371,9 +371,9 @@ m.atom_str(TYPE_STRING, x.co_code) _marshal_tuple(space, x.co_consts_w, m) _marshal_tuple(space, x.co_names_w, m) # list of w_unicodes - co_varnames_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_varnames] - co_freevars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_freevars] - co_cellvars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_cellvars] + co_varnames_w = [space.newtext(*_decode_utf8(space, s)) for s in x.co_varnames] + co_freevars_w = [space.newtext(*_decode_utf8(space, s)) for s in x.co_freevars] + co_cellvars_w = [space.newtext(*_decode_utf8(space, s)) for s in x.co_cellvars] _marshal_tuple(space, co_varnames_w, m) # more lists, now of w_unicodes _marshal_tuple(space, co_freevars_w, m) _marshal_tuple(space, co_cellvars_w, m) @@ -451,7 +451,7 @@ @unmarshaller(TYPE_UNICODE) def unmarshal_unicode(space, u, tc): uc = _decode_utf8(space, u.get_str()) - return space.newtext(uc) + return space.newtext(*uc) @unmarshaller(TYPE_INTERNED) def unmarshal_interned(space, u, tc): @@ -464,7 +464,7 @@ else: lng = u.get_lng() s = u.get(lng) - w_u = u.space.newtext(s.decode('latin-1')) + w_u = u.space.newtext(s) if interned: w_u = u.space.new_interned_w_str(w_u) return w_u diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -380,16 +380,16 @@ def newbytearray(self, l): return W_BytearrayObject(l) + # XXX TODO - remove this and force all users to call with utf8 @specialize.argtype(1) - def newtext(self, s, lgt=-1): + def newtext(self, s, lgt=-1, unused=-1): + # the unused argument can be from something like + # newtext(*decode_utf8sp(space, code)) if isinstance(s, unicode): s, lgt = s.encode('utf8'), len(s) - elif isinstance(s, str) and lgt < 0: + assert isinstance(s, str) + if lgt < 0: lgt = rutf8.codepoints_in_utf8(s) - elif isinstance(s, tuple): - # result of decode_utf8 - s, lgt, codepoints = s - assert isinstance(s, str) return W_UnicodeObject(s, lgt) def newtext_or_none(self, s, lgt=-1): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit