Author: fijal Branch: unicode-utf8 Changeset: r93308:7ffcfc6493e6 Date: 2017-12-08 10:38 +0200 http://bitbucket.org/pypy/pypy/changeset/7ffcfc6493e6/
Log: whack at _io module diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -1,3 +1,5 @@ +from rpython.rlib.rutf8 import get_utf8_length + from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import ( TypeDef, generic_new_descr, GetSetProperty) @@ -152,7 +154,7 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.new_from_utf8(self.readnl)) # YYY + w_readnl = space.str(space.newutf8(self.readnl, get_utf8_length(self.readnl))) # YYY return space.newtuple([ w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) @@ -215,7 +217,8 @@ if self.writenl: w_decoded = space.call_method( w_decoded, "replace", - space.newtext("\n"), space.new_from_utf8(self.writenl)) + space.newtext("\n"), space.newutf8(self.writenl, + get_utf8_length(self.writenl))) string = space.utf8_w(w_decoded) if string: self.buf.write(string) @@ -225,7 +228,9 @@ def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - return space.new_from_utf8(self.buf.read(size)) + v = self.buf.read(size) + lgt = get_utf8_length(v) + return space.newutf8(v, lgt) def readline_w(self, space, w_limit=None): self._check_closed(space) @@ -239,7 +244,8 @@ else: newline = self.readnl result = self.buf.readline(newline, limit) - return space.new_from_utf8(result) + resultlen = get_utf8_length(result) + return space.newutf8(result, resultlen) @unwrap_spec(pos=int, mode=int) @@ -276,7 +282,9 @@ def getvalue_w(self, space): self._check_closed(space) - return space.new_from_utf8(self.buf.getvalue()) + v = self.buf.getvalue() + lgt = get_utf8_length(v) + return space.newutf8(v, lgt) def readable_w(self, space): self._check_closed(space) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -12,7 +12,8 @@ from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder from rpython.rlib.rutf8 import (check_utf8, next_codepoint_pos, - codepoints_in_utf8) + codepoints_in_utf8, get_utf8_length, + Utf8StringBuilder) STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -684,13 +685,15 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.new_from_utf8(self.decoded.get_chars(-1)) + chars = self.decoded.get_chars(-1) + lgt = get_utf8_length(chars) + w_result = space.newutf8(chars, lgt) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final remaining = size - builder = StringBuilder(size) + builder = Utf8StringBuilder(size) # Keep reading chunks until we have n characters to return while remaining > 0: @@ -700,7 +703,7 @@ builder.append(data) remaining -= len(data) - return space.new_from_utf8(builder.build()) + return space.newutf8(builder.build(), builder.get_length()) def _scan_line_ending(self, limit): if self.readuniversal: @@ -725,6 +728,7 @@ limit = convert_size(space, w_limit) remnant = None builder = StringBuilder() + # XXX maybe use Utf8StringBuilder instead? while True: # First, get some data if necessary has_data = self._ensure_data(space) @@ -771,7 +775,8 @@ self.decoded.reset() result = builder.build() - return space.new_from_utf8(result) + lgt = get_utf8_length(result) + return space.newutf8(result, lgt) # _____________________________________________________________ # write methods @@ -794,8 +799,8 @@ if text.find('\n') >= 0: haslf = True if haslf and self.writetranslate and self.writenl: - w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'), - space.new_from_utf8(self.writenl)) + w_text = space.call_method(w_text, "replace", space.newutf8('\n', 1), + space.newutf8(self.writenl, get_utf8_length(self.writenl))) text = space.utf8_w(w_text) needflush = False diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -212,9 +212,6 @@ def newutf8(self, x, l): return w_some_obj() - def new_from_utf8(self, a): - return w_some_obj() - def newunicode(self, a): return w_some_obj() _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit