Author: fijal Branch: unicode-utf8 Changeset: r90400:17031d8a78ec Date: 2017-02-27 15:02 +0100 http://bitbucket.org/pypy/pypy/changeset/17031d8a78ec/
Log: fixes diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -57,7 +57,6 @@ assert 0 <= ps <= q substr = s[ps:q] else: - xxx substr = decode_unicode_utf8(space, s, ps, q) if rawmode: v, length = unicodehelper.decode_raw_unicode_escape(space, substr) @@ -72,7 +71,8 @@ substr = s[ps : q] if rawmode or '\\' not in s[ps:]: if need_encoding: - w_u = space.newunicode(unicodehelper.decode_utf8(space, substr)) + utf, lgt = unicodehelper.decode_utf8(space, substr) + w_u = space.newutf8(utf, lgt) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: @@ -222,8 +222,8 @@ # while (s < end && *s != '\\') s++; */ /* inefficient for u".." while ps < end and ord(s[ps]) & 0x80: ps += 1 - u = unicodehelper.decode_utf8(space, s[pt:ps]) - return u, ps + utf, _ = unicodehelper.decode_utf8(space, s[pt:ps]) + return utf.decode('utf8'), ps def decode_utf8_recode(space, s, ps, end, recode_encoding): u, ps = decode_utf8(space, s, ps, end) diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py b/pypy/interpreter/pyparser/test/test_parsestring.py --- a/pypy/interpreter/pyparser/test/test_parsestring.py +++ b/pypy/interpreter/pyparser/test/test_parsestring.py @@ -50,7 +50,7 @@ s = "u'\x81'" s = s.decode("koi8-u").encode("utf8") w_ret = parsestring.parsestr(self.space, 'koi8-u', s) - ret = space.unwrap(w_ret) + ret = w_ret._utf8.decode('utf8') assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'") def test_unicode_literals(self): diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -77,6 +77,13 @@ errorhandler=raise_unicode_exception_encode, allow_surrogates=True) +def decode_utf8(space, s): + u, _ = runicode.str_decode_utf_8(s, len(s), + "strict", final=True, + errorhandler=decode_error_handler(space), + allow_surrogates=True) + return u.encode('utf8'), len(u) + def utf8_encode_ascii(utf8, utf8len, errors, errorhandler): if len(utf8) == utf8len: return utf8 diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -2,7 +2,7 @@ from rpython.rlib.objectmodel import ( compute_hash, compute_unique_id, import_from_mixin, - enforceargs, newlist_hint) + enforceargs, newlist_hint, specialize) from rpython.rlib.buffer import StringBuffer from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder from rpython.rlib.runicode import make_unicode_escape_function @@ -116,9 +116,8 @@ return W_UnicodeObject(value.encode('utf8'), len(value)) def _new_from_list(self, value): - xxx - return W_UnicodeObject(u''.join(value)) - + u = u''.join(value) + return W_UnicodeObject(u.encode('utf8'), len(u)) def _empty(self): return W_UnicodeObject.EMPTY @@ -154,12 +153,13 @@ def convert_to_w_unicode(self, space): return self + @specialize.argtype(1) def _chr(self, char): assert len(char) == 1 return char[0] def _multi_chr(self, unichar): - return unichar.encode('utf8') + return unichar _builder = UnicodeBuilder @@ -387,7 +387,7 @@ def descr_join(self, space, w_list): l = space.listview_unicode(w_list) if l is not None: - xxx + assert False, "unreachable" if len(l) == 1: return space.newunicode(l[0]) return space.newunicode(self._utf8).join(l) @@ -513,7 +513,7 @@ def descr_zfill(self, space, width): selfval = self._utf8 if len(selfval) == 0: - return W_UnicodeObject(self._multi_chr(self._chr('0')) * width, width) + return W_UnicodeObject(self._chr('0') * width, width) num_zeros = width - self._len() if num_zeros <= 0: # cannot return self, in case it is a subclass of str @@ -571,7 +571,7 @@ d = width - self._len() if d > 0: offset = d//2 + (d & width & 1) - fillchar = self._multi_chr(fillchar[0]) + fillchar = fillchar[0] centered = offset * fillchar + value + (d - offset) * fillchar else: centered = value _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit