Author: fijal
Branch: unicode-utf8
Changeset: r90400:17031d8a78ec
Date: 2017-02-27 15:02 +0100
http://bitbucket.org/pypy/pypy/changeset/17031d8a78ec/
Log: fixes
diff --git a/pypy/interpreter/pyparser/parsestring.py
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -57,7 +57,6 @@
assert 0 <= ps <= q
substr = s[ps:q]
else:
- xxx
substr = decode_unicode_utf8(space, s, ps, q)
if rawmode:
v, length = unicodehelper.decode_raw_unicode_escape(space, substr)
@@ -72,7 +71,8 @@
substr = s[ps : q]
if rawmode or '\\' not in s[ps:]:
if need_encoding:
- w_u = space.newunicode(unicodehelper.decode_utf8(space, substr))
+ utf, lgt = unicodehelper.decode_utf8(space, substr)
+ w_u = space.newutf8(utf, lgt)
w_v = unicodehelper.encode(space, w_u, encoding)
return w_v
else:
@@ -222,8 +222,8 @@
# while (s < end && *s != '\\') s++; */ /* inefficient for u".."
while ps < end and ord(s[ps]) & 0x80:
ps += 1
- u = unicodehelper.decode_utf8(space, s[pt:ps])
- return u, ps
+ utf, _ = unicodehelper.decode_utf8(space, s[pt:ps])
+ return utf.decode('utf8'), ps
def decode_utf8_recode(space, s, ps, end, recode_encoding):
u, ps = decode_utf8(space, s, ps, end)
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py
b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -50,7 +50,7 @@
s = "u'\x81'"
s = s.decode("koi8-u").encode("utf8")
w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
- ret = space.unwrap(w_ret)
+ ret = w_ret._utf8.decode('utf8')
assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
def test_unicode_literals(self):
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -77,6 +77,13 @@
errorhandler=raise_unicode_exception_encode,
allow_surrogates=True)
+def decode_utf8(space, s):
+ u, _ = runicode.str_decode_utf_8(s, len(s),
+ "strict", final=True,
+ errorhandler=decode_error_handler(space),
+ allow_surrogates=True)
+ return u.encode('utf8'), len(u)
+
def utf8_encode_ascii(utf8, utf8len, errors, errorhandler):
if len(utf8) == utf8len:
return utf8
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -2,7 +2,7 @@
from rpython.rlib.objectmodel import (
compute_hash, compute_unique_id, import_from_mixin,
- enforceargs, newlist_hint)
+ enforceargs, newlist_hint, specialize)
from rpython.rlib.buffer import StringBuffer
from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder
from rpython.rlib.runicode import make_unicode_escape_function
@@ -116,9 +116,8 @@
return W_UnicodeObject(value.encode('utf8'), len(value))
def _new_from_list(self, value):
- xxx
- return W_UnicodeObject(u''.join(value))
-
+ u = u''.join(value)
+ return W_UnicodeObject(u.encode('utf8'), len(u))
def _empty(self):
return W_UnicodeObject.EMPTY
@@ -154,12 +153,13 @@
def convert_to_w_unicode(self, space):
return self
+ @specialize.argtype(1)
def _chr(self, char):
assert len(char) == 1
return char[0]
def _multi_chr(self, unichar):
- return unichar.encode('utf8')
+ return unichar
_builder = UnicodeBuilder
@@ -387,7 +387,7 @@
def descr_join(self, space, w_list):
l = space.listview_unicode(w_list)
if l is not None:
- xxx
+ assert False, "unreachable"
if len(l) == 1:
return space.newunicode(l[0])
return space.newunicode(self._utf8).join(l)
@@ -513,7 +513,7 @@
def descr_zfill(self, space, width):
selfval = self._utf8
if len(selfval) == 0:
- return W_UnicodeObject(self._multi_chr(self._chr('0')) * width,
width)
+ return W_UnicodeObject(self._chr('0') * width, width)
num_zeros = width - self._len()
if num_zeros <= 0:
# cannot return self, in case it is a subclass of str
@@ -571,7 +571,7 @@
d = width - self._len()
if d > 0:
offset = d//2 + (d & width & 1)
- fillchar = self._multi_chr(fillchar[0])
+ fillchar = fillchar[0]
centered = offset * fillchar + value + (d - offset) * fillchar
else:
centered = value
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit