[pypy-commit] pypy unicode-utf8: fixes

fijal Mon, 27 Feb 2017 06:03:30 -0800

Author: fijal
Branch: unicode-utf8
Changeset: r90400:17031d8a78ec
Date: 2017-02-27 15:02 +0100
http://bitbucket.org/pypy/pypy/changeset/17031d8a78ec/


Log:    fixes

diff --git a/pypy/interpreter/pyparser/parsestring.py 
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -57,7 +57,6 @@
             assert 0 <= ps <= q
             substr = s[ps:q]
         else:
-            xxx
             substr = decode_unicode_utf8(space, s, ps, q)
         if rawmode:
             v, length = unicodehelper.decode_raw_unicode_escape(space, substr)
@@ -72,7 +71,8 @@
     substr = s[ps : q]
     if rawmode or '\\' not in s[ps:]:
         if need_encoding:
-            w_u = space.newunicode(unicodehelper.decode_utf8(space, substr))
+            utf, lgt = unicodehelper.decode_utf8(space, substr)
+            w_u = space.newutf8(utf, lgt)
             w_v = unicodehelper.encode(space, w_u, encoding)
             return w_v
         else:
@@ -222,8 +222,8 @@
     # while (s < end && *s != '\\') s++; */ /* inefficient for u".."
     while ps < end and ord(s[ps]) & 0x80:
         ps += 1
-    u = unicodehelper.decode_utf8(space, s[pt:ps])
-    return u, ps
+    utf, _ = unicodehelper.decode_utf8(space, s[pt:ps])
+    return utf.decode('utf8'), ps
 
 def decode_utf8_recode(space, s, ps, end, recode_encoding):
     u, ps = decode_utf8(space, s, ps, end)
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py 
b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -50,7 +50,7 @@
         s = "u'\x81'"
         s = s.decode("koi8-u").encode("utf8")
         w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
-        ret = space.unwrap(w_ret)
+        ret = w_ret._utf8.decode('utf8')
         assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
 
     def test_unicode_literals(self):
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -77,6 +77,13 @@
         errorhandler=raise_unicode_exception_encode,
         allow_surrogates=True)
 
+def decode_utf8(space, s):
+    u, _ = runicode.str_decode_utf_8(s, len(s),
+        "strict", final=True,
+        errorhandler=decode_error_handler(space),
+        allow_surrogates=True)
+    return u.encode('utf8'), len(u)
+
 def utf8_encode_ascii(utf8, utf8len, errors, errorhandler):
     if len(utf8) == utf8len:
         return utf8
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -2,7 +2,7 @@
 
 from rpython.rlib.objectmodel import (
     compute_hash, compute_unique_id, import_from_mixin,
-    enforceargs, newlist_hint)
+    enforceargs, newlist_hint, specialize)
 from rpython.rlib.buffer import StringBuffer
 from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder
 from rpython.rlib.runicode import make_unicode_escape_function
@@ -116,9 +116,8 @@
         return W_UnicodeObject(value.encode('utf8'), len(value))
 
     def _new_from_list(self, value):
-        xxx
-        return W_UnicodeObject(u''.join(value))
-
+        u = u''.join(value)
+        return W_UnicodeObject(u.encode('utf8'), len(u))
     def _empty(self):
         return W_UnicodeObject.EMPTY
 
@@ -154,12 +153,13 @@
     def convert_to_w_unicode(self, space):
         return self
 
+    @specialize.argtype(1)
     def _chr(self, char):
         assert len(char) == 1
         return char[0]
 
     def _multi_chr(self, unichar):
-        return unichar.encode('utf8')
+        return unichar
 
     _builder = UnicodeBuilder
 
@@ -387,7 +387,7 @@
     def descr_join(self, space, w_list):
         l = space.listview_unicode(w_list)
         if l is not None:
-            xxx
+            assert False, "unreachable"
             if len(l) == 1:
                 return space.newunicode(l[0])
             return space.newunicode(self._utf8).join(l)
@@ -513,7 +513,7 @@
     def descr_zfill(self, space, width):
         selfval = self._utf8
         if len(selfval) == 0:
-            return W_UnicodeObject(self._multi_chr(self._chr('0')) * width, 
width)
+            return W_UnicodeObject(self._chr('0') * width, width)
         num_zeros = width - self._len()
         if num_zeros <= 0:
             # cannot return self, in case it is a subclass of str
@@ -571,7 +571,7 @@
         d = width - self._len()
         if d > 0:
             offset = d//2 + (d & width & 1)
-            fillchar = self._multi_chr(fillchar[0])
+            fillchar = fillchar[0]
             centered = offset * fillchar + value + (d - offset) * fillchar
         else:
             centered = value
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: fixes

Reply via email to