[pypy-commit] pypy unicode-utf8-py3: unicode/utf8 translation fixes

mattip Sun, 05 Aug 2018 06:35:39 -0700

Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94945:9c9e17b77c17
Date: 2018-08-04 23:51 -0700
http://bitbucket.org/pypy/pypy/changeset/9c9e17b77c17/


Log:    unicode/utf8 translation fixes

diff --git a/pypy/goal/targetpypystandalone.py 
b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -83,7 +83,7 @@
             ##    con.interact()
             except OperationError as e:
                 debug("OperationError:")
-                debug(" operror-type: " + 
e.w_type.getname(space).encode('utf-8'))
+                debug(" operror-type: " + e.w_type.getname(space))
                 debug(" operror-value: " + 
space.text_w(space.str(e.get_w_value(space))))
                 return 1
         finally:
@@ -91,7 +91,7 @@
                 space.finish()
             except OperationError as e:
                 debug("OperationError:")
-                debug(" operror-type: " + 
e.w_type.getname(space).encode('utf-8'))
+                debug(" operror-type: " + e.w_type.getname(space))
                 debug(" operror-value: " + 
space.text_w(space.str(e.get_w_value(space))))
                 return 1
         return exitcode
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -515,7 +515,7 @@
                     elif fmt == 'T':
                         result = _decode_utf8(space.type(value).name)
                     elif fmt == 'N':
-                        result = value.getname(space)
+                        result = _decode_utf8(value.getname(space))
                     elif fmt == '8':
                         if isinstance(value, unicode):
                             result = value.encode('utf8')
diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py
--- a/pypy/interpreter/mixedmodule.py
+++ b/pypy/interpreter/mixedmodule.py
@@ -130,7 +130,7 @@
                     bltin.w_module = self.w_name
                     func._builtinversion_ = bltin
                     bltin.name = name
-                    bltin.qualname = bltin.name.decode('utf-8')
+                    bltin.qualname = bltin.name
                 w_value = bltin
             space.setitem(self.w_dict, w_name, w_value)
             return w_value
diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -74,6 +74,10 @@
                 break
         return i
 
+    @specialize.arg(1)
+    def _raise(self, msg, *args):
+        raise oefmt(self.space.w_ValueError, msg, *args)
+
     def decode_any(self, i):
         i = self.skip_whitespace(i)
         ch = self.ll_chars[i]
diff --git a/pypy/module/_pypyjson/interp_encoder.py 
b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,7 +1,4 @@
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib import rutf8
-from pypy.interpreter import unicodehelper
-
 
 HEX = '0123456789abcdef'
 
@@ -17,7 +14,7 @@
 
 
 def raw_encode_basestring_ascii(space, w_unicode):
-    u = space.utf8_w(w_unicode).encode()
+    u = space.utf8_w(w_unicode)
     for i in range(len(u)):
         c = ord(u[i])
         if c < 32 or c > 126 or c == ord('\\') or c == ord('"'):
diff --git a/pypy/module/cpyext/classobject.py 
b/pypy/module/cpyext/classobject.py
--- a/pypy/module/cpyext/classobject.py
+++ b/pypy/module/cpyext/classobject.py
@@ -38,7 +38,7 @@
 
     def descr_repr(self, space):
         return self.getrepr(space, u'<instancemethod %s>' %
-                            (self.w_function.getname(space),))
+                            (self.w_function.getname(space).decode('utf8'),))
 
 InstanceMethod.typedef = TypeDef("instancemethod",
     __new__ = interp2app(InstanceMethod.descr_new),
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -72,7 +72,7 @@
 
 def unicode_attach(space, py_obj, w_obj, w_userdata=None):
     "Fills a newly allocated PyUnicodeObject with a unicode string"
-    value = space.utf8_w(w_obj).decode()
+    value = space.utf8_w(w_obj).decode('utf8')
     set_wsize(py_obj, len(value))
     set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO))
     _readify(space, py_obj, value)
@@ -353,7 +353,7 @@
     if not get_wbuffer(ref):
         # Copy unicode buffer
         w_unicode = from_ref(space, rffi.cast(PyObject, ref))
-        u = space.utf8_w(w_unicode).decode()
+        u = space.utf8_w(w_unicode).decode('utf8')
         set_wbuffer(ref, rffi.unicode2wcharp(u))
         set_wsize(ref, len(u))
     if psize:
@@ -943,7 +943,7 @@
     than, equal, and greater than, respectively. It is best to pass only
     ASCII-encoded strings, but the function interprets the input string as
     ISO-8859-1 if it contains non-ASCII characters."""
-    uni = space.utf8_w(w_uni).decode()
+    uni = space.utf8_w(w_uni).decode('utf8')
     i = 0
     # Compare Unicode string and source character set string
     while i < len(uni) and string[i] != '\0':
@@ -1054,7 +1054,7 @@
 
 @cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
 def PyUnicode_Substring(space, w_str, start, end):
-    usrc = space.utf8_w(w_str).decode()
+    usrc = space.utf8_w(w_str).decode('utf8')
     length = len(usrc)
     if start < 0 or end < 0:
         raise oefmt(space.w_IndexError, "string index out of range")
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -753,7 +753,7 @@
     def getfulltypename(self, w_obj):
         w_type = self.type(w_obj)
         if w_type.is_heaptype():
-            classname = w_type.getqualname(self)
+            classname = w_type.getqualname(self).decode('utf8')
             w_module = w_type.lookup("__module__")
             if w_module is not None:
                 try:
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -579,7 +579,7 @@
                 result = self.name[dot+1:]
             else:
                 result = self.name
-        return result.decode('utf-8')
+        return result
 
     def getqualname(self, space):
         return self.qualname
@@ -792,7 +792,6 @@
         w_typetype = w_winner
 
     name = space.text_w(w_name) # NB. CPython forbids surrogates here
-    assert isinstance(name, str)
     if '\x00' in name:
         raise oefmt(space.w_ValueError, "type name must not contain null 
characters")
     dict_w = {}
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -377,6 +377,8 @@
                 codes = unicodedb.tolower_full(ch)
             elif unicodedb.islower(ch):
                 codes = unicodedb.toupper_full(ch)
+            else:
+                codes = [ch,]
             for c in codes:
                 builder.append_code(c)
         return self.from_utf8builder(builder)
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -28,7 +28,7 @@
 
 
 # we need a way to accept both r_uint and int(nonneg=True)
-#@signature(types.int_nonneg(), types.bool(), returns=types.str())
+@signature(types.int_nonneg(), types.bool(), returns=types.str())
 def unichr_as_utf8(code, allow_surrogates=False):
     """Encode code (numeric value) as utf8 encoded string
     """
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: unicode/utf8 translation fixes

Reply via email to