Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94945:9c9e17b77c17
Date: 2018-08-04 23:51 -0700
http://bitbucket.org/pypy/pypy/changeset/9c9e17b77c17/
Log: unicode/utf8 translation fixes
diff --git a/pypy/goal/targetpypystandalone.py
b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -83,7 +83,7 @@
## con.interact()
except OperationError as e:
debug("OperationError:")
- debug(" operror-type: " +
e.w_type.getname(space).encode('utf-8'))
+ debug(" operror-type: " + e.w_type.getname(space))
debug(" operror-value: " +
space.text_w(space.str(e.get_w_value(space))))
return 1
finally:
@@ -91,7 +91,7 @@
space.finish()
except OperationError as e:
debug("OperationError:")
- debug(" operror-type: " +
e.w_type.getname(space).encode('utf-8'))
+ debug(" operror-type: " + e.w_type.getname(space))
debug(" operror-value: " +
space.text_w(space.str(e.get_w_value(space))))
return 1
return exitcode
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -515,7 +515,7 @@
elif fmt == 'T':
result = _decode_utf8(space.type(value).name)
elif fmt == 'N':
- result = value.getname(space)
+ result = _decode_utf8(value.getname(space))
elif fmt == '8':
if isinstance(value, unicode):
result = value.encode('utf8')
diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py
--- a/pypy/interpreter/mixedmodule.py
+++ b/pypy/interpreter/mixedmodule.py
@@ -130,7 +130,7 @@
bltin.w_module = self.w_name
func._builtinversion_ = bltin
bltin.name = name
- bltin.qualname = bltin.name.decode('utf-8')
+ bltin.qualname = bltin.name
w_value = bltin
space.setitem(self.w_dict, w_name, w_value)
return w_value
diff --git a/pypy/module/_pypyjson/interp_decoder.py
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -74,6 +74,10 @@
break
return i
+ @specialize.arg(1)
+ def _raise(self, msg, *args):
+ raise oefmt(self.space.w_ValueError, msg, *args)
+
def decode_any(self, i):
i = self.skip_whitespace(i)
ch = self.ll_chars[i]
diff --git a/pypy/module/_pypyjson/interp_encoder.py
b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,7 +1,4 @@
from rpython.rlib.rstring import StringBuilder
-from rpython.rlib import rutf8
-from pypy.interpreter import unicodehelper
-
HEX = '0123456789abcdef'
@@ -17,7 +14,7 @@
def raw_encode_basestring_ascii(space, w_unicode):
- u = space.utf8_w(w_unicode).encode()
+ u = space.utf8_w(w_unicode)
for i in range(len(u)):
c = ord(u[i])
if c < 32 or c > 126 or c == ord('\\') or c == ord('"'):
diff --git a/pypy/module/cpyext/classobject.py
b/pypy/module/cpyext/classobject.py
--- a/pypy/module/cpyext/classobject.py
+++ b/pypy/module/cpyext/classobject.py
@@ -38,7 +38,7 @@
def descr_repr(self, space):
return self.getrepr(space, u'<instancemethod %s>' %
- (self.w_function.getname(space),))
+ (self.w_function.getname(space).decode('utf8'),))
InstanceMethod.typedef = TypeDef("instancemethod",
__new__ = interp2app(InstanceMethod.descr_new),
diff --git a/pypy/module/cpyext/unicodeobject.py
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -72,7 +72,7 @@
def unicode_attach(space, py_obj, w_obj, w_userdata=None):
"Fills a newly allocated PyUnicodeObject with a unicode string"
- value = space.utf8_w(w_obj).decode()
+ value = space.utf8_w(w_obj).decode('utf8')
set_wsize(py_obj, len(value))
set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO))
_readify(space, py_obj, value)
@@ -353,7 +353,7 @@
if not get_wbuffer(ref):
# Copy unicode buffer
w_unicode = from_ref(space, rffi.cast(PyObject, ref))
- u = space.utf8_w(w_unicode).decode()
+ u = space.utf8_w(w_unicode).decode('utf8')
set_wbuffer(ref, rffi.unicode2wcharp(u))
set_wsize(ref, len(u))
if psize:
@@ -943,7 +943,7 @@
than, equal, and greater than, respectively. It is best to pass only
ASCII-encoded strings, but the function interprets the input string as
ISO-8859-1 if it contains non-ASCII characters."""
- uni = space.utf8_w(w_uni).decode()
+ uni = space.utf8_w(w_uni).decode('utf8')
i = 0
# Compare Unicode string and source character set string
while i < len(uni) and string[i] != '\0':
@@ -1054,7 +1054,7 @@
@cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
def PyUnicode_Substring(space, w_str, start, end):
- usrc = space.utf8_w(w_str).decode()
+ usrc = space.utf8_w(w_str).decode('utf8')
length = len(usrc)
if start < 0 or end < 0:
raise oefmt(space.w_IndexError, "string index out of range")
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -753,7 +753,7 @@
def getfulltypename(self, w_obj):
w_type = self.type(w_obj)
if w_type.is_heaptype():
- classname = w_type.getqualname(self)
+ classname = w_type.getqualname(self).decode('utf8')
w_module = w_type.lookup("__module__")
if w_module is not None:
try:
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -579,7 +579,7 @@
result = self.name[dot+1:]
else:
result = self.name
- return result.decode('utf-8')
+ return result
def getqualname(self, space):
return self.qualname
@@ -792,7 +792,6 @@
w_typetype = w_winner
name = space.text_w(w_name) # NB. CPython forbids surrogates here
- assert isinstance(name, str)
if '\x00' in name:
raise oefmt(space.w_ValueError, "type name must not contain null
characters")
dict_w = {}
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -377,6 +377,8 @@
codes = unicodedb.tolower_full(ch)
elif unicodedb.islower(ch):
codes = unicodedb.toupper_full(ch)
+ else:
+ codes = [ch,]
for c in codes:
builder.append_code(c)
return self.from_utf8builder(builder)
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -28,7 +28,7 @@
# we need a way to accept both r_uint and int(nonneg=True)
-#@signature(types.int_nonneg(), types.bool(), returns=types.str())
+@signature(types.int_nonneg(), types.bool(), returns=types.str())
def unichr_as_utf8(code, allow_surrogates=False):
"""Encode code (numeric value) as utf8 encoded string
"""
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit