Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94837:2621c0f70d91
Date: 2018-07-09 05:11 -0700
http://bitbucket.org/pypy/pypy/changeset/2621c0f70d91/
Log: start to pass unicode around, try using decode_utf8sp
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -602,8 +602,8 @@
def getmsg(self):
if self.num_kwds == 1:
- msg = "got an unexpected keyword argument '%s'" % (
- self.kwd_name)
+ msg = u"got an unexpected keyword argument '%s'" % (
+ self.kwd_name.decode('utf8'))
else:
msg = "got %d unexpected keyword arguments" % (
self.num_kwds)
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -472,9 +472,12 @@
assert len(formats) > 0, "unsupported: no % command found"
return tuple(parts), tuple(formats)
[email protected](1)
def _decode_utf8(string):
# when building the error message, don't crash if the byte string
# provided is not valid UTF-8
+ if isinstance(string, unicode):
+ return string
assert isinstance(string, str)
result, consumed = runicode.str_decode_utf_8(
string, len(string), "replace", final=True)
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -255,7 +255,7 @@
return self.call_args(__args__)
def descr_function_repr(self):
- return self.getrepr(self.space, u'function %s' % self.qualname)
+ return self.getrepr(self.space, u'function %s' %
self.qualname.decode('utf8'))
def _cleanup_(self):
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -155,6 +155,7 @@
try:
rutf8.check_ascii(string)
except rutf8.CheckError as e:
+ print 'check_ascii_or_raise', string
decode_error_handler(space)('strict', 'ascii',
'ordinal not in range(128)', string,
e.pos, e.pos + 1)
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -638,7 +638,7 @@
state = space.fromcache(CodecState)
utf8len = w_arg._length
# XXX deal with func() returning length or not
- result = func(w_arg._utf8, errors, state.encode_error_handler)
+ result = func(w_arg._utf8.decode('utf8'), errors,
state.encode_error_handler)
return space.newtuple([space.newbytes(result), space.newint(utf8len)])
wrap_encoder.__name__ = func.__name__
globals()[name] = wrap_encoder
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -5,7 +5,7 @@
from pypy.interpreter.gateway import unwrap_spec
from pypy.interpreter.timeutils import (
SECS_TO_NS, MS_TO_NS, US_TO_NS, monotonic as _monotonic, timestamp_w)
-from pypy.interpreter.unicodehelper import str_decode_utf8
+from pypy.interpreter.unicodehelper import decode_utf8sp
from rpython.rtyper.lltypesystem import lltype
from rpython.rlib.rarithmetic import (
intmask, r_ulonglong, r_longfloat, widen, ovfcheck, ovfcheck_float_to_int)
@@ -554,8 +554,7 @@
if HAS_TM_ZONE:
# CPython calls PyUnicode_DecodeLocale here should we do the same?
- tm_zone = str_decode_utf8(rffi.charp2str(t.c_tm_zone),
- allow_surrogates=True)
+ tm_zone = decode_utf8sp(space, rffi.charp2str(t.c_tm_zone))
extra = [space.newtext(tm_zone),
space.newint(rffi.getintfield(t, 'c_tm_gmtoff'))]
w_time_tuple = space.newtuple(time_tuple + extra)
diff --git a/pypy/objspace/std/dictmultiobject.py
b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -12,7 +12,7 @@
from pypy.interpreter.mixedmodule import MixedModule
from pypy.interpreter.signature import Signature
from pypy.interpreter.typedef import TypeDef
-from pypy.interpreter.unicodehelper import str_decode_utf8
+from pypy.interpreter.unicodehelper import decode_utf8sp
from pypy.objspace.std.util import negate
@@ -1183,9 +1183,12 @@
# we should implement the same shortcuts as we do for BytesDictStrategy
+ def decodekey_str(self, key):
+ return decode_utf8sp(self.space, key)[0]
+
def setitem_str(self, w_dict, key, w_value):
assert key is not None
- self.unerase(w_dict.dstorage)[key] = w_value
+ self.unerase(w_dict.dstorage)[self.decodekey_str(key)] = w_value
def getitem(self, w_dict, w_key):
space = self.space
@@ -1197,7 +1200,7 @@
def getitem_str(self, w_dict, key):
assert key is not None
- return self.unerase(w_dict.dstorage).get(key, None)
+ return self.unerase(w_dict.dstorage).get(self.decodekey_str(key), None)
def listview_utf8(self, w_dict):
return self.unerase(w_dict.dstorage).keys()
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -4,7 +4,7 @@
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.function import Function, Method, FunctionWithFixedCode
from pypy.interpreter.typedef import get_unique_interplevel_subclass
-from pypy.interpreter.unicodehelper import str_decode_utf8
+from pypy.interpreter.unicodehelper import decode_utf8sp
from pypy.objspace.std import frame, transparent, callmethod
from pypy.objspace.descroperation import (
DescrOperation, get_attribute_name, raiseattrerror)
@@ -327,9 +327,7 @@
return W_ListObject.newlist_bytes(self, list_s)
def newlist_text(self, list_t):
- return self.newlist_utf8([
- str_decode_utf8(s, "string", True, None, allow_surrogates=True)[0]
- for s in list_t])
+ return self.newlist_utf8([decode_utf8sp(self, s) for s in list_t])
def newlist_utf8(self, list_u, is_ascii=True):
# TODO ignoring is_ascii, is that correct?
@@ -386,8 +384,7 @@
if isinstance(s, unicode):
s, lgt = s.encode('utf8'), len(s)
elif isinstance(s, str):
- s, uf8lgt, lgt = str_decode_utf8(s, "string", True, None,
- allow_surrogates=True)
+ s, uf8lgt, lgt = decode_utf8sp(self, s)
elif isinstance(s, tuple):
# result of decode_utf8
s, utf8lgt, lgt = s
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit