Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94998:e5867f1518c9
Date: 2018-08-12 14:13 -0700
http://bitbucket.org/pypy/pypy/changeset/e5867f1518c9/
Log: specify errors, disallow space.text_w(non-unicode) but allow
W_Bytes.text_w
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1618,6 +1618,8 @@
an utf-8 encoded rpython string.
"""
assert w_obj is not None
+ if not self.isinstance_w(w_obj, self.w_unicode):
+ w_obj._typed_unwrap_error(self, "unicode")
return w_obj.text_w(self)
@not_rpython # tests only; should be replaced with bytes_w or text_w
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -422,6 +422,9 @@
raise oefmt(space.w_TypeError,
"unicode object expected, received bytes instead")
+ def text_w(self, space):
+ return self._value
+
def utf8_w(self, space):
return self._value
diff --git a/pypy/objspace/std/stringmethods.py
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -195,6 +195,8 @@
w_errors)
if errors is None:
errors = 'strict'
+ if encoding is None:
+ encoding = 'utf8'
return decode_object(space, self, encoding, errors)
@unwrap_spec(tabsize=int)
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -670,7 +670,7 @@
def descr_add(self, space, w_other):
try:
- w_other = self.convert_arg_to_w_unicode(space, w_other,
strict=True)
+ w_other = self.convert_arg_to_w_unicode(space, w_other,
strict='__add__')
except OperationError as e:
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
@@ -1285,7 +1285,7 @@
# repr is guaranteed to be unicode
w_repr = space.repr(w_obj)
w_encoded = encode_object(space, w_repr, 'ascii', 'backslashreplace')
- return decode_object(space, w_encoded, 'ascii', None)
+ return decode_object(space, w_encoded, 'ascii', 'strict')
def unicode_from_string(space, w_bytes):
# this is a performance and bootstrapping hack
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit