Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94996:2b95af3762f9
Date: 2018-08-11 23:38 -0700
http://bitbucket.org/pypy/pypy/changeset/2b95af3762f9/
Log: change default values
diff --git a/pypy/objspace/std/stringmethods.py
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -193,6 +193,8 @@
_get_encoding_and_errors, decode_object)
encoding, errors = _get_encoding_and_errors(space, w_encoding,
w_errors)
+ if errors is None:
+ errors = 'strict'
return decode_object(space, self, encoding, errors)
@unwrap_spec(tabsize=int)
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -515,7 +515,7 @@
def descr_encode(self, space, w_encoding=None, w_errors=None):
encoding, errors = _get_encoding_and_errors(space, w_encoding,
w_errors)
- return encode_object(space, self, encoding, errors,
allow_surrogates=True)
+ return encode_object(space, self, encoding, errors,
allow_surrogates=False)
@unwrap_spec(tabsize=int)
def descr_expandtabs(self, space, tabsize=8):
@@ -670,7 +670,7 @@
def descr_add(self, space, w_other):
try:
- w_other = self.convert_arg_to_w_unicode(space, w_other)
+ w_other = self.convert_arg_to_w_unicode(space, w_other,
strict=True)
except OperationError as e:
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
@@ -1191,13 +1191,19 @@
utf8 = space.utf8_w(w_object)
# TODO: refactor unnatrual use of error hanlders here,
# we should make a single pass over the utf8 str
+ from pypy.module._codecs.interp_codecs import encode_text, CodecState
if not allow_surrogates:
+ if errors is None:
+ errors = 'strict'
pos = rutf8.surrogate_in_utf8(utf8)
if pos >= 0:
- eh = unicodehelper.encode_error_handler(space)
- eh(None, "utf8", "surrogates not allowed", utf8,
+ state = space.fromcache(CodecState)
+ eh = state.encode_error_handler
+ start = utf8[:pos]
+ ru, pos = eh(errors, "utf8", "surrogates not allowed", utf8,
pos, pos + 1)
- assert False, "always raises"
+ end = utf8[pos+1:]
+ utf8 = start + ru + end
if errors is None or errors == 'strict':
if encoding is None or encoding == 'utf-8':
#if rutf8.has_surrogates(utf8):
@@ -1213,7 +1219,6 @@
assert False, "always raises"
return space.newbytes(utf8)
- from pypy.module._codecs.interp_codecs import encode_text
if encoding is None:
encoding = space.sys.defaultencoding
w_retval = encode_text(space, w_object, encoding, errors)
@@ -1228,8 +1233,7 @@
def decode_object(space, w_obj, encoding, errors='strict'):
assert errors is not None
- if encoding is None:
- encoding = getdefaultencoding(space)
+ assert encoding is not None
if errors == 'surrogateescape':
s = space.charbuf_w(w_obj)
s, lgt, pos = unicodehelper.str_decode_utf8(s, errors, True,
@@ -1256,6 +1260,10 @@
def unicode_from_encoded_object(space, w_obj, encoding, errors):
+ if errors is None:
+ errors = 'strict'
+ if encoding is None:
+ encoding = getdefaultencoding(space)
w_retval = decode_object(space, w_obj, encoding, errors)
if not space.isinstance_w(w_retval, space.w_unicode):
raise oefmt(space.w_TypeError,
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit