Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.5
Changeset: r88135:ff527b1b35a4
Date: 2016-11-04 00:49 +0100
http://bitbucket.org/pypy/pypy/changeset/ff527b1b35a4/
Log: Exceptions raised by a codec are "wrapped" to indicate which codec
is used (like a 'raise X from Y') But since the new exception has
the same type as the original, this only works for simple exceptions
which consist of a single message...
Also remove code duplication in unicodeobject.py
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -374,7 +374,7 @@
_fmtcache = {}
_fmtcache2 = {}
-_FMTS = tuple('8NRTds')
+_FMTS = tuple('8NRSTds')
def decompose_valuefmt(valuefmt):
"""Returns a tuple of string parts extracted from valuefmt,
@@ -425,6 +425,8 @@
result = str(value).decode('ascii')
elif fmt == 'R':
result = space.unicode_w(space.repr(value))
+ elif fmt == 'S':
+ result = space.unicode_w(space.str(value))
elif fmt == 'T':
result = space.type(value).name.decode('utf-8')
elif fmt == 'N':
@@ -468,6 +470,7 @@
%8 - The result of arg.decode('utf-8')
%N - The result of w_arg.getname(space)
%R - The result of space.unicode_w(space.repr(w_arg))
+ %S - The result of space.unicode_w(space.str(w_arg))
%T - The result of space.type(w_arg).name
"""
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -415,6 +415,34 @@
state.codec_error_registry[error] =
space.wrap(interp2app(globals()[name]))
+# A simplified version of the incredibly complex CPython function
+# _PyErr_TrySetFromCause, which returns a new exception with another
+# error message. Subclasses of UnicodeErrors are returned inchanged,
+# but this is only a side-effect: they cannot be constructed with a
+# simple message.
+def _wrap_codec_error(space, operr, action, encoding):
+ w_exc = operr.get_w_value(space)
+ try:
+ new_operr = oefmt(space.type(w_exc),
+ "%s with '%s' codec failed (%T: %S)",
+ action, encoding, w_exc, w_exc)
+ new_operr.w_cause = w_exc
+ new_operr.normalize_exception(space)
+ except OperationError:
+ # Return the original error
+ return operr
+ return new_operr
+
+def _call_codec(space, w_decoder, w_obj, action, encoding, errors):
+ try:
+ w_res = space.call_function(w_decoder, w_obj, space.wrap(errors))
+ except OperationError as operr:
+ raise _wrap_codec_error(space, operr, action, encoding)
+ if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res) !=
2):
+ raise oefmt(space.w_TypeError,
+ "encoder must return a tuple (object, integer)")
+ return space.getitem(w_res, space.wrap(0))
+
@unwrap_spec(errors=str)
def lookup_error(space, errors):
"""lookup_error(errors) -> handler
@@ -448,8 +476,7 @@
else:
encoding = space.str_w(w_encoding)
w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
- w_res = space.call_function(w_encoder, w_obj, space.wrap(errors))
- return space.getitem(w_res, space.wrap(0))
+ return _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
@unwrap_spec(errors='str_or_None')
def readbuffer_encode(space, w_data, errors='strict'):
@@ -472,14 +499,7 @@
else:
encoding = space.str_w(w_encoding)
w_decoder = space.getitem(lookup_codec(space, encoding), space.wrap(1))
- if space.is_true(w_decoder):
- w_res = space.call_function(w_decoder, w_obj, space.wrap(errors))
- if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res)
!= 2):
- raise oefmt(space.w_TypeError,
- "encoder must return a tuple (object, integer)")
- return space.getitem(w_res, space.wrap(0))
- else:
- assert 0, "XXX, what to do here?"
+ return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
@unwrap_spec(errors=str)
def register_error(space, errors, w_handler):
@@ -498,6 +518,38 @@
raise oefmt(space.w_TypeError, "handler must be callable")
# ____________________________________________________________
+# Helpers for unicode.encode() and bytes.decode()
+def lookup_text_codec(space, action, encoding):
+ codec_info = lookup_codec(space, encoding)
+ try:
+ is_text_encoding = space.is_true(
+ space.getattr(codec_info, space.wrap('_is_text_encoding')))
+ except OperationError as e:
+ if e.match(space, space.w_AttributeError):
+ is_text_encoding = True
+ else:
+ raise
+ if not is_text_encoding:
+ raise oefmt(space.w_LookupError,
+ "'%s' is not a text encoding; "
+ "use %s to handle arbitrary codecs", encoding, action)
+ return codec_info
+
+def encode_text(space, w_obj, encoding, errors):
+ if errors is None:
+ errors = 'strict'
+ w_encoder = space.getitem(
+ lookup_text_codec(space, "codecs.encode()", encoding), space.wrap(0))
+ return _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
+
+def decode_text(space, w_obj, encoding, errors):
+ if errors is None:
+ errors = 'strict'
+ w_decoder = space.getitem(
+ lookup_text_codec(space, "codecs.decode()", encoding), space.wrap(1))
+ return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
+
+# ____________________________________________________________
# delegation to runicode
from rpython.rlib import runicode
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -376,12 +376,30 @@
raises(TypeError, b"hello".decode, "test.mytestenc")
raises(TypeError, "hello".encode, "test.mytestenc")
+ def test_codec_wrapped_exception(self):
+ import _codecs
+ def search_function(encoding):
+ def f(input, errors="strict"):
+ raise RuntimeError('should be wrapped')
+ if encoding == 'test.failingenc':
+ return (f, f, None, None)
+ return None
+ _codecs.register(search_function)
+ exc = raises(RuntimeError, b"hello".decode, "test.failingenc")
+ assert str(exc.value) == (
+ "decoding with 'test.failingenc' codec failed "
+ "(RuntimeError: should be wrapped)")
+ exc = raises(RuntimeError, u"hello".encode, "test.failingenc")
+ assert str(exc.value) == (
+ "encoding with 'test.failingenc' codec failed "
+ "(RuntimeError: should be wrapped)")
+
def test_cpytest_decode(self):
import codecs
assert codecs.decode(b'\xe4\xf6\xfc', 'latin-1') == '\xe4\xf6\xfc'
raises(TypeError, codecs.decode)
assert codecs.decode(b'abc') == 'abc'
- raises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
+ exc = raises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
def test_bad_errorhandler_return(self):
import codecs
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -577,27 +577,8 @@
except unicodehelper.RUnicodeEncodeError as ue:
raise wrap_encode_error(space, ue)
- from pypy.module._codecs.interp_codecs import lookup_codec
- codec_info = lookup_codec(space, encoding)
- try:
- is_text_encoding = space.is_true(
- space.getattr(codec_info, space.wrap('_is_text_encoding')))
- except OperationError as e:
- if e.match(space, space.w_AttributeError):
- is_text_encoding = True
- else:
- raise
- if not is_text_encoding:
- raise oefmt(space.w_LookupError,
- "'%s' is not a text encoding; "
- "use codecs.encode() to handle arbitrary codecs", encoding)
- w_encoder = space.getitem(codec_info, space.wrap(0))
- if errors is None:
- w_errors = space.wrap('strict')
- else:
- w_errors = space.wrap(errors)
- w_restuple = space.call_function(w_encoder, w_object, w_errors)
- w_retval = space.getitem(w_restuple, space.wrap(0))
+ from pypy.module._codecs.interp_codecs import encode_text
+ w_retval = encode_text(space, w_object, encoding, errors)
if not space.isinstance_w(w_retval, space.w_bytes):
raise oefmt(space.w_TypeError,
"encoder did not return a bytes object (type '%T')",
@@ -635,27 +616,8 @@
return space.wrap(str_decode_utf_8(
s, len(s), None, final=True, errorhandler=eh)[0])
- from pypy.module._codecs.interp_codecs import lookup_codec
- codec_info = lookup_codec(space, encoding)
- try:
- is_text_encoding = space.is_true(
- space.getattr(codec_info, space.wrap('_is_text_encoding')))
- except OperationError as e:
- if e.match(space, space.w_AttributeError):
- is_text_encoding = True
- else:
- raise
- if not is_text_encoding:
- raise oefmt(space.w_LookupError,
- "'%s' is not a text encoding; "
- "use codecs.decode() to handle arbitrary codecs", encoding)
- w_decoder = space.getitem(codec_info, space.wrap(1))
- if errors is None:
- w_errors = space.wrap('strict')
- else:
- w_errors = space.wrap(errors)
- w_restuple = space.call_function(w_decoder, w_obj, w_errors)
- w_retval = space.getitem(w_restuple, space.wrap(0))
+ from pypy.module._codecs.interp_codecs import decode_text
+ w_retval = decode_text(space, w_obj, encoding, errors)
if not space.isinstance_w(w_retval, space.w_unicode):
raise oefmt(space.w_TypeError,
"decoder did not return a bytes object (type '%T')",
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit