Author: Matti Picus <[email protected]>
Branch: py3.6
Changeset: r96098:6890d4e0c302
Date: 2019-02-19 21:21 +0200
http://bitbucket.org/pypy/pypy/changeset/6890d4e0c302/
Log: use rutf8.OutOfRange error
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -212,6 +212,11 @@
i = end
return res.build(), len(s), len(s)
+class ErrorHandlerError(Exception):
+ def __init__(self, new, old):
+ self.new = new
+ self.old = old
+
def utf8_encode_utf_8(s, errors, errorhandler, allow_surrogates=False):
size = len(s)
if size == 0:
@@ -255,9 +260,7 @@
for ch in res:
result.append(ch)
if newindex <= upos:
- raise IndexError(
- "position %d from error handler invalid, already encoded
%d",
- newindex, upos)
+ raise ErrorHandlerError(newindex, upos)
upos = newindex
pos = rutf8._pos_at_index(s, upos)
return result.build()
@@ -521,7 +524,7 @@
try:
builder.append_code(chr)
pos += digits
- except ValueError:
+ except rutf8.OutOfRange:
message = "illegal Unicode character"
r, pos, rettype = errorhandler(
errors, encoding, message, s, pos - 2, pos + digits)
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -735,8 +735,11 @@
try:
result = unicodehelper.utf8_encode_utf_8(utf8, errors,
state.encode_error_handler, allow_surrogates=False)
- except IndexError as e:
- raise oefmt(space.w_IndexError, '%s' % e.args[0])
+ except unicodehelper.ErrorHandlerError as e:
+ raise oefmt(space.w_IndexError,
+ "position %d from error handler invalid, already encoded
%d",
+ e.new,e.old)
+
return space.newtuple([space.newbytes(result), space.newint(lgt)])
@unwrap_spec(string='bufferstr', errors='text_or_none',
diff --git a/pypy/module/cpyext/unicodeobject.py
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -93,8 +93,9 @@
lgt = get_wsize(py_obj)
try:
s_utf8 = rffi.wcharpsize2utf8(get_wbuffer(py_obj), lgt)
- except ValueError as e:
- raise oefmt(space.w_ValueError, '%s' % e.args[0])
+ except rutf8.OutOfRange as e:
+ raise oefmt(space.w_ValueError,
+ 'character U+%x is not in range [U+0000; U+10ffff]' %
e.code)
w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
w_obj.__init__(s_utf8, lgt)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit