Author: Matti Picus <[email protected]>
Branch: py3.6
Changeset: r96098:6890d4e0c302
Date: 2019-02-19 21:21 +0200
http://bitbucket.org/pypy/pypy/changeset/6890d4e0c302/

Log:    use rutf8.OutOfRange error

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -212,6 +212,11 @@
             i = end
     return res.build(), len(s), len(s)
 
+class ErrorHandlerError(Exception):
+    def __init__(self, new, old):
+        self.new = new
+        self.old = old
+
 def utf8_encode_utf_8(s, errors, errorhandler, allow_surrogates=False):
     size = len(s)
     if size == 0:
@@ -255,9 +260,7 @@
                 for ch in res:
                     result.append(ch)
             if newindex <= upos:
-                raise IndexError(
-                   "position %d from error handler invalid, already encoded 
%d",
-                   newindex, upos)
+                raise ErrorHandlerError(newindex, upos)
             upos = newindex
             pos = rutf8._pos_at_index(s, upos)
     return result.build()
@@ -521,7 +524,7 @@
             try:
                 builder.append_code(chr)
                 pos += digits
-            except ValueError:
+            except rutf8.OutOfRange:
                 message = "illegal Unicode character"
                 r, pos, rettype = errorhandler(
                     errors, encoding, message, s, pos - 2, pos + digits)
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -735,8 +735,11 @@
     try:
         result = unicodehelper.utf8_encode_utf_8(utf8, errors,
                      state.encode_error_handler, allow_surrogates=False)
-    except IndexError as e:
-        raise oefmt(space.w_IndexError, '%s' % e.args[0])
+    except unicodehelper.ErrorHandlerError as e:
+        raise oefmt(space.w_IndexError, 
+                   "position %d from error handler invalid, already encoded 
%d",
+                    e.new,e.old)
+
     return space.newtuple([space.newbytes(result), space.newint(lgt)])
 
 @unwrap_spec(string='bufferstr', errors='text_or_none',
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -93,8 +93,9 @@
     lgt = get_wsize(py_obj)
     try:
         s_utf8 = rffi.wcharpsize2utf8(get_wbuffer(py_obj), lgt)
-    except ValueError as e:
-        raise oefmt(space.w_ValueError, '%s' % e.args[0])
+    except rutf8.OutOfRange as e:
+        raise oefmt(space.w_ValueError,
+                   'character U+%x is not in range [U+0000; U+10ffff]' % 
e.code)
     w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
     w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
     w_obj.__init__(s_utf8, lgt)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to