[pypy-commit] pypy unicode-utf8-py3: seperate runicode and pypy error handlers, simplifies unicode/utf8 return vals

mattip Tue, 07 Aug 2018 13:03:43 -0700

Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94963:06beac29ff13
Date: 2018-08-06 21:57 -0700
http://bitbucket.org/pypy/pypy/changeset/06beac29ff13/


Log:    seperate runicode and pypy error handlers, simplifies unicode/utf8
        return vals

diff --git a/pypy/interpreter/pyparser/error.py 
b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -35,13 +35,17 @@
             # XXX do the right thing about continuation lines, which
             # XXX are their own fun, sometimes giving offset >
             # XXX len(self.text) for example (right now, avoid crashing)
+            def replace_error_handler(errors, encoding, msg, s, startpos, 
endpos):
+                # must return unicode
+                return u'\ufffd', endpos
             if offset > len(self.text):
                 offset = len(self.text)
-            text, _ = str_decode_utf_8(self.text, offset, 'replace')
+            text, _ = str_decode_utf_8(self.text, offset,
+                             'replace', errorhandler=replace_error_handler)
             offset = len(text)
             if len(self.text) != offset:
                 text, _ = str_decode_utf_8(self.text, len(self.text),
-                                           'replace')
+                             'replace', errorhandler=replace_error_handler)
             w_text = space.newtext(text)
         return space.newtuple([
             space.newtext(self.msg),
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -27,6 +27,8 @@
                                              space.newint(startingpos),
                                              space.newint(endingpos),
                                              space.newtext(msg)]))
+        # make annotator happy
+        return '', 0
     return raise_unicode_exception_decode
 
 def decode_never_raise(errors, encoding, msg, s, startingpos, endingpos):
@@ -86,8 +88,7 @@
         from pypy.module._codecs.locale import (
             str_decode_locale_surrogateescape)
         bytes = space.bytes_w(w_string)
-        uni = str_decode_locale_surrogateescape(
-            bytes, errorhandler=decode_error_handler(space))
+        uni = str_decode_locale_surrogateescape(bytes)
     else:
         from pypy.module.sys.interp_encoding import getfilesystemencoding
         return space.call_method(w_string, 'decode',
@@ -301,11 +302,15 @@
 if sys.platform == 'win32':
     def utf8_encode_mbcs(s, errors, errorhandler):
         s = s.decode('utf-8')
+        if errorhandler is None:
+            errorhandler = encode_error_handler(space)
         res = unicode_encode_mbcs(s, slen, errors, errorhandler)
         return res
         
     def str_decode_mbcs(s, errors, final, errorhandler):
         slen = len(s)
+        if errorhandler is None:
+            errorhandler = decode_error_handler(space) 
         res, size = str_decode_mbcs(s, slen, final=final, errors=errors,
                                            errorhandler=errorhandler)
         return res.encode('utf8'), len(res)
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -3,7 +3,7 @@
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib import runicode
-from rpython.rlib.runicode import raw_unicode_escape_helper_unicode
+from rpython.rlib.runicode import raw_unicode_escape_helper
 from rpython.rlib import rutf8
 
 from pypy.interpreter.error import OperationError, oefmt
@@ -39,8 +39,8 @@
             the unicode characters, not into the position of utf8 bytes,
             so it needs to be converted by the codec
 
-            Returns (unicode_or_none, str_or_none, newpos) as error
-            handlers may return unicode or on Python 3, bytes.
+            Returns (str_or_none, newpos) as error
+            handlers used outside runicode return utf8
             """
             w_errorhandler = lookup_error(space, errors)
             if decode:
@@ -275,11 +275,11 @@
         start = space.int_w(space.getattr(w_exc, space.newtext('start')))
         w_end = space.getattr(w_exc, space.newtext('end'))
         end = space.int_w(w_end)
-        builder = UnicodeBuilder()
+        builder = StringBuilder()
         pos = start
         while pos < end:
             oc = ord(obj[pos])
-            raw_unicode_escape_helper_unicode(builder, oc)
+            raw_unicode_escape_helper(builder, oc)
             pos += 1
         return space.newtuple([space.newtext(builder.build()), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
@@ -287,11 +287,11 @@
         start = space.int_w(space.getattr(w_exc, space.newtext('start')))
         w_end = space.getattr(w_exc, space.newtext('end'))
         end = space.int_w(w_end)
-        builder = UnicodeBuilder()
+        builder = StringBuilder()
         pos = start
         while pos < end:
             oc = ord(obj[pos])
-            raw_unicode_escape_helper_unicode(builder, oc)
+            raw_unicode_escape_helper(builder, oc)
             pos += 1
         return space.newtuple([space.newtext(builder.build()), w_end])
     else:
diff --git a/pypy/module/_cppyy/test/test_zjit.py 
b/pypy/module/_cppyy/test/test_zjit.py
--- a/pypy/module/_cppyy/test/test_zjit.py
+++ b/pypy/module/_cppyy/test/test_zjit.py
@@ -71,9 +71,7 @@
         self.name = name
         self.__name__ = name
     def getname(self, space, name):
-        if sys.hexversion < 0x3000000:
-            return self.name
-        return unicode(self.name)
+        return self.name
 class FakeBuffer(FakeBase):
     typedname = "buffer"
     def __init__(self, val):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: seperate runicode and pypy error handlers, simplifies unicode/utf8 return vals

Reply via email to