Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94961:18d61ca77776
Date: 2018-08-05 23:27 -0700
http://bitbucket.org/pypy/pypy/changeset/18d61ca77776/

Log:    fix unicodehelper errorhandlers

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -32,7 +32,7 @@
 def decode_never_raise(errors, encoding, msg, s, startingpos, endingpos):
     assert startingpos >= 0
     ux = ['\ux' + hex(ord(x))[2:].upper() for x in s[startingpos:endingpos]]
-    return ''.join(ux), endingpos, endingpos
+    return ''.join(ux), endingpos
 
 @specialize.memo()
 def encode_error_handler(space):
@@ -224,6 +224,10 @@
     # cannot be ASCII, cannot have surrogates, I believe
     return res.build(), len(s), len(s)
 
+def utf8_encode_utf_8(s, errors, errorhandler):
+    # needed by tests
+    return s
+
 def utf8_encode_latin_1(s, errors, errorhandler):
     try:
         rutf8.check_ascii(s)
@@ -295,7 +299,7 @@
     return result.build()
 
 if sys.platform == 'win32':
-    def utf8_encode_mbcs(s, slen, errors, errorhandler):
+    def utf8_encode_mbcs(s, errors, errorhandler):
         s = s.decode('utf-8')
         res = unicode_encode_mbcs(s, slen, errors, errorhandler)
         return res
@@ -606,7 +610,7 @@
                                   errorhandler=None):
     size = len(s)
     if size == 0:
-        return '', 0
+        return '', 0, 0
 
     builder = rutf8.Utf8StringBuilder(size)
     pos = 0
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -946,13 +946,12 @@
 
     unicode_name_handler = state.get_unicodedata_handler(space)
 
-    result, lgt = unicodehelper.str_decode_unicode_escape(
+    result, lgt, u_len = unicodehelper.str_decode_unicode_escape(
         string, errors,
         final, state.decode_error_handler,
         unicode_name_handler)
 
-    s_len = len(string)
-    return space.newtuple([space.newutf8(result, lgt), space.newint(s_len)])
+    return space.newtuple([space.newutf8(result, lgt), space.newint(u_len)])
 
 # ____________________________________________________________
 # Raw Unicode escape (accepts bytes or str)
@@ -964,9 +963,8 @@
         errors = 'strict'
     final = space.is_true(w_final)
     state = space.fromcache(CodecState)
-    result, lgt = runicode.str_decode_raw_unicode_escape(
-        string, len(string), errors,
-        final, state.decode_error_handler)
+    result, lgt, u_len = unicodehelper.str_decode_raw_unicode_escape(
+        string, errors, final, state.decode_error_handler)
     return space.newtuple([space.newtext(result), space.newint(lgt)])
 
 # ____________________________________________________________
diff --git a/pypy/module/_codecs/test/test_locale.py 
b/pypy/module/_codecs/test/test_locale.py
--- a/pypy/module/_codecs/test/test_locale.py
+++ b/pypy/module/_codecs/test/test_locale.py
@@ -41,7 +41,7 @@
         utf8_encoder = self.getencoder('utf-8')
         for val in u'foo', u' &#26085;&#26412;', u'\U0001320C':
             assert (locale_encoder(val).encode('utf8') ==
-                    utf8_encoder(val, 'strict', True, None))
+                    utf8_encoder(val, 'strict', None))
 
     def test_encode_locale_errorhandler(self):
         self.setlocale("en_US.UTF-8")
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -30,7 +30,7 @@
 
 class W_UnicodeObject(W_Root):
     import_from_mixin(StringMethods)
-    _immutable_fields_ = ['_utf8']
+    _immutable_fields_ = ['_utf8', '_length']
 
     @enforceargs(utf8str=str)
     def __init__(self, utf8str, length):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to