Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94961:18d61ca77776
Date: 2018-08-05 23:27 -0700
http://bitbucket.org/pypy/pypy/changeset/18d61ca77776/
Log: fix unicodehelper errorhandlers
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -32,7 +32,7 @@
def decode_never_raise(errors, encoding, msg, s, startingpos, endingpos):
assert startingpos >= 0
ux = ['\ux' + hex(ord(x))[2:].upper() for x in s[startingpos:endingpos]]
- return ''.join(ux), endingpos, endingpos
+ return ''.join(ux), endingpos
@specialize.memo()
def encode_error_handler(space):
@@ -224,6 +224,10 @@
# cannot be ASCII, cannot have surrogates, I believe
return res.build(), len(s), len(s)
+def utf8_encode_utf_8(s, errors, errorhandler):
+ # needed by tests
+ return s
+
def utf8_encode_latin_1(s, errors, errorhandler):
try:
rutf8.check_ascii(s)
@@ -295,7 +299,7 @@
return result.build()
if sys.platform == 'win32':
- def utf8_encode_mbcs(s, slen, errors, errorhandler):
+ def utf8_encode_mbcs(s, errors, errorhandler):
s = s.decode('utf-8')
res = unicode_encode_mbcs(s, slen, errors, errorhandler)
return res
@@ -606,7 +610,7 @@
errorhandler=None):
size = len(s)
if size == 0:
- return '', 0
+ return '', 0, 0
builder = rutf8.Utf8StringBuilder(size)
pos = 0
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -946,13 +946,12 @@
unicode_name_handler = state.get_unicodedata_handler(space)
- result, lgt = unicodehelper.str_decode_unicode_escape(
+ result, lgt, u_len = unicodehelper.str_decode_unicode_escape(
string, errors,
final, state.decode_error_handler,
unicode_name_handler)
- s_len = len(string)
- return space.newtuple([space.newutf8(result, lgt), space.newint(s_len)])
+ return space.newtuple([space.newutf8(result, lgt), space.newint(u_len)])
# ____________________________________________________________
# Raw Unicode escape (accepts bytes or str)
@@ -964,9 +963,8 @@
errors = 'strict'
final = space.is_true(w_final)
state = space.fromcache(CodecState)
- result, lgt = runicode.str_decode_raw_unicode_escape(
- string, len(string), errors,
- final, state.decode_error_handler)
+ result, lgt, u_len = unicodehelper.str_decode_raw_unicode_escape(
+ string, errors, final, state.decode_error_handler)
return space.newtuple([space.newtext(result), space.newint(lgt)])
# ____________________________________________________________
diff --git a/pypy/module/_codecs/test/test_locale.py
b/pypy/module/_codecs/test/test_locale.py
--- a/pypy/module/_codecs/test/test_locale.py
+++ b/pypy/module/_codecs/test/test_locale.py
@@ -41,7 +41,7 @@
utf8_encoder = self.getencoder('utf-8')
for val in u'foo', u' 日本', u'\U0001320C':
assert (locale_encoder(val).encode('utf8') ==
- utf8_encoder(val, 'strict', True, None))
+ utf8_encoder(val, 'strict', None))
def test_encode_locale_errorhandler(self):
self.setlocale("en_US.UTF-8")
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -30,7 +30,7 @@
class W_UnicodeObject(W_Root):
import_from_mixin(StringMethods)
- _immutable_fields_ = ['_utf8']
+ _immutable_fields_ = ['_utf8', '_length']
@enforceargs(utf8str=str)
def __init__(self, utf8str, length):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit