Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94963:06beac29ff13
Date: 2018-08-06 21:57 -0700
http://bitbucket.org/pypy/pypy/changeset/06beac29ff13/
Log: seperate runicode and pypy error handlers, simplifies unicode/utf8
return vals
diff --git a/pypy/interpreter/pyparser/error.py
b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -35,13 +35,17 @@
# XXX do the right thing about continuation lines, which
# XXX are their own fun, sometimes giving offset >
# XXX len(self.text) for example (right now, avoid crashing)
+ def replace_error_handler(errors, encoding, msg, s, startpos,
endpos):
+ # must return unicode
+ return u'\ufffd', endpos
if offset > len(self.text):
offset = len(self.text)
- text, _ = str_decode_utf_8(self.text, offset, 'replace')
+ text, _ = str_decode_utf_8(self.text, offset,
+ 'replace', errorhandler=replace_error_handler)
offset = len(text)
if len(self.text) != offset:
text, _ = str_decode_utf_8(self.text, len(self.text),
- 'replace')
+ 'replace', errorhandler=replace_error_handler)
w_text = space.newtext(text)
return space.newtuple([
space.newtext(self.msg),
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -27,6 +27,8 @@
space.newint(startingpos),
space.newint(endingpos),
space.newtext(msg)]))
+ # make annotator happy
+ return '', 0
return raise_unicode_exception_decode
def decode_never_raise(errors, encoding, msg, s, startingpos, endingpos):
@@ -86,8 +88,7 @@
from pypy.module._codecs.locale import (
str_decode_locale_surrogateescape)
bytes = space.bytes_w(w_string)
- uni = str_decode_locale_surrogateescape(
- bytes, errorhandler=decode_error_handler(space))
+ uni = str_decode_locale_surrogateescape(bytes)
else:
from pypy.module.sys.interp_encoding import getfilesystemencoding
return space.call_method(w_string, 'decode',
@@ -301,11 +302,15 @@
if sys.platform == 'win32':
def utf8_encode_mbcs(s, errors, errorhandler):
s = s.decode('utf-8')
+ if errorhandler is None:
+ errorhandler = encode_error_handler(space)
res = unicode_encode_mbcs(s, slen, errors, errorhandler)
return res
def str_decode_mbcs(s, errors, final, errorhandler):
slen = len(s)
+ if errorhandler is None:
+ errorhandler = decode_error_handler(space)
res, size = str_decode_mbcs(s, slen, final=final, errors=errors,
errorhandler=errorhandler)
return res.encode('utf8'), len(res)
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -3,7 +3,7 @@
from rpython.rlib.objectmodel import we_are_translated, not_rpython
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
from rpython.rlib import runicode
-from rpython.rlib.runicode import raw_unicode_escape_helper_unicode
+from rpython.rlib.runicode import raw_unicode_escape_helper
from rpython.rlib import rutf8
from pypy.interpreter.error import OperationError, oefmt
@@ -39,8 +39,8 @@
the unicode characters, not into the position of utf8 bytes,
so it needs to be converted by the codec
- Returns (unicode_or_none, str_or_none, newpos) as error
- handlers may return unicode or on Python 3, bytes.
+ Returns (str_or_none, newpos) as error
+ handlers used outside runicode return utf8
"""
w_errorhandler = lookup_error(space, errors)
if decode:
@@ -275,11 +275,11 @@
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
- builder = UnicodeBuilder()
+ builder = StringBuilder()
pos = start
while pos < end:
oc = ord(obj[pos])
- raw_unicode_escape_helper_unicode(builder, oc)
+ raw_unicode_escape_helper(builder, oc)
pos += 1
return space.newtuple([space.newtext(builder.build()), w_end])
elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
@@ -287,11 +287,11 @@
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
- builder = UnicodeBuilder()
+ builder = StringBuilder()
pos = start
while pos < end:
oc = ord(obj[pos])
- raw_unicode_escape_helper_unicode(builder, oc)
+ raw_unicode_escape_helper(builder, oc)
pos += 1
return space.newtuple([space.newtext(builder.build()), w_end])
else:
diff --git a/pypy/module/_cppyy/test/test_zjit.py
b/pypy/module/_cppyy/test/test_zjit.py
--- a/pypy/module/_cppyy/test/test_zjit.py
+++ b/pypy/module/_cppyy/test/test_zjit.py
@@ -71,9 +71,7 @@
self.name = name
self.__name__ = name
def getname(self, space, name):
- if sys.hexversion < 0x3000000:
- return self.name
- return unicode(self.name)
+ return self.name
class FakeBuffer(FakeBase):
typedname = "buffer"
def __init__(self, val):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit