Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.5
Changeset: r88134:647ced05d718
Date: 2016-11-03 22:31 +0100
http://bitbucket.org/pypy/pypy/changeset/647ced05d718/
Log: Implement bytes.decode(errors='backslashreplace')
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,8 +1,9 @@
from rpython.rlib import jit
from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
from rpython.rlib.runicode import (
- code_to_unichr, MAXUNICODE, raw_unicode_escape_helper)
+ code_to_unichr, MAXUNICODE,
+ raw_unicode_escape_helper, raw_unicode_escape_helper)
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -260,14 +261,15 @@
builder.append(str(code))
builder.append(";")
pos += 1
- return space.newtuple([space.wrap(builder.build()), w_end])
+ return space.newtuple([space.newbytes(builder.build()), w_end])
else:
raise oefmt(space.w_TypeError,
"don't know how to handle %T in error callback", w_exc)
def backslashreplace_errors(space, w_exc):
check_exception(space, w_exc)
- if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
+ if (space.isinstance_w(w_exc, space.w_UnicodeEncodeError) or
+ space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
start = space.int_w(space.getattr(w_exc, space.wrap('start')))
w_end = space.getattr(w_exc, space.wrap('end'))
@@ -278,6 +280,18 @@
oc = ord(obj[pos])
raw_unicode_escape_helper(builder, oc)
pos += 1
+ return space.newtuple([space.newbytes(builder.build()), w_end])
+ elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
+ obj = space.bytes_w(space.getattr(w_exc, space.wrap('object')))
+ start = space.int_w(space.getattr(w_exc, space.wrap('start')))
+ w_end = space.getattr(w_exc, space.wrap('end'))
+ end = space.int_w(w_end)
+ builder = UnicodeBuilder()
+ pos = start
+ while pos < end:
+ oc = ord(obj[pos])
+ runicode.raw_unicode_escape_helper_unicode(builder, oc)
+ pos += 1
return space.newtuple([space.wrap(builder.build()), w_end])
else:
raise oefmt(space.w_TypeError,
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -562,7 +562,12 @@
assert b'\x00'.decode('unicode-internal', 'ignore') == ''
def test_backslashreplace(self):
+ import codecs
assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace')
== b'a\\xac\u1234\u20ac\u8000'
+ assert b'\x00\x60\x80'.decode(
+ 'ascii', 'backslashreplace') == u'\x00\x60\\x80'
+ assert codecs.charmap_decode(
+ b"\x00\x01\x02", "backslashreplace", "ab") == ("ab\\x02", 3)
def test_namereplace(self):
assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'namereplace') == (
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1432,6 +1432,8 @@
# This function is also used by _codecs/interp_codecs.py
(unicode_encode_unicode_escape, raw_unicode_escape_helper
) = make_unicode_escape_function()
+(_, raw_unicode_escape_helper_unicode
+) = make_unicode_escape_function(unicode_output=True)
# ____________________________________________________________
# Raw unicode escape
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit