Author: Brian Kearns <[email protected]>
Branch: stdlib-2.7.6
Changeset: r69604:8767512bf42b
Date: 2014-03-02 03:25 -0500
http://bitbucket.org/pypy/pypy/changeset/8767512bf42b/
Log: fix escape_decode to support replace/ignore modes
diff --git a/pypy/interpreter/pyparser/parsestring.py
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -1,7 +1,8 @@
-from pypy.interpreter.error import OperationError
+from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter import unicodehelper
from rpython.rlib.rstring import StringBuilder
+
def parsestr(space, encoding, s, unicode_literal=False):
"""Parses a string or unicode literal, and return a wrapped value.
@@ -79,7 +80,7 @@
enc = None
if need_encoding:
enc = encoding
- v = PyString_DecodeEscape(space, substr, enc)
+ v = PyString_DecodeEscape(space, substr, 'strict', enc)
return space.wrap(v)
def hexbyte(val):
@@ -121,7 +122,7 @@
ps += 1
return ''.join(lis)
-def PyString_DecodeEscape(space, s, recode_encoding):
+def PyString_DecodeEscape(space, s, errors, recode_encoding):
"""
Unescape a backslash-escaped string. If recode_encoding is non-zero,
the string is UTF-8 encoded and should be re-encoded in the
@@ -190,9 +191,17 @@
builder.append(chr(num))
ps += 2
else:
- raise_app_valueerror(space, 'invalid \\x escape')
- # ignored replace and ignore for now
-
+ if errors == 'strict':
+ raise_app_valueerror(space, 'invalid \\x escape')
+ elif errors == 'replace':
+ builder.append('?')
+ elif errors == 'ignore':
+ pass
+ else:
+ raise oefmt(space.w_ValueError, "decoding error; "
+ "unknown error handling code: %s", errors)
+ if ps+1 <= end and isxdigit(s[ps]):
+ ps += 1
else:
# this was not an escape, so the backslash
# has to be added, and we start over in
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -695,5 +695,5 @@
@unwrap_spec(data=str, errors='str_or_None')
def escape_decode(space, data, errors='strict'):
from pypy.interpreter.pyparser.parsestring import PyString_DecodeEscape
- result = PyString_DecodeEscape(space, data, None)
+ result = PyString_DecodeEscape(space, data, errors, None)
return space.newtuple([space.wrap(result), space.wrap(len(data))])
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -118,6 +118,16 @@
{0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
u"\U0010FFFFbc", 3)
+ def test_escape_decode_errors(self):
+ from _codecs import escape_decode as decode
+ raises(ValueError, decode, br"\x")
+ raises(ValueError, decode, br"[\x]")
+ assert decode(br"[\x]\x", "ignore") == (b"[]", 6)
+ assert decode(br"[\x]\x", "replace") == (b"[?]?", 6)
+ raises(ValueError, decode, br"\x0")
+ raises(ValueError, decode, br"[\x0]")
+ assert decode(br"[\x0]\x0", "ignore") == (b"[]", 8)
+ assert decode(br"[\x0]\x0", "replace") == (b"[?]?", 8)
def test_unicode_escape(self):
from _codecs import unicode_escape_encode, unicode_escape_decode
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit