Author: Brian Kearns <[email protected]>
Branch: stdlib-2.7.6
Changeset: r69604:8767512bf42b
Date: 2014-03-02 03:25 -0500
http://bitbucket.org/pypy/pypy/changeset/8767512bf42b/

Log:    fix escape_decode to support replace/ignore modes

diff --git a/pypy/interpreter/pyparser/parsestring.py 
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -1,7 +1,8 @@
-from pypy.interpreter.error import OperationError
+from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter import unicodehelper
 from rpython.rlib.rstring import StringBuilder
 
+
 def parsestr(space, encoding, s, unicode_literal=False):
     """Parses a string or unicode literal, and return a wrapped value.
 
@@ -79,7 +80,7 @@
     enc = None
     if need_encoding:
         enc = encoding
-    v = PyString_DecodeEscape(space, substr, enc)
+    v = PyString_DecodeEscape(space, substr, 'strict', enc)
     return space.wrap(v)
 
 def hexbyte(val):
@@ -121,7 +122,7 @@
             ps += 1
     return ''.join(lis)
 
-def PyString_DecodeEscape(space, s, recode_encoding):
+def PyString_DecodeEscape(space, s, errors, recode_encoding):
     """
     Unescape a backslash-escaped string. If recode_encoding is non-zero,
     the string is UTF-8 encoded and should be re-encoded in the
@@ -190,9 +191,17 @@
                 builder.append(chr(num))
                 ps += 2
             else:
-                raise_app_valueerror(space, 'invalid \\x escape')
-            # ignored replace and ignore for now
-
+                if errors == 'strict':
+                    raise_app_valueerror(space, 'invalid \\x escape')
+                elif errors == 'replace':
+                    builder.append('?')
+                elif errors == 'ignore':
+                    pass
+                else:
+                    raise oefmt(space.w_ValueError, "decoding error; "
+                        "unknown error handling code: %s", errors)
+                if ps+1 <= end and isxdigit(s[ps]):
+                    ps += 1
         else:
             # this was not an escape, so the backslash
             # has to be added, and we start over in
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -695,5 +695,5 @@
 @unwrap_spec(data=str, errors='str_or_None')
 def escape_decode(space, data, errors='strict'):
     from pypy.interpreter.pyparser.parsestring import PyString_DecodeEscape
-    result = PyString_DecodeEscape(space, data, None)
+    result = PyString_DecodeEscape(space, data, errors, None)
     return space.newtuple([space.wrap(result), space.wrap(len(data))])
diff --git a/pypy/module/_codecs/test/test_codecs.py 
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -118,6 +118,16 @@
                                {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
                 u"\U0010FFFFbc", 3)
 
+    def test_escape_decode_errors(self):
+        from _codecs import escape_decode as decode
+        raises(ValueError, decode, br"\x")
+        raises(ValueError, decode, br"[\x]")
+        assert decode(br"[\x]\x", "ignore") == (b"[]", 6)
+        assert decode(br"[\x]\x", "replace") == (b"[?]?", 6)
+        raises(ValueError, decode, br"\x0")
+        raises(ValueError, decode, br"[\x0]")
+        assert decode(br"[\x0]\x0", "ignore") == (b"[]", 8)
+        assert decode(br"[\x0]\x0", "replace") == (b"[?]?", 8)
 
     def test_unicode_escape(self):
         from _codecs import unicode_escape_encode, unicode_escape_decode
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to