Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r58596:77bd102bba03
Date: 2012-10-29 12:27 -0700
http://bitbucket.org/pypy/pypy/changeset/77bd102bba03/

Log:    bounds check for bad data (thanks amaury) port input fix checking
        from http://bugs.python.org/issue16336

diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -275,9 +275,10 @@
         # let the codec call us again
         ch0 = ord(obj[start + 0])
         ch1 = ord(obj[start + 1])
-        ch2 = ord(obj[start + 2])
-        if (ch0 & 0xf0 == 0xe0 or
-            ch1 & 0xc0 == 0x80 or
+        ch2 = ord(obj[start + 2]) if len(obj) > start + 2 else -1
+        if (ch2 != -1 and
+            ch0 & 0xf0 == 0xe0 and
+            ch1 & 0xc0 == 0x80 and
             ch2 & 0xc0 == 0x80):
             # it's a three-byte code
             ch = ((ch0 & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f)
diff --git a/pypy/module/_codecs/test/test_codecs.py 
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -499,6 +499,10 @@
                 b"abc\xed\xa0\x80def")
         assert (b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass") ==
                 "abc\ud800def")
+        raises(UnicodeDecodeError, b"abc\xed\xa0".decode, "utf-8",
+               "surrogatepass")
+        raises(UnicodeDecodeError, b"abc\xed\xa0z".decode, "utf-8",
+               "surrogatepass")
 
     def test_badhandler(self):
         import codecs
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to