Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r58596:77bd102bba03
Date: 2012-10-29 12:27 -0700
http://bitbucket.org/pypy/pypy/changeset/77bd102bba03/
Log: bounds check for bad data (thanks amaury) port input fix checking
from http://bugs.python.org/issue16336
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -275,9 +275,10 @@
# let the codec call us again
ch0 = ord(obj[start + 0])
ch1 = ord(obj[start + 1])
- ch2 = ord(obj[start + 2])
- if (ch0 & 0xf0 == 0xe0 or
- ch1 & 0xc0 == 0x80 or
+ ch2 = ord(obj[start + 2]) if len(obj) > start + 2 else -1
+ if (ch2 != -1 and
+ ch0 & 0xf0 == 0xe0 and
+ ch1 & 0xc0 == 0x80 and
ch2 & 0xc0 == 0x80):
# it's a three-byte code
ch = ((ch0 & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f)
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -499,6 +499,10 @@
b"abc\xed\xa0\x80def")
assert (b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass") ==
"abc\ud800def")
+ raises(UnicodeDecodeError, b"abc\xed\xa0".decode, "utf-8",
+ "surrogatepass")
+ raises(UnicodeDecodeError, b"abc\xed\xa0z".decode, "utf-8",
+ "surrogatepass")
def test_badhandler(self):
import codecs
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit