Author: Brian Kearns <[email protected]>
Branch: stdlib-2.7.6
Changeset: r69599:62fa89efe2e7
Date: 2014-03-02 02:02 -0500
http://bitbucket.org/pypy/pypy/changeset/62fa89efe2e7/
Log: fix incremental utf-16 decoder (cpython issue11461)
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -134,11 +134,15 @@
u"\x00\xff\u07ff\u0800",
u"\x00\xff\u07ff\u0800",
u"\x00\xff\u07ff\u0800\uffff",
+ u"\x00\xff\u07ff\u0800\uffff",
+ u"\x00\xff\u07ff\u0800\uffff",
+ u"\x00\xff\u07ff\u0800\uffff",
+ u"\x00\xff\u07ff\u0800\uffff\U00010000",
]
buffer = ''
result = u""
- for (c, partialresult) in
zip(u"\x00\xff\u07ff\u0800\uffff".encode(encoding), check_partial):
+ for (c, partialresult) in
zip(u"\x00\xff\u07ff\u0800\uffff\U00010000".encode(encoding), check_partial):
buffer += c
res = _codecs.utf_8_decode(buffer,'strict',False)
if res[1] >0 :
@@ -160,10 +164,14 @@
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100\uffff",
+ u"\x00\xff\u0100\uffff",
+ u"\x00\xff\u0100\uffff",
+ u"\x00\xff\u0100\uffff",
+ u"\x00\xff\u0100\uffff\U00010000",
]
buffer = ''
result = u""
- for (c, partialresult) in
zip(u"\x00\xff\u0100\uffff".encode(encoding), check_partial):
+ for (c, partialresult) in
zip(u"\x00\xff\u0100\uffff\U00010000".encode(encoding), check_partial):
buffer += c
res = _codecs.utf_16_decode(buffer,'strict',False)
if res[1] >0 :
diff --git a/pypy/objspace/std/test/test_unicodeobject.py
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1,8 +1,8 @@
import py
import sys
+
class TestUnicodeObject:
-
def test_comparison_warning(self):
warnings = []
def my_warn(msg, warningscls):
@@ -32,6 +32,7 @@
space.w_unicode, "__new__", space.w_unicode, w_uni)
assert w_new is w_uni
+
class AppTestUnicodeStringStdOnly:
def test_compares(self):
assert u'a' == 'a'
@@ -314,7 +315,6 @@
assert u'xyzzyhelloxyzzy'.lstrip('xyz') == u'helloxyzzy'
assert u'xyzzyhelloxyzzy'.rstrip(u'xyz') == u'xyzzyhello'
-
def test_long_from_unicode(self):
assert long(u'12345678901234567890') == 12345678901234567890
assert int(u'12345678901234567890') == 12345678901234567890
@@ -336,7 +336,7 @@
u'a', u'"', u'\'', u'\"', u'\t', u'\\', u"'''\"",
unichr(19), unichr(2), u'\u1234', u'\U00101234']:
assert eval(repr(ustr)) == ustr
-
+
def test_getnewargs(self):
class X(unicode):
pass
@@ -400,7 +400,7 @@
assert not 'hello'.endswith((u'he\u1111', u'he'))
assert 'hello'.endswith((u'\u1111lo', u'llo'))
assert 'hello'.endswith((u'\u1111hellox', u'hello'))
-
+
def test_endswith(self):
assert u'ab'.endswith(u'ab') is True
assert u'ab'.endswith(u'b') is True
@@ -441,13 +441,13 @@
s = u'xy\t'
assert s.expandtabs() =='xy '
-
+
s = u'\txy\t'
assert s.expandtabs() ==' xy '
assert s.expandtabs(1) ==' xy '
assert s.expandtabs(2) ==' xy '
assert s.expandtabs(3) ==' xy '
-
+
assert u'xy'.expandtabs() =='xy'
assert u''.expandtabs() ==''
@@ -456,7 +456,7 @@
if sys.maxint > (1 << 32):
skip("Wrong platform")
raises((OverflowError, MemoryError), u't\tt\t'.expandtabs, sys.maxint)
-
+
def test_translate(self):
assert u'bbbc' == u'abababc'.translate({ord('a'):None})
assert u'iiic' == u'abababc'.translate({ord('a'):None,
ord('b'):ord('i')})
@@ -473,7 +473,7 @@
def test_unicode_form_encoded_object(self):
assert unicode('x', 'utf-8') == u'x'
assert unicode('x', 'utf-8', 'strict') == u'x'
-
+
def test_unicode_startswith_tuple(self):
assert u'xxx'.startswith(('x', 'y', 'z'), 0)
assert u'xxx'.endswith(('x', 'y', 'z'), 0)
@@ -572,7 +572,6 @@
def test_partition(self):
-
assert (u'this is the par', u'ti', u'tion method') == \
u'this is the partition method'.partition(u'ti')
@@ -587,7 +586,6 @@
raises(TypeError, S.partition, None)
def test_rpartition(self):
-
assert (u'this is the rparti', u'ti', u'on method') == \
u'this is the rpartition method'.rpartition(u'ti')
@@ -601,7 +599,6 @@
raises(ValueError, S.rpartition, u'')
raises(TypeError, S.rpartition, None)
-
def test_mul(self):
zero = 0
assert type(u'' * zero) == type(zero * u'') == unicode
@@ -730,7 +727,7 @@
return X("stuff")
assert unicode(Y()).__class__ is X
-
+
def test_getslice(self):
assert u'123456'.__getslice__(1, 5) == u'2345'
s = u"abc"
@@ -827,7 +824,7 @@
def __unicode__(self):
return u'bar'
-
+
a = A()
b = B()
s = '%s %s' % (a, b)
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -453,10 +453,11 @@
continue
# UTF-16 code pair:
if len(s) - pos < 2:
+ pos -= 2
if not final:
break
errmsg = "unexpected end of data"
- r, pos = errorhandler(errors, 'utf16', errmsg, s, pos - 2, len(s))
+ r, pos = errorhandler(errors, 'utf16', errmsg, s, pos, len(s))
result.append(r)
if len(s) - pos < 2:
break
diff --git a/rpython/rlib/test/test_runicode.py
b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -4,6 +4,7 @@
import sys, random
from rpython.rlib import runicode
+
def test_unichr():
assert runicode.UNICHR(0xffff) == u'\uffff'
if runicode.MAXUNICODE > 0xffff:
@@ -15,6 +16,7 @@
py.test.raises(ValueError, runicode.UNICHR, 0x10000)
py.test.raises(TypeError, runicode.UNICHR, 'abc')
+
def test_ord():
assert runicode.ORD('a') == 97
assert runicode.ORD(u'a') == 97
@@ -118,7 +120,6 @@
class TestDecoding(UnicodeTests):
-
# XXX test bom recognition in utf-16
# XXX test proper error handling
@@ -552,7 +553,6 @@
self.checkdecodeerror(s, "utf-8", 0, 3, addstuff=True,
msg='invalid continuation byte')
-
def test_issue8271(self):
# From CPython
# Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
@@ -648,6 +648,7 @@
assert decoder(seq, len(seq), 'ignore', final=True
) == (res, len(seq))
+
class TestEncoding(UnicodeTests):
def test_all_ascii(self):
for i in range(128):
@@ -759,6 +760,7 @@
py.test.raises(UnicodeEncodeError, encoder, u' 12, \u1234 ', 7, None)
assert encoder(u'u\u1234', 2, 'replace') == 'u?'
+
class TestTranslation(object):
def setup_class(cls):
if runicode.MAXUNICODE != sys.maxunicode:
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit