Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95362:a9ecac2f678b
Date: 2018-11-23 08:59 -0600
http://bitbucket.org/pypy/pypy/changeset/a9ecac2f678b/
Log: add more tests
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py
b/pypy/module/_multibytecodec/test/test_app_incremental.py
--- a/pypy/module/_multibytecodec/test/test_app_incremental.py
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -1,5 +1,6 @@
+import os
class AppTestClasses:
- spaceconfig = dict(usemodules=['_multibytecodec'])
+ spaceconfig = dict(usemodules=['_multibytecodec', '_codecs', '_io'])
def setup_class(cls):
cls.w_IncrementalHzDecoder = cls.space.appexec([], """():
@@ -29,6 +30,7 @@
return IncrementalBig5hkscsEncoder
""")
+ cls.w_myfile = cls.space.wrap(os.path.dirname(__file__))
def test_decode_hz(self):
d = self.IncrementalHzDecoder()
@@ -170,3 +172,27 @@
assert r == b'\x88f'
r = e.encode('\u0304')
assert r == b'\x88b'
+
+ def test_incremental_big5hkscs(self):
+ import _codecs, _io
+ with open(self.myfile + '/big5hkscs.txt', 'rb') as fid:
+ uni_str = fid.read()
+ with open(self.myfile + '/big5hkscs-utf8.txt', 'rb') as fid:
+ utf8str = fid.read()
+ UTF8Reader = _codecs.lookup('utf-8').streamreader
+ for sizehint in [None] + list(range(1, 33)) + \
+ [64, 128, 256, 512, 1024]:
+ istream = UTF8Reader(_io.BytesIO(utf8str))
+ ostream = _io.BytesIO()
+ encoder = self.IncrementalBig5hkscsEncoder()
+ while 1:
+ if sizehint is not None:
+ data = istream.read(sizehint)
+ else:
+ data = istream.read()
+
+ if not data:
+ break
+ e = encoder.encode(data)
+ ostream.write(e)
+ assert ostream.getvalue() == uni_str
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -21,10 +21,10 @@
def test_decode_hz():
# stateful
c = getcodec("hz")
- u = decode(c, "~{abc}")
- assert u == u'\u5f95\u6cef'.encode('utf8')
+ utf8 = decode(c, "~{abc}")
+ assert utf8.decode('utf8') == u'\u5f95\u6cef'
u = decode(c, "~{")
- assert u == ''
+ assert u == u''
def test_decodeex_hz():
c = getcodec("hz")
@@ -85,13 +85,13 @@
def test_decode_hz_ignore():
c = getcodec("hz")
- u = decode(c, 'def~{}abc', 'ignore')
- assert u == u'def\u5fcf'.encode('utf8')
+ utf8 = decode(c, 'def~{}abc', 'ignore')
+ assert utf8.decode('utf8') == u'def\u5f95'
def test_decode_hz_replace():
c = getcodec("hz")
- u = decode(c, 'def~{}abc', 'replace')
- assert u == u'def\ufffd\u5fcf'.encode('utf8')
+ utf8 = decode(c, 'def~{}abc', 'replace')
+ assert utf8.decode('utf8') == u'def\ufffd\u5f95\ufffd'
def test_encode_hz():
c = getcodec("hz")
@@ -130,3 +130,4 @@
return u'\xc3'.encode('utf8'), endingpos
s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'foo', errorhandler)
assert '\xc3' in s
+
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit