Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95362:a9ecac2f678b
Date: 2018-11-23 08:59 -0600
http://bitbucket.org/pypy/pypy/changeset/a9ecac2f678b/

Log:    add more tests

diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py 
b/pypy/module/_multibytecodec/test/test_app_incremental.py
--- a/pypy/module/_multibytecodec/test/test_app_incremental.py
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -1,5 +1,6 @@
+import os
 class AppTestClasses:
-    spaceconfig = dict(usemodules=['_multibytecodec'])
+    spaceconfig = dict(usemodules=['_multibytecodec', '_codecs', '_io'])
 
     def setup_class(cls):
         cls.w_IncrementalHzDecoder = cls.space.appexec([], """():
@@ -29,6 +30,7 @@
 
             return IncrementalBig5hkscsEncoder
         """)
+        cls.w_myfile = cls.space.wrap(os.path.dirname(__file__))
 
     def test_decode_hz(self):
         d = self.IncrementalHzDecoder()
@@ -170,3 +172,27 @@
         assert r == b'\x88f'
         r = e.encode('\u0304')
         assert r == b'\x88b'
+
+    def test_incremental_big5hkscs(self):
+        import _codecs, _io
+        with open(self.myfile + '/big5hkscs.txt', 'rb') as fid:
+            uni_str =  fid.read()
+        with open(self.myfile + '/big5hkscs-utf8.txt', 'rb') as fid:
+            utf8str =  fid.read()
+        UTF8Reader = _codecs.lookup('utf-8').streamreader
+        for sizehint in [None] + list(range(1, 33)) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = UTF8Reader(_io.BytesIO(utf8str))
+            ostream = _io.BytesIO()
+            encoder = self.IncrementalBig5hkscsEncoder()
+            while 1:
+                if sizehint is not None:
+                    data = istream.read(sizehint)
+                else:
+                    data = istream.read()
+
+                if not data:
+                    break
+                e = encoder.encode(data)
+                ostream.write(e)
+            assert ostream.getvalue() == uni_str
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py 
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -21,10 +21,10 @@
 def test_decode_hz():
     # stateful
     c = getcodec("hz")
-    u = decode(c, "~{abc}")
-    assert u == u'\u5f95\u6cef'.encode('utf8')
+    utf8 = decode(c, "~{abc}")
+    assert utf8.decode('utf8') == u'\u5f95\u6cef'
     u = decode(c, "~{")
-    assert u == ''
+    assert u == u''
 
 def test_decodeex_hz():
     c = getcodec("hz")
@@ -85,13 +85,13 @@
 
 def test_decode_hz_ignore():
     c = getcodec("hz")
-    u = decode(c, 'def~{}abc', 'ignore')
-    assert u == u'def\u5fcf'.encode('utf8')
+    utf8 = decode(c, 'def~{}abc', 'ignore')
+    assert utf8.decode('utf8') == u'def\u5f95'
 
 def test_decode_hz_replace():
     c = getcodec("hz")
-    u = decode(c, 'def~{}abc', 'replace')
-    assert u == u'def\ufffd\u5fcf'.encode('utf8')
+    utf8 = decode(c, 'def~{}abc', 'replace')
+    assert utf8.decode('utf8') == u'def\ufffd\u5f95\ufffd'
 
 def test_encode_hz():
     c = getcodec("hz")
@@ -130,3 +130,4 @@
         return u'\xc3'.encode('utf8'), endingpos
     s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'foo', errorhandler)
     assert '\xc3' in s
+
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to