[pypy-commit] pypy unicode-utf8-py3: fix test
Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95361:fad3f16aa899 Date: 2018-11-20 22:36 -0800 http://bitbucket.org/pypy/pypy/changeset/fad3f16aa899/ Log:fix test diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -689,7 +689,7 @@ with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) -assert space.unicode_w(PyUnicode_FromEncodedObject( +assert space.text_w(PyUnicode_FromEncodedObject( space, space.newbytes(s_text), null_charp, None)) == u_text rffi.free_charp(b_text) ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8-py3: add more tests
Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95362:a9ecac2f678b Date: 2018-11-23 08:59 -0600 http://bitbucket.org/pypy/pypy/changeset/a9ecac2f678b/ Log:add more tests diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py --- a/pypy/module/_multibytecodec/test/test_app_incremental.py +++ b/pypy/module/_multibytecodec/test/test_app_incremental.py @@ -1,5 +1,6 @@ +import os class AppTestClasses: -spaceconfig = dict(usemodules=['_multibytecodec']) +spaceconfig = dict(usemodules=['_multibytecodec', '_codecs', '_io']) def setup_class(cls): cls.w_IncrementalHzDecoder = cls.space.appexec([], """(): @@ -29,6 +30,7 @@ return IncrementalBig5hkscsEncoder """) +cls.w_myfile = cls.space.wrap(os.path.dirname(__file__)) def test_decode_hz(self): d = self.IncrementalHzDecoder() @@ -170,3 +172,27 @@ assert r == b'\x88f' r = e.encode('\u0304') assert r == b'\x88b' + +def test_incremental_big5hkscs(self): +import _codecs, _io +with open(self.myfile + '/big5hkscs.txt', 'rb') as fid: +uni_str = fid.read() +with open(self.myfile + '/big5hkscs-utf8.txt', 'rb') as fid: +utf8str = fid.read() +UTF8Reader = _codecs.lookup('utf-8').streamreader +for sizehint in [None] + list(range(1, 33)) + \ +[64, 128, 256, 512, 1024]: +istream = UTF8Reader(_io.BytesIO(utf8str)) +ostream = _io.BytesIO() +encoder = self.IncrementalBig5hkscsEncoder() +while 1: +if sizehint is not None: +data = istream.read(sizehint) +else: +data = istream.read() + +if not data: +break +e = encoder.encode(data) +ostream.write(e) +assert ostream.getvalue() == uni_str diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py --- a/pypy/module/_multibytecodec/test/test_c_codecs.py +++ b/pypy/module/_multibytecodec/test/test_c_codecs.py @@ -21,10 +21,10 @@ def test_decode_hz(): # stateful c = getcodec("hz") -u = decode(c, "~{abc}") -assert u == u'\u5f95\u6cef'.encode('utf8') +utf8 = decode(c, "~{abc}") +assert utf8.decode('utf8') == u'\u5f95\u6cef' u = decode(c, "~{") -assert u == '' +assert u == u'' def test_decodeex_hz(): c = getcodec("hz") @@ -85,13 +85,13 @@ def test_decode_hz_ignore(): c = getcodec("hz") -u = decode(c, 'def~{}abc', 'ignore') -assert u == u'def\u5fcf'.encode('utf8') +utf8 = decode(c, 'def~{}abc', 'ignore') +assert utf8.decode('utf8') == u'def\u5f95' def test_decode_hz_replace(): c = getcodec("hz") -u = decode(c, 'def~{}abc', 'replace') -assert u == u'def\ufffd\u5fcf'.encode('utf8') +utf8 = decode(c, 'def~{}abc', 'replace') +assert utf8.decode('utf8') == u'def\ufffd\u5f95\ufffd' def test_encode_hz(): c = getcodec("hz") @@ -130,3 +130,4 @@ return u'\xc3'.encode('utf8'), endingpos s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'foo', errorhandler) assert '\xc3' in s + ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8-py3: return consumed, not unicode lgt
Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95365:5ff7a09cd178 Date: 2018-11-24 00:02 -0600 http://bitbucket.org/pypy/pypy/changeset/5ff7a09cd178/ Log:return consumed, not unicode lgt diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -754,7 +754,7 @@ res, lgt, pos = unicodehelper.str_decode_utf8(string, errors, final, state.decode_error_handler) return space.newtuple([space.newutf8(res, lgt), - space.newint(lgt)]) + space.newint(pos)]) else: return space.newtuple([space.newutf8(string, lgt), space.newint(len(string))]) ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8-py3: fix wrong unicode length
Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95363:a325699736bc Date: 2018-11-23 08:59 -0600 http://bitbucket.org/pypy/pypy/changeset/a325699736bc/ Log:fix wrong unicode length diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -269,7 +269,8 @@ unicodedata, start, end) if rettype == 'u': codec = pypy_cjk_enc_getcodec(encodebuf) -replace = encode(codec, replace, end - start) +lgt = rutf8.check_utf8(replace, False) +replace = encode(codec, replace, lgt) lgt = len(replace) with rffi.scoped_nonmovingbuffer(replace) as inbuf: r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, lgt, end) ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8-py3: improve and add a test
Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95366:2d33f43487c5 Date: 2018-11-25 12:30 -0600 http://bitbucket.org/pypy/pypy/changeset/2d33f43487c5/ Log:improve and add a test diff --git a/pypy/module/_locale/test/test_locale.py b/pypy/module/_locale/test/test_locale.py --- a/pypy/module/_locale/test/test_locale.py +++ b/pypy/module/_locale/test/test_locale.py @@ -20,6 +20,7 @@ # check whether used locales are installed, otherwise the tests will # fail current = _locale.setlocale(_locale.LC_ALL) +cls.oldlocale = current try: try: # some systems are only UTF-8 oriented @@ -45,6 +46,12 @@ finally: _locale.setlocale(_locale.LC_ALL, current) +def teardown_class(cls): +import _locale +_locale.setlocale(_locale.LC_ALL, cls.oldlocale) + + + def test_import(self): import _locale assert _locale @@ -299,3 +306,36 @@ assert lang is None or isinstance(lang, str) assert encoding.startswith('cp') +def test_lc_numeric_basic(self): +from _locale import (setlocale, nl_langinfo, Error, LC_NUMERIC, + LC_CTYPE, RADIXCHAR, THOUSEP, localeconv) +# Test nl_langinfo against localeconv +candidate_locales = ['es_UY', 'fr_FR', 'fi_FI', 'es_CO', 'pt_PT', 'it_IT', +'et_EE', 'es_PY', 'no_NO', 'nl_NL', 'lv_LV', 'el_GR', 'be_BY', 'fr_BE', +'ro_RO', 'ru_UA', 'ru_RU', 'es_VE', 'ca_ES', 'se_NO', 'es_EC', 'id_ID', +'ka_GE', 'es_CL', 'wa_BE', 'hu_HU', 'lt_LT', 'sl_SI', 'hr_HR', 'es_AR', +'es_ES', 'oc_FR', 'gl_ES', 'bg_BG', 'is_IS', 'mk_MK', 'de_AT', 'pt_BR', +'da_DK', 'nn_NO', 'cs_CZ', 'de_LU', 'es_BO', 'sq_AL', 'sk_SK', 'fr_CH', +'de_DE', 'sr_YU', 'br_FR', 'nl_BE', 'sv_FI', 'pl_PL', 'fr_CA', 'fo_FO', +'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA', +'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US', +'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro', +'ru_RU.KOI8-R', 'ko_KR.eucKR'] + +tested = False +for loc in candidate_locales: +try: +setlocale(LC_NUMERIC, loc) +setlocale(LC_CTYPE, loc) +except Error: +continue +for li, lc in ((RADIXCHAR, "decimal_point"), +(THOUSEP, "thousands_sep")): +nl_radixchar = nl_langinfo(li) +li_radixchar = localeconv()[lc] +try: +set_locale = setlocale(LC_NUMERIC) +except Error: +set_locale = "" +assert nl_radixchar == li_radixchar, ("nl_langinfo != localeconv " +"(set to %s, using %s)" % ( loc, set_locale)) ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8-py3: add missing test files
Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95364:2a5ae5b4e029 Date: 2018-11-23 09:26 -0600 http://bitbucket.org/pypy/pypy/changeset/2a5ae5b4e029/ Log:add missing test files diff --git a/pypy/module/_multibytecodec/test/big5hkscs-utf8.txt b/pypy/module/_multibytecodec/test/big5hkscs-utf8.txt new file mode 100644 --- /dev/null +++ b/pypy/module/_multibytecodec/test/big5hkscs-utf8.txt @@ -0,0 +1,2 @@ +𠄌Ě鵮罓洆 +ÊÊ̄ê êê̄ diff --git a/pypy/module/_multibytecodec/test/big5hkscs.txt b/pypy/module/_multibytecodec/test/big5hkscs.txt new file mode 100644 --- /dev/null +++ b/pypy/module/_multibytecodec/test/big5hkscs.txt @@ -0,0 +1,2 @@ +�E�\�s�ڍ� +�f�b�� diff --git a/pypy/module/_multibytecodec/test/test_multibtye_codecs.py b/pypy/module/_multibytecodec/test/test_multibtye_codecs.py new file mode 100644 --- /dev/null +++ b/pypy/module/_multibytecodec/test/test_multibtye_codecs.py @@ -0,0 +1,64 @@ +import os + +class AppTestPartialEvaluation: +spaceconfig = dict(usemodules=['_multibytecodec', '_codecs']) + +def setup_class(cls): +cls.w_myfile = cls.space.wrap(os.path.dirname(__file__)) + +def test_callback_None_index(self): +import _multibytecodec, _codecs +codec = _multibytecodec.__getcodec('cp932') +def myreplace(exc): +return ('x', None) +_codecs.register_error("test.cjktest", myreplace) +raises(TypeError, codec.encode, '\udeee', 'test.cjktest') + +def test_callback_backward_index(self): +import _multibytecodec, _codecs +codec = _multibytecodec.__getcodec('cp932') +def myreplace(exc): +if myreplace.limit > 0: +myreplace.limit -= 1 +return ('REPLACED', 0) +else: +return ('TERMINAL', exc.end) +myreplace.limit = 3 +_codecs.register_error("test.cjktest", myreplace) +assert (codec.encode('abcd' + '\udeee' + 'efgh', 'test.cjktest') == +(b'abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9)) + +def test_callback_forward_index(self): +import _multibytecodec, _codecs +codec = _multibytecodec.__getcodec('cp932') +def myreplace(exc): +return ('REPLACED', exc.end + 2) +_codecs.register_error("test.cjktest", myreplace) +assert (codec.encode('abcd' + '\udeee' + 'efgh', 'test.cjktest') == + (b'abcdREPLACEDgh', 9)) + +def _test_incrementalencoder(self): +import _multibytecodec, _codecs, _io +with open(self.myfile + '/shift_jis.txt', 'rb') as fid: +uni_str = fid.read() +with open(self.myfile + '/shift_jis-utf8.txt', 'rb') as fid: +utf8str = fid.read() +UTF8Reader = _codecs.lookup('utf-8').streamreader +for sizehint in [None] + list(range(1, 33)) + \ +[64, 128, 256, 512, 1024]: +istream = UTF8Reader(_io.BytesIO(utf8str)) +ostream = _io.BytesIO() +codec = _multibytecodec.__getcodec('cp932') +print(dir(codec)) +encoder = codec.incrementalencoder() +while 1: +if sizehint is not None: +data = istream.read(sizehint) +else: +data = istream.read() + +if not data: +break +e = encoder.encode(data) +ostream.write(e) +assert ostream.getvalue() == uni_str ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8-py3: UnicodeListStrategy can hold utf8, not just ascii
Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95367:a841b6df8847 Date: 2018-11-25 12:31 -0600 http://bitbucket.org/pypy/pypy/changeset/a841b6df8847/ Log:UnicodeListStrategy can hold utf8, not just ascii diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -98,7 +98,6 @@ return self._utf8 def listview_utf8(self): -assert self.is_ascii() return _create_list_from_unicode(self._utf8) def ord(self, space): ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit