Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: py3.7 Changeset: r98474:338458b7bc73 Date: 2020-01-06 23:09 +0100 http://bitbucket.org/pypy/pypy/changeset/338458b7bc73/
Log: merge heads diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py b/rpython/rlib/unicodedata/generate_unicodedb.py --- a/rpython/rlib/unicodedata/generate_unicodedb.py +++ b/rpython/rlib/unicodedata/generate_unicodedb.py @@ -549,38 +549,48 @@ print >> outfile, 'version = %r' % version print >> outfile - if version < "4.1": + version_tuple = tuple(int(x) for x in version.split(".")) + if version_tuple < (4, 1, 0): cjk_interval = ("(0x3400 <= code <= 0x4DB5 or" " 0x4E00 <= code <= 0x9FA5 or" " 0x20000 <= code <= 0x2A6D6)") - elif version < "5": # don't know the exact limit + elif version_tuple < (5, 0, 0): # don't know the exact limit cjk_interval = ("(0x3400 <= code <= 0x4DB5 or" " 0x4E00 <= code <= 0x9FBB or" " 0x20000 <= code <= 0x2A6D6)") - elif version < "6": + elif version_tuple < (6, 0, 0): cjk_interval = ("(0x3400 <= code <= 0x4DB5 or" " 0x4E00 <= code <= 0x9FCB or" " 0x20000 <= code <= 0x2A6D6 or" " 0x2A700 <= code <= 0x2B734)") - elif version < "6.1": + elif version_tuple < (6, 1, 0): cjk_interval = ("(0x3400 <= code <= 0x4DB5 or" " 0x4E00 <= code <= 0x9FCB or" " 0x20000 <= code <= 0x2A6D6 or" " 0x2A700 <= code <= 0x2B734 or" " 0x2B740 <= code <= 0x2B81D)") - elif version < "8": + elif version_tuple < (8, 0, 0): cjk_interval = ("(0x3400 <= code <= 0x4DB5 or" " 0x4E00 <= code <= 0x9FCC or" " 0x20000 <= code <= 0x2A6D6 or" " 0x2A700 <= code <= 0x2B734 or" " 0x2B740 <= code <= 0x2B81D)") - else: + elif version_tuple == (9, 0, 0): cjk_interval = ("(0x3400 <= code <= 0x4DB5 or" " 0x4E00 <= code <= 0x9FD5 or" " 0x20000 <= code <= 0x2A6D6 or" " 0x2A700 <= code <= 0x2B734 or" " 0x2B740 <= code <= 0x2B81D or" " 0x2B820 <= code <= 0x2CEA1)") + elif version_tuple == (11, 0, 0): + cjk_interval = ("(0x3400 <= code <= 0x4DB5 or" + " 0x4E00 <= code <= 0x9FEF or" + " 0x20000 <= code <= 0x2A6D6 or" + " 0x2A700 <= code <= 0x2B734 or" + " 0x2B740 <= code <= 0x2B81D or" + " 0x2B820 <= code <= 0x2CEA1)") + else: + raise ValueError("please look up CJK ranges and fix the script, e.g. here: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)") write_character_names(outfile, table, base_mod) diff --git a/rpython/rlib/unicodedata/test/test_ucd.py b/rpython/rlib/unicodedata/test/test_ucd.py --- a/rpython/rlib/unicodedata/test/test_ucd.py +++ b/rpython/rlib/unicodedata/test/test_ucd.py @@ -1,5 +1,6 @@ +import pytest from rpython.rlib.runicode import code_to_unichr, MAXUNICODE -from rpython.rlib.unicodedata import unicodedb_5_2_0 +from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_11_0_0 from rpython.rtyper.test.tool import BaseRtypingTest from rpython.translator.c.test.test_genc import compile @@ -26,3 +27,26 @@ assert got == 0xd808 # first char of a pair else: assert got == 0x12346 + +def test_cjk(): + cases = [ + ('3400', '4DB5'), + ('4E00', '9FEF'), + ('20000', '2A6D6'), + ('2A700', '2B734'), + ('2B740', '2B81D'), + ('2B820', '2CEA1'), + ] + for first, last in cases: + first = int(first, 16) + last = int(last, 16) + # Test at and inside the boundary + for i in (first, first + 1, last - 1, last): + charname = 'CJK UNIFIED IDEOGRAPH-%X'%i + assert unicodedb_11_0_0.lookup(charname) == i + # Test outside the boundary + for i in first - 1, last + 1: + charname = 'CJK UNIFIED IDEOGRAPH-%X'%i + with pytest.raises(KeyError): + unicodedb_11_0_0.lookup(charname) + diff --git a/rpython/rlib/unicodedata/unicodedb_11_0_0.py b/rpython/rlib/unicodedata/unicodedb_11_0_0.py --- a/rpython/rlib/unicodedata/unicodedb_11_0_0.py +++ b/rpython/rlib/unicodedata/unicodedb_11_0_0.py @@ -61172,7 +61172,7 @@ if not ('0' <= c <= '9' or 'A' <= c <= 'F'): raise KeyError code = int(cjk_code, 16) - if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FA5 or 0x20000 <= code <= 0x2A6D6): + if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FEF or 0x20000 <= code <= 0x2A6D6 or 0x2A700 <= code <= 0x2B734 or 0x2B740 <= code <= 0x2B81D or 0x2B820 <= code <= 0x2CEA1): return code raise KeyError @@ -61197,7 +61197,7 @@ return code def name(code): - if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FA5 or 0x20000 <= code <= 0x2A6D6): + if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FEF or 0x20000 <= code <= 0x2A6D6 or 0x2A700 <= code <= 0x2B734 or 0x2B740 <= code <= 0x2B81D or 0x2B820 <= code <= 0x2CEA1): return "CJK UNIFIED IDEOGRAPH-" + hex(code)[2:].upper() if 0xAC00 <= code <= 0xD7A3: # vl_code, t_code = divmod(code - 0xAC00, len(_hangul_T)) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit