Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: py3.7
Changeset: r98474:338458b7bc73
Date: 2020-01-06 23:09 +0100
http://bitbucket.org/pypy/pypy/changeset/338458b7bc73/
Log: merge heads
diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py
b/rpython/rlib/unicodedata/generate_unicodedb.py
--- a/rpython/rlib/unicodedata/generate_unicodedb.py
+++ b/rpython/rlib/unicodedata/generate_unicodedb.py
@@ -549,38 +549,48 @@
print >> outfile, 'version = %r' % version
print >> outfile
- if version < "4.1":
+ version_tuple = tuple(int(x) for x in version.split("."))
+ if version_tuple < (4, 1, 0):
cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
" 0x4E00 <= code <= 0x9FA5 or"
" 0x20000 <= code <= 0x2A6D6)")
- elif version < "5": # don't know the exact limit
+ elif version_tuple < (5, 0, 0): # don't know the exact limit
cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
" 0x4E00 <= code <= 0x9FBB or"
" 0x20000 <= code <= 0x2A6D6)")
- elif version < "6":
+ elif version_tuple < (6, 0, 0):
cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
" 0x4E00 <= code <= 0x9FCB or"
" 0x20000 <= code <= 0x2A6D6 or"
" 0x2A700 <= code <= 0x2B734)")
- elif version < "6.1":
+ elif version_tuple < (6, 1, 0):
cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
" 0x4E00 <= code <= 0x9FCB or"
" 0x20000 <= code <= 0x2A6D6 or"
" 0x2A700 <= code <= 0x2B734 or"
" 0x2B740 <= code <= 0x2B81D)")
- elif version < "8":
+ elif version_tuple < (8, 0, 0):
cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
" 0x4E00 <= code <= 0x9FCC or"
" 0x20000 <= code <= 0x2A6D6 or"
" 0x2A700 <= code <= 0x2B734 or"
" 0x2B740 <= code <= 0x2B81D)")
- else:
+ elif version_tuple == (9, 0, 0):
cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
" 0x4E00 <= code <= 0x9FD5 or"
" 0x20000 <= code <= 0x2A6D6 or"
" 0x2A700 <= code <= 0x2B734 or"
" 0x2B740 <= code <= 0x2B81D or"
" 0x2B820 <= code <= 0x2CEA1)")
+ elif version_tuple == (11, 0, 0):
+ cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
+ " 0x4E00 <= code <= 0x9FEF or"
+ " 0x20000 <= code <= 0x2A6D6 or"
+ " 0x2A700 <= code <= 0x2B734 or"
+ " 0x2B740 <= code <= 0x2B81D or"
+ " 0x2B820 <= code <= 0x2CEA1)")
+ else:
+ raise ValueError("please look up CJK ranges and fix the script, e.g.
here: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)")
write_character_names(outfile, table, base_mod)
diff --git a/rpython/rlib/unicodedata/test/test_ucd.py
b/rpython/rlib/unicodedata/test/test_ucd.py
--- a/rpython/rlib/unicodedata/test/test_ucd.py
+++ b/rpython/rlib/unicodedata/test/test_ucd.py
@@ -1,5 +1,6 @@
+import pytest
from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
-from rpython.rlib.unicodedata import unicodedb_5_2_0
+from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_11_0_0
from rpython.rtyper.test.tool import BaseRtypingTest
from rpython.translator.c.test.test_genc import compile
@@ -26,3 +27,26 @@
assert got == 0xd808 # first char of a pair
else:
assert got == 0x12346
+
+def test_cjk():
+ cases = [
+ ('3400', '4DB5'),
+ ('4E00', '9FEF'),
+ ('20000', '2A6D6'),
+ ('2A700', '2B734'),
+ ('2B740', '2B81D'),
+ ('2B820', '2CEA1'),
+ ]
+ for first, last in cases:
+ first = int(first, 16)
+ last = int(last, 16)
+ # Test at and inside the boundary
+ for i in (first, first + 1, last - 1, last):
+ charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
+ assert unicodedb_11_0_0.lookup(charname) == i
+ # Test outside the boundary
+ for i in first - 1, last + 1:
+ charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
+ with pytest.raises(KeyError):
+ unicodedb_11_0_0.lookup(charname)
+
diff --git a/rpython/rlib/unicodedata/unicodedb_11_0_0.py
b/rpython/rlib/unicodedata/unicodedb_11_0_0.py
--- a/rpython/rlib/unicodedata/unicodedb_11_0_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_11_0_0.py
@@ -61172,7 +61172,7 @@
if not ('0' <= c <= '9' or 'A' <= c <= 'F'):
raise KeyError
code = int(cjk_code, 16)
- if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FA5 or 0x20000 <=
code <= 0x2A6D6):
+ if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FEF or 0x20000 <=
code <= 0x2A6D6 or 0x2A700 <= code <= 0x2B734 or 0x2B740 <= code <= 0x2B81D or
0x2B820 <= code <= 0x2CEA1):
return code
raise KeyError
@@ -61197,7 +61197,7 @@
return code
def name(code):
- if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FA5 or 0x20000 <=
code <= 0x2A6D6):
+ if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FEF or 0x20000 <=
code <= 0x2A6D6 or 0x2A700 <= code <= 0x2B734 or 0x2B740 <= code <= 0x2B81D or
0x2B820 <= code <= 0x2CEA1):
return "CJK UNIFIED IDEOGRAPH-" + hex(code)[2:].upper()
if 0xAC00 <= code <= 0xD7A3:
# vl_code, t_code = divmod(code - 0xAC00, len(_hangul_T))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit