Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de>
Branch: py3.7
Changeset: r98474:338458b7bc73
Date: 2020-01-06 23:09 +0100
http://bitbucket.org/pypy/pypy/changeset/338458b7bc73/

Log:    merge heads

diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py 
b/rpython/rlib/unicodedata/generate_unicodedb.py
--- a/rpython/rlib/unicodedata/generate_unicodedb.py
+++ b/rpython/rlib/unicodedata/generate_unicodedb.py
@@ -549,38 +549,48 @@
     print >> outfile, 'version = %r' % version
     print >> outfile
 
-    if version < "4.1":
+    version_tuple = tuple(int(x) for x in version.split("."))
+    if version_tuple < (4, 1, 0):
         cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
                         " 0x4E00 <= code <= 0x9FA5 or"
                         " 0x20000 <= code <= 0x2A6D6)")
-    elif version < "5":    # don't know the exact limit
+    elif version_tuple < (5, 0, 0):    # don't know the exact limit
         cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
                         " 0x4E00 <= code <= 0x9FBB or"
                         " 0x20000 <= code <= 0x2A6D6)")
-    elif version < "6":
+    elif version_tuple < (6, 0, 0):
         cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
                         " 0x4E00 <= code <= 0x9FCB or"
                         " 0x20000 <= code <= 0x2A6D6 or"
                         " 0x2A700 <= code <= 0x2B734)")
-    elif version < "6.1":
+    elif version_tuple < (6, 1, 0):
         cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
                         " 0x4E00 <= code <= 0x9FCB or"
                         " 0x20000 <= code <= 0x2A6D6 or"
                         " 0x2A700 <= code <= 0x2B734 or"
                         " 0x2B740 <= code <= 0x2B81D)")
-    elif version < "8":
+    elif version_tuple < (8, 0, 0):
         cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
                         " 0x4E00 <= code <= 0x9FCC or"
                         " 0x20000 <= code <= 0x2A6D6 or"
                         " 0x2A700 <= code <= 0x2B734 or"
                         " 0x2B740 <= code <= 0x2B81D)")
-    else:
+    elif version_tuple == (9, 0, 0):
         cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
                         " 0x4E00 <= code <= 0x9FD5 or"
                         " 0x20000 <= code <= 0x2A6D6 or"
                         " 0x2A700 <= code <= 0x2B734 or"
                         " 0x2B740 <= code <= 0x2B81D or"
                         " 0x2B820 <= code <= 0x2CEA1)")
+    elif version_tuple == (11, 0, 0):
+        cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
+                        " 0x4E00 <= code <= 0x9FEF or"
+                        " 0x20000 <= code <= 0x2A6D6 or"
+                        " 0x2A700 <= code <= 0x2B734 or"
+                        " 0x2B740 <= code <= 0x2B81D or"
+                        " 0x2B820 <= code <= 0x2CEA1)")
+    else:
+        raise ValueError("please look up CJK ranges and fix the script, e.g. 
here: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)")
 
     write_character_names(outfile, table, base_mod)
 
diff --git a/rpython/rlib/unicodedata/test/test_ucd.py 
b/rpython/rlib/unicodedata/test/test_ucd.py
--- a/rpython/rlib/unicodedata/test/test_ucd.py
+++ b/rpython/rlib/unicodedata/test/test_ucd.py
@@ -1,5 +1,6 @@
+import pytest
 from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
-from rpython.rlib.unicodedata import unicodedb_5_2_0
+from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_11_0_0
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.translator.c.test.test_genc import compile
 
@@ -26,3 +27,26 @@
         assert got == 0xd808    # first char of a pair
     else:
         assert got == 0x12346
+
+def test_cjk():
+    cases = [
+        ('3400', '4DB5'),
+        ('4E00', '9FEF'),
+        ('20000', '2A6D6'),
+        ('2A700', '2B734'),
+        ('2B740', '2B81D'),
+        ('2B820', '2CEA1'),
+    ]
+    for first, last in cases:
+        first = int(first, 16)
+        last = int(last, 16)
+        # Test at and inside the boundary
+        for i in (first, first + 1, last - 1, last):
+            charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
+            assert unicodedb_11_0_0.lookup(charname) == i
+        # Test outside the boundary
+        for i in first - 1, last + 1:
+            charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
+            with pytest.raises(KeyError):
+                unicodedb_11_0_0.lookup(charname)
+
diff --git a/rpython/rlib/unicodedata/unicodedb_11_0_0.py 
b/rpython/rlib/unicodedata/unicodedb_11_0_0.py
--- a/rpython/rlib/unicodedata/unicodedb_11_0_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_11_0_0.py
@@ -61172,7 +61172,7 @@
         if not ('0' <= c <= '9' or 'A' <= c <= 'F'):
             raise KeyError
     code = int(cjk_code, 16)
-    if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FA5 or 0x20000 <= 
code <= 0x2A6D6):
+    if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FEF or 0x20000 <= 
code <= 0x2A6D6 or 0x2A700 <= code <= 0x2B734 or 0x2B740 <= code <= 0x2B81D or 
0x2B820 <= code <= 0x2CEA1):
         return code
     raise KeyError
 
@@ -61197,7 +61197,7 @@
     return code
 
 def name(code):
-    if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FA5 or 0x20000 <= 
code <= 0x2A6D6):
+    if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FEF or 0x20000 <= 
code <= 0x2A6D6 or 0x2A700 <= code <= 0x2B734 or 0x2B740 <= code <= 0x2B81D or 
0x2B820 <= code <= 0x2CEA1):
         return "CJK UNIFIED IDEOGRAPH-" + hex(code)[2:].upper()
     if 0xAC00 <= code <= 0xD7A3:
         # vl_code, t_code = divmod(code - 0xAC00, len(_hangul_T))
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to