Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: py3.3 Changeset: r76391:06f9a5ad6287 Date: 2015-03-16 01:15 +0100 http://bitbucket.org/pypy/pypy/changeset/06f9a5ad6287/
Log: Unicodedb: Add support for Aliases. diff too long, truncating to 2000 out of 172399 lines diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -75,7 +75,7 @@ class UCD(W_Root): def __init__(self, unicodedb): - self._lookup = unicodedb.lookup + self._lookup = unicodedb.lookup_with_alias self._lookup_named_sequence = unicodedb.lookup_named_sequence self._name = unicodedb.name self._decimal = unicodedb.decimal diff --git a/pypy/module/unicodedata/test/test_unicodedata.py b/pypy/module/unicodedata/test/test_unicodedata.py --- a/pypy/module/unicodedata/test/test_unicodedata.py +++ b/pypy/module/unicodedata/test/test_unicodedata.py @@ -107,6 +107,27 @@ import unicodedata raises(TypeError, unicodedata.bidirectional, 'xx') + def test_aliases(self): + import unicodedata + aliases = [ + ('LATIN CAPITAL LETTER GHA', 0x01A2), + ('LATIN SMALL LETTER GHA', 0x01A3), + ('KANNADA LETTER LLLA', 0x0CDE), + ('LAO LETTER FO FON', 0x0E9D), + ('LAO LETTER FO FAY', 0x0E9F), + ('LAO LETTER RO', 0x0EA3), + ('LAO LETTER LO', 0x0EA5), + ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0), + ('YI SYLLABLE ITERATION MARK', 0xA015), + ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18), + ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5) + ] + for alias, codepoint in aliases: + name = unicodedata.name(chr(codepoint)) + assert name != alias + assert unicodedata.lookup(alias) == unicodedata.lookup(name) + raises(KeyError, unicodedata.ucd_3_2_0.lookup, alias) + def test_named_sequences(self): import unicodedata sequences = [ diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py b/rpython/rlib/unicodedata/generate_unicodedb.py --- a/rpython/rlib/unicodedata/generate_unicodedb.py +++ b/rpython/rlib/unicodedata/generate_unicodedb.py @@ -88,10 +88,11 @@ class UnicodeData(object): # we use this range of PUA_15 to store name aliases and named sequences NAME_ALIASES_START = 0xF0000 - NAMED_SEQUENCES_START = 0xF0100 + NAMED_SEQUENCES_START = 0xF0200 def __init__(self): self.table = [None] * (MAXUNICODE + 1) + self.aliases = [] self.named_sequences = [] def add_char(self, code, char): @@ -149,6 +150,12 @@ self.table[code].canonical_decomp = result return self.table[code].canonical_decomp + def add_alias(self, name, char): + pua_index = self.NAME_ALIASES_START + len(self.aliases) + self.aliases.append((name, char)) + # also store the name in the PUA 1 + self.table[pua_index].name = name + def add_named_sequence(self, name, chars): pua_index = self.NAMED_SEQUENCES_START + len(self.named_sequences) self.named_sequences.append((name, chars)) @@ -262,6 +269,16 @@ table.get_canonical_decomposition(code) table.get_compat_decomposition(code) + # Name aliases + for line in files['name_aliases']: + line = line.strip() + if not line or line.startswith('#'): + continue + items = line.split(';') + char = int(items[0], 16) + name = items[1] + table.add_alias(name, char) + # Named sequences for line in files['named_sequences']: line = line.strip() @@ -786,7 +803,21 @@ return None ''' % dict(start=table.NAMED_SEQUENCES_START) - + # aliases + print >> outfile, '_name_aliases = [' + for name, char in table.aliases: + print >> outfile, "%s," % (char,) + print >> outfile, ']' + print >> outfile, ''' + +def lookup_with_alias(name): + code = lookup(name) + if 0 <= code - %(start)s < len(_name_aliases): + return _name_aliases[code - %(start)s] + else: + return code +''' % dict(start=table.NAME_ALIASES_START) + def main(): import sys diff --git a/rpython/rlib/unicodedata/unicodedb_3_2_0.py b/rpython/rlib/unicodedata/unicodedb_3_2_0.py --- a/rpython/rlib/unicodedata/unicodedb_3_2_0.py +++ b/rpython/rlib/unicodedata/unicodedb_3_2_0.py @@ -493,6 +493,7 @@ 6679: None, 6682: None, 6680: None, +983050: None, 6322: None, 6321: None, 6387: None, @@ -3668,7 +3669,7 @@ 9965: None, 4346: None, 4345: None, -983646: None, +983902: None, 11520: None, 11521: None, 11546: None, @@ -4439,13 +4440,14 @@ 69815: None, 69811: None, 69812: None, +983042: None, 3261: None, 3313: None, 3260: None, 3314: None, 3298: None, 3299: None, -983688: None, +983944: None, 43272: None, 43269: None, 43268: None, @@ -4559,44 +4561,44 @@ 68102: None, 68098: None, 68099: None, -983667: None, -983652: None, -983653: None, -983655: None, -983654: None, -983657: None, -983659: None, -983679: None, -983647: None, -983648: None, -983650: None, -983649: None, -983680: None, -983674: None, -983671: None, -983661: None, -983651: None, -983666: None, -983656: None, -983668: None, -983670: None, -983669: None, -983673: None, -983678: None, -983676: None, -983677: None, -983662: None, -983663: None, -983665: None, -983664: None, -983658: None, -983660: None, -983675: None, -983672: None, -983685: None, -983682: None, -983683: None, -983684: None, +983923: None, +983908: None, +983909: None, +983911: None, +983910: None, +983913: None, +983915: None, +983935: None, +983903: None, +983904: None, +983906: None, +983905: None, +983936: None, +983930: None, +983927: None, +983917: None, +983907: None, +983922: None, +983912: None, +983924: None, +983926: None, +983925: None, +983929: None, +983934: None, +983932: None, +983933: None, +983918: None, +983919: None, +983921: None, +983920: None, +983914: None, +983916: None, +983931: None, +983928: None, +983941: None, +983938: None, +983939: None, +983940: None, 6109: None, 6627: None, 6643: None, @@ -4640,16 +4642,20 @@ 6631: None, 6647: None, 6640: None, -983687: None, -983681: None, -983686: None, +983943: None, +983937: None, +983942: None, +983044: None, +983043: None, +983046: None, +983045: None, 68413: None, 68415: None, 68412: None, 68414: None, -983296: None, -983322: None, -983324: None, +983552: None, +983578: None, +983580: None, 570: None, 42802: None, 11373: None, @@ -4664,26 +4670,27 @@ 42862: None, 42796: None, 42798: None, -983306: None, -983304: None, -983330: None, -983332: None, -983326: None, -983328: None, +983562: None, +983560: None, +983586: None, +983588: None, +983582: None, +983584: None, 582: None, -983298: None, -983302: None, -983300: None, +983554: None, +983558: None, +983556: None, 42788: None, 42786: None, 42858: None, +983040: None, 577: None, 11367: None, 11381: None, 42790: None, -983308: None, -983336: None, -983338: None, +983564: None, +983592: None, +983594: None, 42873: None, 42875: None, 42877: None, @@ -4692,7 +4699,7 @@ 42886: None, 42860: None, 584: None, -983340: None, +983596: None, 11369: None, 42818: None, 42816: None, @@ -4701,16 +4708,16 @@ 11360: None, 42824: None, 11362: None, -983342: None, +983598: None, 11374: None, -983344: None, +983600: None, 7930: None, 7932: None, 42826: None, 42828: None, -983312: None, -983316: None, -983314: None, +983568: None, +983572: None, +983570: None, 42830: None, 42834: None, 42836: None, @@ -4721,11 +4728,11 @@ 42842: None, 588: None, 11364: None, -983346: None, +983602: None, 42814: None, 42844: None, 11390: None, -983318: None, +983574: None, 42891: None, 7838: None, 586: None, @@ -4740,11 +4747,11 @@ 581: None, 42792: None, 580: None, -983352: None, -983320: None, -983354: None, -983348: None, -983350: None, +983608: None, +983576: None, +983610: None, +983604: None, +983606: None, 42846: None, 42856: None, 42850: None, @@ -4791,9 +4798,9 @@ 7460: None, 7547: None, 7550: None, -983297: None, -983323: None, -983325: None, +983553: None, +983579: None, +983581: None, 7567: None, 11365: None, 42803: None, @@ -4819,18 +4826,18 @@ 7839: None, 567: None, 42865: None, -983307: None, -983305: None, -983331: None, -983333: None, +983563: None, +983561: None, +983587: None, +983589: None, 11384: None, -983327: None, -983329: None, +983583: None, +983585: None, 7570: None, 583: None, -983299: None, -983303: None, -983301: None, +983555: None, +983559: None, +983557: None, 42789: None, 42787: None, 7563: None, @@ -4840,16 +4847,17 @@ 7534: None, 7554: None, 7555: None, +983041: None, 578: None, 11368: None, 11382: None, 42791: None, -983310: None, -983334: None, -983335: None, -983309: None, -983337: None, -983339: None, +983566: None, +983590: None, +983591: None, +983565: None, +983593: None, +983595: None, 7574: None, 42874: None, 42876: None, @@ -4859,7 +4867,7 @@ 42887: None, 7548: None, 42861: None, -983341: None, +983597: None, 585: None, 11370: None, 42819: None, @@ -4870,27 +4878,27 @@ 11361: None, 42825: None, 7557: None, -983343: None, +983599: None, 7836: None, 7837: None, 42866: None, 7535: None, 7558: None, -983345: None, +983601: None, 7931: None, 7933: None, 42867: None, 565: None, 7536: None, 7559: None, -983311: None, +983567: None, 42868: None, 42827: None, 42829: None, 11386: None, -983313: None, -983317: None, -983315: None, +983569: None, +983573: None, +983571: None, 42831: None, 7571: None, 7575: None, @@ -4909,7 +4917,7 @@ 7538: None, 7561: None, 589: None, -983347: None, +983603: None, 8580: None, 42815: None, 7572: None, @@ -4918,7 +4926,7 @@ 7540: None, 7562: None, 575: None, -983319: None, +983575: None, 42892: None, 7573: None, 7454: None, @@ -4948,11 +4956,11 @@ 7432: None, 11385: None, 42793: None, -983353: None, -983321: None, -983355: None, -983349: None, -983351: None, +983609: None, +983577: None, +983611: None, +983605: None, +983607: None, 7577: None, 7531: None, 42872: None, @@ -5661,7 +5669,7 @@ 764: None, 42765: None, 42760: None, -983689: None, +983945: None, 42770: None, 42769: None, 42764: None, @@ -6483,6 +6491,7 @@ 65047: None, 65046: None, 65096: None, +983049: None, 65048: None, 65044: None, 9915: None, @@ -7220,30 +7229,30 @@ 43699: None, 43705: None, 3064: None, -983358: None, -983378: None, -983374: None, -983356: None, -983379: None, -983368: None, -983371: None, -983370: None, -983365: None, -983363: None, -983357: None, -983361: None, -983373: None, -983359: None, -983364: None, -983367: None, -983372: None, -983377: None, -983375: None, -983376: None, -983362: None, -983360: None, -983369: None, -983366: None, +983614: None, +983634: None, +983630: None, +983612: None, +983635: None, +983624: None, +983627: None, +983626: None, +983621: None, +983619: None, +983613: None, +983617: None, +983629: None, +983615: None, +983620: None, +983623: None, +983628: None, +983633: None, +983631: None, +983632: None, +983618: None, +983616: None, +983625: None, +983622: None, 3063: None, 3059: None, 3062: None, @@ -7253,272 +7262,272 @@ 3066: None, 3024: None, 3065: None, -983402: None, -983409: None, -983412: None, -983407: None, -983408: None, -983403: None, -983404: None, -983410: None, -983411: None, -983405: None, -983406: None, -983622: None, -983629: None, -983632: None, -983627: None, -983628: None, -983623: None, -983624: None, -983630: None, -983631: None, -983625: None, -983626: None, -983578: None, -983585: None, -983588: None, -983583: None, -983584: None, -983579: None, -983580: None, -983586: None, -983587: None, -983581: None, -983582: None, -983380: None, -983387: None, -983390: None, -983385: None, -983386: None, -983381: None, -983382: None, -983388: None, -983389: None, -983633: None, -983634: None, +983658: None, +983665: None, +983668: None, +983663: None, +983664: None, +983659: None, +983660: None, +983666: None, +983667: None, +983661: None, +983662: None, +983878: None, +983885: None, +983888: None, +983883: None, +983884: None, +983879: None, +983880: None, +983886: None, +983887: None, +983881: None, +983882: None, +983834: None, +983841: None, +983844: None, +983839: None, +983840: None, +983835: None, +983836: None, +983842: None, +983843: None, +983837: None, +983838: None, +983636: None, +983643: None, +983646: None, 983641: None, +983642: None, +983637: None, +983638: None, 983644: None, +983645: None, +983889: None, +983890: None, +983897: None, +983900: None, +983895: None, +983896: None, +983891: None, +983892: None, +983898: None, +983899: None, +983893: None, +983894: None, 983639: None, 983640: None, -983635: None, -983636: None, -983642: None, -983643: None, -983637: None, -983638: None, -983383: None, -983384: None, -983512: None, -983519: None, -983522: None, -983517: None, -983518: None, -983513: None, -983514: None, -983545: None, -983552: None, -983555: None, -983550: None, -983551: None, -983546: None, -983547: None, -983534: None, -983541: None, -983544: None, -983539: None, -983540: None, -983535: None, -983536: None, -983542: None, -983543: None, -983537: None, -983538: None, -983553: None, -983554: None, -983548: None, -983549: None, -983520: None, -983521: None, -983515: None, -983516: None, -983479: None, -983486: None, -983489: None, -983484: None, -983485: None, -983480: None, -983481: None, -983487: None, -983488: None, -983482: None, -983483: None, -983457: None, -983464: None, -983467: None, -983462: None, -983463: None, -983391: None, -983398: None, -983401: None, -983396: None, -983397: None, -983392: None, -983393: None, -983399: None, -983400: None, -983394: None, -983395: None, -983458: None, -983459: None, -983435: None, -983442: None, -983445: None, -983440: None, -983441: None, -983436: None, -983437: None, -983567: None, -983574: None, -983577: None, -983572: None, -983573: None, -983568: None, -983569: None, -983575: None, -983576: None, -983570: None, -983571: None, -983443: None, -983444: None, -983438: None, -983439: None, -983465: None, -983466: None, -983460: None, -983461: None, -983413: None, -983420: None, -983423: None, -983418: None, -983419: None, -983414: None, -983415: None, -983421: None, -983422: None, -983416: None, -983417: None, -983468: None, -983475: None, -983478: None, -983473: None, -983474: None, -983469: None, -983470: None, -983476: None, -983477: None, -983471: None, -983472: None, -983501: None, -983508: None, -983511: None, -983506: None, -983507: None, -983502: None, -983503: None, -983509: None, -983510: None, -983556: None, -983563: None, -983566: None, -983561: None, -983562: None, -983557: None, -983558: None, -983564: None, -983565: None, -983559: None, -983560: None, -983504: None, -983505: None, -983611: None, -983618: None, -983621: None, -983616: None, -983617: None, -983589: None, -983596: None, -983599: None, -983594: None, -983595: None, -983590: None, -983591: None, -983597: None, -983598: None, -983645: None, -983592: None, -983593: None, -983612: None, -983613: None, -983619: None, -983620: None, -983600: None, -983607: None, -983610: None, -983605: None, -983606: None, -983601: None, -983602: None, -983608: None, -983609: None, -983603: None, -983604: None, -983614: None, -983615: None, -983446: None, -983453: None, -983456: None, -983451: None, -983452: None, -983447: None, -983448: None, -983454: None, -983455: None, -983424: None, -983431: None, -983434: None, -983429: None, -983430: None, -983425: None, -983426: None, -983432: None, -983433: None, -983427: None, -983428: None, -983449: None, -983450: None, -983523: None, -983530: None, -983533: None, -983528: None, -983529: None, -983524: None, -983525: None, -983531: None, -983532: None, -983526: None, -983527: None, -983490: None, -983497: None, -983500: None, -983495: None, -983496: None, -983491: None, -983492: None, -983498: None, -983499: None, -983493: None, -983494: None, +983768: None, +983775: None, +983778: None, +983773: None, +983774: None, +983769: None, +983770: None, +983801: None, +983808: None, +983811: None, +983806: None, +983807: None, +983802: None, +983803: None, +983790: None, +983797: None, +983800: None, +983795: None, +983796: None, +983791: None, +983792: None, +983798: None, +983799: None, +983793: None, +983794: None, +983809: None, +983810: None, +983804: None, +983805: None, +983776: None, +983777: None, +983771: None, +983772: None, +983735: None, +983742: None, +983745: None, +983740: None, +983741: None, +983736: None, +983737: None, +983743: None, +983744: None, +983738: None, +983739: None, +983713: None, +983720: None, +983723: None, +983718: None, +983719: None, +983647: None, +983654: None, +983657: None, +983652: None, +983653: None, +983648: None, +983649: None, +983655: None, +983656: None, +983650: None, +983651: None, +983714: None, +983715: None, +983691: None, +983698: None, +983701: None, +983696: None, +983697: None, +983692: None, +983693: None, +983823: None, +983830: None, +983833: None, +983828: None, +983829: None, +983824: None, +983825: None, +983831: None, +983832: None, +983826: None, +983827: None, +983699: None, +983700: None, +983694: None, +983695: None, +983721: None, +983722: None, +983716: None, +983717: None, +983669: None, +983676: None, +983679: None, +983674: None, +983675: None, +983670: None, +983671: None, +983677: None, +983678: None, +983672: None, +983673: None, +983724: None, +983731: None, +983734: None, +983729: None, +983730: None, +983725: None, +983726: None, +983732: None, +983733: None, +983727: None, +983728: None, +983757: None, +983764: None, +983767: None, +983762: None, +983763: None, +983758: None, +983759: None, +983765: None, +983766: None, +983812: None, +983819: None, +983822: None, +983817: None, +983818: None, +983813: None, +983814: None, +983820: None, +983821: None, +983815: None, +983816: None, +983760: None, +983761: None, +983867: None, +983874: None, +983877: None, +983872: None, +983873: None, +983845: None, +983852: None, +983855: None, +983850: None, +983851: None, +983846: None, +983847: None, +983853: None, +983854: None, +983901: None, +983848: None, +983849: None, +983868: None, +983869: None, +983875: None, +983876: None, +983856: None, +983863: None, +983866: None, +983861: None, +983862: None, +983857: None, +983858: None, +983864: None, +983865: None, +983859: None, +983860: None, +983870: None, +983871: None, +983702: None, +983709: None, +983712: None, +983707: None, +983708: None, +983703: None, +983704: None, +983710: None, +983711: None, +983680: None, +983687: None, +983690: None, +983685: None, +983686: None, +983681: None, +983682: None, +983688: None, +983689: None, +983683: None, +983684: None, +983705: None, +983706: None, +983779: None, +983786: None, +983789: None, +983784: None, +983785: None, +983780: None, +983781: None, +983787: None, +983788: None, +983782: None, +983783: None, +983746: None, +983753: None, +983756: None, +983751: None, +983752: None, +983747: None, +983748: None, +983754: None, +983755: None, +983749: None, +983750: None, 3061: None, 3196: None, 3193: None, @@ -7624,6 +7633,7 @@ 9928: None, 3947: None, 3948: None, +983047: None, 4048: None, 4052: None, 4051: None, @@ -8358,6 +8368,7 @@ 11055: None, 11038: None, 11825: None, +983048: None, } _code_by_name = { } @@ -8844,6 +8855,7 @@ 'BUGINESE VOWEL SIGN I': None, 'BUGINESE VOWEL SIGN O': None, 'BUGINESE VOWEL SIGN U': None, +'BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS': None, 'CANADIAN SYLLABICS AAY': None, 'CANADIAN SYLLABICS AY': None, 'CANADIAN SYLLABICS BEAVER DENE L': None, @@ -12790,6 +12802,7 @@ 'KAITHI VOWEL SIGN O': None, 'KAITHI VOWEL SIGN U': None, 'KAITHI VOWEL SIGN UU': None, +'KANNADA LETTER LLLA': None, 'KANNADA SIGN AVAGRAHA': None, 'KANNADA SIGN JIHVAMULIYA': None, 'KANNADA SIGN NUKTA': None, @@ -12994,6 +13007,10 @@ 'KHMER VOWEL SIGN AAM': None, 'KHMER VOWEL SIGN COENG QA': None, 'KHMER VOWEL SIGN OM': None, +'LAO LETTER FO FAY': None, +'LAO LETTER FO FON': None, +'LAO LETTER LO': None, +'LAO LETTER RO': None, 'LARGE ONE DOT OVER TWO DOTS PUNCTUATION': None, 'LARGE ONE RING OVER TWO RINGS PUNCTUATION': None, 'LARGE TWO DOTS OVER ONE DOT PUNCTUATION': None, @@ -13028,6 +13045,7 @@ 'LATIN CAPITAL LETTER EGYPTOLOGICAL AIN': None, 'LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF': None, 'LATIN CAPITAL LETTER ET': None, +'LATIN CAPITAL LETTER GHA': None, 'LATIN CAPITAL LETTER GLOTTAL STOP': None, 'LATIN CAPITAL LETTER H WITH DESCENDER': None, 'LATIN CAPITAL LETTER HALF H': None, @@ -13191,6 +13209,7 @@ 'LATIN SMALL LETTER F WITH MIDDLE TILDE': None, 'LATIN SMALL LETTER F WITH PALATAL HOOK': None, 'LATIN SMALL LETTER G WITH PALATAL HOOK': None, +'LATIN SMALL LETTER GHA': None, 'LATIN SMALL LETTER GLOTTAL STOP': None, 'LATIN SMALL LETTER H WITH DESCENDER': None, 'LATIN SMALL LETTER HALF H': None, @@ -14834,6 +14853,7 @@ 'PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET': None, 'PRESENTATION FORM FOR VERTICAL QUESTION MARK': None, 'PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET': None, +'PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET': None, 'PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET': None, 'PRESENTATION FORM FOR VERTICAL SEMICOLON': None, 'QUINCUNX': None, @@ -15975,6 +15995,7 @@ 'THUNDER CLOUD AND RAIN': None, 'TIBETAN LETTER KKA': None, 'TIBETAN LETTER RRA': None, +'TIBETAN MARK BKA- SHOG GI MGO RGYAN': None, 'TIBETAN MARK BSKA- SHOG GI MGO RGYAN': None, 'TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA': None, 'TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA': None, @@ -16709,6 +16730,7 @@ 'WHITE VERTICAL ELLIPSE': None, 'WHITE VERY SMALL SQUARE': None, 'WORD SEPARATOR MIDDLE DOT': None, +'YI SYLLABLE ITERATION MARK': None, } _cjk_prefix = "CJK UNIFIED IDEOGRAPH-" @@ -21257,8 +21279,19 @@ def lookup_named_sequence(code): - if 0 <= code - 983296 < len(_named_sequences): - return _named_sequences[code - 983296] + if 0 <= code - 983552 < len(_named_sequences): + return _named_sequences[code - 983552] else: return None +_name_aliases = [ +] + + +def lookup_with_alias(name): + code = lookup(name) + if 0 <= code - 983040 < len(_name_aliases): + return _name_aliases[code - 983040] + else: + return code + diff --git a/rpython/rlib/unicodedata/unicodedb_5_2_0.py b/rpython/rlib/unicodedata/unicodedb_5_2_0.py --- a/rpython/rlib/unicodedata/unicodedb_5_2_0.py +++ b/rpython/rlib/unicodedata/unicodedb_5_2_0.py @@ -190,6 +190,7 @@ '\x07 KEFULA' '\x08 KEMBANG' '\x0e KISIM5 TIMES ' +'\x02 L' '\x05 LACA' '\x11 LAGAB TIMES ASH2' '\x11 LAGAR OVER LAGAR' @@ -1349,7 +1350,7 @@ '\x04CHOR' '\tCHOSEONG ' '\x06CHRIVI' -'\rCHROMA SYNAFI' +'\x07CHROMA ' '\rCHRYSANTHEMUM' '\x07CHU CAN' '\x05CHULA' @@ -1377,6 +1378,7 @@ '\x02CK' '\x07CK MARK' ')CK-TILTED SHADOWED WHITE RIGHTWARDS ARROW' +'\x04CKET' '\x05CKING' '\x06CKNESS' '\x07CKWISE ' @@ -1932,6 +1934,7 @@ '\x07ER THAN' '\x08ER TRUTH' '\x08ERAL URN' +'\x0cERATION MARK' '\x0bERCENT SIGN' '\x07ERCIAL ' '\tERCIAL AT' @@ -2251,6 +2254,7 @@ '\x04GHWA' '\x02GI' '\x07GI GUNU' +'\x0cGI MGO RGYAN' '\x04GIBA' '\x06GICAL ' '\x04GIDA' @@ -2972,7 +2976,7 @@ '\nK2 PLUS BU' '\x02K4' '\x02KA' -'\x10KA- SHOG YIG MGO' +'\tKA- SHOG ' '\x04KAAF' '\x03KAB' '\tKABA TENU' @@ -3003,6 +3007,7 @@ '\x06KAYAH ' '\x07KAYANNA' '\x12KBAR ISOLATED FORM' +'\x04KCET' '\x02KE' '\x06KE PHO' '\x16KEEPING STILL MOUNTAIN' @@ -3187,8 +3192,8 @@ '\nLENDED YUS' '\x0bLENGTH MARK' '\x05LENIS' +'\x0eLENTICULAR BRA' '\x12LENTICULAR BRACKET' -'\x12LENTICULAR BRAKCET' '\x04LEPH' '\x0cLER CONSTANT' '\x08LESS SHA' @@ -3276,7 +3281,6 @@ '\x0eLMOST EQUAL TO' '\x0fLMOST EQUAL TO ' '\x02LO' -'\x04LO L' '\rLOCATION SIGN' '\x08LOCATIVE' '\tLOCKWISE ' @@ -5041,6 +5045,7 @@ '\x0bSYMBOL FOR ' '\x12SYMMETRIC SWAPPING' '\x16SYMPTOTICALLY EQUAL TO' +'\x06SYNAFI' '\x07SYNAGMA' '\rSYNDESMOS NEO' '\tSYNTHETON' @@ -5598,6 +5603,7 @@ '\x13VARIANT WITH SQUARE' '\x13VARIATION INDICATOR' '\x0bVARYS ICHOS' +'\x05VASIS' '\x13VASTNESS OR WASTING' '\x03VAV' '\x07VAV YOD' @@ -5907,6 +5913,7 @@ '\x06YGISMA' '\x02YI' '\x08YIDDISH ' +'\x07YIG MGO' '\x08YIG MGO ' '\x0fYIG MGO MDUN MA' '\x14YIG MGO PHUR SHAD MA' @@ -5998,6313 +6005,6313 @@ '\x05ZYGOS' ) _charnodes =[70758, - -54016, + -54013, -1, 132371, - 28772, + 28800, -1, 197694, - 78442, + 78444, -1, 262727, - 136032, + 136035, -1, 327957, - 202072, + 202075, -1, 393238, - 282255, + 282270, -1, -65529, - 347791, + 347806, 195071, -65528, - 409765, + 409767, 195070, -65527, - 472618, + 472620, 195069, -65526, - 535808, + 535811, 195068, -65525, - 599704, + 599707, 195067, -65524, - 660875, + 660878, 195066, -65523, - 726360, + 726363, 195065, -65522, - 791844, + 791847, 195064, -65521, - 857309, + 857312, 195063, -65520, - 922776, + 922779, 195062, -65519, - 988192, + 988195, 195061, -65518, - 1053651, + 1053654, 195060, -65517, - 1119075, + 1119078, 195059, -65516, - 1184446, + 1184449, 195058, -65515, - 1249830, + 1249833, 195057, -1, - 1315238, + 1315241, 195056, 1507367, - 344229, + 344231, -1, -65512, - 1461903, + 1461918, 195055, -65511, - 1523877, + 1523879, 195054, -65510, - 1586730, + 1586732, 195053, -65509, - 1649920, + 1649923, 195052, -65508, - 1713816, + 1713819, 195051, -65507, - 1774987, + 1774990, 195050, -65506, - 1840472, + 1840475, 195049, -65505, - 1905956, + 1905959, 195048, -65504, - 1971421, + 1971424, 195047, -65503, - 2036888, + 2036891, 195046, -65502, - 2102304, + 2102307, 195045, -65501, - 2167763, + 2167766, 195044, -65500, - 2233187, + 2233190, 195043, -65499, - 2298558, + 2298561, 195042, -65498, - 2363942, + 2363945, 195041, -1, - 2429350, + 2429353, 195040, 2621496, - 1455658, + 1455660, -1, -65495, - 2576015, + 2576030, 195039, -65494, - 2637989, + 2637991, 195038, -65493, - 2700842, + 2700844, 195037, -65492, - 2764032, + 2764035, 195036, -65491, - 2827928, + 2827931, 195035, -65490, - 2889099, + 2889102, 195034, -65489, - 2954584, + 2954587, 195033, -65488, - 3020068, + 3020071, 195032, -65487, - 3085533, + 3085536, 195031, -65486, - 3151000, + 3151003, 195030, -65485, - 3216416, + 3216419, 195029, -65484, - 3281875, + 3281878, 195028, -65483, - 3347299, + 3347302, 195027, -65482, - 3412670, + 3412673, 195026, -65481, - 3478054, + 3478057, 195025, -1, - 3543462, + 3543465, 195024, 3735625, - 2567424, + 2567427, -1, -65478, - 3690127, + 3690142, 195023, -65477, - 3752101, + 3752103, 195022, -65476, - 3814954, + 3814956, 195021, -65475, - 3878144, + 3878147, 195020, -65474, - 3942040, + 3942043, 195019, -65473, - 4003211, + 4003214, 195018, -65472, - 4068696, + 4068699, 195017, -65471, - 4134180, + 4134183, 195016, -65470, - 4199645, + 4199648, 195015, -65469, - 4265112, + 4265115, 195014, -65468, - 4330528, + 4330531, 195013, -65467, - 4395987, + 4395990, 195012, -65466, - 4461411, + 4461414, 195011, -65465, - 4526782, + 4526785, 195010, -65464, - 4592166, + 4592169, 195009, -1, - 4657574, + 4657577, 195008, 4849754, - 3679896, + 3679899, -1, -65461, - 4804239, + 4804254, 195007, -65460, - 4866213, + 4866215, 195006, -65459, - 4929066, + 4929068, 195005, -65458, - 4992256, + 4992259, 195004, -65457, - 5056152, + 5056155, 195003, -65456, - 5117323, + 5117326, 195002, -65455, - 5182808, + 5182811, 195001, -65454, - 5248292, + 5248295, 195000, -65453, - 5313757, + 5313760, 194999, -65452, - 5379224, + 5379227, 194998, -65451, - 5444640, + 5444643, 194997, -65450, - 5510099, + 5510102, 194996, -65449, - 5575523, + 5575526, 194995, -65448, - 5640894, + 5640897, 194994, -65447, - 5706278, + 5706281, 194993, -1, - 5771686, + 5771689, 194992, 5963883, - 4789643, + 4789646, -1, -65444, - 5918351, + 5918366, 194991, -65443, - 5980325, + 5980327, 194990, -65442, - 6043178, + 6043180, 194989, -65441, - 6106368, + 6106371, 194988, -65440, - 6170264, + 6170267, 194987, -65439, - 6231435, + 6231438, 194986, -65438, - 6296920, + 6296923, 194985, -65437, - 6362404, + 6362407, 194984, -65436, - 6427869, + 6427872, 194983, -65435, - 6493336, + 6493339, 194982, -65434, - 6558752, + 6558755, 194981, -65433, - 6624211, + 6624214, 194980, -65432, - 6689635, + 6689638, 194979, -65431, - 6755006, + 6755009, 194978, -65430, - 6820390, + 6820393, 194977, -1, - 6885798, + 6885801, 194976, 7078012, - 5903704, + 5903707, -1, -65427, - 7032463, + 7032478, 194975, -65426, - 7094437, + 7094439, 194974, -65425, - 7157290, + 7157292, 194973, -65424, - 7220480, + 7220483, 194972, -65423, - 7284376, + 7284379, 194971, -65422, - 7345547, + 7345550, 194970, -65421, - 7411032, + 7411035, 194969, -65420, - 7476516, + 7476519, 194968, -65419, - 7541981, + 7541984, 194967, -65418, - 7607448, + 7607451, 194966, -65417, - 7672864, + 7672867, 194965, -65416, - 7738323, + 7738326, 194964, -65415, - 7803747, + 7803750, 194963, -65414, - 7869118, + 7869121, 194962, -65413, - 7934502, + 7934505, 194961, -1, - 7999910, + 7999913, 194960, 8192141, - 7017764, + 7017767, -1, -65410, - 8146575, + 8146590, 194959, -65409, - 8208549, + 8208551, 194958, -65408, - 8271402, + 8271404, 194957, -65407, - 8334592, + 8334595, 194956, -65406, - 8398488, + 8398491, 194955, -65405, - 8459659, + 8459662, 194954, -65404, - 8525144, + 8525147, 194953, -65403, - 8590628, + 8590631, 194952, -65402, - 8656093, + 8656096, 194951, -65401, - 8721560, + 8721563, 194950, -65400, - 8786976, + 8786979, 194949, -65399, - 8852435, + 8852438, 194948, -65398, - 8917859, + 8917862, 194947, -65397, - 8983230, + 8983233, 194946, -65396, - 9048614, + 9048617, 194945, -1, - 9114022, + 9114025, 194944, 9306270, - 8131805, + 8131808, -1, -65393, - 9260687, + 9260702, 194943, -65392, - 9322661, + 9322663, 194942, -65391, - 9385514, + 9385516, 194941, -65390, - 9448704, + 9448707, 194940, -65389, - 9512600, + 9512603, 194939, -65388, - 9573771, + 9573774, 194938, -65387, - 9639256, + 9639259, 194937, -65386, - 9704740, + 9704743, 194936, -65385, - 9770205, + 9770208, 194935, -65384, - 9835672, + 9835675, 194934, -65383, - 9901088, + 9901091, 194933, -65382, - 9966547, + 9966550, 194932, -65381, - 10031971, + 10031974, 194931, -65380, - 10097342, + 10097345, 194930, -65379, - 10162726, + 10162729, 194929, -1, - 10228134, + 10228137, 194928, 10420399, - 9245848, + 9245851, -1, -65376, - 10374799, + 10374814, 194927, -65375, - 10436773, + 10436775, 194926, -65374, - 10499626, + 10499628, 194925, -65373, - 10562816, + 10562819, 194924, -65372, - 10626712, + 10626715, 194923, -65371, - 10687883, + 10687886, 194922, -65370, - 10753368, + 10753371, 194921, -65369, - 10818852, + 10818855, 194920, -65368, - 10884317, + 10884320, 194919, -65367, - 10949784, + 10949787, 194918, -65366, - 11015200, + 11015203, _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit