Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: py3.3 Changeset: r76390:81b82168c663 Date: 2015-03-13 18:56 +0100 http://bitbucket.org/pypy/pypy/changeset/81b82168c663/
Log: Add support for "named sequences" in unicode database diff too long, truncating to 2000 out of 176971 lines diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -76,6 +76,7 @@ class UCD(W_Root): def __init__(self, unicodedb): self._lookup = unicodedb.lookup + self._lookup_named_sequence = unicodedb.lookup_named_sequence self._name = unicodedb.name self._decimal = unicodedb.decimal self._digit = unicodedb.digit @@ -108,6 +109,13 @@ except KeyError: msg = space.mod(space.wrap("undefined character name '%s'"), space.wrap(name)) raise OperationError(space.w_KeyError, msg) + + # The code may be a named sequence + sequence = self._lookup_named_sequence(code) + if sequence is not None: + # named sequences only contain UCS2 codes, no surrogates &co. + return space.wrap(sequence) + return space.wrap(code_to_unichr(code)) def name(self, space, w_unichr, w_default=None): diff --git a/pypy/module/unicodedata/test/test_unicodedata.py b/pypy/module/unicodedata/test/test_unicodedata.py --- a/pypy/module/unicodedata/test/test_unicodedata.py +++ b/pypy/module/unicodedata/test/test_unicodedata.py @@ -106,3 +106,16 @@ def test_bidirectional(self): import unicodedata raises(TypeError, unicodedata.bidirectional, 'xx') + + def test_named_sequences(self): + import unicodedata + sequences = [ + ('LATIN SMALL LETTER R WITH TILDE', '\u0072\u0303'), + ('TAMIL SYLLABLE SAI', '\u0BB8\u0BC8'), + ('TAMIL SYLLABLE MOO', '\u0BAE\u0BCB'), + ('TAMIL SYLLABLE NNOO', '\u0BA3\u0BCB'), + ('TAMIL CONSONANT KSS', '\u0B95\u0BCD\u0BB7\u0BCD'), + ] + for seqname, codepoints in sequences: + assert unicodedata.lookup(seqname) == codepoints + diff --git a/rpython/rlib/unicodedata/NameAliases-3.2.0.txt b/rpython/rlib/unicodedata/NameAliases-3.2.0.txt new file mode 100644 --- /dev/null +++ b/rpython/rlib/unicodedata/NameAliases-3.2.0.txt @@ -0,0 +1,1 @@ +# NameAliases-3.2.0.txt does not exist. diff --git a/rpython/rlib/unicodedata/NameAliases-5.2.0.txt b/rpython/rlib/unicodedata/NameAliases-5.2.0.txt new file mode 100644 --- /dev/null +++ b/rpython/rlib/unicodedata/NameAliases-5.2.0.txt @@ -0,0 +1,40 @@ +# NameAliases-5.2.0.txt +# Date: 2009-05-22, 13:05:00 PDT [KW] +# +# This file is a normative contributory data file in the +# Unicode Character Database. +# +# Copyright (c) 2005-2009 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# This file defines the formal name aliases for Unicode characters. +# +# For informative aliases see NamesList.txt +# +# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/ +# +# FORMAT +# +# Each line has two fields +# First field: Code point +# Second field: Alias +# +# In case multiple aliases are assigned, additional aliases +# would be provided on separate lines +# +#----------------------------------------------------------------- +01A2;LATIN CAPITAL LETTER GHA +01A3;LATIN SMALL LETTER GHA +0CDE;KANNADA LETTER LLLA +0E9D;LAO LETTER FO FON +0E9F;LAO LETTER FO FAY +0EA3;LAO LETTER RO +0EA5;LAO LETTER LO +0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN +A015;YI SYLLABLE ITERATION MARK +FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET +1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS + +# Total code points: 11 + +# EOF diff --git a/rpython/rlib/unicodedata/NameAliases-6.0.0.txt b/rpython/rlib/unicodedata/NameAliases-6.0.0.txt new file mode 100644 --- /dev/null +++ b/rpython/rlib/unicodedata/NameAliases-6.0.0.txt @@ -0,0 +1,40 @@ +# NameAliases-6.0.0.txt +# Date: 2010-05-10, 11:58:00 PDT [KW] +# +# This file is a normative contributory data file in the +# Unicode Character Database. +# +# Copyright (c) 2005-2010 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# This file defines the formal name aliases for Unicode characters. +# +# For informative aliases see NamesList.txt +# +# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/ +# +# FORMAT +# +# Each line has two fields +# First field: Code point +# Second field: Alias +# +# In case multiple aliases are assigned, additional aliases +# would be provided on separate lines +# +#----------------------------------------------------------------- +01A2;LATIN CAPITAL LETTER GHA +01A3;LATIN SMALL LETTER GHA +0CDE;KANNADA LETTER LLLA +0E9D;LAO LETTER FO FON +0E9F;LAO LETTER FO FAY +0EA3;LAO LETTER RO +0EA5;LAO LETTER LO +0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN +A015;YI SYLLABLE ITERATION MARK +FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET +1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS + +# Total code points: 11 + +# EOF diff --git a/rpython/rlib/unicodedata/NameAliases-6.2.0.txt b/rpython/rlib/unicodedata/NameAliases-6.2.0.txt new file mode 100644 --- /dev/null +++ b/rpython/rlib/unicodedata/NameAliases-6.2.0.txt @@ -0,0 +1,509 @@ +# NameAliases-6.2.0.txt +# Date: 2012-05-15, 18:44:00 GMT [KW] +# +# This file is a normative contributory data file in the +# Unicode Character Database. +# +# Copyright (c) 2005-2012 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# This file defines the formal name aliases for Unicode characters. +# +# For informative aliases see NamesList.txt +# +# The formal name aliases are divided into five types. +# +# 1. Corrections for serious problems in the character names +# 2. ISO 6429 names for C0 and C1 control functions, and other +# commonly occurring names for control codes +# 3. A few widely used alternate names for format characters +# 4. Several documented labels for C1 control code points which +# were never actually approved in any standard +# 5. Commonly occurring abbreviations (or acronyms) for control codes, +# format characters, spaces, and variation selectors +# +# The formal name aliases are part of the Unicode character namespace, which +# includes the character names and the names of named character sequences. +# The inclusion of ISO 6429 names and other commonly occurring names and +# abbreviations for control codes and format characters as formal name aliases +# is to help avoid name collisions between Unicode character names and the +# labels which commonly appear in text and/or in implementations such as regex, for +# control codes (which have no Unicode character name) or for format characters. +# +# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/ +# +# FORMAT +# +# Each line has three fields, as described here: +# +# First field: Code point +# Second field: Alias +# Third field: Type +# +# The Type labels used are: correction, control, alternate, figment, abbreviation +# +# Those Type labels can be mapped to other strings for display, if desired. +# +# In case multiple aliases are assigned, additional aliases +# are provided on separate lines. Parsers of this data file should +# take note that the same code point can (and does) occur more than once. +# +#----------------------------------------------------------------- + +0000;NULL;control +0000;NUL;abbreviation +0001;START OF HEADING;control +0001;SOH;abbreviation +0002;START OF TEXT;control +0002;STX;abbreviation +0003;END OF TEXT;control +0003;ETX;abbreviation +0004;END OF TRANSMISSION;control +0004;EOT;abbreviation +0005;ENQUIRY;control +0005;ENQ;abbreviation +0006;ACKNOWLEDGE;control +0006;ACK;abbreviation + +# Note that no formal name alias for the ISO 6429 "BELL" is +# provided for U+0007, because of the existing name collision +# with U+1F514 BELL. + +0007;ALERT;control +0007;BEL;abbreviation +0008;BACKSPACE;control +0008;BS;abbreviation +0009;CHARACTER TABULATION;control +0009;HORIZONTAL TABULATION;control +0009;HT;abbreviation +0009;TAB;abbreviation +000A;LINE FEED;control +000A;NEW LINE;control +000A;END OF LINE;control +000A;LF;abbreviation +000A;NL;abbreviation +000A;EOL;abbreviation +000B;LINE TABULATION;control +000B;VERTICAL TABULATION;control +000B;VT;abbreviation +000C;FORM FEED;control +000C;FF;abbreviation +000D;CARRIAGE RETURN;control +000D;CR;abbreviation +000E;SHIFT OUT;control +000E;LOCKING-SHIFT ONE;control +000E;SO;abbreviation +000F;SHIFT IN;control +000F;LOCKING-SHIFT ZERO;control +000F;SI;abbreviation +0010;DATA LINK ESCAPE;control +0010;DLE;abbreviation +0011;DEVICE CONTROL ONE;control +0011;DC1;abbreviation +0012;DEVICE CONTROL TWO;control +0012;DC2;abbreviation +0013;DEVICE CONTROL THREE;control +0013;DC3;abbreviation +0014;DEVICE CONTROL FOUR;control +0014;DC4;abbreviation +0015;NEGATIVE ACKNOWLEDGE;control +0015;NAK;abbreviation +0016;SYNCHRONOUS IDLE;control +0016;SYN;abbreviation +0017;END OF TRANSMISSION BLOCK;control +0017;ETB;abbreviation +0018;CANCEL;control +0018;CAN;abbreviation +0019;END OF MEDIUM;control +0019;EOM;abbreviation +001A;SUBSTITUTE;control +001A;SUB;abbreviation +001B;ESCAPE;control +001B;ESC;abbreviation +001C;INFORMATION SEPARATOR FOUR;control +001C;FILE SEPARATOR;control +001C;FS;abbreviation +001D;INFORMATION SEPARATOR THREE;control +001D;GROUP SEPARATOR;control +001D;GS;abbreviation +001E;INFORMATION SEPARATOR TWO;control +001E;RECORD SEPARATOR;control +001E;RS;abbreviation +001F;INFORMATION SEPARATOR ONE;control +001F;UNIT SEPARATOR;control +001F;US;abbreviation +0020;SP;abbreviation +007F;DELETE;control +007F;DEL;abbreviation +0080;PADDING CHARACTER;figment +0080;PAD;abbreviation +0081;HIGH OCTET PRESET;figment +0081;HOP;abbreviation +0082;BREAK PERMITTED HERE;control +0082;BPH;abbreviation +0083;NO BREAK HERE;control +0083;NBH;abbreviation +0084;INDEX;control +0084;IND;abbreviation +0085;NEXT LINE;control +0085;NEL;abbreviation +0086;START OF SELECTED AREA;control +0086;SSA;abbreviation +0087;END OF SELECTED AREA;control +0087;ESA;abbreviation +0088;CHARACTER TABULATION SET;control +0088;HORIZONTAL TABULATION SET;control +0088;HTS;abbreviation +0089;CHARACTER TABULATION WITH JUSTIFICATION;control +0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control +0089;HTJ;abbreviation +008A;LINE TABULATION SET;control +008A;VERTICAL TABULATION SET;control +008A;VTS;abbreviation +008B;PARTIAL LINE FORWARD;control +008B;PARTIAL LINE DOWN;control +008B;PLD;abbreviation +008C;PARTIAL LINE BACKWARD;control +008C;PARTIAL LINE UP;control +008C;PLU;abbreviation +008D;REVERSE LINE FEED;control +008D;REVERSE INDEX;control +008D;RI;abbreviation +008E;SINGLE SHIFT TWO;control +008E;SINGLE-SHIFT-2;control +008E;SS2;abbreviation +008F;SINGLE SHIFT THREE;control +008F;SINGLE-SHIFT-3;control +008F;SS3;abbreviation +0090;DEVICE CONTROL STRING;control +0090;DCS;abbreviation +0091;PRIVATE USE ONE;control +0091;PRIVATE USE-1;control +0091;PU1;abbreviation +0092;PRIVATE USE TWO;control +0092;PRIVATE USE-2;control +0092;PU2;abbreviation +0093;SET TRANSMIT STATE;control +0093;STS;abbreviation +0094;CANCEL CHARACTER;control +0094;CCH;abbreviation +0095;MESSAGE WAITING;control +0095;MW;abbreviation +0096;START OF GUARDED AREA;control +0096;START OF PROTECTED AREA;control +0096;SPA;abbreviation +0097;END OF GUARDED AREA;control +0097;END OF PROTECTED AREA;control +0097;EPA;abbreviation +0098;START OF STRING;control +0098;SOS;abbreviation +0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment +0099;SGC;abbreviation +009A;SINGLE CHARACTER INTRODUCER;control +009A;SCI;abbreviation +009B;CONTROL SEQUENCE INTRODUCER;control +009B;CSI;abbreviation +009C;STRING TERMINATOR;control +009C;ST;abbreviation +009D;OPERATING SYSTEM COMMAND;control +009D;OSC;abbreviation +009E;PRIVACY MESSAGE;control +009E;PM;abbreviation +009F;APPLICATION PROGRAM COMMAND;control +009F;APC;abbreviation +00A0;NBSP;abbreviation +00AD;SHY;abbreviation +01A2;LATIN CAPITAL LETTER GHA;correction +01A3;LATIN SMALL LETTER GHA;correction +034F;CGJ;abbreviation +0709;SYRIAC SUBLINEAR COLON SKEWED LEFT;correction +0CDE;KANNADA LETTER LLLA;correction +0E9D;LAO LETTER FO FON;correction +0E9F;LAO LETTER FO FAY;correction +0EA3;LAO LETTER RO;correction +0EA5;LAO LETTER LO;correction +0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN;correction +180B;FVS1;abbreviation +180C;FVS2;abbreviation +180D;FVS3;abbreviation +180E;MVS;abbreviation +200B;ZWSP;abbreviation +200C;ZWNJ;abbreviation +200D;ZWJ;abbreviation +200E;LRM;abbreviation +200F;RLM;abbreviation +202A;LRE;abbreviation +202B;RLE;abbreviation +202C;PDF;abbreviation +202D;LRO;abbreviation +202E;RLO;abbreviation +202F;NNBSP;abbreviation +205F;MMSP;abbreviation +2060;WJ;abbreviation +2118;WEIERSTRASS ELLIPTIC FUNCTION;correction +2448;MICR ON US SYMBOL;correction +2449;MICR DASH SYMBOL;correction +A015;YI SYLLABLE ITERATION MARK;correction +FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET;correction +FE00;VS1;abbreviation +FE01;VS2;abbreviation +FE02;VS3;abbreviation +FE03;VS4;abbreviation +FE04;VS5;abbreviation +FE05;VS6;abbreviation +FE06;VS7;abbreviation +FE07;VS8;abbreviation +FE08;VS9;abbreviation +FE09;VS10;abbreviation +FE0A;VS11;abbreviation +FE0B;VS12;abbreviation +FE0C;VS13;abbreviation +FE0D;VS14;abbreviation +FE0E;VS15;abbreviation +FE0F;VS16;abbreviation +FEFF;BYTE ORDER MARK;alternate +FEFF;BOM;abbreviation +FEFF;ZWNBSP;abbreviation +1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS;correction +E0100;VS17;abbreviation +E0101;VS18;abbreviation +E0102;VS19;abbreviation +E0103;VS20;abbreviation +E0104;VS21;abbreviation +E0105;VS22;abbreviation +E0106;VS23;abbreviation +E0107;VS24;abbreviation +E0108;VS25;abbreviation +E0109;VS26;abbreviation +E010A;VS27;abbreviation +E010B;VS28;abbreviation +E010C;VS29;abbreviation +E010D;VS30;abbreviation +E010E;VS31;abbreviation +E010F;VS32;abbreviation +E0110;VS33;abbreviation +E0111;VS34;abbreviation +E0112;VS35;abbreviation +E0113;VS36;abbreviation +E0114;VS37;abbreviation +E0115;VS38;abbreviation +E0116;VS39;abbreviation +E0117;VS40;abbreviation +E0118;VS41;abbreviation +E0119;VS42;abbreviation +E011A;VS43;abbreviation +E011B;VS44;abbreviation +E011C;VS45;abbreviation +E011D;VS46;abbreviation +E011E;VS47;abbreviation +E011F;VS48;abbreviation +E0120;VS49;abbreviation +E0121;VS50;abbreviation +E0122;VS51;abbreviation +E0123;VS52;abbreviation +E0124;VS53;abbreviation +E0125;VS54;abbreviation +E0126;VS55;abbreviation +E0127;VS56;abbreviation +E0128;VS57;abbreviation +E0129;VS58;abbreviation +E012A;VS59;abbreviation +E012B;VS60;abbreviation +E012C;VS61;abbreviation +E012D;VS62;abbreviation +E012E;VS63;abbreviation +E012F;VS64;abbreviation +E0130;VS65;abbreviation +E0131;VS66;abbreviation +E0132;VS67;abbreviation +E0133;VS68;abbreviation +E0134;VS69;abbreviation +E0135;VS70;abbreviation +E0136;VS71;abbreviation +E0137;VS72;abbreviation +E0138;VS73;abbreviation +E0139;VS74;abbreviation +E013A;VS75;abbreviation +E013B;VS76;abbreviation +E013C;VS77;abbreviation +E013D;VS78;abbreviation +E013E;VS79;abbreviation +E013F;VS80;abbreviation +E0140;VS81;abbreviation +E0141;VS82;abbreviation +E0142;VS83;abbreviation +E0143;VS84;abbreviation +E0144;VS85;abbreviation +E0145;VS86;abbreviation +E0146;VS87;abbreviation +E0147;VS88;abbreviation +E0148;VS89;abbreviation +E0149;VS90;abbreviation +E014A;VS91;abbreviation +E014B;VS92;abbreviation +E014C;VS93;abbreviation +E014D;VS94;abbreviation +E014E;VS95;abbreviation +E014F;VS96;abbreviation +E0150;VS97;abbreviation +E0151;VS98;abbreviation +E0152;VS99;abbreviation +E0153;VS100;abbreviation +E0154;VS101;abbreviation +E0155;VS102;abbreviation +E0156;VS103;abbreviation +E0157;VS104;abbreviation +E0158;VS105;abbreviation +E0159;VS106;abbreviation +E015A;VS107;abbreviation +E015B;VS108;abbreviation +E015C;VS109;abbreviation +E015D;VS110;abbreviation +E015E;VS111;abbreviation +E015F;VS112;abbreviation +E0160;VS113;abbreviation +E0161;VS114;abbreviation +E0162;VS115;abbreviation +E0163;VS116;abbreviation +E0164;VS117;abbreviation +E0165;VS118;abbreviation +E0166;VS119;abbreviation +E0167;VS120;abbreviation +E0168;VS121;abbreviation +E0169;VS122;abbreviation +E016A;VS123;abbreviation +E016B;VS124;abbreviation +E016C;VS125;abbreviation +E016D;VS126;abbreviation +E016E;VS127;abbreviation +E016F;VS128;abbreviation +E0170;VS129;abbreviation +E0171;VS130;abbreviation +E0172;VS131;abbreviation +E0173;VS132;abbreviation +E0174;VS133;abbreviation +E0175;VS134;abbreviation +E0176;VS135;abbreviation +E0177;VS136;abbreviation +E0178;VS137;abbreviation +E0179;VS138;abbreviation +E017A;VS139;abbreviation +E017B;VS140;abbreviation +E017C;VS141;abbreviation +E017D;VS142;abbreviation +E017E;VS143;abbreviation +E017F;VS144;abbreviation +E0180;VS145;abbreviation +E0181;VS146;abbreviation +E0182;VS147;abbreviation +E0183;VS148;abbreviation +E0184;VS149;abbreviation +E0185;VS150;abbreviation +E0186;VS151;abbreviation +E0187;VS152;abbreviation +E0188;VS153;abbreviation +E0189;VS154;abbreviation +E018A;VS155;abbreviation +E018B;VS156;abbreviation +E018C;VS157;abbreviation +E018D;VS158;abbreviation +E018E;VS159;abbreviation +E018F;VS160;abbreviation +E0190;VS161;abbreviation +E0191;VS162;abbreviation +E0192;VS163;abbreviation +E0193;VS164;abbreviation +E0194;VS165;abbreviation +E0195;VS166;abbreviation +E0196;VS167;abbreviation +E0197;VS168;abbreviation +E0198;VS169;abbreviation +E0199;VS170;abbreviation +E019A;VS171;abbreviation +E019B;VS172;abbreviation +E019C;VS173;abbreviation +E019D;VS174;abbreviation +E019E;VS175;abbreviation +E019F;VS176;abbreviation +E01A0;VS177;abbreviation +E01A1;VS178;abbreviation +E01A2;VS179;abbreviation +E01A3;VS180;abbreviation +E01A4;VS181;abbreviation +E01A5;VS182;abbreviation +E01A6;VS183;abbreviation +E01A7;VS184;abbreviation +E01A8;VS185;abbreviation +E01A9;VS186;abbreviation +E01AA;VS187;abbreviation +E01AB;VS188;abbreviation +E01AC;VS189;abbreviation +E01AD;VS190;abbreviation +E01AE;VS191;abbreviation +E01AF;VS192;abbreviation +E01B0;VS193;abbreviation +E01B1;VS194;abbreviation +E01B2;VS195;abbreviation +E01B3;VS196;abbreviation +E01B4;VS197;abbreviation +E01B5;VS198;abbreviation +E01B6;VS199;abbreviation +E01B7;VS200;abbreviation +E01B8;VS201;abbreviation +E01B9;VS202;abbreviation +E01BA;VS203;abbreviation +E01BB;VS204;abbreviation +E01BC;VS205;abbreviation +E01BD;VS206;abbreviation +E01BE;VS207;abbreviation +E01BF;VS208;abbreviation +E01C0;VS209;abbreviation +E01C1;VS210;abbreviation +E01C2;VS211;abbreviation +E01C3;VS212;abbreviation +E01C4;VS213;abbreviation +E01C5;VS214;abbreviation +E01C6;VS215;abbreviation +E01C7;VS216;abbreviation +E01C8;VS217;abbreviation +E01C9;VS218;abbreviation +E01CA;VS219;abbreviation +E01CB;VS220;abbreviation +E01CC;VS221;abbreviation +E01CD;VS222;abbreviation +E01CE;VS223;abbreviation +E01CF;VS224;abbreviation +E01D0;VS225;abbreviation +E01D1;VS226;abbreviation +E01D2;VS227;abbreviation +E01D3;VS228;abbreviation +E01D4;VS229;abbreviation +E01D5;VS230;abbreviation +E01D6;VS231;abbreviation +E01D7;VS232;abbreviation +E01D8;VS233;abbreviation +E01D9;VS234;abbreviation +E01DA;VS235;abbreviation +E01DB;VS236;abbreviation +E01DC;VS237;abbreviation +E01DD;VS238;abbreviation +E01DE;VS239;abbreviation +E01DF;VS240;abbreviation +E01E0;VS241;abbreviation +E01E1;VS242;abbreviation +E01E2;VS243;abbreviation +E01E3;VS244;abbreviation +E01E4;VS245;abbreviation +E01E5;VS246;abbreviation +E01E6;VS247;abbreviation +E01E7;VS248;abbreviation +E01E8;VS249;abbreviation +E01E9;VS250;abbreviation +E01EA;VS251;abbreviation +E01EB;VS252;abbreviation +E01EC;VS253;abbreviation +E01ED;VS254;abbreviation +E01EE;VS255;abbreviation +E01EF;VS256;abbreviation + +# EOF diff --git a/rpython/rlib/unicodedata/NamedSequences-3.2.0.txt b/rpython/rlib/unicodedata/NamedSequences-3.2.0.txt new file mode 100644 --- /dev/null +++ b/rpython/rlib/unicodedata/NamedSequences-3.2.0.txt @@ -0,0 +1,1 @@ +# NamedSequences-3.2.0.txt does not exist. diff --git a/rpython/rlib/unicodedata/NamedSequences-5.2.0.txt b/rpython/rlib/unicodedata/NamedSequences-5.2.0.txt new file mode 100644 --- /dev/null +++ b/rpython/rlib/unicodedata/NamedSequences-5.2.0.txt @@ -0,0 +1,448 @@ +# NamedSequences-5.2.0.txt +# Date: 2009-09-14, 12:44:00 PDT [KW] +# +# Unicode Character Database +# Copyright (c) 1991-2009 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Format: +# Name of Sequence; Code Point Sequence for USI +# +# Note: The order of entries in this file is not significant. +# However, entries are generally in script order corresponding +# to block order in the Unicode Standard, to make it easier +# to find entries in the list. + +# ================================================ + +LATIN CAPITAL LETTER A WITH MACRON AND GRAVE;0100 0300 +LATIN SMALL LETTER A WITH MACRON AND GRAVE;0101 0300 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW;0045 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW;0065 0329 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00C8 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00E8 0329 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00C9 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00E9 0329 +LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON;00CA 0304 +LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON;00EA 0304 +LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON;00CA 030C +LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON;00EA 030C +LATIN CAPITAL LETTER I WITH MACRON AND GRAVE;012A 0300 +LATIN SMALL LETTER I WITH MACRON AND GRAVE;012B 0300 +LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE;0069 0307 0301 +LATIN SMALL LETTER NG WITH TILDE ABOVE;006E 0360 0067 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW;004F 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW;006F 0329 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00D2 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00F2 0329 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00D3 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00F3 0329 +LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW;0053 0329 +LATIN SMALL LETTER S WITH VERTICAL LINE BELOW;0073 0329 +LATIN CAPITAL LETTER U WITH MACRON AND GRAVE;016A 0300 +LATIN SMALL LETTER U WITH MACRON AND GRAVE;016B 0300 + +# Additions for Lithuanian. Provisional 2006-05-18, Approved 2007-10-19 + +LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301 +LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301 +LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303 +LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303 +LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301 +LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301 +LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303 +LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303 +LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301 +LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301 +LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303 +LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303 +LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300 +LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303 +LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301 +LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301 +LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303 +LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303 +LATIN CAPITAL LETTER J WITH TILDE;004A 0303 +LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303 +LATIN CAPITAL LETTER L WITH TILDE;004C 0303 +LATIN SMALL LETTER L WITH TILDE;006C 0303 +LATIN CAPITAL LETTER M WITH TILDE;004D 0303 +LATIN SMALL LETTER M WITH TILDE;006D 0303 +LATIN CAPITAL LETTER R WITH TILDE;0052 0303 +LATIN SMALL LETTER R WITH TILDE;0072 0303 +LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301 +LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301 +LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303 +LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303 +LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301 +LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301 +LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303 +LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303 + +# Additions for Tamil. Provisional 2008-02-08, Approved 2009-08-14 +# +# A visual display of the Tamil named sequences is available +# in the documentation for Unicode 5.2. See: +# http://www.unicode.org/versions/Unicode5.2.0/ + +TAMIL CONSONANT K; 0B95 0BCD +TAMIL CONSONANT NG; 0B99 0BCD +TAMIL CONSONANT C; 0B9A 0BCD +TAMIL CONSONANT NY; 0B9E 0BCD +TAMIL CONSONANT TT; 0B9F 0BCD +TAMIL CONSONANT NN; 0BA3 0BCD +TAMIL CONSONANT T; 0BA4 0BCD +TAMIL CONSONANT N; 0BA8 0BCD +TAMIL CONSONANT P; 0BAA 0BCD +TAMIL CONSONANT M; 0BAE 0BCD +TAMIL CONSONANT Y; 0BAF 0BCD +TAMIL CONSONANT R; 0BB0 0BCD +TAMIL CONSONANT L; 0BB2 0BCD +TAMIL CONSONANT V; 0BB5 0BCD +TAMIL CONSONANT LLL;0BB4 0BCD +TAMIL CONSONANT LL; 0BB3 0BCD +TAMIL CONSONANT RR; 0BB1 0BCD +TAMIL CONSONANT NNN;0BA9 0BCD +TAMIL CONSONANT J; 0B9C 0BCD +TAMIL CONSONANT SH; 0BB6 0BCD +TAMIL CONSONANT SS; 0BB7 0BCD +TAMIL CONSONANT S; 0BB8 0BCD +TAMIL CONSONANT H; 0BB9 0BCD +TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD + +TAMIL SYLLABLE KAA; 0B95 0BBE +TAMIL SYLLABLE KI; 0B95 0BBF +TAMIL SYLLABLE KII; 0B95 0BC0 +TAMIL SYLLABLE KU; 0B95 0BC1 +TAMIL SYLLABLE KUU; 0B95 0BC2 +TAMIL SYLLABLE KE; 0B95 0BC6 +TAMIL SYLLABLE KEE; 0B95 0BC7 +TAMIL SYLLABLE KAI; 0B95 0BC8 +TAMIL SYLLABLE KO; 0B95 0BCA +TAMIL SYLLABLE KOO; 0B95 0BCB +TAMIL SYLLABLE KAU; 0B95 0BCC + +TAMIL SYLLABLE NGAA; 0B99 0BBE +TAMIL SYLLABLE NGI; 0B99 0BBF +TAMIL SYLLABLE NGII; 0B99 0BC0 +TAMIL SYLLABLE NGU; 0B99 0BC1 +TAMIL SYLLABLE NGUU; 0B99 0BC2 +TAMIL SYLLABLE NGE; 0B99 0BC6 +TAMIL SYLLABLE NGEE; 0B99 0BC7 +TAMIL SYLLABLE NGAI; 0B99 0BC8 +TAMIL SYLLABLE NGO; 0B99 0BCA +TAMIL SYLLABLE NGOO; 0B99 0BCB +TAMIL SYLLABLE NGAU; 0B99 0BCC + +TAMIL SYLLABLE CAA; 0B9A 0BBE +TAMIL SYLLABLE CI; 0B9A 0BBF +TAMIL SYLLABLE CII; 0B9A 0BC0 +TAMIL SYLLABLE CU; 0B9A 0BC1 +TAMIL SYLLABLE CUU; 0B9A 0BC2 +TAMIL SYLLABLE CE; 0B9A 0BC6 +TAMIL SYLLABLE CEE; 0B9A 0BC7 +TAMIL SYLLABLE CAI; 0B9A 0BC8 +TAMIL SYLLABLE CO; 0B9A 0BCA +TAMIL SYLLABLE COO; 0B9A 0BCB +TAMIL SYLLABLE CAU; 0B9A 0BCC + +TAMIL SYLLABLE NYAA; 0B9E 0BBE +TAMIL SYLLABLE NYI; 0B9E 0BBF +TAMIL SYLLABLE NYII; 0B9E 0BC0 +TAMIL SYLLABLE NYU; 0B9E 0BC1 +TAMIL SYLLABLE NYUU; 0B9E 0BC2 +TAMIL SYLLABLE NYE; 0B9E 0BC6 +TAMIL SYLLABLE NYEE; 0B9E 0BC7 +TAMIL SYLLABLE NYAI; 0B9E 0BC8 +TAMIL SYLLABLE NYO; 0B9E 0BCA +TAMIL SYLLABLE NYOO; 0B9E 0BCB +TAMIL SYLLABLE NYAU; 0B9E 0BCC + +TAMIL SYLLABLE TTAA; 0B9F 0BBE +TAMIL SYLLABLE TTI; 0B9F 0BBF +TAMIL SYLLABLE TTII; 0B9F 0BC0 +TAMIL SYLLABLE TTU; 0B9F 0BC1 +TAMIL SYLLABLE TTUU; 0B9F 0BC2 +TAMIL SYLLABLE TTE; 0B9F 0BC6 +TAMIL SYLLABLE TTEE; 0B9F 0BC7 +TAMIL SYLLABLE TTAI; 0B9F 0BC8 +TAMIL SYLLABLE TTO; 0B9F 0BCA +TAMIL SYLLABLE TTOO; 0B9F 0BCB +TAMIL SYLLABLE TTAU; 0B9F 0BCC + +TAMIL SYLLABLE NNAA; 0BA3 0BBE +TAMIL SYLLABLE NNI; 0BA3 0BBF +TAMIL SYLLABLE NNII; 0BA3 0BC0 +TAMIL SYLLABLE NNU; 0BA3 0BC1 +TAMIL SYLLABLE NNUU; 0BA3 0BC2 +TAMIL SYLLABLE NNE; 0BA3 0BC6 +TAMIL SYLLABLE NNEE; 0BA3 0BC7 +TAMIL SYLLABLE NNAI; 0BA3 0BC8 +TAMIL SYLLABLE NNO; 0BA3 0BCA +TAMIL SYLLABLE NNOO; 0BA3 0BCB +TAMIL SYLLABLE NNAU; 0BA3 0BCC + +TAMIL SYLLABLE TAA; 0BA4 0BBE +TAMIL SYLLABLE TI; 0BA4 0BBF +TAMIL SYLLABLE TII; 0BA4 0BC0 +TAMIL SYLLABLE TU; 0BA4 0BC1 +TAMIL SYLLABLE TUU; 0BA4 0BC2 +TAMIL SYLLABLE TE; 0BA4 0BC6 +TAMIL SYLLABLE TEE; 0BA4 0BC7 +TAMIL SYLLABLE TAI; 0BA4 0BC8 +TAMIL SYLLABLE TO; 0BA4 0BCA +TAMIL SYLLABLE TOO; 0BA4 0BCB +TAMIL SYLLABLE TAU; 0BA4 0BCC + +TAMIL SYLLABLE NAA; 0BA8 0BBE +TAMIL SYLLABLE NI; 0BA8 0BBF +TAMIL SYLLABLE NII; 0BA8 0BC0 +TAMIL SYLLABLE NU; 0BA8 0BC1 +TAMIL SYLLABLE NUU; 0BA8 0BC2 +TAMIL SYLLABLE NE; 0BA8 0BC6 +TAMIL SYLLABLE NEE; 0BA8 0BC7 +TAMIL SYLLABLE NAI; 0BA8 0BC8 +TAMIL SYLLABLE NO; 0BA8 0BCA +TAMIL SYLLABLE NOO; 0BA8 0BCB +TAMIL SYLLABLE NAU; 0BA8 0BCC + +TAMIL SYLLABLE PAA; 0BAA 0BBE +TAMIL SYLLABLE PI; 0BAA 0BBF +TAMIL SYLLABLE PII; 0BAA 0BC0 +TAMIL SYLLABLE PU; 0BAA 0BC1 +TAMIL SYLLABLE PUU; 0BAA 0BC2 +TAMIL SYLLABLE PE; 0BAA 0BC6 +TAMIL SYLLABLE PEE; 0BAA 0BC7 +TAMIL SYLLABLE PAI; 0BAA 0BC8 +TAMIL SYLLABLE PO; 0BAA 0BCA +TAMIL SYLLABLE POO; 0BAA 0BCB +TAMIL SYLLABLE PAU; 0BAA 0BCC + +TAMIL SYLLABLE MAA; 0BAE 0BBE +TAMIL SYLLABLE MI; 0BAE 0BBF +TAMIL SYLLABLE MII; 0BAE 0BC0 +TAMIL SYLLABLE MU; 0BAE 0BC1 +TAMIL SYLLABLE MUU; 0BAE 0BC2 +TAMIL SYLLABLE ME; 0BAE 0BC6 +TAMIL SYLLABLE MEE; 0BAE 0BC7 +TAMIL SYLLABLE MAI; 0BAE 0BC8 +TAMIL SYLLABLE MO; 0BAE 0BCA +TAMIL SYLLABLE MOO; 0BAE 0BCB +TAMIL SYLLABLE MAU; 0BAE 0BCC + +TAMIL SYLLABLE YAA; 0BAF 0BBE +TAMIL SYLLABLE YI; 0BAF 0BBF +TAMIL SYLLABLE YII; 0BAF 0BC0 +TAMIL SYLLABLE YU; 0BAF 0BC1 +TAMIL SYLLABLE YUU; 0BAF 0BC2 +TAMIL SYLLABLE YE; 0BAF 0BC6 +TAMIL SYLLABLE YEE; 0BAF 0BC7 +TAMIL SYLLABLE YAI; 0BAF 0BC8 +TAMIL SYLLABLE YO; 0BAF 0BCA +TAMIL SYLLABLE YOO; 0BAF 0BCB +TAMIL SYLLABLE YAU; 0BAF 0BCC + +TAMIL SYLLABLE RAA; 0BB0 0BBE +TAMIL SYLLABLE RI; 0BB0 0BBF +TAMIL SYLLABLE RII; 0BB0 0BC0 +TAMIL SYLLABLE RU; 0BB0 0BC1 +TAMIL SYLLABLE RUU; 0BB0 0BC2 +TAMIL SYLLABLE RE; 0BB0 0BC6 +TAMIL SYLLABLE REE; 0BB0 0BC7 +TAMIL SYLLABLE RAI; 0BB0 0BC8 +TAMIL SYLLABLE RO; 0BB0 0BCA +TAMIL SYLLABLE ROO; 0BB0 0BCB +TAMIL SYLLABLE RAU; 0BB0 0BCC + +TAMIL SYLLABLE LAA; 0BB2 0BBE +TAMIL SYLLABLE LI; 0BB2 0BBF +TAMIL SYLLABLE LII; 0BB2 0BC0 +TAMIL SYLLABLE LU; 0BB2 0BC1 +TAMIL SYLLABLE LUU; 0BB2 0BC2 +TAMIL SYLLABLE LE; 0BB2 0BC6 +TAMIL SYLLABLE LEE; 0BB2 0BC7 +TAMIL SYLLABLE LAI; 0BB2 0BC8 +TAMIL SYLLABLE LO; 0BB2 0BCA +TAMIL SYLLABLE LOO; 0BB2 0BCB +TAMIL SYLLABLE LAU; 0BB2 0BCC + +TAMIL SYLLABLE VAA; 0BB5 0BBE +TAMIL SYLLABLE VI; 0BB5 0BBF +TAMIL SYLLABLE VII; 0BB5 0BC0 +TAMIL SYLLABLE VU; 0BB5 0BC1 +TAMIL SYLLABLE VUU; 0BB5 0BC2 +TAMIL SYLLABLE VE; 0BB5 0BC6 +TAMIL SYLLABLE VEE; 0BB5 0BC7 +TAMIL SYLLABLE VAI; 0BB5 0BC8 +TAMIL SYLLABLE VO; 0BB5 0BCA +TAMIL SYLLABLE VOO; 0BB5 0BCB +TAMIL SYLLABLE VAU; 0BB5 0BCC + +TAMIL SYLLABLE LLLAA; 0BB4 0BBE +TAMIL SYLLABLE LLLI; 0BB4 0BBF +TAMIL SYLLABLE LLLII; 0BB4 0BC0 +TAMIL SYLLABLE LLLU; 0BB4 0BC1 +TAMIL SYLLABLE LLLUU; 0BB4 0BC2 +TAMIL SYLLABLE LLLE; 0BB4 0BC6 +TAMIL SYLLABLE LLLEE; 0BB4 0BC7 +TAMIL SYLLABLE LLLAI; 0BB4 0BC8 +TAMIL SYLLABLE LLLO; 0BB4 0BCA +TAMIL SYLLABLE LLLOO; 0BB4 0BCB +TAMIL SYLLABLE LLLAU; 0BB4 0BCC + +TAMIL SYLLABLE LLAA; 0BB3 0BBE +TAMIL SYLLABLE LLI; 0BB3 0BBF +TAMIL SYLLABLE LLII; 0BB3 0BC0 +TAMIL SYLLABLE LLU; 0BB3 0BC1 +TAMIL SYLLABLE LLUU; 0BB3 0BC2 +TAMIL SYLLABLE LLE; 0BB3 0BC6 +TAMIL SYLLABLE LLEE; 0BB3 0BC7 +TAMIL SYLLABLE LLAI; 0BB3 0BC8 +TAMIL SYLLABLE LLO; 0BB3 0BCA +TAMIL SYLLABLE LLOO; 0BB3 0BCB +TAMIL SYLLABLE LLAU; 0BB3 0BCC + +TAMIL SYLLABLE RRAA; 0BB1 0BBE +TAMIL SYLLABLE RRI; 0BB1 0BBF +TAMIL SYLLABLE RRII; 0BB1 0BC0 +TAMIL SYLLABLE RRU; 0BB1 0BC1 +TAMIL SYLLABLE RRUU; 0BB1 0BC2 +TAMIL SYLLABLE RRE; 0BB1 0BC6 +TAMIL SYLLABLE RREE; 0BB1 0BC7 +TAMIL SYLLABLE RRAI; 0BB1 0BC8 +TAMIL SYLLABLE RRO; 0BB1 0BCA +TAMIL SYLLABLE RROO; 0BB1 0BCB +TAMIL SYLLABLE RRAU; 0BB1 0BCC + +TAMIL SYLLABLE NNNAA; 0BA9 0BBE +TAMIL SYLLABLE NNNI; 0BA9 0BBF +TAMIL SYLLABLE NNNII; 0BA9 0BC0 +TAMIL SYLLABLE NNNU; 0BA9 0BC1 +TAMIL SYLLABLE NNNUU; 0BA9 0BC2 +TAMIL SYLLABLE NNNE; 0BA9 0BC6 +TAMIL SYLLABLE NNNEE; 0BA9 0BC7 +TAMIL SYLLABLE NNNAI; 0BA9 0BC8 +TAMIL SYLLABLE NNNO; 0BA9 0BCA +TAMIL SYLLABLE NNNOO; 0BA9 0BCB +TAMIL SYLLABLE NNNAU; 0BA9 0BCC + +TAMIL SYLLABLE JAA; 0B9C 0BBE +TAMIL SYLLABLE JI; 0B9C 0BBF +TAMIL SYLLABLE JII; 0B9C 0BC0 +TAMIL SYLLABLE JU; 0B9C 0BC1 +TAMIL SYLLABLE JUU; 0B9C 0BC2 +TAMIL SYLLABLE JE; 0B9C 0BC6 +TAMIL SYLLABLE JEE; 0B9C 0BC7 +TAMIL SYLLABLE JAI; 0B9C 0BC8 +TAMIL SYLLABLE JO; 0B9C 0BCA +TAMIL SYLLABLE JOO; 0B9C 0BCB +TAMIL SYLLABLE JAU; 0B9C 0BCC + +TAMIL SYLLABLE SHAA; 0BB6 0BBE +TAMIL SYLLABLE SHI; 0BB6 0BBF +TAMIL SYLLABLE SHII; 0BB6 0BC0 +TAMIL SYLLABLE SHU; 0BB6 0BC1 +TAMIL SYLLABLE SHUU; 0BB6 0BC2 +TAMIL SYLLABLE SHE; 0BB6 0BC6 +TAMIL SYLLABLE SHEE; 0BB6 0BC7 +TAMIL SYLLABLE SHAI; 0BB6 0BC8 +TAMIL SYLLABLE SHO; 0BB6 0BCA +TAMIL SYLLABLE SHOO; 0BB6 0BCB +TAMIL SYLLABLE SHAU; 0BB6 0BCC + +TAMIL SYLLABLE SSAA; 0BB7 0BBE +TAMIL SYLLABLE SSI; 0BB7 0BBF +TAMIL SYLLABLE SSII; 0BB7 0BC0 +TAMIL SYLLABLE SSU; 0BB7 0BC1 +TAMIL SYLLABLE SSUU; 0BB7 0BC2 +TAMIL SYLLABLE SSE; 0BB7 0BC6 +TAMIL SYLLABLE SSEE; 0BB7 0BC7 +TAMIL SYLLABLE SSAI; 0BB7 0BC8 +TAMIL SYLLABLE SSO; 0BB7 0BCA +TAMIL SYLLABLE SSOO; 0BB7 0BCB +TAMIL SYLLABLE SSAU; 0BB7 0BCC + +TAMIL SYLLABLE SAA; 0BB8 0BBE +TAMIL SYLLABLE SI; 0BB8 0BBF +TAMIL SYLLABLE SII; 0BB8 0BC0 +TAMIL SYLLABLE SU; 0BB8 0BC1 +TAMIL SYLLABLE SUU; 0BB8 0BC2 +TAMIL SYLLABLE SE; 0BB8 0BC6 +TAMIL SYLLABLE SEE; 0BB8 0BC7 +TAMIL SYLLABLE SAI; 0BB8 0BC8 +TAMIL SYLLABLE SO; 0BB8 0BCA +TAMIL SYLLABLE SOO; 0BB8 0BCB +TAMIL SYLLABLE SAU; 0BB8 0BCC + +TAMIL SYLLABLE HAA; 0BB9 0BBE +TAMIL SYLLABLE HI; 0BB9 0BBF +TAMIL SYLLABLE HII; 0BB9 0BC0 +TAMIL SYLLABLE HU; 0BB9 0BC1 +TAMIL SYLLABLE HUU; 0BB9 0BC2 +TAMIL SYLLABLE HE; 0BB9 0BC6 +TAMIL SYLLABLE HEE; 0BB9 0BC7 +TAMIL SYLLABLE HAI; 0BB9 0BC8 +TAMIL SYLLABLE HO; 0BB9 0BCA +TAMIL SYLLABLE HOO; 0BB9 0BCB +TAMIL SYLLABLE HAU; 0BB9 0BCC + +TAMIL SYLLABLE KSSA; 0B95 0BCD 0BB7 +TAMIL SYLLABLE KSSAA; 0B95 0BCD 0BB7 0BBE +TAMIL SYLLABLE KSSI; 0B95 0BCD 0BB7 0BBF +TAMIL SYLLABLE KSSII; 0B95 0BCD 0BB7 0BC0 +TAMIL SYLLABLE KSSU; 0B95 0BCD 0BB7 0BC1 +TAMIL SYLLABLE KSSUU; 0B95 0BCD 0BB7 0BC2 +TAMIL SYLLABLE KSSE; 0B95 0BCD 0BB7 0BC6 +TAMIL SYLLABLE KSSEE; 0B95 0BCD 0BB7 0BC7 +TAMIL SYLLABLE KSSAI; 0B95 0BCD 0BB7 0BC8 +TAMIL SYLLABLE KSSO; 0B95 0BCD 0BB7 0BCA +TAMIL SYLLABLE KSSOO; 0B95 0BCD 0BB7 0BCB +TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC + +TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0 + +GEORGIAN LETTER U-BRJGU;10E3 0302 +KHMER CONSONANT SIGN COENG KA;17D2 1780 +KHMER CONSONANT SIGN COENG KHA;17D2 1781 +KHMER CONSONANT SIGN COENG KO;17D2 1782 +KHMER CONSONANT SIGN COENG KHO;17D2 1783 +KHMER CONSONANT SIGN COENG NGO;17D2 1784 +KHMER CONSONANT SIGN COENG CA;17D2 1785 +KHMER CONSONANT SIGN COENG CHA;17D2 1786 +KHMER CONSONANT SIGN COENG CO;17D2 1787 +KHMER CONSONANT SIGN COENG CHO;17D2 1788 +KHMER CONSONANT SIGN COENG NYO;17D2 1789 +KHMER CONSONANT SIGN COENG DA;17D2 178A +KHMER CONSONANT SIGN COENG TTHA;17D2 178B +KHMER CONSONANT SIGN COENG DO;17D2 178C +KHMER CONSONANT SIGN COENG TTHO;17D2 178D +KHMER CONSONANT SIGN COENG NA;17D2 178E +KHMER CONSONANT SIGN COENG TA;17D2 178F +KHMER CONSONANT SIGN COENG THA;17D2 1790 +KHMER CONSONANT SIGN COENG TO;17D2 1791 +KHMER CONSONANT SIGN COENG THO;17D2 1792 +KHMER CONSONANT SIGN COENG NO;17D2 1793 +KHMER CONSONANT SIGN COENG BA;17D2 1794 +KHMER CONSONANT SIGN COENG PHA;17D2 1795 +KHMER CONSONANT SIGN COENG PO;17D2 1796 +KHMER CONSONANT SIGN COENG PHO;17D2 1797 +KHMER CONSONANT SIGN COENG MO;17D2 1798 +KHMER CONSONANT SIGN COENG YO;17D2 1799 +KHMER CONSONANT SIGN COENG RO;17D2 179A +KHMER CONSONANT SIGN COENG LO;17D2 179B +KHMER CONSONANT SIGN COENG VO;17D2 179C +KHMER CONSONANT SIGN COENG SHA;17D2 179D +KHMER CONSONANT SIGN COENG SSA;17D2 179E +KHMER CONSONANT SIGN COENG SA;17D2 179F +KHMER CONSONANT SIGN COENG HA;17D2 17A0 +KHMER CONSONANT SIGN COENG LA;17D2 17A1 +KHMER VOWEL SIGN COENG QA;17D2 17A2 +KHMER INDEPENDENT VOWEL SIGN COENG QU;17D2 17A7 +KHMER INDEPENDENT VOWEL SIGN COENG RY;17D2 17AB +KHMER INDEPENDENT VOWEL SIGN COENG RYY;17D2 17AC +KHMER INDEPENDENT VOWEL SIGN COENG QE;17D2 17AF +KHMER VOWEL SIGN OM;17BB 17C6 +KHMER VOWEL SIGN AAM;17B6 17C6 +KATAKANA LETTER AINU P;31F7 309A +MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR;02E5 02E9 diff --git a/rpython/rlib/unicodedata/NamedSequences-6.0.0.txt b/rpython/rlib/unicodedata/NamedSequences-6.0.0.txt new file mode 100644 --- /dev/null +++ b/rpython/rlib/unicodedata/NamedSequences-6.0.0.txt @@ -0,0 +1,495 @@ +# NamedSequences-6.0.0.txt +# Date: 2010-05-18, 10:48:00 PDT [KW] +# +# Unicode Character Database +# Copyright (c) 1991-2010 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Format: +# Name of Sequence; Code Point Sequence for USI +# +# Code point sequences in the UCD use spaces as delimiters. +# The corresponding format for a USI in ISO/IEC 10646 uses +# comma delimitation and angle brackets. Thus, a named sequence +# of the form: +# +# EXAMPLE NAME;1000 1001 1002 +# +# in this data file, would correspond to a 10646 USI as follows: +# +# <1000, 1001, 1002> +# +# Note: The order of entries in this file is not significant. +# However, entries are generally in script order corresponding +# to block order in the Unicode Standard, to make it easier +# to find entries in the list. + +# ================================================ + +LATIN CAPITAL LETTER A WITH MACRON AND GRAVE;0100 0300 +LATIN SMALL LETTER A WITH MACRON AND GRAVE;0101 0300 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW;0045 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW;0065 0329 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00C8 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00E8 0329 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00C9 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00E9 0329 +LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON;00CA 0304 +LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON;00EA 0304 +LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON;00CA 030C +LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON;00EA 030C +LATIN CAPITAL LETTER I WITH MACRON AND GRAVE;012A 0300 +LATIN SMALL LETTER I WITH MACRON AND GRAVE;012B 0300 +LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE;0069 0307 0301 +LATIN SMALL LETTER NG WITH TILDE ABOVE;006E 0360 0067 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW;004F 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW;006F 0329 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00D2 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00F2 0329 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00D3 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00F3 0329 +LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW;0053 0329 +LATIN SMALL LETTER S WITH VERTICAL LINE BELOW;0073 0329 +LATIN CAPITAL LETTER U WITH MACRON AND GRAVE;016A 0300 +LATIN SMALL LETTER U WITH MACRON AND GRAVE;016B 0300 + +# Additions for Lithuanian. Provisional 2006-05-18, Approved 2007-10-19 + +LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301 +LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301 +LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303 +LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303 +LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301 +LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301 +LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303 +LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303 +LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301 +LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301 +LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303 +LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303 +LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300 +LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303 +LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301 +LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301 +LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303 +LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303 +LATIN CAPITAL LETTER J WITH TILDE;004A 0303 +LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303 +LATIN CAPITAL LETTER L WITH TILDE;004C 0303 +LATIN SMALL LETTER L WITH TILDE;006C 0303 +LATIN CAPITAL LETTER M WITH TILDE;004D 0303 +LATIN SMALL LETTER M WITH TILDE;006D 0303 +LATIN CAPITAL LETTER R WITH TILDE;0052 0303 +LATIN SMALL LETTER R WITH TILDE;0072 0303 +LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301 +LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301 +LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303 +LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303 +LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301 +LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301 +LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303 +LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303 + +# Entries for JIS X 0213 compatibility mapping. +# Provisional: 2008-11-07, Approved 2010-05-14 + +LATIN SMALL LETTER AE WITH GRAVE;00E6 0300 +LATIN SMALL LETTER OPEN O WITH GRAVE;0254 0300 +LATIN SMALL LETTER OPEN O WITH ACUTE;0254 0301 +LATIN SMALL LETTER TURNED V WITH GRAVE;028C 0300 +LATIN SMALL LETTER TURNED V WITH ACUTE;028C 0301 +LATIN SMALL LETTER SCHWA WITH GRAVE;0259 0300 +LATIN SMALL LETTER SCHWA WITH ACUTE;0259 0301 +LATIN SMALL LETTER HOOKED SCHWA WITH GRAVE;025A 0300 +LATIN SMALL LETTER HOOKED SCHWA WITH ACUTE;025A 0301 + +# Entry for a Bangla entity. +# Provisional: 2009-08-10, Approved 2010-05-14 + +BENGALI LETTER KHINYA;0995 09CD 09B7 + +# Additions for Tamil. Provisional 2008-02-08, Approved 2009-08-14 +# +# A visual display of the Tamil named sequences is available +# in the documentation for Unicode 5.2. See: +# http://www.unicode.org/versions/Unicode5.2.0/ + +TAMIL CONSONANT K; 0B95 0BCD +TAMIL CONSONANT NG; 0B99 0BCD +TAMIL CONSONANT C; 0B9A 0BCD +TAMIL CONSONANT NY; 0B9E 0BCD +TAMIL CONSONANT TT; 0B9F 0BCD +TAMIL CONSONANT NN; 0BA3 0BCD +TAMIL CONSONANT T; 0BA4 0BCD +TAMIL CONSONANT N; 0BA8 0BCD +TAMIL CONSONANT P; 0BAA 0BCD +TAMIL CONSONANT M; 0BAE 0BCD +TAMIL CONSONANT Y; 0BAF 0BCD +TAMIL CONSONANT R; 0BB0 0BCD +TAMIL CONSONANT L; 0BB2 0BCD +TAMIL CONSONANT V; 0BB5 0BCD +TAMIL CONSONANT LLL;0BB4 0BCD +TAMIL CONSONANT LL; 0BB3 0BCD +TAMIL CONSONANT RR; 0BB1 0BCD +TAMIL CONSONANT NNN;0BA9 0BCD +TAMIL CONSONANT J; 0B9C 0BCD +TAMIL CONSONANT SH; 0BB6 0BCD +TAMIL CONSONANT SS; 0BB7 0BCD +TAMIL CONSONANT S; 0BB8 0BCD +TAMIL CONSONANT H; 0BB9 0BCD +TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD + +TAMIL SYLLABLE KAA; 0B95 0BBE +TAMIL SYLLABLE KI; 0B95 0BBF +TAMIL SYLLABLE KII; 0B95 0BC0 +TAMIL SYLLABLE KU; 0B95 0BC1 +TAMIL SYLLABLE KUU; 0B95 0BC2 +TAMIL SYLLABLE KE; 0B95 0BC6 +TAMIL SYLLABLE KEE; 0B95 0BC7 +TAMIL SYLLABLE KAI; 0B95 0BC8 +TAMIL SYLLABLE KO; 0B95 0BCA +TAMIL SYLLABLE KOO; 0B95 0BCB +TAMIL SYLLABLE KAU; 0B95 0BCC + +TAMIL SYLLABLE NGAA; 0B99 0BBE +TAMIL SYLLABLE NGI; 0B99 0BBF +TAMIL SYLLABLE NGII; 0B99 0BC0 +TAMIL SYLLABLE NGU; 0B99 0BC1 +TAMIL SYLLABLE NGUU; 0B99 0BC2 +TAMIL SYLLABLE NGE; 0B99 0BC6 +TAMIL SYLLABLE NGEE; 0B99 0BC7 +TAMIL SYLLABLE NGAI; 0B99 0BC8 +TAMIL SYLLABLE NGO; 0B99 0BCA +TAMIL SYLLABLE NGOO; 0B99 0BCB +TAMIL SYLLABLE NGAU; 0B99 0BCC + +TAMIL SYLLABLE CAA; 0B9A 0BBE +TAMIL SYLLABLE CI; 0B9A 0BBF +TAMIL SYLLABLE CII; 0B9A 0BC0 +TAMIL SYLLABLE CU; 0B9A 0BC1 +TAMIL SYLLABLE CUU; 0B9A 0BC2 +TAMIL SYLLABLE CE; 0B9A 0BC6 +TAMIL SYLLABLE CEE; 0B9A 0BC7 +TAMIL SYLLABLE CAI; 0B9A 0BC8 +TAMIL SYLLABLE CO; 0B9A 0BCA +TAMIL SYLLABLE COO; 0B9A 0BCB +TAMIL SYLLABLE CAU; 0B9A 0BCC + +TAMIL SYLLABLE NYAA; 0B9E 0BBE +TAMIL SYLLABLE NYI; 0B9E 0BBF +TAMIL SYLLABLE NYII; 0B9E 0BC0 +TAMIL SYLLABLE NYU; 0B9E 0BC1 +TAMIL SYLLABLE NYUU; 0B9E 0BC2 +TAMIL SYLLABLE NYE; 0B9E 0BC6 +TAMIL SYLLABLE NYEE; 0B9E 0BC7 +TAMIL SYLLABLE NYAI; 0B9E 0BC8 +TAMIL SYLLABLE NYO; 0B9E 0BCA +TAMIL SYLLABLE NYOO; 0B9E 0BCB +TAMIL SYLLABLE NYAU; 0B9E 0BCC + +TAMIL SYLLABLE TTAA; 0B9F 0BBE +TAMIL SYLLABLE TTI; 0B9F 0BBF +TAMIL SYLLABLE TTII; 0B9F 0BC0 +TAMIL SYLLABLE TTU; 0B9F 0BC1 +TAMIL SYLLABLE TTUU; 0B9F 0BC2 +TAMIL SYLLABLE TTE; 0B9F 0BC6 +TAMIL SYLLABLE TTEE; 0B9F 0BC7 +TAMIL SYLLABLE TTAI; 0B9F 0BC8 +TAMIL SYLLABLE TTO; 0B9F 0BCA +TAMIL SYLLABLE TTOO; 0B9F 0BCB +TAMIL SYLLABLE TTAU; 0B9F 0BCC + +TAMIL SYLLABLE NNAA; 0BA3 0BBE +TAMIL SYLLABLE NNI; 0BA3 0BBF +TAMIL SYLLABLE NNII; 0BA3 0BC0 +TAMIL SYLLABLE NNU; 0BA3 0BC1 +TAMIL SYLLABLE NNUU; 0BA3 0BC2 +TAMIL SYLLABLE NNE; 0BA3 0BC6 +TAMIL SYLLABLE NNEE; 0BA3 0BC7 +TAMIL SYLLABLE NNAI; 0BA3 0BC8 +TAMIL SYLLABLE NNO; 0BA3 0BCA +TAMIL SYLLABLE NNOO; 0BA3 0BCB +TAMIL SYLLABLE NNAU; 0BA3 0BCC + +TAMIL SYLLABLE TAA; 0BA4 0BBE +TAMIL SYLLABLE TI; 0BA4 0BBF +TAMIL SYLLABLE TII; 0BA4 0BC0 +TAMIL SYLLABLE TU; 0BA4 0BC1 +TAMIL SYLLABLE TUU; 0BA4 0BC2 +TAMIL SYLLABLE TE; 0BA4 0BC6 +TAMIL SYLLABLE TEE; 0BA4 0BC7 +TAMIL SYLLABLE TAI; 0BA4 0BC8 +TAMIL SYLLABLE TO; 0BA4 0BCA +TAMIL SYLLABLE TOO; 0BA4 0BCB +TAMIL SYLLABLE TAU; 0BA4 0BCC + +TAMIL SYLLABLE NAA; 0BA8 0BBE +TAMIL SYLLABLE NI; 0BA8 0BBF +TAMIL SYLLABLE NII; 0BA8 0BC0 +TAMIL SYLLABLE NU; 0BA8 0BC1 +TAMIL SYLLABLE NUU; 0BA8 0BC2 +TAMIL SYLLABLE NE; 0BA8 0BC6 +TAMIL SYLLABLE NEE; 0BA8 0BC7 +TAMIL SYLLABLE NAI; 0BA8 0BC8 +TAMIL SYLLABLE NO; 0BA8 0BCA +TAMIL SYLLABLE NOO; 0BA8 0BCB +TAMIL SYLLABLE NAU; 0BA8 0BCC + +TAMIL SYLLABLE PAA; 0BAA 0BBE +TAMIL SYLLABLE PI; 0BAA 0BBF +TAMIL SYLLABLE PII; 0BAA 0BC0 +TAMIL SYLLABLE PU; 0BAA 0BC1 +TAMIL SYLLABLE PUU; 0BAA 0BC2 +TAMIL SYLLABLE PE; 0BAA 0BC6 +TAMIL SYLLABLE PEE; 0BAA 0BC7 +TAMIL SYLLABLE PAI; 0BAA 0BC8 +TAMIL SYLLABLE PO; 0BAA 0BCA +TAMIL SYLLABLE POO; 0BAA 0BCB +TAMIL SYLLABLE PAU; 0BAA 0BCC + +TAMIL SYLLABLE MAA; 0BAE 0BBE +TAMIL SYLLABLE MI; 0BAE 0BBF +TAMIL SYLLABLE MII; 0BAE 0BC0 +TAMIL SYLLABLE MU; 0BAE 0BC1 +TAMIL SYLLABLE MUU; 0BAE 0BC2 +TAMIL SYLLABLE ME; 0BAE 0BC6 +TAMIL SYLLABLE MEE; 0BAE 0BC7 +TAMIL SYLLABLE MAI; 0BAE 0BC8 +TAMIL SYLLABLE MO; 0BAE 0BCA +TAMIL SYLLABLE MOO; 0BAE 0BCB +TAMIL SYLLABLE MAU; 0BAE 0BCC + +TAMIL SYLLABLE YAA; 0BAF 0BBE +TAMIL SYLLABLE YI; 0BAF 0BBF +TAMIL SYLLABLE YII; 0BAF 0BC0 +TAMIL SYLLABLE YU; 0BAF 0BC1 +TAMIL SYLLABLE YUU; 0BAF 0BC2 +TAMIL SYLLABLE YE; 0BAF 0BC6 +TAMIL SYLLABLE YEE; 0BAF 0BC7 +TAMIL SYLLABLE YAI; 0BAF 0BC8 +TAMIL SYLLABLE YO; 0BAF 0BCA +TAMIL SYLLABLE YOO; 0BAF 0BCB +TAMIL SYLLABLE YAU; 0BAF 0BCC + +TAMIL SYLLABLE RAA; 0BB0 0BBE +TAMIL SYLLABLE RI; 0BB0 0BBF +TAMIL SYLLABLE RII; 0BB0 0BC0 +TAMIL SYLLABLE RU; 0BB0 0BC1 +TAMIL SYLLABLE RUU; 0BB0 0BC2 +TAMIL SYLLABLE RE; 0BB0 0BC6 +TAMIL SYLLABLE REE; 0BB0 0BC7 +TAMIL SYLLABLE RAI; 0BB0 0BC8 +TAMIL SYLLABLE RO; 0BB0 0BCA +TAMIL SYLLABLE ROO; 0BB0 0BCB +TAMIL SYLLABLE RAU; 0BB0 0BCC + +TAMIL SYLLABLE LAA; 0BB2 0BBE +TAMIL SYLLABLE LI; 0BB2 0BBF +TAMIL SYLLABLE LII; 0BB2 0BC0 +TAMIL SYLLABLE LU; 0BB2 0BC1 +TAMIL SYLLABLE LUU; 0BB2 0BC2 +TAMIL SYLLABLE LE; 0BB2 0BC6 +TAMIL SYLLABLE LEE; 0BB2 0BC7 +TAMIL SYLLABLE LAI; 0BB2 0BC8 +TAMIL SYLLABLE LO; 0BB2 0BCA +TAMIL SYLLABLE LOO; 0BB2 0BCB +TAMIL SYLLABLE LAU; 0BB2 0BCC + +TAMIL SYLLABLE VAA; 0BB5 0BBE +TAMIL SYLLABLE VI; 0BB5 0BBF +TAMIL SYLLABLE VII; 0BB5 0BC0 +TAMIL SYLLABLE VU; 0BB5 0BC1 +TAMIL SYLLABLE VUU; 0BB5 0BC2 +TAMIL SYLLABLE VE; 0BB5 0BC6 +TAMIL SYLLABLE VEE; 0BB5 0BC7 +TAMIL SYLLABLE VAI; 0BB5 0BC8 +TAMIL SYLLABLE VO; 0BB5 0BCA +TAMIL SYLLABLE VOO; 0BB5 0BCB +TAMIL SYLLABLE VAU; 0BB5 0BCC + +TAMIL SYLLABLE LLLAA; 0BB4 0BBE +TAMIL SYLLABLE LLLI; 0BB4 0BBF +TAMIL SYLLABLE LLLII; 0BB4 0BC0 +TAMIL SYLLABLE LLLU; 0BB4 0BC1 +TAMIL SYLLABLE LLLUU; 0BB4 0BC2 +TAMIL SYLLABLE LLLE; 0BB4 0BC6 +TAMIL SYLLABLE LLLEE; 0BB4 0BC7 +TAMIL SYLLABLE LLLAI; 0BB4 0BC8 +TAMIL SYLLABLE LLLO; 0BB4 0BCA +TAMIL SYLLABLE LLLOO; 0BB4 0BCB +TAMIL SYLLABLE LLLAU; 0BB4 0BCC + +TAMIL SYLLABLE LLAA; 0BB3 0BBE +TAMIL SYLLABLE LLI; 0BB3 0BBF +TAMIL SYLLABLE LLII; 0BB3 0BC0 +TAMIL SYLLABLE LLU; 0BB3 0BC1 +TAMIL SYLLABLE LLUU; 0BB3 0BC2 +TAMIL SYLLABLE LLE; 0BB3 0BC6 +TAMIL SYLLABLE LLEE; 0BB3 0BC7 +TAMIL SYLLABLE LLAI; 0BB3 0BC8 +TAMIL SYLLABLE LLO; 0BB3 0BCA +TAMIL SYLLABLE LLOO; 0BB3 0BCB +TAMIL SYLLABLE LLAU; 0BB3 0BCC + +TAMIL SYLLABLE RRAA; 0BB1 0BBE +TAMIL SYLLABLE RRI; 0BB1 0BBF +TAMIL SYLLABLE RRII; 0BB1 0BC0 +TAMIL SYLLABLE RRU; 0BB1 0BC1 +TAMIL SYLLABLE RRUU; 0BB1 0BC2 +TAMIL SYLLABLE RRE; 0BB1 0BC6 +TAMIL SYLLABLE RREE; 0BB1 0BC7 +TAMIL SYLLABLE RRAI; 0BB1 0BC8 +TAMIL SYLLABLE RRO; 0BB1 0BCA +TAMIL SYLLABLE RROO; 0BB1 0BCB +TAMIL SYLLABLE RRAU; 0BB1 0BCC + +TAMIL SYLLABLE NNNAA; 0BA9 0BBE +TAMIL SYLLABLE NNNI; 0BA9 0BBF +TAMIL SYLLABLE NNNII; 0BA9 0BC0 +TAMIL SYLLABLE NNNU; 0BA9 0BC1 +TAMIL SYLLABLE NNNUU; 0BA9 0BC2 +TAMIL SYLLABLE NNNE; 0BA9 0BC6 +TAMIL SYLLABLE NNNEE; 0BA9 0BC7 +TAMIL SYLLABLE NNNAI; 0BA9 0BC8 +TAMIL SYLLABLE NNNO; 0BA9 0BCA +TAMIL SYLLABLE NNNOO; 0BA9 0BCB +TAMIL SYLLABLE NNNAU; 0BA9 0BCC + +TAMIL SYLLABLE JAA; 0B9C 0BBE +TAMIL SYLLABLE JI; 0B9C 0BBF +TAMIL SYLLABLE JII; 0B9C 0BC0 +TAMIL SYLLABLE JU; 0B9C 0BC1 +TAMIL SYLLABLE JUU; 0B9C 0BC2 +TAMIL SYLLABLE JE; 0B9C 0BC6 +TAMIL SYLLABLE JEE; 0B9C 0BC7 +TAMIL SYLLABLE JAI; 0B9C 0BC8 +TAMIL SYLLABLE JO; 0B9C 0BCA +TAMIL SYLLABLE JOO; 0B9C 0BCB +TAMIL SYLLABLE JAU; 0B9C 0BCC + +TAMIL SYLLABLE SHAA; 0BB6 0BBE +TAMIL SYLLABLE SHI; 0BB6 0BBF +TAMIL SYLLABLE SHII; 0BB6 0BC0 +TAMIL SYLLABLE SHU; 0BB6 0BC1 +TAMIL SYLLABLE SHUU; 0BB6 0BC2 +TAMIL SYLLABLE SHE; 0BB6 0BC6 +TAMIL SYLLABLE SHEE; 0BB6 0BC7 +TAMIL SYLLABLE SHAI; 0BB6 0BC8 +TAMIL SYLLABLE SHO; 0BB6 0BCA +TAMIL SYLLABLE SHOO; 0BB6 0BCB +TAMIL SYLLABLE SHAU; 0BB6 0BCC + +TAMIL SYLLABLE SSAA; 0BB7 0BBE +TAMIL SYLLABLE SSI; 0BB7 0BBF +TAMIL SYLLABLE SSII; 0BB7 0BC0 +TAMIL SYLLABLE SSU; 0BB7 0BC1 +TAMIL SYLLABLE SSUU; 0BB7 0BC2 +TAMIL SYLLABLE SSE; 0BB7 0BC6 +TAMIL SYLLABLE SSEE; 0BB7 0BC7 +TAMIL SYLLABLE SSAI; 0BB7 0BC8 +TAMIL SYLLABLE SSO; 0BB7 0BCA +TAMIL SYLLABLE SSOO; 0BB7 0BCB +TAMIL SYLLABLE SSAU; 0BB7 0BCC + +TAMIL SYLLABLE SAA; 0BB8 0BBE +TAMIL SYLLABLE SI; 0BB8 0BBF +TAMIL SYLLABLE SII; 0BB8 0BC0 +TAMIL SYLLABLE SU; 0BB8 0BC1 +TAMIL SYLLABLE SUU; 0BB8 0BC2 +TAMIL SYLLABLE SE; 0BB8 0BC6 +TAMIL SYLLABLE SEE; 0BB8 0BC7 +TAMIL SYLLABLE SAI; 0BB8 0BC8 +TAMIL SYLLABLE SO; 0BB8 0BCA +TAMIL SYLLABLE SOO; 0BB8 0BCB +TAMIL SYLLABLE SAU; 0BB8 0BCC + +TAMIL SYLLABLE HAA; 0BB9 0BBE +TAMIL SYLLABLE HI; 0BB9 0BBF +TAMIL SYLLABLE HII; 0BB9 0BC0 +TAMIL SYLLABLE HU; 0BB9 0BC1 +TAMIL SYLLABLE HUU; 0BB9 0BC2 +TAMIL SYLLABLE HE; 0BB9 0BC6 +TAMIL SYLLABLE HEE; 0BB9 0BC7 +TAMIL SYLLABLE HAI; 0BB9 0BC8 +TAMIL SYLLABLE HO; 0BB9 0BCA +TAMIL SYLLABLE HOO; 0BB9 0BCB +TAMIL SYLLABLE HAU; 0BB9 0BCC + +TAMIL SYLLABLE KSSA; 0B95 0BCD 0BB7 +TAMIL SYLLABLE KSSAA; 0B95 0BCD 0BB7 0BBE +TAMIL SYLLABLE KSSI; 0B95 0BCD 0BB7 0BBF +TAMIL SYLLABLE KSSII; 0B95 0BCD 0BB7 0BC0 +TAMIL SYLLABLE KSSU; 0B95 0BCD 0BB7 0BC1 +TAMIL SYLLABLE KSSUU; 0B95 0BCD 0BB7 0BC2 +TAMIL SYLLABLE KSSE; 0B95 0BCD 0BB7 0BC6 +TAMIL SYLLABLE KSSEE; 0B95 0BCD 0BB7 0BC7 +TAMIL SYLLABLE KSSAI; 0B95 0BCD 0BB7 0BC8 +TAMIL SYLLABLE KSSO; 0B95 0BCD 0BB7 0BCA +TAMIL SYLLABLE KSSOO; 0B95 0BCD 0BB7 0BCB +TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC + +TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0 + +GEORGIAN LETTER U-BRJGU;10E3 0302 +KHMER CONSONANT SIGN COENG KA;17D2 1780 +KHMER CONSONANT SIGN COENG KHA;17D2 1781 +KHMER CONSONANT SIGN COENG KO;17D2 1782 +KHMER CONSONANT SIGN COENG KHO;17D2 1783 +KHMER CONSONANT SIGN COENG NGO;17D2 1784 +KHMER CONSONANT SIGN COENG CA;17D2 1785 +KHMER CONSONANT SIGN COENG CHA;17D2 1786 +KHMER CONSONANT SIGN COENG CO;17D2 1787 +KHMER CONSONANT SIGN COENG CHO;17D2 1788 +KHMER CONSONANT SIGN COENG NYO;17D2 1789 +KHMER CONSONANT SIGN COENG DA;17D2 178A +KHMER CONSONANT SIGN COENG TTHA;17D2 178B +KHMER CONSONANT SIGN COENG DO;17D2 178C +KHMER CONSONANT SIGN COENG TTHO;17D2 178D +KHMER CONSONANT SIGN COENG NA;17D2 178E +KHMER CONSONANT SIGN COENG TA;17D2 178F +KHMER CONSONANT SIGN COENG THA;17D2 1790 +KHMER CONSONANT SIGN COENG TO;17D2 1791 +KHMER CONSONANT SIGN COENG THO;17D2 1792 +KHMER CONSONANT SIGN COENG NO;17D2 1793 +KHMER CONSONANT SIGN COENG BA;17D2 1794 +KHMER CONSONANT SIGN COENG PHA;17D2 1795 +KHMER CONSONANT SIGN COENG PO;17D2 1796 +KHMER CONSONANT SIGN COENG PHO;17D2 1797 +KHMER CONSONANT SIGN COENG MO;17D2 1798 +KHMER CONSONANT SIGN COENG YO;17D2 1799 +KHMER CONSONANT SIGN COENG RO;17D2 179A +KHMER CONSONANT SIGN COENG LO;17D2 179B +KHMER CONSONANT SIGN COENG VO;17D2 179C +KHMER CONSONANT SIGN COENG SHA;17D2 179D +KHMER CONSONANT SIGN COENG SSA;17D2 179E +KHMER CONSONANT SIGN COENG SA;17D2 179F +KHMER CONSONANT SIGN COENG HA;17D2 17A0 +KHMER CONSONANT SIGN COENG LA;17D2 17A1 +KHMER VOWEL SIGN COENG QA;17D2 17A2 +KHMER INDEPENDENT VOWEL SIGN COENG QU;17D2 17A7 +KHMER INDEPENDENT VOWEL SIGN COENG RY;17D2 17AB +KHMER INDEPENDENT VOWEL SIGN COENG RYY;17D2 17AC +KHMER INDEPENDENT VOWEL SIGN COENG QE;17D2 17AF +KHMER VOWEL SIGN OM;17BB 17C6 +KHMER VOWEL SIGN AAM;17B6 17C6 + +# Entries for JIS X 0213 compatibility mapping. +# Provisional: 2008-11-07, Approved 2010-05-14 + +HIRAGANA LETTER BIDAKUON NGA;304B 309A +HIRAGANA LETTER BIDAKUON NGI;304D 309A +HIRAGANA LETTER BIDAKUON NGU;304F 309A +HIRAGANA LETTER BIDAKUON NGE;3051 309A +HIRAGANA LETTER BIDAKUON NGO;3053 309A +KATAKANA LETTER BIDAKUON NGA;30AB 309A +KATAKANA LETTER BIDAKUON NGI;30AD 309A +KATAKANA LETTER BIDAKUON NGU;30AF 309A +KATAKANA LETTER BIDAKUON NGE;30B1 309A +KATAKANA LETTER BIDAKUON NGO;30B3 309A +KATAKANA LETTER AINU CE;30BB 309A +KATAKANA LETTER AINU TU;30C4 309A +KATAKANA LETTER AINU TO;30C8 309A +KATAKANA LETTER AINU P;31F7 309A +MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR;02E5 02E9 +MODIFIER LETTER EXTRA-LOW EXTRA-HIGH CONTOUR TONE BAR;02E9 02E5 diff --git a/rpython/rlib/unicodedata/NamedSequences-6.2.0.txt b/rpython/rlib/unicodedata/NamedSequences-6.2.0.txt new file mode 100644 --- /dev/null +++ b/rpython/rlib/unicodedata/NamedSequences-6.2.0.txt @@ -0,0 +1,504 @@ +# NamedSequences-6.2.0.txt +# Date: 2012-05-15, 21:23:00 GMT [KW] +# +# Unicode Character Database +# Copyright (c) 1991-2012 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Format: +# Name of Sequence; Code Point Sequence for USI +# +# Code point sequences in the UCD use spaces as delimiters. +# The corresponding format for a USI in ISO/IEC 10646 uses +# comma delimitation and angle brackets. Thus, a named sequence +# of the form: +# +# EXAMPLE NAME;1000 1001 1002 +# +# in this data file, would correspond to a 10646 USI as follows: +# +# <1000, 1001, 1002> +# +# Note: The order of entries in this file is not significant. +# However, entries are generally in script order corresponding +# to block order in the Unicode Standard, to make it easier +# to find entries in the list. + +# ================================================ + +LATIN CAPITAL LETTER A WITH MACRON AND GRAVE;0100 0300 +LATIN SMALL LETTER A WITH MACRON AND GRAVE;0101 0300 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW;0045 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW;0065 0329 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00C8 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00E8 0329 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00C9 0329 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00E9 0329 +LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON;00CA 0304 +LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON;00EA 0304 +LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON;00CA 030C +LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON;00EA 030C +LATIN CAPITAL LETTER I WITH MACRON AND GRAVE;012A 0300 +LATIN SMALL LETTER I WITH MACRON AND GRAVE;012B 0300 +LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE;0069 0307 0301 +LATIN SMALL LETTER NG WITH TILDE ABOVE;006E 0360 0067 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW;004F 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW;006F 0329 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00D2 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00F2 0329 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00D3 0329 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00F3 0329 +LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW;0053 0329 +LATIN SMALL LETTER S WITH VERTICAL LINE BELOW;0073 0329 +LATIN CAPITAL LETTER U WITH MACRON AND GRAVE;016A 0300 +LATIN SMALL LETTER U WITH MACRON AND GRAVE;016B 0300 + +# Additions for Lithuanian. Provisional 2006-05-18, Approved 2007-10-19 + +LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301 +LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301 +LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303 +LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303 +LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301 +LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301 +LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303 +LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303 +LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301 +LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301 +LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303 +LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303 +LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300 +LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303 +LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301 +LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301 +LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303 +LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303 +LATIN CAPITAL LETTER J WITH TILDE;004A 0303 +LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303 +LATIN CAPITAL LETTER L WITH TILDE;004C 0303 +LATIN SMALL LETTER L WITH TILDE;006C 0303 +LATIN CAPITAL LETTER M WITH TILDE;004D 0303 +LATIN SMALL LETTER M WITH TILDE;006D 0303 +LATIN CAPITAL LETTER R WITH TILDE;0052 0303 +LATIN SMALL LETTER R WITH TILDE;0072 0303 +LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301 +LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301 +LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303 +LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303 +LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301 +LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301 +LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303 +LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303 + +# Entries for JIS X 0213 compatibility mapping. +# Provisional: 2008-11-07, Approved 2010-05-14 + +LATIN SMALL LETTER AE WITH GRAVE;00E6 0300 +LATIN SMALL LETTER OPEN O WITH GRAVE;0254 0300 +LATIN SMALL LETTER OPEN O WITH ACUTE;0254 0301 +LATIN SMALL LETTER TURNED V WITH GRAVE;028C 0300 +LATIN SMALL LETTER TURNED V WITH ACUTE;028C 0301 +LATIN SMALL LETTER SCHWA WITH GRAVE;0259 0300 +LATIN SMALL LETTER SCHWA WITH ACUTE;0259 0301 +LATIN SMALL LETTER HOOKED SCHWA WITH GRAVE;025A 0300 +LATIN SMALL LETTER HOOKED SCHWA WITH ACUTE;025A 0301 + +# Entry for a Bangla entity. +# Provisional: 2009-08-10, Approved 2010-05-14 + +BENGALI LETTER KHINYA;0995 09CD 09B7 + +# Additions for Tamil. Provisional 2008-02-08, Approved 2009-08-14 +# +# A visual display of the Tamil named sequences is available +# in the documentation for the Unicode Standard. See Section 9.6, Tamil in +# http://www.unicode.org/versions/latest/ + +TAMIL CONSONANT K; 0B95 0BCD +TAMIL CONSONANT NG; 0B99 0BCD +TAMIL CONSONANT C; 0B9A 0BCD +TAMIL CONSONANT NY; 0B9E 0BCD +TAMIL CONSONANT TT; 0B9F 0BCD +TAMIL CONSONANT NN; 0BA3 0BCD +TAMIL CONSONANT T; 0BA4 0BCD +TAMIL CONSONANT N; 0BA8 0BCD +TAMIL CONSONANT P; 0BAA 0BCD +TAMIL CONSONANT M; 0BAE 0BCD +TAMIL CONSONANT Y; 0BAF 0BCD +TAMIL CONSONANT R; 0BB0 0BCD +TAMIL CONSONANT L; 0BB2 0BCD +TAMIL CONSONANT V; 0BB5 0BCD +TAMIL CONSONANT LLL;0BB4 0BCD +TAMIL CONSONANT LL; 0BB3 0BCD +TAMIL CONSONANT RR; 0BB1 0BCD +TAMIL CONSONANT NNN;0BA9 0BCD +TAMIL CONSONANT J; 0B9C 0BCD +TAMIL CONSONANT SH; 0BB6 0BCD +TAMIL CONSONANT SS; 0BB7 0BCD +TAMIL CONSONANT S; 0BB8 0BCD +TAMIL CONSONANT H; 0BB9 0BCD +TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD + +TAMIL SYLLABLE KAA; 0B95 0BBE +TAMIL SYLLABLE KI; 0B95 0BBF +TAMIL SYLLABLE KII; 0B95 0BC0 +TAMIL SYLLABLE KU; 0B95 0BC1 +TAMIL SYLLABLE KUU; 0B95 0BC2 +TAMIL SYLLABLE KE; 0B95 0BC6 +TAMIL SYLLABLE KEE; 0B95 0BC7 +TAMIL SYLLABLE KAI; 0B95 0BC8 +TAMIL SYLLABLE KO; 0B95 0BCA +TAMIL SYLLABLE KOO; 0B95 0BCB +TAMIL SYLLABLE KAU; 0B95 0BCC + +TAMIL SYLLABLE NGAA; 0B99 0BBE +TAMIL SYLLABLE NGI; 0B99 0BBF +TAMIL SYLLABLE NGII; 0B99 0BC0 +TAMIL SYLLABLE NGU; 0B99 0BC1 +TAMIL SYLLABLE NGUU; 0B99 0BC2 +TAMIL SYLLABLE NGE; 0B99 0BC6 +TAMIL SYLLABLE NGEE; 0B99 0BC7 +TAMIL SYLLABLE NGAI; 0B99 0BC8 +TAMIL SYLLABLE NGO; 0B99 0BCA +TAMIL SYLLABLE NGOO; 0B99 0BCB +TAMIL SYLLABLE NGAU; 0B99 0BCC + +TAMIL SYLLABLE CAA; 0B9A 0BBE +TAMIL SYLLABLE CI; 0B9A 0BBF +TAMIL SYLLABLE CII; 0B9A 0BC0 +TAMIL SYLLABLE CU; 0B9A 0BC1 +TAMIL SYLLABLE CUU; 0B9A 0BC2 +TAMIL SYLLABLE CE; 0B9A 0BC6 +TAMIL SYLLABLE CEE; 0B9A 0BC7 +TAMIL SYLLABLE CAI; 0B9A 0BC8 +TAMIL SYLLABLE CO; 0B9A 0BCA +TAMIL SYLLABLE COO; 0B9A 0BCB +TAMIL SYLLABLE CAU; 0B9A 0BCC + +TAMIL SYLLABLE NYAA; 0B9E 0BBE +TAMIL SYLLABLE NYI; 0B9E 0BBF +TAMIL SYLLABLE NYII; 0B9E 0BC0 +TAMIL SYLLABLE NYU; 0B9E 0BC1 +TAMIL SYLLABLE NYUU; 0B9E 0BC2 +TAMIL SYLLABLE NYE; 0B9E 0BC6 +TAMIL SYLLABLE NYEE; 0B9E 0BC7 +TAMIL SYLLABLE NYAI; 0B9E 0BC8 +TAMIL SYLLABLE NYO; 0B9E 0BCA +TAMIL SYLLABLE NYOO; 0B9E 0BCB +TAMIL SYLLABLE NYAU; 0B9E 0BCC + +TAMIL SYLLABLE TTAA; 0B9F 0BBE +TAMIL SYLLABLE TTI; 0B9F 0BBF +TAMIL SYLLABLE TTII; 0B9F 0BC0 +TAMIL SYLLABLE TTU; 0B9F 0BC1 +TAMIL SYLLABLE TTUU; 0B9F 0BC2 +TAMIL SYLLABLE TTE; 0B9F 0BC6 +TAMIL SYLLABLE TTEE; 0B9F 0BC7 +TAMIL SYLLABLE TTAI; 0B9F 0BC8 +TAMIL SYLLABLE TTO; 0B9F 0BCA +TAMIL SYLLABLE TTOO; 0B9F 0BCB +TAMIL SYLLABLE TTAU; 0B9F 0BCC + +TAMIL SYLLABLE NNAA; 0BA3 0BBE +TAMIL SYLLABLE NNI; 0BA3 0BBF +TAMIL SYLLABLE NNII; 0BA3 0BC0 +TAMIL SYLLABLE NNU; 0BA3 0BC1 +TAMIL SYLLABLE NNUU; 0BA3 0BC2 +TAMIL SYLLABLE NNE; 0BA3 0BC6 +TAMIL SYLLABLE NNEE; 0BA3 0BC7 +TAMIL SYLLABLE NNAI; 0BA3 0BC8 +TAMIL SYLLABLE NNO; 0BA3 0BCA +TAMIL SYLLABLE NNOO; 0BA3 0BCB +TAMIL SYLLABLE NNAU; 0BA3 0BCC + +TAMIL SYLLABLE TAA; 0BA4 0BBE +TAMIL SYLLABLE TI; 0BA4 0BBF +TAMIL SYLLABLE TII; 0BA4 0BC0 +TAMIL SYLLABLE TU; 0BA4 0BC1 +TAMIL SYLLABLE TUU; 0BA4 0BC2 +TAMIL SYLLABLE TE; 0BA4 0BC6 +TAMIL SYLLABLE TEE; 0BA4 0BC7 +TAMIL SYLLABLE TAI; 0BA4 0BC8 +TAMIL SYLLABLE TO; 0BA4 0BCA +TAMIL SYLLABLE TOO; 0BA4 0BCB +TAMIL SYLLABLE TAU; 0BA4 0BCC + +TAMIL SYLLABLE NAA; 0BA8 0BBE +TAMIL SYLLABLE NI; 0BA8 0BBF +TAMIL SYLLABLE NII; 0BA8 0BC0 +TAMIL SYLLABLE NU; 0BA8 0BC1 +TAMIL SYLLABLE NUU; 0BA8 0BC2 +TAMIL SYLLABLE NE; 0BA8 0BC6 +TAMIL SYLLABLE NEE; 0BA8 0BC7 +TAMIL SYLLABLE NAI; 0BA8 0BC8 +TAMIL SYLLABLE NO; 0BA8 0BCA +TAMIL SYLLABLE NOO; 0BA8 0BCB +TAMIL SYLLABLE NAU; 0BA8 0BCC + +TAMIL SYLLABLE PAA; 0BAA 0BBE +TAMIL SYLLABLE PI; 0BAA 0BBF +TAMIL SYLLABLE PII; 0BAA 0BC0 +TAMIL SYLLABLE PU; 0BAA 0BC1 +TAMIL SYLLABLE PUU; 0BAA 0BC2 +TAMIL SYLLABLE PE; 0BAA 0BC6 +TAMIL SYLLABLE PEE; 0BAA 0BC7 +TAMIL SYLLABLE PAI; 0BAA 0BC8 +TAMIL SYLLABLE PO; 0BAA 0BCA +TAMIL SYLLABLE POO; 0BAA 0BCB +TAMIL SYLLABLE PAU; 0BAA 0BCC + +TAMIL SYLLABLE MAA; 0BAE 0BBE +TAMIL SYLLABLE MI; 0BAE 0BBF +TAMIL SYLLABLE MII; 0BAE 0BC0 +TAMIL SYLLABLE MU; 0BAE 0BC1 +TAMIL SYLLABLE MUU; 0BAE 0BC2 +TAMIL SYLLABLE ME; 0BAE 0BC6 +TAMIL SYLLABLE MEE; 0BAE 0BC7 +TAMIL SYLLABLE MAI; 0BAE 0BC8 +TAMIL SYLLABLE MO; 0BAE 0BCA +TAMIL SYLLABLE MOO; 0BAE 0BCB +TAMIL SYLLABLE MAU; 0BAE 0BCC + +TAMIL SYLLABLE YAA; 0BAF 0BBE +TAMIL SYLLABLE YI; 0BAF 0BBF +TAMIL SYLLABLE YII; 0BAF 0BC0 +TAMIL SYLLABLE YU; 0BAF 0BC1 +TAMIL SYLLABLE YUU; 0BAF 0BC2 +TAMIL SYLLABLE YE; 0BAF 0BC6 +TAMIL SYLLABLE YEE; 0BAF 0BC7 +TAMIL SYLLABLE YAI; 0BAF 0BC8 +TAMIL SYLLABLE YO; 0BAF 0BCA +TAMIL SYLLABLE YOO; 0BAF 0BCB +TAMIL SYLLABLE YAU; 0BAF 0BCC + +TAMIL SYLLABLE RAA; 0BB0 0BBE +TAMIL SYLLABLE RI; 0BB0 0BBF +TAMIL SYLLABLE RII; 0BB0 0BC0 +TAMIL SYLLABLE RU; 0BB0 0BC1 +TAMIL SYLLABLE RUU; 0BB0 0BC2 +TAMIL SYLLABLE RE; 0BB0 0BC6 +TAMIL SYLLABLE REE; 0BB0 0BC7 +TAMIL SYLLABLE RAI; 0BB0 0BC8 +TAMIL SYLLABLE RO; 0BB0 0BCA +TAMIL SYLLABLE ROO; 0BB0 0BCB +TAMIL SYLLABLE RAU; 0BB0 0BCC + +TAMIL SYLLABLE LAA; 0BB2 0BBE +TAMIL SYLLABLE LI; 0BB2 0BBF +TAMIL SYLLABLE LII; 0BB2 0BC0 +TAMIL SYLLABLE LU; 0BB2 0BC1 +TAMIL SYLLABLE LUU; 0BB2 0BC2 +TAMIL SYLLABLE LE; 0BB2 0BC6 +TAMIL SYLLABLE LEE; 0BB2 0BC7 +TAMIL SYLLABLE LAI; 0BB2 0BC8 +TAMIL SYLLABLE LO; 0BB2 0BCA +TAMIL SYLLABLE LOO; 0BB2 0BCB +TAMIL SYLLABLE LAU; 0BB2 0BCC + +TAMIL SYLLABLE VAA; 0BB5 0BBE +TAMIL SYLLABLE VI; 0BB5 0BBF +TAMIL SYLLABLE VII; 0BB5 0BC0 +TAMIL SYLLABLE VU; 0BB5 0BC1 +TAMIL SYLLABLE VUU; 0BB5 0BC2 +TAMIL SYLLABLE VE; 0BB5 0BC6 +TAMIL SYLLABLE VEE; 0BB5 0BC7 +TAMIL SYLLABLE VAI; 0BB5 0BC8 +TAMIL SYLLABLE VO; 0BB5 0BCA +TAMIL SYLLABLE VOO; 0BB5 0BCB +TAMIL SYLLABLE VAU; 0BB5 0BCC + +TAMIL SYLLABLE LLLAA; 0BB4 0BBE +TAMIL SYLLABLE LLLI; 0BB4 0BBF +TAMIL SYLLABLE LLLII; 0BB4 0BC0 +TAMIL SYLLABLE LLLU; 0BB4 0BC1 +TAMIL SYLLABLE LLLUU; 0BB4 0BC2 +TAMIL SYLLABLE LLLE; 0BB4 0BC6 +TAMIL SYLLABLE LLLEE; 0BB4 0BC7 +TAMIL SYLLABLE LLLAI; 0BB4 0BC8 +TAMIL SYLLABLE LLLO; 0BB4 0BCA +TAMIL SYLLABLE LLLOO; 0BB4 0BCB +TAMIL SYLLABLE LLLAU; 0BB4 0BCC + +TAMIL SYLLABLE LLAA; 0BB3 0BBE +TAMIL SYLLABLE LLI; 0BB3 0BBF +TAMIL SYLLABLE LLII; 0BB3 0BC0 +TAMIL SYLLABLE LLU; 0BB3 0BC1 +TAMIL SYLLABLE LLUU; 0BB3 0BC2 +TAMIL SYLLABLE LLE; 0BB3 0BC6 +TAMIL SYLLABLE LLEE; 0BB3 0BC7 +TAMIL SYLLABLE LLAI; 0BB3 0BC8 +TAMIL SYLLABLE LLO; 0BB3 0BCA +TAMIL SYLLABLE LLOO; 0BB3 0BCB +TAMIL SYLLABLE LLAU; 0BB3 0BCC + +TAMIL SYLLABLE RRAA; 0BB1 0BBE +TAMIL SYLLABLE RRI; 0BB1 0BBF +TAMIL SYLLABLE RRII; 0BB1 0BC0 +TAMIL SYLLABLE RRU; 0BB1 0BC1 +TAMIL SYLLABLE RRUU; 0BB1 0BC2 +TAMIL SYLLABLE RRE; 0BB1 0BC6 +TAMIL SYLLABLE RREE; 0BB1 0BC7 +TAMIL SYLLABLE RRAI; 0BB1 0BC8 +TAMIL SYLLABLE RRO; 0BB1 0BCA +TAMIL SYLLABLE RROO; 0BB1 0BCB +TAMIL SYLLABLE RRAU; 0BB1 0BCC + +TAMIL SYLLABLE NNNAA; 0BA9 0BBE +TAMIL SYLLABLE NNNI; 0BA9 0BBF +TAMIL SYLLABLE NNNII; 0BA9 0BC0 +TAMIL SYLLABLE NNNU; 0BA9 0BC1 +TAMIL SYLLABLE NNNUU; 0BA9 0BC2 +TAMIL SYLLABLE NNNE; 0BA9 0BC6 +TAMIL SYLLABLE NNNEE; 0BA9 0BC7 +TAMIL SYLLABLE NNNAI; 0BA9 0BC8 +TAMIL SYLLABLE NNNO; 0BA9 0BCA +TAMIL SYLLABLE NNNOO; 0BA9 0BCB +TAMIL SYLLABLE NNNAU; 0BA9 0BCC + +TAMIL SYLLABLE JAA; 0B9C 0BBE +TAMIL SYLLABLE JI; 0B9C 0BBF +TAMIL SYLLABLE JII; 0B9C 0BC0 +TAMIL SYLLABLE JU; 0B9C 0BC1 +TAMIL SYLLABLE JUU; 0B9C 0BC2 +TAMIL SYLLABLE JE; 0B9C 0BC6 +TAMIL SYLLABLE JEE; 0B9C 0BC7 +TAMIL SYLLABLE JAI; 0B9C 0BC8 +TAMIL SYLLABLE JO; 0B9C 0BCA +TAMIL SYLLABLE JOO; 0B9C 0BCB +TAMIL SYLLABLE JAU; 0B9C 0BCC + +TAMIL SYLLABLE SHAA; 0BB6 0BBE +TAMIL SYLLABLE SHI; 0BB6 0BBF +TAMIL SYLLABLE SHII; 0BB6 0BC0 +TAMIL SYLLABLE SHU; 0BB6 0BC1 +TAMIL SYLLABLE SHUU; 0BB6 0BC2 +TAMIL SYLLABLE SHE; 0BB6 0BC6 +TAMIL SYLLABLE SHEE; 0BB6 0BC7 +TAMIL SYLLABLE SHAI; 0BB6 0BC8 +TAMIL SYLLABLE SHO; 0BB6 0BCA +TAMIL SYLLABLE SHOO; 0BB6 0BCB +TAMIL SYLLABLE SHAU; 0BB6 0BCC _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit