In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/ac71d2a0d8089c52dd1f5d16f2eeca6df2368e3a?hp=83eda48afbabd87d5e280f69d5fb47b67feffe23>
- Log ----------------------------------------------------------------- commit ac71d2a0d8089c52dd1f5d16f2eeca6df2368e3a Author: The Unicode Consortium <unicode.org> Date: Wed Apr 1 10:35:39 2015 -0600 Switch to Unicode Version 8 M MANIFEST M charclass_invlists.h M lib/Unicode/UCD.t M lib/unicore/ArabicShaping.txt M lib/unicore/BidiBrackets.txt M lib/unicore/BidiMirroring.txt M lib/unicore/Blocks.txt M lib/unicore/CJKRadicals.txt M lib/unicore/CaseFolding.txt M lib/unicore/CompositionExclusions.txt M lib/unicore/DAge.txt M lib/unicore/DCoreProperties.txt M lib/unicore/DNormalizationProps.txt M lib/unicore/EastAsianWidth.txt M lib/unicore/EmojiSources.txt M lib/unicore/HangulSyllableType.txt M lib/unicore/Index.txt D lib/unicore/IndicMatraCategory.txt A lib/unicore/IndicPositionalCategory.txt M lib/unicore/IndicSyllabicCategory.txt M lib/unicore/Jamo.txt M lib/unicore/LineBreak.txt M lib/unicore/NameAliases.txt M lib/unicore/NamedSequences.txt M lib/unicore/NamedSqProv.txt M lib/unicore/NamesList.txt M lib/unicore/NormalizationCorrections.txt M lib/unicore/PropList.txt M lib/unicore/PropValueAliases.txt M lib/unicore/PropertyAliases.txt M lib/unicore/ReadMe.txt M lib/unicore/ScriptExtensions.txt M lib/unicore/Scripts.txt M lib/unicore/SpecialCasing.txt M lib/unicore/StandardizedVariants.txt M lib/unicore/UnicodeData.txt M lib/unicore/auxiliary/GCBTest.txt M lib/unicore/auxiliary/GraphemeBreakProperty.txt M lib/unicore/auxiliary/SBTest.txt M lib/unicore/auxiliary/SentenceBreakProperty.txt M lib/unicore/auxiliary/WBTest.txt M lib/unicore/auxiliary/WordBreakProperty.txt M lib/unicore/extracted/DBidiClass.txt M lib/unicore/extracted/DBinaryProperties.txt M lib/unicore/extracted/DCombiningClass.txt M lib/unicore/extracted/DDecompositionType.txt M lib/unicore/extracted/DEastAsianWidth.txt M lib/unicore/extracted/DGeneralCategory.txt M lib/unicore/extracted/DJoinGroup.txt M lib/unicore/extracted/DJoinType.txt M lib/unicore/extracted/DLineBreak.txt M lib/unicore/extracted/DNumType.txt M lib/unicore/extracted/DNumValues.txt M lib/unicore/mktables M lib/unicore/version M pod/perldelta.pod M regcharclass.h M unicode_constants.h commit 2e2fa59ae08347201472d6e80ba75b2c2e467c8f Author: Karl Williamson <k...@cpan.org> Date: Wed Jun 17 16:27:08 2015 -0600 Add unicode.org to the AUTHOR file M AUTHORS commit d4005659915af881e74662a0790898e5f850358e Author: Karl Williamson <k...@cpan.org> Date: Wed Jun 17 16:25:48 2015 -0600 regexec.c: Change \b{sb} rule in prep for Unicode 8.0 Unicode 8 version modifies this rule. M regexec.c ----------------------------------------------------------------------- Summary of changes: AUTHORS | 1 + MANIFEST | 2 +- charclass_invlists.h | 4374 +++++++++++++++----- lib/Unicode/UCD.t | 6 +- lib/unicore/ArabicShaping.txt | 34 +- lib/unicore/BidiBrackets.txt | 6 +- lib/unicore/BidiMirroring.txt | 10 +- lib/unicore/Blocks.txt | 25 +- lib/unicore/CJKRadicals.txt | 22 +- lib/unicore/CaseFolding.txt | 147 +- lib/unicore/CompositionExclusions.txt | 6 +- lib/unicore/DAge.txt | 74 +- lib/unicore/DCoreProperties.txt | 483 ++- lib/unicore/DNormalizationProps.txt | 157 +- lib/unicore/EastAsianWidth.txt | 121 +- lib/unicore/EmojiSources.txt | 8 +- lib/unicore/HangulSyllableType.txt | 6 +- lib/unicore/Index.txt | 33 + ...traCategory.txt => IndicPositionalCategory.txt} | 313 +- lib/unicore/IndicSyllabicCategory.txt | 159 +- lib/unicore/Jamo.txt | 8 +- lib/unicore/LineBreak.txt | 132 +- lib/unicore/NameAliases.txt | 15 +- lib/unicore/NamedSequences.txt | 6 +- lib/unicore/NamedSqProv.txt | 22 +- lib/unicore/NamesList.txt | 2967 ++++++++++++- lib/unicore/NormalizationCorrections.txt | 6 +- lib/unicore/PropList.txt | 66 +- lib/unicore/PropValueAliases.txt | 59 +- lib/unicore/PropertyAliases.txt | 8 +- lib/unicore/ReadMe.txt | 8 +- lib/unicore/ScriptExtensions.txt | 143 +- lib/unicore/Scripts.txt | 180 +- lib/unicore/SpecialCasing.txt | 4 +- lib/unicore/StandardizedVariants.txt | 34 +- lib/unicore/UnicodeData.txt | 2161 +++++++++- lib/unicore/auxiliary/GCBTest.txt | 6 +- lib/unicore/auxiliary/GraphemeBreakProperty.txt | 33 +- lib/unicore/auxiliary/SBTest.txt | 16 +- lib/unicore/auxiliary/SentenceBreakProperty.txt | 90 +- lib/unicore/auxiliary/WBTest.txt | 125 +- lib/unicore/auxiliary/WordBreakProperty.txt | 69 +- lib/unicore/extracted/DBidiClass.txt | 140 +- lib/unicore/extracted/DBinaryProperties.txt | 6 +- lib/unicore/extracted/DCombiningClass.txt | 125 +- lib/unicore/extracted/DDecompositionType.txt | 6 +- lib/unicore/extracted/DEastAsianWidth.txt | 120 +- lib/unicore/extracted/DGeneralCategory.txt | 237 +- lib/unicore/extracted/DJoinGroup.txt | 12 +- lib/unicore/extracted/DJoinType.txt | 38 +- lib/unicore/extracted/DLineBreak.txt | 161 +- lib/unicore/extracted/DNumType.txt | 17 +- lib/unicore/extracted/DNumValues.txt | 255 +- lib/unicore/mktables | 13 +- lib/unicore/version | 2 +- pod/perldelta.pod | 5 + regcharclass.h | 86 +- regexec.c | 5 +- unicode_constants.h | 2 +- 59 files changed, 11108 insertions(+), 2267 deletions(-) rename lib/unicore/{IndicMatraCategory.txt => IndicPositionalCategory.txt} (62%) diff --git a/AUTHORS b/AUTHORS index 779cd68..c191a7d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1209,6 +1209,7 @@ Tye McQueen <t...@metronet.com> Ulrich Habel <rh...@netbsd.org> Ulrich Kunitz <kun...@mai-koeln.com> Ulrich Pfeifer <pfei...@wait.de> +Unicode Consortium <unicode.org> Vadim Konovalov <vkonova...@lucent.com> Valeriy E. Ushakov <u...@ptc.spbu.ru> Vernon Lyon <vl...@cpan.org> diff --git a/MANIFEST b/MANIFEST index 679d2d6..d3f87d3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4329,7 +4329,7 @@ lib/unicore/extracted/DNumType.txt Unicode character database lib/unicore/extracted/DNumValues.txt Unicode character database lib/unicore/HangulSyllableType.txt Unicode character database lib/unicore/Index.txt Unicode character database -lib/unicore/IndicMatraCategory.txt Unicode character database +lib/unicore/IndicPositionalCategory.txt Unicode character database lib/unicore/IndicSyllabicCategory.txt Unicode character database lib/unicore/Jamo.txt Unicode character database lib/unicore/LineBreak.txt Unicode character database diff --git a/charclass_invlists.h b/charclass_invlists.h index 620ab63..6214690 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -43,7 +43,7 @@ static const UV ASCII_invlist[] = { /* for ASCII/Latin1 */ }; static const UV Cased_invlist[] = { /* for ASCII/Latin1 */ - 255, /* Number of elements */ + 263, /* Number of elements */ 148565664, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -108,6 +108,10 @@ static const UV Cased_invlist[] = { /* for ASCII/Latin1 */ 0x10C8, 0x10CD, 0x10CE, + 0x13A0, + 0x13F6, + 0x13F8, + 0x13FE, 0x1D00, 0x1DC0, 0x1E00, @@ -215,15 +219,15 @@ static const UV Cased_invlist[] = { /* for ASCII/Latin1 */ 0xA790, 0xA7AE, 0xA7B0, - 0xA7B2, + 0xA7B8, 0xA7F8, 0xA7FB, 0xAB30, 0xAB5B, 0xAB5C, - 0xAB60, - 0xAB64, 0xAB66, + 0xAB70, + 0xABC0, 0xFB00, 0xFB07, 0xFB13, @@ -234,6 +238,10 @@ static const UV Cased_invlist[] = { /* for ASCII/Latin1 */ 0xFF5B, 0x10400, 0x10450, + 0x10C80, + 0x10CB3, + 0x10CC0, + 0x10CF3, 0x118A0, 0x118E0, 0x1D400, @@ -305,7 +313,7 @@ static const UV Cased_invlist[] = { /* for ASCII/Latin1 */ }; static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ - 1484, /* Number of elements */ + 1502, /* Number of elements */ 148565664, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -372,7 +380,7 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0x82E, 0x859, 0x85C, - 0x8E4, + 0x8E3, 0x903, 0x904, 0x93A, @@ -640,10 +648,6 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0x1933, 0x1939, 0x193C, - 0x19B5, - 0x19B8, - 0x19BA, - 0x19BB, 0x1A17, 0x1A19, 0x1A1B, @@ -744,7 +748,7 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0xA673, 0xA674, 0xA67E, - 0xA69F, + 0xA69E, 0xA6A0, 0xA6F0, 0xA6F2, @@ -1633,7 +1637,7 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0xFE00, 0xFE10, 0xFE20, - 0xFE2E, + 0xFE30, 0xFEFF, 0xFF00, 0xFF9E, @@ -1689,6 +1693,8 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0x111B6, 0x111BF, 0x111C1, + 0x111CA, + 0x111CD, 0x1122C, 0x1122F, 0x11232, @@ -1700,7 +1706,7 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0x112E0, 0x112E3, 0x112EB, - 0x11301, + 0x11300, 0x11302, 0x11304, 0x1133C, @@ -1743,6 +1749,8 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0x115BE, 0x115BF, 0x115C1, + 0x115DC, + 0x115DE, 0x11630, 0x11633, 0x1163B, @@ -1758,6 +1766,12 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0x116B6, 0x116B7, 0x116B8, + 0x1171D, + 0x11720, + 0x11722, + 0x11726, + 0x11727, + 0x1172C, 0x16AF0, 0x16AF5, 0x16B30, @@ -1785,6 +1799,18 @@ static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */ 0x1D1AE, 0x1D242, 0x1D245, + 0x1DA00, + 0x1DA37, + 0x1DA3B, + 0x1DA6D, + 0x1DA75, + 0x1DA76, + 0x1DA84, + 0x1DA85, + 0x1DA9B, + 0x1DAA0, + 0x1DAA1, + 0x1DAB0, 0x1E8D0, 0x1E8D7, 0x1F1E6, @@ -2150,10 +2176,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */ GCB_SpacingMark, GCB_Extend, GCB_Other, - GCB_SpacingMark, - GCB_Other, - GCB_SpacingMark, - GCB_Other, GCB_Extend, GCB_SpacingMark, GCB_Extend, @@ -3199,6 +3221,8 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */ GCB_Extend, GCB_SpacingMark, GCB_Other, + GCB_Extend, + GCB_Other, GCB_SpacingMark, GCB_Extend, GCB_SpacingMark, @@ -3253,6 +3277,8 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */ GCB_SpacingMark, GCB_Extend, GCB_Other, + GCB_Extend, + GCB_Other, GCB_SpacingMark, GCB_Extend, GCB_SpacingMark, @@ -3269,6 +3295,12 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */ GCB_Extend, GCB_Other, GCB_Extend, + GCB_SpacingMark, + GCB_Extend, + GCB_SpacingMark, + GCB_Extend, + GCB_Other, + GCB_Extend, GCB_Other, GCB_Extend, GCB_Other, @@ -3297,6 +3329,18 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */ GCB_Other, GCB_Extend, GCB_Other, + GCB_Extend, + GCB_Other, + GCB_Extend, + GCB_Other, + GCB_Extend, + GCB_Other, + GCB_Extend, + GCB_Other, + GCB_Extend, + GCB_Other, + GCB_Extend, + GCB_Other, GCB_Regional_Indicator, GCB_Other, GCB_Control, @@ -3362,7 +3406,7 @@ static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for ASCII/Latin1 */ }; static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */ - 235, /* Number of elements */ + 247, /* Number of elements */ 148565664, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -3431,7 +3475,7 @@ static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */ 0x28D, 0x292, 0x293, - 0x29E, + 0x29D, 0x29F, 0x2BC, 0x2BD, @@ -3489,6 +3533,10 @@ static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */ 0x10C8, 0x10CD, 0x10CE, + 0x13A0, + 0x13F6, + 0x13F8, + 0x13FE, 0x1D79, 0x1D7A, 0x1D7D, @@ -3588,7 +3636,11 @@ static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */ 0xA796, 0xA7AE, 0xA7B0, - 0xA7B2, + 0xA7B8, + 0xAB53, + 0xAB54, + 0xAB70, + 0xABC0, 0xFB00, 0xFB07, 0xFB13, @@ -3599,6 +3651,10 @@ static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */ 0xFF5B, 0x10400, 0x10450, + 0x10C80, + 0x10CB3, + 0x10CC0, + 0x10CF3, 0x118A0, 0x118E0 }; @@ -3674,7 +3730,7 @@ static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for ASCII/Latin1 */ #if defined(PERL_IN_UTF8_C) static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ - 1267, /* Number of elements */ + 1317, /* Number of elements */ 148565664, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -3778,8 +3834,8 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x840, 0x85C, 0x8A0, - 0x8B3, - 0x8E4, + 0x8B5, + 0x8E3, 0x964, 0x966, 0x970, @@ -3869,6 +3925,8 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0xAE4, 0xAE6, 0xAF0, + 0xAF9, + 0xAFA, 0xB01, 0xB04, 0xB05, @@ -3950,7 +4008,7 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0xC55, 0xC57, 0xC58, - 0xC5A, + 0xC5B, 0xC60, 0xC64, 0xC66, @@ -3999,7 +4057,7 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0xD4F, 0xD57, 0xD58, - 0xD60, + 0xD5F, 0xD64, 0xD66, 0xD70, @@ -4144,7 +4202,9 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x1380, 0x1390, 0x13A0, - 0x13F5, + 0x13F6, + 0x13F8, + 0x13FE, 0x1401, 0x166D, 0x166F, @@ -4388,7 +4448,7 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x3400, 0x4DB6, 0x4E00, - 0x9FCD, + 0x9FD6, 0xA000, 0xA48D, 0xA4D0, @@ -4402,19 +4462,15 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0xA674, 0xA67E, 0xA67F, - 0xA69E, - 0xA69F, 0xA6F2, 0xA717, 0xA720, 0xA722, 0xA789, 0xA78B, - 0xA78F, - 0xA790, 0xA7AE, 0xA7B0, - 0xA7B2, + 0xA7B8, 0xA7F7, 0xA828, 0xA840, @@ -4427,6 +4483,8 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0xA8F8, 0xA8FB, 0xA8FC, + 0xA8FD, + 0xA8FE, 0xA900, 0xA92E, 0xA930, @@ -4468,10 +4526,8 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0xAB30, 0xAB5B, 0xAB5C, - 0xAB60, - 0xAB64, 0xAB66, - 0xABC0, + 0xAB70, 0xABEB, 0xABEC, 0xABEE, @@ -4518,7 +4574,7 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0xFE00, 0xFE10, 0xFE20, - 0xFE2E, + 0xFE30, 0xFE33, 0xFE35, 0xFE4D, @@ -4623,6 +4679,10 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x10877, 0x10880, 0x1089F, + 0x108E0, + 0x108F3, + 0x108F4, + 0x108F6, 0x10900, 0x10916, 0x10920, @@ -4663,6 +4723,10 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x10B92, 0x10C00, 0x10C49, + 0x10C80, + 0x10CB3, + 0x10CC0, + 0x10CF3, 0x11000, 0x11047, 0x11066, @@ -4683,17 +4747,31 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x11177, 0x11180, 0x111C5, + 0x111CA, + 0x111CD, 0x111D0, 0x111DB, + 0x111DC, + 0x111DD, 0x11200, 0x11212, 0x11213, 0x11238, + 0x11280, + 0x11287, + 0x11288, + 0x11289, + 0x1128A, + 0x1128E, + 0x1128F, + 0x1129E, + 0x1129F, + 0x112A9, 0x112B0, 0x112EB, 0x112F0, 0x112FA, - 0x11301, + 0x11300, 0x11304, 0x11305, 0x1130D, @@ -4713,6 +4791,8 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x11349, 0x1134B, 0x1134E, + 0x11350, + 0x11351, 0x11357, 0x11358, 0x1135D, @@ -4731,6 +4811,8 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x115B6, 0x115B8, 0x115C1, + 0x115D8, + 0x115DE, 0x11600, 0x11641, 0x11644, @@ -4741,6 +4823,12 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x116B8, 0x116C0, 0x116CA, + 0x11700, + 0x1171A, + 0x1171D, + 0x1172C, + 0x11730, + 0x1173A, 0x118A0, 0x118EA, 0x118FF, @@ -4748,11 +4836,15 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x11AC0, 0x11AF9, 0x12000, - 0x12399, + 0x1239A, 0x12400, 0x1246F, + 0x12480, + 0x12544, 0x13000, 0x1342F, + 0x14400, + 0x14647, 0x16800, 0x16A39, 0x16A40, @@ -4865,6 +4957,18 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x1D7CC, 0x1D7CE, 0x1D800, + 0x1DA00, + 0x1DA37, + 0x1DA3B, + 0x1DA6D, + 0x1DA75, + 0x1DA76, + 0x1DA84, + 0x1DA85, + 0x1DA9B, + 0x1DAA0, + 0x1DAA1, + 0x1DAB0, 0x1E800, 0x1E8C5, 0x1E8D0, @@ -4941,6 +5045,8 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ 0x2B735, 0x2B740, 0x2B81E, + 0x2B820, + 0x2CEA2, 0x2F800, 0x2FA1E, 0xE0100, @@ -4948,7 +5054,7 @@ static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */ }; static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ - 1095, /* Number of elements */ + 1129, /* Number of elements */ 148565664, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -5052,7 +5158,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x840, 0x859, 0x8A0, - 0x8B3, + 0x8B5, 0x904, 0x93A, 0x93D, @@ -5123,6 +5229,8 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0xAD1, 0xAE0, 0xAE2, + 0xAF9, + 0xAFA, 0xB05, 0xB0D, 0xB0F, @@ -5176,7 +5284,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0xC3D, 0xC3E, 0xC58, - 0xC5A, + 0xC5B, 0xC60, 0xC62, 0xC85, @@ -5207,7 +5315,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0xD3E, 0xD4E, 0xD4F, - 0xD60, + 0xD5F, 0xD62, 0xD7A, 0xD80, @@ -5330,7 +5438,9 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x1380, 0x1390, 0x13A0, - 0x13F5, + 0x13F6, + 0x13F8, + 0x13FE, 0x1401, 0x166D, 0x166F, @@ -5375,8 +5485,8 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x1975, 0x1980, 0x19AC, - 0x19C1, - 0x19C8, + 0x19B0, + 0x19CA, 0x1A00, 0x1A17, 0x1A20, @@ -5544,7 +5654,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x3400, 0x4DB6, 0x4E00, - 0x9FCD, + 0x9FD6, 0xA000, 0xA48D, 0xA4D0, @@ -5566,11 +5676,9 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0xA722, 0xA789, 0xA78B, - 0xA78F, - 0xA790, 0xA7AE, 0xA7B0, - 0xA7B2, + 0xA7B8, 0xA7F7, 0xA802, 0xA803, @@ -5587,6 +5695,8 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0xA8F8, 0xA8FB, 0xA8FC, + 0xA8FD, + 0xA8FE, 0xA90A, 0xA926, 0xA930, @@ -5644,10 +5754,8 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0xAB30, 0xAB5B, 0xAB5C, - 0xAB60, - 0xAB64, 0xAB66, - 0xABC0, + 0xAB70, 0xABE3, 0xAC00, 0xD7A4, @@ -5781,6 +5889,10 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x10877, 0x10880, 0x1089F, + 0x108E0, + 0x108F3, + 0x108F4, + 0x108F6, 0x10900, 0x10916, 0x10920, @@ -5815,6 +5927,10 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x10B92, 0x10C00, 0x10C49, + 0x10C80, + 0x10CB3, + 0x10CC0, + 0x10CF3, 0x11003, 0x11038, 0x11083, @@ -5833,10 +5949,22 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x111C5, 0x111DA, 0x111DB, + 0x111DC, + 0x111DD, 0x11200, 0x11212, 0x11213, 0x1122C, + 0x11280, + 0x11287, + 0x11288, + 0x11289, + 0x1128A, + 0x1128E, + 0x1128F, + 0x1129E, + 0x1129F, + 0x112A9, 0x112B0, 0x112DF, 0x11305, @@ -5853,6 +5981,8 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x1133A, 0x1133D, 0x1133E, + 0x11350, + 0x11351, 0x1135D, 0x11362, 0x11480, @@ -5863,12 +5993,16 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x114C8, 0x11580, 0x115AF, + 0x115D8, + 0x115DC, 0x11600, 0x11630, 0x11644, 0x11645, 0x11680, 0x116AB, + 0x11700, + 0x1171A, 0x118A0, 0x118E0, 0x118FF, @@ -5876,11 +6010,15 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x11AC0, 0x11AF9, 0x12000, - 0x12399, + 0x1239A, 0x12400, 0x1246F, + 0x12480, + 0x12544, 0x13000, 0x1342F, + 0x14400, + 0x14647, 0x16800, 0x16A39, 0x16A40, @@ -6045,6 +6183,8 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ 0x2B735, 0x2B740, 0x2B81E, + 0x2B820, + 0x2CEA2, 0x2F800, 0x2FA1E }; @@ -6054,7 +6194,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */ #if defined(PERL_IN_PERL_C) static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ - 2835, /* Number of elements */ + 2896, /* Number of elements */ 148565664, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -6760,8 +6900,8 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x859, 0x85C, 0x8A0, - 0x8B3, - 0x8E4, + 0x8B5, + 0x8E3, 0x904, 0x93A, 0x93D, @@ -6871,6 +7011,8 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0xAE4, 0xAE6, 0xAF0, + 0xAF9, + 0xAFA, 0xB01, 0xB04, 0xB05, @@ -6957,7 +7099,7 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0xC55, 0xC57, 0xC58, - 0xC5A, + 0xC5B, 0xC60, 0xC62, 0xC64, @@ -7012,7 +7154,7 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0xD4F, 0xD57, 0xD58, - 0xD60, + 0xD5F, 0xD62, 0xD64, 0xD66, @@ -7192,7 +7334,9 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x1380, 0x1390, 0x13A0, - 0x13F5, + 0x13F6, + 0x13F8, + 0x13FE, 0x1401, 0x166D, 0x166E, @@ -7267,8 +7411,6 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x1980, 0x19AC, 0x19B0, - 0x19C1, - 0x19C8, 0x19CA, 0x19D0, 0x19DA, @@ -7955,7 +8097,7 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x3400, 0x4DB6, 0x4E00, - 0x9FCD, + 0x9FD6, 0xA000, 0xA48D, 0xA4D0, @@ -8049,7 +8191,6 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0xA69A, 0xA69B, 0xA69E, - 0xA69F, 0xA6A0, 0xA6F0, 0xA6F2, @@ -8183,7 +8324,10 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0xA7AA, 0xA7AE, 0xA7B0, - 0xA7B2, + 0xA7B5, + 0xA7B6, + 0xA7B7, + 0xA7B8, 0xA7F7, 0xA7F8, 0xA7FB, @@ -8211,6 +8355,8 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0xA8F8, 0xA8FB, 0xA8FC, + 0xA8FD, + 0xA8FE, 0xA900, 0xA90A, 0xA926, @@ -8284,9 +8430,8 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0xAB30, 0xAB5B, 0xAB5C, - 0xAB60, - 0xAB64, 0xAB66, + 0xAB70, 0xABC0, 0xABE3, 0xABEB, @@ -8341,7 +8486,7 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0xFE17, 0xFE19, 0xFE20, - 0xFE2E, + 0xFE30, 0xFE31, 0xFE33, 0xFE35, @@ -8475,6 +8620,10 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x10877, 0x10880, 0x1089F, + 0x108E0, + 0x108F3, + 0x108F4, + 0x108F6, 0x10900, 0x10916, 0x10920, @@ -8520,6 +8669,10 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x10B92, 0x10C00, 0x10C49, + 0x10C80, + 0x10CB3, + 0x10CC0, + 0x10CF3, 0x11000, 0x11003, 0x11038, @@ -8557,11 +8710,16 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x111C1, 0x111C5, 0x111C7, + 0x111CA, 0x111CD, 0x111CE, 0x111D0, 0x111DA, 0x111DB, + 0x111DC, + 0x111DD, + 0x111DE, + 0x111E0, 0x11200, 0x11212, 0x11213, @@ -8570,12 +8728,23 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x1123A, 0x1123B, 0x1123D, + 0x11280, + 0x11287, + 0x11288, + 0x11289, + 0x1128A, + 0x1128E, + 0x1128F, + 0x1129E, + 0x1129F, + 0x112A9, + 0x112AA, 0x112B0, 0x112DF, 0x112EB, 0x112F0, 0x112FA, - 0x11301, + 0x11300, 0x11304, 0x11305, 0x1130D, @@ -8597,6 +8766,8 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x11349, 0x1134B, 0x1134E, + 0x11350, + 0x11351, 0x11357, 0x11358, 0x1135D, @@ -8622,7 +8793,9 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x115C2, 0x115C4, 0x115C9, - 0x115CA, + 0x115D8, + 0x115DC, + 0x115DE, 0x11600, 0x11630, 0x11641, @@ -8636,6 +8809,14 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x116B8, 0x116C0, 0x116CA, + 0x11700, + 0x1171A, + 0x1171D, + 0x1172C, + 0x11730, + 0x1173A, + 0x1173C, + 0x1173F, 0x118A0, 0x118C0, 0x118E0, @@ -8645,11 +8826,15 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x11AC0, 0x11AF9, 0x12000, - 0x12399, + 0x1239A, 0x12400, 0x1246F, + 0x12480, + 0x12544, 0x13000, 0x1342F, + 0x14400, + 0x14647, 0x16800, 0x16A39, 0x16A40, @@ -8801,6 +8986,20 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x1D7CC, 0x1D7CE, 0x1D800, + 0x1DA00, + 0x1DA37, + 0x1DA3B, + 0x1DA6D, + 0x1DA75, + 0x1DA76, + 0x1DA84, + 0x1DA85, + 0x1DA88, + 0x1DA89, + 0x1DA9B, + 0x1DAA0, + 0x1DAA1, + 0x1DAB0, 0x1E800, 0x1E8C5, 0x1E8D0, @@ -8885,6 +9084,8 @@ static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */ 0x2B735, 0x2B740, 0x2B81E, + 0x2B820, + 0x2CEA2, 0x2F800, 0x2FA1E, 0xE0001, @@ -9734,6 +9935,8 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_Numeric, SB_Other, + SB_OLetter, + SB_Other, SB_Extend, SB_Other, SB_OLetter, @@ -10054,7 +10257,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_OLetter, SB_Other, - SB_OLetter, + SB_Upper, + SB_Other, + SB_Lower, SB_Other, SB_OLetter, SB_Other, @@ -10129,9 +10334,7 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_OLetter, SB_Other, - SB_Extend, SB_OLetter, - SB_Extend, SB_Other, SB_Numeric, SB_Other, @@ -10911,7 +11114,6 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Lower, SB_Upper, SB_Lower, - SB_Other, SB_Extend, SB_OLetter, SB_Extend, @@ -11018,7 +11220,7 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Lower, SB_Upper, SB_Lower, - SB_Other, + SB_OLetter, SB_Upper, SB_Lower, SB_Upper, @@ -11046,6 +11248,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Upper, SB_Other, SB_Upper, + SB_Lower, + SB_Upper, + SB_Lower, SB_Other, SB_OLetter, SB_Lower, @@ -11074,6 +11279,8 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_OLetter, SB_Other, + SB_OLetter, + SB_Other, SB_Numeric, SB_OLetter, SB_Extend, @@ -11149,7 +11356,6 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Lower, SB_Other, SB_Lower, - SB_Other, SB_OLetter, SB_Extend, SB_STerm, @@ -11347,6 +11553,10 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_OLetter, SB_Other, SB_OLetter, + SB_Other, + SB_OLetter, + SB_Other, + SB_OLetter, SB_Extend, SB_Other, SB_Extend, @@ -11383,6 +11593,10 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_OLetter, SB_Other, + SB_Upper, + SB_Other, + SB_Lower, + SB_Other, SB_Extend, SB_OLetter, SB_Extend, @@ -11420,6 +11634,7 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_OLetter, SB_STerm, SB_Other, + SB_Extend, SB_STerm, SB_Other, SB_Numeric, @@ -11427,6 +11642,10 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_OLetter, SB_Other, + SB_STerm, + SB_Other, + SB_OLetter, + SB_Other, SB_OLetter, SB_Extend, SB_STerm, @@ -11434,6 +11653,17 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_STerm, SB_Other, SB_OLetter, + SB_Other, + SB_OLetter, + SB_Other, + SB_OLetter, + SB_Other, + SB_OLetter, + SB_Other, + SB_OLetter, + SB_STerm, + SB_Other, + SB_OLetter, SB_Extend, SB_Other, SB_Numeric, @@ -11460,6 +11690,8 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_Extend, SB_Other, + SB_OLetter, + SB_Other, SB_Extend, SB_Other, SB_OLetter, @@ -11485,6 +11717,8 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_STerm, SB_Other, SB_STerm, + SB_OLetter, + SB_Extend, SB_Other, SB_OLetter, SB_Extend, @@ -11499,6 +11733,14 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_Numeric, SB_Other, + SB_OLetter, + SB_Other, + SB_Extend, + SB_Other, + SB_Numeric, + SB_Other, + SB_STerm, + SB_Other, SB_Upper, SB_Lower, SB_Numeric, @@ -11517,6 +11759,10 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_OLetter, SB_Other, + SB_OLetter, + SB_Other, + SB_OLetter, + SB_Other, SB_Numeric, SB_Other, SB_STerm, @@ -11664,6 +11910,20 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_Numeric, SB_Other, + SB_Extend, + SB_Other, + SB_Extend, + SB_Other, + SB_Extend, + SB_Other, + SB_Extend, + SB_Other, + SB_STerm, + SB_Other, + SB_Extend, + SB_Other, + SB_Extend, + SB_Other, SB_OLetter, SB_Other, SB_Extend, @@ -11750,6 +12010,8 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */ SB_Other, SB_OLetter, SB_Other, + SB_OLetter, + SB_Other, SB_Format, SB_Other, SB_Format, @@ -11791,7 +12053,7 @@ static const UV VertSpace_invlist[] = { /* for ASCII/Latin1 */ }; static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ - 1480, /* Number of elements */ + 1524, /* Number of elements */ 148565664, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -11960,8 +12222,8 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x859, 0x85C, 0x8A0, - 0x8B3, - 0x8E4, + 0x8B5, + 0x8E3, 0x904, 0x93A, 0x93D, @@ -12071,6 +12333,8 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0xAE4, 0xAE6, 0xAF0, + 0xAF9, + 0xAFA, 0xB01, 0xB04, 0xB05, @@ -12157,7 +12421,7 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0xC55, 0xC57, 0xC58, - 0xC5A, + 0xC5B, 0xC60, 0xC62, 0xC64, @@ -12212,7 +12476,7 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0xD4F, 0xD57, 0xD58, - 0xD60, + 0xD5F, 0xD62, 0xD64, 0xD66, @@ -12353,7 +12617,9 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x1380, 0x1390, 0x13A0, - 0x13F5, + 0x13F6, + 0x13F8, + 0x13FE, 0x1401, 0x166D, 0x166F, @@ -12408,10 +12674,6 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x193C, 0x1946, 0x1950, - 0x19B0, - 0x19C1, - 0x19C8, - 0x19CA, 0x19D0, 0x19DA, 0x1A00, @@ -12653,7 +12915,6 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0xA67E, 0xA67F, 0xA69E, - 0xA69F, 0xA6A0, 0xA6F0, 0xA6F2, @@ -12662,11 +12923,9 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0xA722, 0xA789, 0xA78B, - 0xA78F, - 0xA790, 0xA7AE, 0xA7B0, - 0xA7B2, + 0xA7B8, 0xA7F7, 0xA802, 0xA803, @@ -12689,6 +12948,8 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0xA8F8, 0xA8FB, 0xA8FC, + 0xA8FD, + 0xA8FE, 0xA900, 0xA90A, 0xA926, @@ -12750,10 +13011,8 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0xAB30, 0xAB5B, 0xAB5C, - 0xAB60, - 0xAB64, 0xAB66, - 0xABC0, + 0xAB70, 0xABE3, 0xABEB, 0xABEC, @@ -12802,7 +13061,7 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0xFE14, 0xFE15, 0xFE20, - 0xFE2E, + 0xFE30, 0xFE33, 0xFE35, 0xFE4D, @@ -12917,6 +13176,10 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x10877, 0x10880, 0x1089F, + 0x108E0, + 0x108F3, + 0x108F4, + 0x108F6, 0x10900, 0x10916, 0x10920, @@ -12960,6 +13223,10 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x10B92, 0x10C00, 0x10C49, + 0x10C80, + 0x10CB3, + 0x10CC0, + 0x10CF3, 0x11000, 0x11003, 0x11038, @@ -12992,20 +13259,34 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x111B3, 0x111C1, 0x111C5, + 0x111CA, + 0x111CD, 0x111D0, 0x111DA, 0x111DB, + 0x111DC, + 0x111DD, 0x11200, 0x11212, 0x11213, 0x1122C, 0x11238, + 0x11280, + 0x11287, + 0x11288, + 0x11289, + 0x1128A, + 0x1128E, + 0x1128F, + 0x1129E, + 0x1129F, + 0x112A9, 0x112B0, 0x112DF, 0x112EB, 0x112F0, 0x112FA, - 0x11301, + 0x11300, 0x11304, 0x11305, 0x1130D, @@ -13027,6 +13308,8 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x11349, 0x1134B, 0x1134E, + 0x11350, + 0x11351, 0x11357, 0x11358, 0x1135D, @@ -13049,6 +13332,9 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x115B6, 0x115B8, 0x115C1, + 0x115D8, + 0x115DC, + 0x115DE, 0x11600, 0x11630, 0x11641, @@ -13061,6 +13347,10 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x116B8, 0x116C0, 0x116CA, + 0x1171D, + 0x1172C, + 0x11730, + 0x1173A, 0x118A0, 0x118E0, 0x118EA, @@ -13069,11 +13359,15 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x11AC0, 0x11AF9, 0x12000, - 0x12399, + 0x1239A, 0x12400, 0x1246F, + 0x12480, + 0x12544, 0x13000, 0x1342F, + 0x14400, + 0x14647, 0x16800, 0x16A39, 0x16A40, @@ -13191,6 +13485,18 @@ static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */ 0x1D7CC, 0x1D7CE, 0x1D800, + 0x1DA00, + 0x1DA37, + 0x1DA3B, + 0x1DA6D, + 0x1DA75, + 0x1DA76, + 0x1DA84, + 0x1DA85, + 0x1DA9B, + 0x1DAA0, + 0x1DAA1, + 0x1DAB0, 0x1E800, 0x1E8C5, 0x1E8D0, @@ -13582,6 +13888,8 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Other, WB_Numeric, WB_Other, + WB_ALetter, + WB_Other, WB_Extend, WB_Other, WB_ALetter, @@ -13878,6 +14186,8 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_ALetter, WB_Other, WB_ALetter, + WB_Other, + WB_ALetter, WB_Extend, WB_Other, WB_ALetter, @@ -13919,10 +14229,6 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Other, WB_Numeric, WB_Other, - WB_Extend, - WB_Other, - WB_Extend, - WB_Other, WB_Numeric, WB_Other, WB_ALetter, @@ -14163,7 +14469,6 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Extend, WB_Other, WB_ALetter, - WB_Other, WB_Extend, WB_ALetter, WB_Extend, @@ -14177,8 +14482,6 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_ALetter, WB_Other, WB_ALetter, - WB_Other, - WB_ALetter, WB_Extend, WB_ALetter, WB_Extend, @@ -14200,6 +14503,8 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Other, WB_ALetter, WB_Other, + WB_ALetter, + WB_Other, WB_Numeric, WB_ALetter, WB_Extend, @@ -14263,8 +14568,6 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_ALetter, WB_Other, WB_ALetter, - WB_Other, - WB_ALetter, WB_Extend, WB_Other, WB_Extend, @@ -14437,6 +14740,10 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_ALetter, WB_Other, WB_ALetter, + WB_Other, + WB_ALetter, + WB_Other, + WB_ALetter, WB_Extend, WB_Other, WB_Extend, @@ -14471,6 +14778,10 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Other, WB_ALetter, WB_Other, + WB_ALetter, + WB_Other, + WB_ALetter, + WB_Other, WB_Extend, WB_ALetter, WB_Extend, @@ -14503,15 +14814,29 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Extend, WB_ALetter, WB_Other, + WB_Extend, + WB_Other, WB_Numeric, WB_ALetter, WB_Other, WB_ALetter, WB_Other, WB_ALetter, + WB_Other, + WB_ALetter, WB_Extend, WB_Other, WB_ALetter, + WB_Other, + WB_ALetter, + WB_Other, + WB_ALetter, + WB_Other, + WB_ALetter, + WB_Other, + WB_ALetter, + WB_Other, + WB_ALetter, WB_Extend, WB_Other, WB_Numeric, @@ -14538,6 +14863,8 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Other, WB_Extend, WB_Other, + WB_ALetter, + WB_Other, WB_Extend, WB_Other, WB_ALetter, @@ -14564,6 +14891,9 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Extend, WB_Other, WB_ALetter, + WB_Extend, + WB_Other, + WB_ALetter, WB_Other, WB_Numeric, WB_Other, @@ -14572,6 +14902,10 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Other, WB_Numeric, WB_Other, + WB_Extend, + WB_Other, + WB_Numeric, + WB_Other, WB_ALetter, WB_Numeric, WB_Other, @@ -14589,6 +14923,10 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Other, WB_ALetter, WB_Other, + WB_ALetter, + WB_Other, + WB_ALetter, + WB_Other, WB_Numeric, WB_Other, WB_ALetter, @@ -14702,6 +15040,18 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */ WB_Other, WB_Numeric, WB_Other, + WB_Extend, + WB_Other, + WB_Extend, + WB_Other, + WB_Extend, + WB_Other, + WB_Extend, + WB_Other, + WB_Extend, + WB_Other, + WB_Extend, + WB_Other, WB_ALetter, WB_Other, WB_Extend, @@ -14821,7 +15171,7 @@ static const UV XPerlSpace_invlist[] = { /* for ASCII/Latin1 */ }; static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ - 1261, /* Number of elements */ + 1297, /* Number of elements */ 148565664, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -14929,8 +15279,8 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x840, 0x859, 0x8A0, - 0x8B3, - 0x8E4, + 0x8B5, + 0x8E3, 0x8EA, 0x8F0, 0x93C, @@ -15028,6 +15378,8 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0xAE4, 0xAE6, 0xAF0, + 0xAF9, + 0xAFA, 0xB01, 0xB04, 0xB05, @@ -15109,7 +15461,7 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0xC55, 0xC57, 0xC58, - 0xC5A, + 0xC5B, 0xC60, 0xC64, 0xC66, @@ -15160,7 +15512,7 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0xD4F, 0xD57, 0xD58, - 0xD60, + 0xD5F, 0xD64, 0xD66, 0xD70, @@ -15309,7 +15661,9 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x1380, 0x1390, 0x13A0, - 0x13F5, + 0x13F6, + 0x13F8, + 0x13FE, 0x1401, 0x166D, 0x166F, @@ -15553,7 +15907,7 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x3400, 0x4DB6, 0x4E00, - 0x9FCD, + 0x9FD6, 0xA000, 0xA48D, 0xA4D0, @@ -15567,19 +15921,15 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0xA674, 0xA67C, 0xA67F, - 0xA69E, - 0xA69F, 0xA6F0, 0xA717, 0xA720, 0xA722, 0xA789, 0xA78B, - 0xA78F, - 0xA790, 0xA7AE, 0xA7B0, - 0xA7B2, + 0xA7B8, 0xA7F7, 0xA802, 0xA803, @@ -15598,6 +15948,8 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0xA8F8, 0xA8FB, 0xA8FC, + 0xA8FD, + 0xA8FE, 0xA900, 0xA92B, 0xA930, @@ -15649,10 +16001,8 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0xAB30, 0xAB5B, 0xAB5C, - 0xAB60, - 0xAB64, 0xAB66, - 0xABC0, + 0xAB70, 0xABEB, 0xABF0, 0xABFA, @@ -15776,6 +16126,10 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x10877, 0x10880, 0x1089F, + 0x108E0, + 0x108F3, + 0x108F4, + 0x108F6, 0x10900, 0x10916, 0x10920, @@ -15812,6 +16166,10 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x10B92, 0x10C00, 0x10C49, + 0x10C80, + 0x10CB3, + 0x10CC0, + 0x10CF3, 0x11000, 0x11046, 0x11066, @@ -15836,17 +16194,29 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x111C5, 0x111D0, 0x111DB, + 0x111DC, + 0x111DD, 0x11200, 0x11212, 0x11213, 0x11235, 0x11237, 0x11238, + 0x11280, + 0x11287, + 0x11288, + 0x11289, + 0x1128A, + 0x1128E, + 0x1128F, + 0x1129E, + 0x1129F, + 0x112A9, 0x112B0, 0x112E9, 0x112F0, 0x112FA, - 0x11301, + 0x11300, 0x11304, 0x11305, 0x1130D, @@ -15866,6 +16236,8 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x11349, 0x1134B, 0x1134D, + 0x11350, + 0x11351, 0x11357, 0x11358, 0x1135D, @@ -15882,6 +16254,8 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x115B6, 0x115B8, 0x115BF, + 0x115D8, + 0x115DE, 0x11600, 0x1163F, 0x11640, @@ -15894,6 +16268,12 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x116B6, 0x116C0, 0x116CA, + 0x11700, + 0x1171A, + 0x1171D, + 0x1172B, + 0x11730, + 0x1173A, 0x118A0, 0x118EA, 0x118FF, @@ -15901,11 +16281,15 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x11AC0, 0x11AF9, 0x12000, - 0x12399, + 0x1239A, 0x12400, 0x1246F, + 0x12480, + 0x12544, 0x13000, 0x1342F, + 0x14400, + 0x14647, 0x16800, 0x16A39, 0x16A40, @@ -16084,12 +16468,14 @@ static const UV XPosixAlnum_invlist[] = { /* for ASCII/Latin1 */ 0x2B735, 0x2B740, 0x2B81E, + 0x2B820, + 0x2CEA2, 0x2F800, 0x2FA1E }; static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ - 1201, /* Number of elements */ + 1235, /* Number of elements */ 148565664, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -16197,8 +16583,8 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0x840, 0x859, 0x8A0, - 0x8B3, - 0x8E4, + 0x8B5, + 0x8E3, 0x8EA, 0x8F0, 0x93C, @@ -16292,6 +16678,8 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0xAD1, 0xAE0, 0xAE4, + 0xAF9, + 0xAFA, 0xB01, 0xB04, 0xB05, @@ -16369,7 +16757,7 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0xC55, 0xC57, 0xC58, - 0xC5A, + 0xC5B, 0xC60, 0xC64, 0xC81, @@ -16416,7 +16804,7 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0xD4F, 0xD57, 0xD58, - 0xD60, + 0xD5F, 0xD64, 0xD7A, 0xD80, @@ -16553,7 +16941,9 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0x1380, 0x1390, 0x13A0, - 0x13F5, + 0x13F6, + 0x13F8, + 0x13FE, 0x1401, 0x166D, 0x166F, @@ -16787,7 +17177,7 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0x3400, 0x4DB6, 0x4E00, - 0x9FCD, + 0x9FD6, 0xA000, 0xA48D, 0xA4D0, @@ -16803,19 +17193,15 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0xA674, 0xA67C, 0xA67F, - 0xA69E, - 0xA69F, 0xA6F0, 0xA717, 0xA720, 0xA722, 0xA789, 0xA78B, - 0xA78F, - 0xA790, 0xA7AE, 0xA7B0, - 0xA7B2, + 0xA7B8, 0xA7F7, 0xA802, 0xA803, @@ -16832,6 +17218,8 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0xA8F8, 0xA8FB, 0xA8FC, + 0xA8FD, + 0xA8FE, 0xA90A, 0xA92B, 0xA930, @@ -16883,10 +17271,8 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0xAB30, 0xAB5B, 0xAB5C, - 0xAB60, - 0xAB64, 0xAB66, - 0xABC0, + 0xAB70, 0xABEB, 0xAC00, 0xD7A4, @@ -17004,6 +17390,10 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0x10877, 0x10880, 0x1089F, + 0x108E0, + 0x108F3, + 0x108F4, + 0x108F6, 0x10900, 0x10916, 0x10920, @@ -17040,6 +17430,10 @@ static const UV XPosixAlpha_invlist[] = { /* for ASCII/Latin1 */ 0x10B92, 0x10C00, 0x10C49, + 0x10C80, + 0x10CB3, + 0x10CC0, + 0x10CF3, 0x11000, 0x11046, **** PATCH TRUNCATED AT 2000 LINES -- 28308 NOT SHOWN **** -- Perl5 Master Repository