In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/74333e98d322c14118a2c17040aba2c17c0b2152?hp=3000ebb8835feceffa00b8081072ed74a39db7e3>
- Log ----------------------------------------------------------------- commit 74333e98d322c14118a2c17040aba2c17c0b2152 Author: Karl Williamson <[email protected]> Date: Sun Jun 2 18:51:21 2019 -0600 /\p{InFoo} should only match blocks, or be user-defined For a property \p{Block=Foo}, we allow the synonym \p{InFoo} as documented variously, including perluniprops, even though this usage is discouraged, as a new Unicode release used in a new version of Perl could cause the synonym to no longer work. Prior to this commit, we erroneously allowed the synonym for other properties, such as \p{InKana} or \p{InS}. ----------------------------------------------------------------------- Summary of changes: charclass_invlists.h | 2 +- lib/unicore/mktables | 1 + lib/unicore/uni_keywords.pl | 2 +- regcharclass.h | 2 +- regcomp.c | 16 +++++++++++----- uni_keywords.h | 2 +- 6 files changed, 16 insertions(+), 9 deletions(-) diff --git a/charclass_invlists.h b/charclass_invlists.h index 566d22f4d4..20284301e7 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -395301,7 +395301,7 @@ static const U8 WB_table[23][23] = { * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt - * b3d90fc23817ea4e33e9a90107c0a6c7b23314efd5712905ed172624d5524693 lib/unicore/mktables + * a53648677d262457dda0b22efba8820d2a45ca6ebae01f8c73d30db380eb83b5 lib/unicore/mktables * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 9860bf7832..368794dcc7 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -19133,6 +19133,7 @@ Test_GCB("1100 $nobreak 1161"); # Bug #70940 Expect(0, 0x2028, '\p{Print}', ""); # Bug # 71722 Expect(0, 0x2029, '\p{Print}', ""); # Bug # 71722 Expect(1, 0xFF10, '\p{XDigit}', ""); # Bug # 71726 +Error('\p{InKana}'); # 'Kana' is not a block so InKana shouldn't compile # Make sure this gets tested; it was not part of the official test suite at # the time this was added. Note that this is as it would appear in the diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index 22fd8c60cb..5c2f2a9859 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1261,7 +1261,7 @@ # 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt # 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt # 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt -# b3d90fc23817ea4e33e9a90107c0a6c7b23314efd5712905ed172624d5524693 lib/unicore/mktables +# a53648677d262457dda0b22efba8820d2a45ca6ebae01f8c73d30db380eb83b5 lib/unicore/mktables # a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl # 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl diff --git a/regcharclass.h b/regcharclass.h index 39d3a4b474..cf25342ee4 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -1901,7 +1901,7 @@ * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt - * b3d90fc23817ea4e33e9a90107c0a6c7b23314efd5712905ed172624d5524693 lib/unicore/mktables + * a53648677d262457dda0b22efba8820d2a45ca6ebae01f8c73d30db380eb83b5 lib/unicore/mktables * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 830144f6afdd047b009754ffa06134397268f6638837fe85283483eb0cfdd558 regen/regcharclass.pl diff --git a/regcomp.c b/regcomp.c index 8125ce03ed..5068a0a1a2 100644 --- a/regcomp.c +++ b/regcomp.c @@ -22574,8 +22574,7 @@ Perl_parse_uniprop_string(pTHX_ int slash_pos = -1; /* Where the '/' is found, or negative if none */ int table_index = 0; /* The entry number for this property in the table of all Unicode property names */ - bool starts_with_In_or_Is = FALSE; /* ? Does the name start with 'In' or - 'Is' */ + bool starts_with_Is = FALSE; /* ? Does the name start with 'Is' */ Size_t lookup_offset = 0; /* Used to ignore the first few characters of the normalized name in certain situations */ Size_t non_pkg_begin = 0; /* Offset of first byte in 'name' that isn't @@ -23053,7 +23052,11 @@ Perl_parse_uniprop_string(pTHX_ && name[non_pkg_begin+0] == 'I' && (name[non_pkg_begin+1] == 'n' || name[non_pkg_begin+1] == 's')) { - starts_with_In_or_Is = TRUE; + /* Names that start with In have different characterstics than those + * that start with Is */ + if (name[non_pkg_begin+1] == 's') { + starts_with_Is = TRUE; + } } else { could_be_user_defined = FALSE; @@ -23392,8 +23395,11 @@ Perl_parse_uniprop_string(pTHX_ /* If it didn't find the property ... */ if (table_index == 0) { - /* Try again stripping off any initial 'In' or 'Is' */ - if (starts_with_In_or_Is) { + /* Try again stripping off any initial 'Is'. This is because we + * promise that an initial Is is optional. The same isn't true of + * names that start with 'In'. Those can match only blocks, and the + * lookup table already has those accounted for. */ + if (starts_with_Is) { lookup_name += 2; lookup_len -= 2; equals_pos -= 2; diff --git a/uni_keywords.h b/uni_keywords.h index 6eebad3e26..d0d20e063e 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7284,7 +7284,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt - * b3d90fc23817ea4e33e9a90107c0a6c7b23314efd5712905ed172624d5524693 lib/unicore/mktables + * a53648677d262457dda0b22efba8820d2a45ca6ebae01f8c73d30db380eb83b5 lib/unicore/mktables * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl -- Perl5 Master Repository
