In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/0e9b0dc26af667dfb49d2d2a9537ebda6f50c6db?hp=1415b48ffc957366424380f485fb6a8f97638a26>
- Log ----------------------------------------------------------------- commit 0e9b0dc26af667dfb49d2d2a9537ebda6f50c6db Author: Karl Williamson <[email protected]> Date: Mon Aug 26 09:21:11 2019 -0600 regcomp.c: Emit more info if we have it in a panic msg commit 2f15a8fa895419258c884bc74aa8e79b7089bcce Author: Karl Williamson <[email protected]> Date: Mon Aug 26 09:15:09 2019 -0600 regcomp.c: Change message into a panic Because that's what is happening, and the next commit would otherwise say we need to document this diagnostic, but it's one that the observer can do nothing about except file a bug report commit 4e96facf32fc51bf161361df8303b29ae85a035c Author: Karl Williamson <[email protected]> Date: Mon Aug 26 08:35:21 2019 -0600 regcomp.c: Move code within #ifdef This code does nothing except an 'if' without acting on it, as a result of an #ifdef within it, so might as well move the whole thing inside the #ifdef commit ba2a280355a1e23b0bc7e7b92d4483daba7f86f8 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 22:14:52 2019 -0600 util.c: Correct spelling in rarely compiled code On Z/OS, this does get compiled, and fails due to the missepllings. commit 5c0563e751095d7a7aad52c9fc3e203cbeeb4871 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:49:02 2019 -0600 regen/mk_invlists.pl: Fix /i rules for non-ASCII machines Two variable weren't getting initialized properly in one code path, with the result that the case folding tables were pretty much garbage, but not on ASCII platforms. commit f4d6df297dfa1ce0ef62433181961bd7e80b2497 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:41:44 2019 -0600 regen/mk_invlists.pl: Never remap 0 0 is a special marker, and shouldn't be remapped. It would be unlikely to be so, but this makes sure. commit 9a9a3246d59159e9717e27dee127f17f7158ac35 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:21:54 2019 -0600 regen/mk_invlists.pl: inversion map requires a final entry Inversion maps are supposed to have an entry for what to do above the Unicode range. This subroutine crafts a custom map that was missing that. commit 4caef9c39de07c61112dd9aaf70c2464ee774420 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:19:02 2019 -0600 regcomp.c: Use macro to remove some #ifdef EBCDIC lines commit 869e073ac72c68a1499d9a875c8344aa6b2d66cd Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:13:07 2019 -0600 regcomp.c: Some code paths didn't terminate an inversion list iteration There were a couple paths through the code that failed to call invlist_iterfinish(). This was a bug everywhere, but prevented z/OS from completely compiling. ----------------------------------------------------------------------- Summary of changes: charclass_invlists.h | 86 ++++++++++++++++++++++++--------------------- lib/unicore/uni_keywords.pl | 2 +- regcomp.c | 50 +++++++++++++------------- regen/mk_invlists.pl | 20 +++++++---- uni_keywords.h | 2 +- util.c | 4 +-- 6 files changed, 88 insertions(+), 76 deletions(-) diff --git a/charclass_invlists.h b/charclass_invlists.h index 2f5feb5f80..70ea70894d 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -29371,7 +29371,7 @@ static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 037 */ # if 'A' == 65 /* ASCII/Latin1 */ static const UV _Perl_IVCF_invlist[] = { /* for ASCII/Latin1 */ - 1309, /* Number of elements */ + 1310, /* Number of elements */ 148565664, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -30683,7 +30683,8 @@ static const UV _Perl_IVCF_invlist[] = { /* for ASCII/Latin1 */ 0x118E0, 0x16E60, 0x16E80, - 0x1E922 + 0x1E922, + 0x1E944 }; # endif /* ASCII/Latin1 */ @@ -32243,7 +32244,8 @@ static const int _Perl_IVCF_invmap[] = { /* for ASCII/Latin1 */ 0, 0x16E40, 0, - 0x1E900 + 0x1E900, + 0 }; # endif /* ASCII/Latin1 */ @@ -32258,7 +32260,7 @@ static const int _Perl_IVCF_invmap[] = { /* for ASCII/Latin1 */ && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 21 static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 1047 */ - 1323, /* Number of elements */ + 1325, /* Number of elements */ 148565664, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -32276,6 +32278,7 @@ static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 1047 */ 0x8A, 0x8C, 0x8D, + 0x8E, 0x8F, 0x91, 0x92, @@ -33584,7 +33587,8 @@ static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 1047 */ 0x118E0, 0x16E60, 0x16E80, - 0x1E922 + 0x1E922, + 0x1E944 }; # endif /* EBCDIC 1047 */ @@ -33698,7 +33702,7 @@ static const unsigned int IVCF_AUX_TABLE_12[] = { }; static const unsigned int IVCF_AUX_TABLE_13[] = { - 0xB5, + 0xA0, 0x39C }; @@ -33840,32 +33844,33 @@ static const U8 IVCF_AUX_TABLE_lengths[] = { static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 1047 */ 0, - 0xC0, + 0x62, IVCF_use_AUX_TABLE_1, - 0xC6, + 0x68, 0, - 0xC6, + 0x71, 0x1E9E, 0, - 0xD8, + 0x80, 0, - 0x41, + 0xC1, 0, - 0xC6, - 0xD8, + 0xAC, + 0xBA, + 0xAE, 0, - 0x41, + 0xD1, IVCF_use_AUX_TABLE_2, - 0x4C, + 0xD3, 0, - 0xC6, + 0x9E, 0, IVCF_use_AUX_TABLE_3, - 0x54, + 0xE3, 0, - 0xC6, + 0xEB, 0, - 0xD8, + 0xFB, 0x178, 0, 0x100, @@ -34648,7 +34653,7 @@ static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 1047 */ 0, 0x1E94, 0, - 0xDF, + 0x59, 0, 0x1EA0, 0, @@ -35161,7 +35166,8 @@ static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 1047 */ 0, 0x16E40, 0, - 0x1E900 + 0x1E900, + 0 }; # endif /* EBCDIC 1047 */ @@ -35193,7 +35199,6 @@ static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 037 */ 0x81, 0x8A, 0x8C, - 0x8D, 0x8F, 0x91, 0x92, @@ -36502,7 +36507,8 @@ static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 037 */ 0x118E0, 0x16E60, 0x16E80, - 0x1E922 + 0x1E922, + 0x1E944 }; # endif /* EBCDIC 037 */ @@ -36616,7 +36622,7 @@ static const unsigned int IVCF_AUX_TABLE_12[] = { }; static const unsigned int IVCF_AUX_TABLE_13[] = { - 0xB5, + 0xA0, 0x39C }; @@ -36758,32 +36764,31 @@ static const U8 IVCF_AUX_TABLE_lengths[] = { static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 037 */ 0, - 0xC0, + 0x62, IVCF_use_AUX_TABLE_1, - 0xC6, + 0x68, 0, - 0xC6, + 0x71, 0x1E9E, 0, - 0xD8, + 0x80, 0, - 0x41, + 0xC1, 0, - 0xC6, - 0xD8, + 0xAC, 0, - 0x41, + 0xD1, IVCF_use_AUX_TABLE_2, - 0x4C, + 0xD3, 0, - 0xC6, + 0x9E, 0, IVCF_use_AUX_TABLE_3, - 0x54, + 0xE3, 0, - 0xC6, + 0xEB, 0, - 0xD8, + 0xFB, 0x178, 0, 0x100, @@ -37566,7 +37571,7 @@ static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 037 */ 0, 0x1E94, 0, - 0xDF, + 0x59, 0, 0x1EA0, 0, @@ -38079,7 +38084,8 @@ static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 037 */ 0, 0x16E40, 0, - 0x1E900 + 0x1E900, + 0 }; # endif /* EBCDIC 037 */ @@ -395305,5 +395311,5 @@ static const U8 WB_table[23][23] = { * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl - * 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 regen/mk_invlists.pl + * 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl * ex: set ro: */ diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index a4183fc324..b1640d7583 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1265,5 +1265,5 @@ # a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl # 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl -# 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 regen/mk_invlists.pl +# 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl # ex: set ro: diff --git a/regcomp.c b/regcomp.c index b2cc6672cb..f7164ed0a8 100644 --- a/regcomp.c +++ b/regcomp.c @@ -185,9 +185,7 @@ struct RExC_state_t { I32 in_lookahead; I32 contains_locale; I32 override_recoding; -#ifdef EBCDIC - I32 recode_x_to_native; -#endif + I32 recode_x_to_native; I32 in_multi_char_class; struct reg_code_blocks *code_blocks;/* positions of literal (?{}) within pattern */ @@ -245,7 +243,6 @@ struct RExC_state_t { #define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs under /d from /u ? */ - #ifdef RE_TRACK_PATTERN_OFFSETS # define RExC_offsets (RExC_rxi->u.offsets) /* I am not like the others */ @@ -276,9 +273,15 @@ struct RExC_state_t { #define RExC_in_lookbehind (pRExC_state->in_lookbehind) #define RExC_in_lookahead (pRExC_state->in_lookahead) #define RExC_contains_locale (pRExC_state->contains_locale) +#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native) + #ifdef EBCDIC -# define RExC_recode_x_to_native (pRExC_state->recode_x_to_native) +# define SET_recode_x_to_native(x) \ + STMT_START { RExC_recode_x_to_native = (x); } STMT_END +#else +# define SET_recode_x_to_native(x) NOOP #endif + #define RExC_in_multi_char_class (pRExC_state->in_multi_char_class) #define RExC_frame_head (pRExC_state->frame_head) #define RExC_frame_last (pRExC_state->frame_last) @@ -7626,9 +7629,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_in_lookbehind = 0; RExC_in_lookahead = 0; RExC_seen_zerolen = *exp == '^' ? -1 : 0; -#ifdef EBCDIC RExC_recode_x_to_native = 0; -#endif RExC_in_multi_char_class = 0; RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp; @@ -12967,11 +12968,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, sv_catsv(substitute_parse, value_sv); sv_catpv(substitute_parse, ")"); -#ifdef EBCDIC /* The value should already be native, so no need to convert on EBCDIC * platforms.*/ assert(! RExC_recode_x_to_native); -#endif } else { /* \N{U+...} */ @@ -13104,12 +13103,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, sv_catpvs(substitute_parse, ")"); -#ifdef EBCDIC /* The values are Unicode, and therefore have to be converted to native * on a non-Unicode (meaning non-ASCII) platform. */ - RExC_recode_x_to_native = 1; -#endif - + SET_recode_x_to_native(1); } /* Here, we have the string the name evaluates to, ready to be parsed, @@ -13134,9 +13130,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, RExC_start = save_start; RExC_parse = endbrace; RExC_end = orig_end; -#ifdef EBCDIC - RExC_recode_x_to_native = 0; -#endif + SET_recode_x_to_native(0); SvREFCNT_dec_NN(substitute_parse); @@ -14182,13 +14176,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) UPDATE_WARNINGS_LOC(p - 1); ender = result; - if (ender < 0x100) { #ifdef EBCDIC + if (ender < 0x100) { if (RExC_recode_x_to_native) { ender = LATIN1_TO_NATIVE(ender); } -#endif } +#endif break; } case 'c': @@ -18890,7 +18884,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, full_cp_count += this_end - this_start + 1; } - invlist_iterfinish(cp_list); /* At the end of the loop, we count how many bits differ from * the bits in lowest code point, call the count 'd'. If the @@ -18919,8 +18912,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, ret = reganode(pRExC_state, op, lowest_cp); FLAGS(REGNODE_p(ret)) = ANYOFM_mask; } + + done_anyofm: + invlist_iterfinish(cp_list); } - done_anyofm: if (inverted) { _invlist_invert(cp_list); @@ -20276,11 +20271,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ SvPVCLEAR(sv); - if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */ - /* It would be nice to FAIL() here, but this may be called from - regexec.c, and it would be hard to supply pRExC_state. */ - Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", - (int)OP(o), (int)REGNODE_MAX); + if (OP(o) > REGNODE_MAX) { /* regnode.type is unsigned */ + if (pRExC_state) { /* This gives more info, if we have it */ + FAIL3("panic: corrupted regexp opcode %d > %d", + (int)OP(o), (int)REGNODE_MAX); + } + else { + Perl_croak(aTHX_ "panic: corrupted regexp opcode %d > %d", + (int)OP(o), (int)REGNODE_MAX); + } + } sv_catpv(sv, PL_reg_name[OP(o)]); /* Take off const! */ k = PL_regkind[OP(o)]; diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 8a0c1f071f..6853a64272 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -1075,6 +1075,9 @@ sub _Perl_IVCF { push @invlist, $sorted_folds[-1] + 1; push @invmap, 0; + push @invlist, 0x110000; + push @invmap, 0; + # All Unicode versions have some places where multiple code points map to # the same one, so the format always has an 'l' return \@invlist, \@invmap, 'al', $default; @@ -2554,10 +2557,10 @@ foreach my $prop (@props) { my @invlist; my @invmap; - my $map_format; + my $map_format = 0;; my $map_default; - my $maps_to_code_point; - my $to_adjust; + my $maps_to_code_point = 0; + my $to_adjust = 0; my $same_in_all_code_pages; if ($is_local_sub) { my @return = eval $lookup_prop; @@ -2598,12 +2601,15 @@ foreach my $prop (@props) { @invmap = @$map_ref; $map_format = $format; $map_default = $default; - $maps_to_code_point = $map_format =~ / a ($ | [^r] ) /x; - $to_adjust = $map_format =~ /a/; } } } + if ($map_format) { + $maps_to_code_point = $map_format =~ / a ($ | [^r] ) /x; + $to_adjust = $map_format =~ /a/; + } + # Re-order the Unicode code points to native ones for this platform. # This is only needed for code points below 256, because native code # points are only in that range. For inversion maps of properties @@ -2712,8 +2718,8 @@ foreach my $prop (@props) { # Do convert to native for maps to single code points. # There are some properties that have a few outlier # maps that aren't code points, so the above test - # skips those. - $bucket = a2n($invmap[0]); + # skips those. 0 is never remapped. + $bucket = $invmap[0] == 0 ? 0 : a2n($invmap[0]); } else { $bucket = $invmap[0]; } diff --git a/uni_keywords.h b/uni_keywords.h index 6ee2494f0d..c160321c0c 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7288,6 +7288,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl - * 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 regen/mk_invlists.pl + * 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl * e80fb4dd6c15dc1b543793552ab5c7255a0f7b50d6ca9cce3a30a4dadf187b53 regen/mph.pl * ex: set ro: */ diff --git a/util.c b/util.c index 359f3b6d85..dc2e1bd489 100644 --- a/util.c +++ b/util.c @@ -6425,8 +6425,8 @@ Perl_get_c_backtrace(pTHX_ int depth, int skip) Safefree(raw_frames); return bt; #else - PERL_UNUSED_ARGV(depth); - PERL_UNUSED_ARGV(skip); + PERL_UNUSED_ARG(depth); + PERL_UNUSED_ARG(skip); return NULL; #endif } -- Perl5 Master Repository
