In perl.git, the branch smoke-me/khw-134329 has been updated <https://perl5.git.perl.org/perl.git/commitdiff/7d164336f2eab2eb2df2b9ff97575fe1ab0ad1fc?hp=b7365028d37ece23d3543a972317950f8322e7b8>
- Log ----------------------------------------------------------------- commit 7d164336f2eab2eb2df2b9ff97575fe1ab0ad1fc Author: Karl Williamson <[email protected]> Date: Mon Aug 26 09:21:11 2019 -0600 regcomp.c: Emit more info if we have it in a panic msg commit 78baeba51c178147c7343f10d81382b8ce7964b6 Author: Karl Williamson <[email protected]> Date: Mon Aug 26 09:15:09 2019 -0600 regcomp.c: Change message into a panic Because that's what is happening, and the next commit would otherwise say we need to document this diagnostic, but it's one that the observer can do nothing about except file a bug report commit 3944ef35dff789a3bce0de974108bc35095865e9 Author: Karl Williamson <[email protected]> Date: Mon Aug 26 08:35:21 2019 -0600 regcomp.c: Move code within #ifdef This code does nothing except an 'if' without acting on it, as a result of an #ifdef within it, so might as well move the whole thing inside the #ifdef commit d0d560aa1850d3c79aa7669f6aad2542d51a4f59 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 22:14:52 2019 -0600 util.c: Correct spelling in rarely compiled code On Z/OS, this does get compiled, and fails due to the missepllings. commit 9c156728992e2ae6c8cece193dc40cb9f72d8883 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:49:02 2019 -0600 regen/mk_invlists.pl: Fix /i rules for non-ASCII machines Two variable weren't getting initialized properly in one code path, with the result that the case folding tables were pretty much garbage, but not on ASCII platforms. commit ab77419355fe80e42a9af0497163f680d67080f0 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:41:44 2019 -0600 regen/mk_invlists.pl: Never remap 0 0 is a special marker, and shouldn't be remapped. It would be unlikely to be so, but this makes sure. commit b0b45fe7558a07eb1b54fe745ec9f399f18ad734 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:21:54 2019 -0600 regen/mk_invlists.pl: inversion map requires a final entry Inversion maps are supposed to have an entry for what to do above the Unicode range. This subroutine crafts a custom map that was missing that. commit 7f0c094607d1d422be6b1d59d8d4d61e2c69f856 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:19:02 2019 -0600 regcomp.c: Use macro to remove some #ifdef EBCDIC lines commit 2ea8d0b3a13eaa7e187cff05c80eb84dfdcb75d2 Author: Karl Williamson <[email protected]> Date: Sun Aug 25 18:13:07 2019 -0600 regcomp.c: Some code paths didn't terminate an inversion list iteration There were a couple paths through the code that failed to call invlist_iterfinish(). This was a bug everywhere, but prevented z/OS from completely compiling. commit 62ae67bb0932b1a2749e27b6d2fdc14c08f49917 Author: Karl Williamson <[email protected]> Date: Mon Nov 26 20:16:09 2018 -0700 XXX need to do process; figure name Configure Fix alignment needed probe ----------------------------------------------------------------------- Summary of changes: Configure | 52 +++++++++++++-------------- charclass_invlists.h | 86 ++++++++++++++++++++++++--------------------- lib/unicore/uni_keywords.pl | 2 +- regcomp.c | 51 +++++++++++++-------------- regen/mk_invlists.pl | 20 +++++++---- uni_keywords.h | 2 +- util.c | 4 +-- 7 files changed, 112 insertions(+), 105 deletions(-) diff --git a/Configure b/Configure index b89d34dbb2..ee0e487ba7 100755 --- a/Configure +++ b/Configure @@ -19711,7 +19711,8 @@ case "$d_u32align" in #ifdef I_STDLIB #include <stdlib.h> #endif -#define U32 $u32type +#define UV $uvtype +#define UVSIZE sizeof(UV) #define BYTEORDER 0x$byteorder #define U8 $u8type #include <signal.h> @@ -19719,49 +19720,44 @@ case "$d_u32align" in $signal_t bletch(int s) { exit(4); } #endif int main() { -#if BYTEORDER == 0x1234 || BYTEORDER == 0x4321 - volatile U8 buf[8]; - volatile U32 *up; +#if BYTEORDER == 0x1234 || BYTEORDER == 0x4321 || BYTEORDER == 0x12345678 || BYTEORDER == 0x87654321 + volatile U8 buf[2*UVSIZE]; + volatile UV *up; int i; - if (sizeof(U32) != 4) { - printf("sizeof(U32) is not 4, but %d\n", sizeof(U32)); - exit(1); - } - fflush(stdout); #ifdef SIGBUS signal(SIGBUS, bletch); #endif - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; - buf[3] = 1; - buf[4] = 0; - buf[5] = 0; - buf[6] = 0; - buf[7] = 1; - - for (i = 0; i < 4; i++) { - up = (U32*)(buf + i); - if (! ((*up == 1 << (8*i)) || /* big-endian */ - (*up == 1 << (8*(3-i))) /* little-endian */ + for (i = 0; i < sizeof(buf); i++) { + if (i % UVSIZE == UVSIZE - 1) { + buf[i] = 1; + } + else { + buf[i] = 0; + } + } + + for (i = 0; i < UVSIZE; i++) { + up = (UV*)(buf + i); + if (! ((*up == 1L << (8*i)) || /* big-endian */ + (*up == 1L << (8*(UVSIZE-1-i))) /* little-endian */ ) ) { - printf("read failed (%x)\n", *up); + printf("read failed (%x)\n", (int) *up); exit(2); } } /* write test */ - for (i = 0; i < 4; i++) { - up = (U32*)(buf + i); - *up = 0xBeef; - if (*up != 0xBeef) { - printf("write failed (%x)\n", *up); + for (i = 0; i < UVSIZE; i++) { + up = (UV*)(buf + i); + *up = 0xDeadBeef; + if (*up != 0xDeadBeef) { + printf("write failed (%x)\n", (int) *up); exit(3); } } diff --git a/charclass_invlists.h b/charclass_invlists.h index 2f5feb5f80..70ea70894d 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -29371,7 +29371,7 @@ static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 037 */ # if 'A' == 65 /* ASCII/Latin1 */ static const UV _Perl_IVCF_invlist[] = { /* for ASCII/Latin1 */ - 1309, /* Number of elements */ + 1310, /* Number of elements */ 148565664, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -30683,7 +30683,8 @@ static const UV _Perl_IVCF_invlist[] = { /* for ASCII/Latin1 */ 0x118E0, 0x16E60, 0x16E80, - 0x1E922 + 0x1E922, + 0x1E944 }; # endif /* ASCII/Latin1 */ @@ -32243,7 +32244,8 @@ static const int _Perl_IVCF_invmap[] = { /* for ASCII/Latin1 */ 0, 0x16E40, 0, - 0x1E900 + 0x1E900, + 0 }; # endif /* ASCII/Latin1 */ @@ -32258,7 +32260,7 @@ static const int _Perl_IVCF_invmap[] = { /* for ASCII/Latin1 */ && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 21 static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 1047 */ - 1323, /* Number of elements */ + 1325, /* Number of elements */ 148565664, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ @@ -32276,6 +32278,7 @@ static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 1047 */ 0x8A, 0x8C, 0x8D, + 0x8E, 0x8F, 0x91, 0x92, @@ -33584,7 +33587,8 @@ static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 1047 */ 0x118E0, 0x16E60, 0x16E80, - 0x1E922 + 0x1E922, + 0x1E944 }; # endif /* EBCDIC 1047 */ @@ -33698,7 +33702,7 @@ static const unsigned int IVCF_AUX_TABLE_12[] = { }; static const unsigned int IVCF_AUX_TABLE_13[] = { - 0xB5, + 0xA0, 0x39C }; @@ -33840,32 +33844,33 @@ static const U8 IVCF_AUX_TABLE_lengths[] = { static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 1047 */ 0, - 0xC0, + 0x62, IVCF_use_AUX_TABLE_1, - 0xC6, + 0x68, 0, - 0xC6, + 0x71, 0x1E9E, 0, - 0xD8, + 0x80, 0, - 0x41, + 0xC1, 0, - 0xC6, - 0xD8, + 0xAC, + 0xBA, + 0xAE, 0, - 0x41, + 0xD1, IVCF_use_AUX_TABLE_2, - 0x4C, + 0xD3, 0, - 0xC6, + 0x9E, 0, IVCF_use_AUX_TABLE_3, - 0x54, + 0xE3, 0, - 0xC6, + 0xEB, 0, - 0xD8, + 0xFB, 0x178, 0, 0x100, @@ -34648,7 +34653,7 @@ static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 1047 */ 0, 0x1E94, 0, - 0xDF, + 0x59, 0, 0x1EA0, 0, @@ -35161,7 +35166,8 @@ static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 1047 */ 0, 0x16E40, 0, - 0x1E900 + 0x1E900, + 0 }; # endif /* EBCDIC 1047 */ @@ -35193,7 +35199,6 @@ static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 037 */ 0x81, 0x8A, 0x8C, - 0x8D, 0x8F, 0x91, 0x92, @@ -36502,7 +36507,8 @@ static const UV _Perl_IVCF_invlist[] = { /* for EBCDIC 037 */ 0x118E0, 0x16E60, 0x16E80, - 0x1E922 + 0x1E922, + 0x1E944 }; # endif /* EBCDIC 037 */ @@ -36616,7 +36622,7 @@ static const unsigned int IVCF_AUX_TABLE_12[] = { }; static const unsigned int IVCF_AUX_TABLE_13[] = { - 0xB5, + 0xA0, 0x39C }; @@ -36758,32 +36764,31 @@ static const U8 IVCF_AUX_TABLE_lengths[] = { static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 037 */ 0, - 0xC0, + 0x62, IVCF_use_AUX_TABLE_1, - 0xC6, + 0x68, 0, - 0xC6, + 0x71, 0x1E9E, 0, - 0xD8, + 0x80, 0, - 0x41, + 0xC1, 0, - 0xC6, - 0xD8, + 0xAC, 0, - 0x41, + 0xD1, IVCF_use_AUX_TABLE_2, - 0x4C, + 0xD3, 0, - 0xC6, + 0x9E, 0, IVCF_use_AUX_TABLE_3, - 0x54, + 0xE3, 0, - 0xC6, + 0xEB, 0, - 0xD8, + 0xFB, 0x178, 0, 0x100, @@ -37566,7 +37571,7 @@ static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 037 */ 0, 0x1E94, 0, - 0xDF, + 0x59, 0, 0x1EA0, 0, @@ -38079,7 +38084,8 @@ static const int _Perl_IVCF_invmap[] = { /* for EBCDIC 037 */ 0, 0x16E40, 0, - 0x1E900 + 0x1E900, + 0 }; # endif /* EBCDIC 037 */ @@ -395305,5 +395311,5 @@ static const U8 WB_table[23][23] = { * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl - * 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 regen/mk_invlists.pl + * 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl * ex: set ro: */ diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index a4183fc324..b1640d7583 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1265,5 +1265,5 @@ # a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl # 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl -# 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 regen/mk_invlists.pl +# 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl # ex: set ro: diff --git a/regcomp.c b/regcomp.c index b6a9dab140..347fc98f2f 100644 --- a/regcomp.c +++ b/regcomp.c @@ -185,9 +185,7 @@ struct RExC_state_t { I32 in_lookahead; I32 contains_locale; I32 override_recoding; -#ifdef EBCDIC - I32 recode_x_to_native; -#endif + I32 recode_x_to_native; I32 in_multi_char_class; struct reg_code_blocks *code_blocks;/* positions of literal (?{}) within pattern */ @@ -211,6 +209,12 @@ struct RExC_state_t { SV *mysv1; SV *mysv2; +#ifdef EBCDIC +# define SET_recode_x_to_native(x) RExC_recode_x_to_native = (x); +#else +# define SET_recode_x_to_native(x) +#endif + #define RExC_lastparse (pRExC_state->lastparse) #define RExC_lastnum (pRExC_state->lastnum) #define RExC_paren_name_list (pRExC_state->paren_name_list) @@ -276,9 +280,7 @@ struct RExC_state_t { #define RExC_in_lookbehind (pRExC_state->in_lookbehind) #define RExC_in_lookahead (pRExC_state->in_lookahead) #define RExC_contains_locale (pRExC_state->contains_locale) -#ifdef EBCDIC -# define RExC_recode_x_to_native (pRExC_state->recode_x_to_native) -#endif +#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native) #define RExC_in_multi_char_class (pRExC_state->in_multi_char_class) #define RExC_frame_head (pRExC_state->frame_head) #define RExC_frame_last (pRExC_state->frame_last) @@ -7622,9 +7624,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_in_lookbehind = 0; RExC_in_lookahead = 0; RExC_seen_zerolen = *exp == '^' ? -1 : 0; -#ifdef EBCDIC RExC_recode_x_to_native = 0; -#endif RExC_in_multi_char_class = 0; RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp; @@ -13009,11 +13009,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, sv_catsv(substitute_parse, value_sv); sv_catpv(substitute_parse, ")"); -#ifdef EBCDIC /* The value should already be native, so no need to convert on EBCDIC * platforms.*/ assert(! RExC_recode_x_to_native); -#endif } else { /* \N{U+...} */ @@ -13146,12 +13144,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, sv_catpvs(substitute_parse, ")"); -#ifdef EBCDIC /* The values are Unicode, and therefore have to be converted to native * on a non-Unicode (meaning non-ASCII) platform. */ - RExC_recode_x_to_native = 1; -#endif - + SET_recode_x_to_native(1); } /* Here, we have the string the name evaluates to, ready to be parsed, @@ -13176,9 +13171,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, RExC_start = save_start; RExC_parse = endbrace; RExC_end = orig_end; -#ifdef EBCDIC - RExC_recode_x_to_native = 0; -#endif + SET_recode_x_to_native(0); SvREFCNT_dec_NN(substitute_parse); @@ -14224,13 +14217,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) UPDATE_WARNINGS_LOC(p - 1); ender = result; - if (ender < 0x100) { #ifdef EBCDIC + if (ender < 0x100) { if (RExC_recode_x_to_native) { ender = LATIN1_TO_NATIVE(ender); } -#endif } +#endif break; } case 'c': @@ -18932,7 +18925,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, full_cp_count += this_end - this_start + 1; } - invlist_iterfinish(cp_list); /* At the end of the loop, we count how many bits differ from * the bits in lowest code point, call the count 'd'. If the @@ -18961,8 +18953,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, ret = reganode(pRExC_state, op, lowest_cp); FLAGS(REGNODE_p(ret)) = ANYOFM_mask; } + + done_anyofm: + invlist_iterfinish(cp_list); } - done_anyofm: if (inverted) { _invlist_invert(cp_list); @@ -20321,11 +20315,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ SvPVCLEAR(sv); - if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */ - /* It would be nice to FAIL() here, but this may be called from - regexec.c, and it would be hard to supply pRExC_state. */ - Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", - (int)OP(o), (int)REGNODE_MAX); + if (OP(o) > REGNODE_MAX) { /* regnode.type is unsigned */ + if (pRExC_state) { /* This gives more info, if we have it */ + FAIL3("panic: corrupted regexp opcode %d > %d", + (int)OP(o), (int)REGNODE_MAX); + } + else { + Perl_croak(aTHX_ "panic: corrupted regexp opcode %d > %d", + (int)OP(o), (int)REGNODE_MAX); + } + } sv_catpv(sv, PL_reg_name[OP(o)]); /* Take off const! */ k = PL_regkind[OP(o)]; diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 8a0c1f071f..6853a64272 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -1075,6 +1075,9 @@ sub _Perl_IVCF { push @invlist, $sorted_folds[-1] + 1; push @invmap, 0; + push @invlist, 0x110000; + push @invmap, 0; + # All Unicode versions have some places where multiple code points map to # the same one, so the format always has an 'l' return \@invlist, \@invmap, 'al', $default; @@ -2554,10 +2557,10 @@ foreach my $prop (@props) { my @invlist; my @invmap; - my $map_format; + my $map_format = 0;; my $map_default; - my $maps_to_code_point; - my $to_adjust; + my $maps_to_code_point = 0; + my $to_adjust = 0; my $same_in_all_code_pages; if ($is_local_sub) { my @return = eval $lookup_prop; @@ -2598,12 +2601,15 @@ foreach my $prop (@props) { @invmap = @$map_ref; $map_format = $format; $map_default = $default; - $maps_to_code_point = $map_format =~ / a ($ | [^r] ) /x; - $to_adjust = $map_format =~ /a/; } } } + if ($map_format) { + $maps_to_code_point = $map_format =~ / a ($ | [^r] ) /x; + $to_adjust = $map_format =~ /a/; + } + # Re-order the Unicode code points to native ones for this platform. # This is only needed for code points below 256, because native code # points are only in that range. For inversion maps of properties @@ -2712,8 +2718,8 @@ foreach my $prop (@props) { # Do convert to native for maps to single code points. # There are some properties that have a few outlier # maps that aren't code points, so the above test - # skips those. - $bucket = a2n($invmap[0]); + # skips those. 0 is never remapped. + $bucket = $invmap[0] == 0 ? 0 : a2n($invmap[0]); } else { $bucket = $invmap[0]; } diff --git a/uni_keywords.h b/uni_keywords.h index 6ee2494f0d..c160321c0c 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7288,6 +7288,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl - * 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 regen/mk_invlists.pl + * 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl * e80fb4dd6c15dc1b543793552ab5c7255a0f7b50d6ca9cce3a30a4dadf187b53 regen/mph.pl * ex: set ro: */ diff --git a/util.c b/util.c index 359f3b6d85..dc2e1bd489 100644 --- a/util.c +++ b/util.c @@ -6425,8 +6425,8 @@ Perl_get_c_backtrace(pTHX_ int depth, int skip) Safefree(raw_frames); return bt; #else - PERL_UNUSED_ARGV(depth); - PERL_UNUSED_ARGV(skip); + PERL_UNUSED_ARG(depth); + PERL_UNUSED_ARG(skip); return NULL; #endif } -- Perl5 Master Repository
