In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/0e9b0dc26af667dfb49d2d2a9537ebda6f50c6db?hp=1415b48ffc957366424380f485fb6a8f97638a26>

- Log -----------------------------------------------------------------
commit 0e9b0dc26af667dfb49d2d2a9537ebda6f50c6db
Author: Karl Williamson <[email protected]>
Date:   Mon Aug 26 09:21:11 2019 -0600

    regcomp.c: Emit more info if we have it in a panic msg

commit 2f15a8fa895419258c884bc74aa8e79b7089bcce
Author: Karl Williamson <[email protected]>
Date:   Mon Aug 26 09:15:09 2019 -0600

    regcomp.c: Change message into a panic
    
    Because that's what is happening, and the next commit would otherwise
    say we need to document this diagnostic, but it's one that the observer
    can do nothing about except file a bug report

commit 4e96facf32fc51bf161361df8303b29ae85a035c
Author: Karl Williamson <[email protected]>
Date:   Mon Aug 26 08:35:21 2019 -0600

    regcomp.c: Move code within #ifdef
    
    This code does nothing except an 'if' without acting on it, as a result
    of an #ifdef within it, so might as well move the whole thing inside the
     #ifdef

commit ba2a280355a1e23b0bc7e7b92d4483daba7f86f8
Author: Karl Williamson <[email protected]>
Date:   Sun Aug 25 22:14:52 2019 -0600

    util.c: Correct spelling in rarely compiled code
    
    On Z/OS, this does get compiled, and fails due to the missepllings.

commit 5c0563e751095d7a7aad52c9fc3e203cbeeb4871
Author: Karl Williamson <[email protected]>
Date:   Sun Aug 25 18:49:02 2019 -0600

    regen/mk_invlists.pl: Fix /i rules for non-ASCII machines
    
    Two variable weren't getting initialized properly in one code path, with
    the result that the case folding tables were pretty much garbage, but
    not on ASCII platforms.

commit f4d6df297dfa1ce0ef62433181961bd7e80b2497
Author: Karl Williamson <[email protected]>
Date:   Sun Aug 25 18:41:44 2019 -0600

    regen/mk_invlists.pl: Never remap 0
    
    0 is a special marker, and shouldn't be remapped.  It would be
    unlikely to be so, but this makes sure.

commit 9a9a3246d59159e9717e27dee127f17f7158ac35
Author: Karl Williamson <[email protected]>
Date:   Sun Aug 25 18:21:54 2019 -0600

    regen/mk_invlists.pl: inversion map requires a final entry
    
    Inversion maps are supposed to have an entry for what to do above the
    Unicode range.  This subroutine crafts a custom map that was missing
    that.

commit 4caef9c39de07c61112dd9aaf70c2464ee774420
Author: Karl Williamson <[email protected]>
Date:   Sun Aug 25 18:19:02 2019 -0600

    regcomp.c: Use macro to remove some #ifdef EBCDIC lines

commit 869e073ac72c68a1499d9a875c8344aa6b2d66cd
Author: Karl Williamson <[email protected]>
Date:   Sun Aug 25 18:13:07 2019 -0600

    regcomp.c: Some code paths didn't terminate an inversion list iteration
    
    There were a couple paths through the code that failed to call
    invlist_iterfinish().  This was a bug everywhere, but prevented z/OS
    from completely compiling.

-----------------------------------------------------------------------

Summary of changes:
 charclass_invlists.h        | 86 ++++++++++++++++++++++++---------------------
 lib/unicore/uni_keywords.pl |  2 +-
 regcomp.c                   | 50 +++++++++++++-------------
 regen/mk_invlists.pl        | 20 +++++++----
 uni_keywords.h              |  2 +-
 util.c                      |  4 +--
 6 files changed, 88 insertions(+), 76 deletions(-)

diff --git a/charclass_invlists.h b/charclass_invlists.h
index 2f5feb5f80..70ea70894d 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -29371,7 +29371,7 @@ static const GCB_enum _Perl_GCB_invmap[] = {  /* for 
EBCDIC 037 */
 #  if 'A' == 65 /* ASCII/Latin1 */
 
 static const UV _Perl_IVCF_invlist[] = {  /* for ASCII/Latin1 */
-       1309,   /* Number of elements */
+       1310,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
@@ -30683,7 +30683,8 @@ static const UV _Perl_IVCF_invlist[] = {  /* for 
ASCII/Latin1 */
        0x118E0,
        0x16E60,
        0x16E80,
-       0x1E922
+       0x1E922,
+       0x1E944
 };
 
 #  endif       /* ASCII/Latin1 */
@@ -32243,7 +32244,8 @@ static const int _Perl_IVCF_invmap[] = {  /* for 
ASCII/Latin1 */
        0,
        0x16E40,
        0,
-       0x1E900
+       0x1E900,
+       0
 };
 
 #  endif       /* ASCII/Latin1 */
@@ -32258,7 +32260,7 @@ static const int _Perl_IVCF_invmap[] = {  /* for 
ASCII/Latin1 */
      && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 21
 
 static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 1047 */
-       1323,   /* Number of elements */
+       1325,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
@@ -32276,6 +32278,7 @@ static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 
1047 */
        0x8A,
        0x8C,
        0x8D,
+       0x8E,
        0x8F,
        0x91,
        0x92,
@@ -33584,7 +33587,8 @@ static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 
1047 */
        0x118E0,
        0x16E60,
        0x16E80,
-       0x1E922
+       0x1E922,
+       0x1E944
 };
 
 #  endif       /* EBCDIC 1047 */
@@ -33698,7 +33702,7 @@ static const unsigned int IVCF_AUX_TABLE_12[] = {
 };
 
 static const unsigned int IVCF_AUX_TABLE_13[] = {
-       0xB5,
+       0xA0,
        0x39C
 };
 
@@ -33840,32 +33844,33 @@ static const U8 IVCF_AUX_TABLE_lengths[] = {
 
 static const int _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
        0,
-       0xC0,
+       0x62,
        IVCF_use_AUX_TABLE_1,
-       0xC6,
+       0x68,
        0,
-       0xC6,
+       0x71,
        0x1E9E,
        0,
-       0xD8,
+       0x80,
        0,
-       0x41,
+       0xC1,
        0,
-       0xC6,
-       0xD8,
+       0xAC,
+       0xBA,
+       0xAE,
        0,
-       0x41,
+       0xD1,
        IVCF_use_AUX_TABLE_2,
-       0x4C,
+       0xD3,
        0,
-       0xC6,
+       0x9E,
        0,
        IVCF_use_AUX_TABLE_3,
-       0x54,
+       0xE3,
        0,
-       0xC6,
+       0xEB,
        0,
-       0xD8,
+       0xFB,
        0x178,
        0,
        0x100,
@@ -34648,7 +34653,7 @@ static const int _Perl_IVCF_invmap[] = {  /* for EBCDIC 
1047 */
        0,
        0x1E94,
        0,
-       0xDF,
+       0x59,
        0,
        0x1EA0,
        0,
@@ -35161,7 +35166,8 @@ static const int _Perl_IVCF_invmap[] = {  /* for EBCDIC 
1047 */
        0,
        0x16E40,
        0,
-       0x1E900
+       0x1E900,
+       0
 };
 
 #  endif       /* EBCDIC 1047 */
@@ -35193,7 +35199,6 @@ static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 
037 */
        0x81,
        0x8A,
        0x8C,
-       0x8D,
        0x8F,
        0x91,
        0x92,
@@ -36502,7 +36507,8 @@ static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 
037 */
        0x118E0,
        0x16E60,
        0x16E80,
-       0x1E922
+       0x1E922,
+       0x1E944
 };
 
 #  endif       /* EBCDIC 037 */
@@ -36616,7 +36622,7 @@ static const unsigned int IVCF_AUX_TABLE_12[] = {
 };
 
 static const unsigned int IVCF_AUX_TABLE_13[] = {
-       0xB5,
+       0xA0,
        0x39C
 };
 
@@ -36758,32 +36764,31 @@ static const U8 IVCF_AUX_TABLE_lengths[] = {
 
 static const int _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
        0,
-       0xC0,
+       0x62,
        IVCF_use_AUX_TABLE_1,
-       0xC6,
+       0x68,
        0,
-       0xC6,
+       0x71,
        0x1E9E,
        0,
-       0xD8,
+       0x80,
        0,
-       0x41,
+       0xC1,
        0,
-       0xC6,
-       0xD8,
+       0xAC,
        0,
-       0x41,
+       0xD1,
        IVCF_use_AUX_TABLE_2,
-       0x4C,
+       0xD3,
        0,
-       0xC6,
+       0x9E,
        0,
        IVCF_use_AUX_TABLE_3,
-       0x54,
+       0xE3,
        0,
-       0xC6,
+       0xEB,
        0,
-       0xD8,
+       0xFB,
        0x178,
        0,
        0x100,
@@ -37566,7 +37571,7 @@ static const int _Perl_IVCF_invmap[] = {  /* for EBCDIC 
037 */
        0,
        0x1E94,
        0,
-       0xDF,
+       0x59,
        0,
        0x1EA0,
        0,
@@ -38079,7 +38084,8 @@ static const int _Perl_IVCF_invmap[] = {  /* for EBCDIC 
037 */
        0,
        0x16E40,
        0,
-       0x1E900
+       0x1E900,
+       0
 };
 
 #  endif       /* EBCDIC 037 */
@@ -395305,5 +395311,5 @@ static const U8 WB_table[23][23] = {
  * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 
lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 
regen/charset_translations.pl
  * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea 
regen/mk_PL_charclass.pl
- * 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 
regen/mk_invlists.pl
+ * 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a 
regen/mk_invlists.pl
  * ex: set ro: */
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
index a4183fc324..b1640d7583 100644
--- a/lib/unicore/uni_keywords.pl
+++ b/lib/unicore/uni_keywords.pl
@@ -1265,5 +1265,5 @@
 # a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 
lib/unicore/version
 # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 
regen/charset_translations.pl
 # 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea 
regen/mk_PL_charclass.pl
-# 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 
regen/mk_invlists.pl
+# 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a 
regen/mk_invlists.pl
 # ex: set ro:
diff --git a/regcomp.c b/regcomp.c
index b2cc6672cb..f7164ed0a8 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -185,9 +185,7 @@ struct RExC_state_t {
     I32                in_lookahead;
     I32                contains_locale;
     I32                override_recoding;
-#ifdef EBCDIC
-    I32                recode_x_to_native;
-#endif
+    I32         recode_x_to_native;
     I32                in_multi_char_class;
     struct reg_code_blocks *code_blocks;/* positions of literal (?{})
                                            within pattern */
@@ -245,7 +243,6 @@ struct RExC_state_t {
 #define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
                                                    under /d from /u ? */
 
-
 #ifdef RE_TRACK_PATTERN_OFFSETS
 #  define RExC_offsets (RExC_rxi->u.offsets) /* I am not like the
                                                          others */
@@ -276,9 +273,15 @@ struct RExC_state_t {
 #define RExC_in_lookbehind     (pRExC_state->in_lookbehind)
 #define RExC_in_lookahead      (pRExC_state->in_lookahead)
 #define RExC_contains_locale   (pRExC_state->contains_locale)
+#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
+
 #ifdef EBCDIC
-#   define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
+#  define SET_recode_x_to_native(x)                                         \
+                    STMT_START { RExC_recode_x_to_native = (x); } STMT_END
+#else
+#  define SET_recode_x_to_native(x) NOOP
 #endif
+
 #define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
 #define RExC_frame_head (pRExC_state->frame_head)
 #define RExC_frame_last (pRExC_state->frame_last)
@@ -7626,9 +7629,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int 
pat_count,
     RExC_in_lookbehind = 0;
     RExC_in_lookahead = 0;
     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
-#ifdef EBCDIC
     RExC_recode_x_to_native = 0;
-#endif
     RExC_in_multi_char_class = 0;
 
     RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = 
RExC_precomp = exp;
@@ -12967,11 +12968,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         sv_catsv(substitute_parse, value_sv);
         sv_catpv(substitute_parse, ")");
 
-#ifdef EBCDIC
         /* The value should already be native, so no need to convert on EBCDIC
          * platforms.*/
         assert(! RExC_recode_x_to_native);
-#endif
 
     }
     else {   /* \N{U+...} */
@@ -13104,12 +13103,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
 
         sv_catpvs(substitute_parse, ")");
 
-#ifdef EBCDIC
         /* The values are Unicode, and therefore have to be converted to native
          * on a non-Unicode (meaning non-ASCII) platform. */
-        RExC_recode_x_to_native = 1;
-#endif
-
+        SET_recode_x_to_native(1);
     }
 
     /* Here, we have the string the name evaluates to, ready to be parsed,
@@ -13134,9 +13130,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     RExC_start = save_start;
     RExC_parse = endbrace;
     RExC_end = orig_end;
-#ifdef EBCDIC
-    RExC_recode_x_to_native = 0;
-#endif
+    SET_recode_x_to_native(0);
 
     SvREFCNT_dec_NN(substitute_parse);
 
@@ -14182,13 +14176,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 
*flagp, U32 depth)
                             UPDATE_WARNINGS_LOC(p - 1);
                             ender = result;
 
-                            if (ender < 0x100) {
 #ifdef EBCDIC
+                            if (ender < 0x100) {
                                 if (RExC_recode_x_to_native) {
                                     ender = LATIN1_TO_NATIVE(ender);
                                 }
-#endif
                            }
+#endif
                            break;
                        }
                    case 'c':
@@ -18890,7 +18884,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth,
 
                     full_cp_count += this_end - this_start + 1;
                 }
-                invlist_iterfinish(cp_list);
 
                 /* At the end of the loop, we count how many bits differ from
                  * the bits in lowest code point, call the count 'd'.  If the
@@ -18919,8 +18912,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 
*flagp, U32 depth,
                     ret = reganode(pRExC_state, op, lowest_cp);
                     FLAGS(REGNODE_p(ret)) = ANYOFM_mask;
                 }
+
+              done_anyofm:
+                invlist_iterfinish(cp_list);
             }
-          done_anyofm:
 
             if (inverted) {
                 _invlist_invert(cp_list);
@@ -20276,11 +20271,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
 
     SvPVCLEAR(sv);
 
-    if (OP(o) > REGNODE_MAX)           /* regnode.type is unsigned */
-       /* It would be nice to FAIL() here, but this may be called from
-          regexec.c, and it would be hard to supply pRExC_state. */
-       Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d",
-                                              (int)OP(o), (int)REGNODE_MAX);
+    if (OP(o) > REGNODE_MAX) {          /* regnode.type is unsigned */
+        if (pRExC_state) {  /* This gives more info, if we have it */
+            FAIL3("panic: corrupted regexp opcode %d > %d",
+                  (int)OP(o), (int)REGNODE_MAX);
+        }
+        else {
+            Perl_croak(aTHX_ "panic: corrupted regexp opcode %d > %d",
+                             (int)OP(o), (int)REGNODE_MAX);
+        }
+    }
     sv_catpv(sv, PL_reg_name[OP(o)]); /* Take off const! */
 
     k = PL_regkind[OP(o)];
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 8a0c1f071f..6853a64272 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -1075,6 +1075,9 @@ sub _Perl_IVCF {
     push @invlist, $sorted_folds[-1] + 1;
     push @invmap, 0;
 
+    push @invlist, 0x110000;
+    push @invmap, 0;
+
     # All Unicode versions have some places where multiple code points map to
     # the same one, so the format always has an 'l'
     return \@invlist, \@invmap, 'al', $default;
@@ -2554,10 +2557,10 @@ foreach my $prop (@props) {
 
         my @invlist;
         my @invmap;
-        my $map_format;
+        my $map_format = 0;;
         my $map_default;
-        my $maps_to_code_point;
-        my $to_adjust;
+        my $maps_to_code_point = 0;
+        my $to_adjust = 0;
         my $same_in_all_code_pages;
         if ($is_local_sub) {
             my @return = eval $lookup_prop;
@@ -2598,12 +2601,15 @@ foreach my $prop (@props) {
                     @invmap = @$map_ref;
                     $map_format = $format;
                     $map_default = $default;
-                    $maps_to_code_point = $map_format =~ / a ($ | [^r] ) /x;
-                    $to_adjust = $map_format =~ /a/;
                 }
             }
         }
 
+        if ($map_format) {
+            $maps_to_code_point = $map_format =~ / a ($ | [^r] ) /x;
+            $to_adjust = $map_format =~ /a/;
+        }
+
         # Re-order the Unicode code points to native ones for this platform.
         # This is only needed for code points below 256, because native code
         # points are only in that range.  For inversion maps of properties
@@ -2712,8 +2718,8 @@ foreach my $prop (@props) {
                         # Do convert to native for maps to single code points.
                         # There are some properties that have a few outlier
                         # maps that aren't code points, so the above test
-                        # skips those.
-                        $bucket = a2n($invmap[0]);
+                        # skips those.  0 is never remapped.
+                        $bucket = $invmap[0] == 0 ? 0 : a2n($invmap[0]);
                     } else {
                         $bucket = $invmap[0];
                     }
diff --git a/uni_keywords.h b/uni_keywords.h
index 6ee2494f0d..c160321c0c 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -7288,6 +7288,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, 
const U16 key_len ) {
  * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 
lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 
regen/charset_translations.pl
  * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea 
regen/mk_PL_charclass.pl
- * 61ea8132bb9ea5c637609e2d026b0b85ce17d6bec544c2f08ce411e6f65e8386 
regen/mk_invlists.pl
+ * 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a 
regen/mk_invlists.pl
  * e80fb4dd6c15dc1b543793552ab5c7255a0f7b50d6ca9cce3a30a4dadf187b53 
regen/mph.pl
  * ex: set ro: */
diff --git a/util.c b/util.c
index 359f3b6d85..dc2e1bd489 100644
--- a/util.c
+++ b/util.c
@@ -6425,8 +6425,8 @@ Perl_get_c_backtrace(pTHX_ int depth, int skip)
     Safefree(raw_frames);
     return bt;
 #else
-    PERL_UNUSED_ARGV(depth);
-    PERL_UNUSED_ARGV(skip);
+    PERL_UNUSED_ARG(depth);
+    PERL_UNUSED_ARG(skip);
     return NULL;
 #endif
 }

-- 
Perl5 Master Repository

Reply via email to