In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/ca21d46d3db8b4555070f23307b87ad37f93afe7?hp=7592075538038c0fdff9c2e46810a4596afdda41>
- Log ----------------------------------------------------------------- commit ca21d46d3db8b4555070f23307b87ad37f93afe7 Author: Karl Williamson <[email protected]> Date: Wed Mar 27 21:28:32 2019 -0600 perldiag: Rmv obsolete message This restriction no longer exists, and is for an experimental feature commit 766d6d3362791924f037969c5b7eadc346483d52 Author: Karl Williamson <[email protected]> Date: Thu Mar 28 12:26:43 2019 -0600 PATCH: [perl #133942] BBC: BKB/Lingua-JA-Moji This ticket was originally fixed by a PR being issued on the distribution, which was merged and a new version placed on CPAN. But before Slaven had a chance to test it, commit 765e6ecf32a570694dcff91c1c72f98306a9390e came along, and happened to break it again from a totally independent cause. So we have two separate bugs on the same ticket, only one of which was perl's fault. And this commit fixes that. The problematic commit was still looking in the regnode FLAGS field in one function, whereas that field has been repurposed for ANYOFH nodes, so should be ignored for them. ----------------------------------------------------------------------- Summary of changes: pod/perldiag.pod | 7 ------- regcomp.c | 17 +++++++++-------- t/re/re_tests | 1 + 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 7a3faad3ca..823d358159 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4299,13 +4299,6 @@ supplied. See L<perlform>. of Perl. Check the #! line, or manually feed your script into Perl yourself. -=item (?[...]) not valid in locale in regex; marked by S<<-- HERE> in m/%s/ - -(F) C<(?[...])> cannot be used within the scope of a C<S<use locale>> or with -an C</l> regular expression modifier, as that would require deferring -to run-time the calculation of what it should evaluate to, and it is -regex compile-time only. - =item no UTC offset information; assuming local time is UTC (S) A warning peculiar to VMS. Perl was unable to find the local diff --git a/regcomp.c b/regcomp.c index aff83ef41b..d98ad1a838 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1574,6 +1574,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, unsigned int i; const U32 n = ARG(node); bool new_node_has_latin1 = FALSE; + const U8 flags = OP(node) == ANYOFH ? 0 : ANYOF_FLAGS(node); PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC; @@ -1598,7 +1599,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, } /* Get the code points valid only under UTF-8 locales */ - if ( (ANYOF_FLAGS(node) & ANYOFL_FOLD) + if ( (flags & ANYOFL_FOLD) && av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX) { only_utf8_locale_invlist = ary[ONLY_LOCALE_MATCHES_INDEX]; @@ -1619,7 +1620,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, * actually does include them. (Think about "\xe0" =~ /[^\xc0]/di;). We * have to do this here before we add the unconditionally matched code * points */ - if (ANYOF_FLAGS(node) & ANYOF_INVERT) { + if (flags & ANYOF_INVERT) { _invlist_intersection_complement_2nd(invlist, PL_UpperLatin1, &invlist); @@ -1646,21 +1647,21 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, * as well. But don't add them if inverting, as when that gets done below, * it would exclude all these characters, including the ones it shouldn't * that were added just above */ - if (! (ANYOF_FLAGS(node) & ANYOF_INVERT) && OP(node) == ANYOFD - && (ANYOF_FLAGS(node) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)) + if (! (flags & ANYOF_INVERT) && OP(node) == ANYOFD + && (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)) { _invlist_union(invlist, PL_UpperLatin1, &invlist); } /* Similarly for these */ - if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) { + if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP) { _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist); } - if (ANYOF_FLAGS(node) & ANYOF_INVERT) { + if (flags & ANYOF_INVERT) { _invlist_invert(invlist); } - else if (ANYOF_FLAGS(node) & ANYOFL_FOLD) { + else if (flags & ANYOFL_FOLD) { if (new_node_has_latin1) { /* Under /li, any 0-255 could fold to any other 0-255, depending on @@ -1688,7 +1689,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, if (only_utf8_locale_invlist) { _invlist_union_maybe_complement_2nd(invlist, only_utf8_locale_invlist, - ANYOF_FLAGS(node) & ANYOF_INVERT, + flags & ANYOF_INVERT, &invlist); } diff --git a/t/re/re_tests b/t/re/re_tests index bd7fc8f80d..991cde6066 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -2014,6 +2014,7 @@ AB\s+\x{100} AB \x{100}X y - - /[\xdf-/i - c - Invalid [] range # [perl #133620] likely only fails under valgrind /\1a(b)/ bab n - - # This compiles but fails to match as \1 is not set when parsed. /(?iu)(?<=\xdf)hbase/ sshbase y $& hbase +/\x{30c3}?[\x{30a2}\x{30a4}\x{30a6}\x{30a8}\x{30aa}-\x{30e2}\x{30e4}\x{30e6}\x{30e8}-\x{30f4}](?:[\x{30e3}\x{30e5}\x{30e7}\x{30a1}\x{30a3}\x{30a5}\x{30a7}\x{30a9}])?\x{30fc}?\x{30f3}?/ \x{30de}\x{30fc}\x{30af}\x{30b5}\x{30fc}\x{30d3}\x{30b9} y $& \x{30de}\x{30fc} # part of [perl #133942 # Keep these lines at the end of the file # vim: softtabstop=0 noexpandtab -- Perl5 Master Repository
