In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/39cd6e6a879395d79d698d684b3839f9423d1607?hp=d0d8d0cb339722488ab4472815d936625b06704b>
- Log ----------------------------------------------------------------- commit 39cd6e6a879395d79d698d684b3839f9423d1607 Author: Karl Williamson <k...@cpan.org> Date: Sun Oct 28 21:24:22 2018 -0600 regcomp.c: Make sure UTF-8 regex pattern uses /u When a pattern is in UTF-8, Unicode rules should be selected. This commit makes sure that this happens and that the displayable form of the pattern shows /u. I don't know of any bugs this fixes. ----------------------------------------------------------------------- Summary of changes: regcomp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/regcomp.c b/regcomp.c index 0a7940d0ef..3549619aae 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6967,7 +6967,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx) * properly wrapped with the right modifiers */ bool has_p = ((RExC_rx->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); - bool has_charset = (get_regex_charset(RExC_rx->extflags) + bool has_charset = RExC_utf8 || (get_regex_charset(RExC_rx->extflags) != REGEX_DEPENDS_CHARSET); /* The caret is output if there are any defaults: if not all the STD @@ -7011,7 +7011,14 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx) } if (has_charset) { STRLEN len; - const char* const name = get_regex_charset_name(RExC_rx->extflags, &len); + const char* name; + + name = get_regex_charset_name(RExC_rx->extflags, &len); + if strEQ(name, DEPENDS_PAT_MODS) { /* /d under UTF-8 => /u */ + assert(RExC_utf8); + name = UNICODE_PAT_MODS; + len = sizeof(UNICODE_PAT_MODS) - 1; + } Copy(name, p, len, char); p += len; } -- Perl5 Master Repository