In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/52bcf2657b8c6ee52d2eeb841fc2607db893f58f?hp=44b0aff01ba282b14dc62a1137996136282bc17a>
- Log ----------------------------------------------------------------- commit 52bcf2657b8c6ee52d2eeb841fc2607db893f58f Author: Karl Williamson <[email protected]> Date: Mon Mar 18 21:43:34 2019 -0600 regexec.c: Add comments commit 9edbb8b2d2f1fc6dd6ab01894735f5fd43a32966 Author: Karl Williamson <[email protected]> Date: Mon Mar 18 21:17:03 2019 -0600 pp.c, pp_sys.c: Use DO_UTF8 instead of its expansion We have a macro to hide the details of this; use it ----------------------------------------------------------------------- Summary of changes: pp.c | 2 +- pp_sys.c | 2 +- regexec.c | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pp.c b/pp.c index c7fa23189d..42b111ea32 100644 --- a/pp.c +++ b/pp.c @@ -4012,7 +4012,7 @@ PP(pp_ucfirst) /* In a "use bytes" we don't treat the source as UTF-8, but, still want * the destination to retain that flag */ - if (SvUTF8(source) && ! IN_BYTES) + if (DO_UTF8(source)) SvUTF8_on(dest); if (!inplace) { /* Finish the rest of the string, unchanged */ diff --git a/pp_sys.c b/pp_sys.c index e28e8906f1..304e1b50f8 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -1736,7 +1736,7 @@ PP(pp_sysread) } else { buffer = SvPV_force(bufsv, blen); - buffer_utf8 = !IN_BYTES && SvUTF8(bufsv); + buffer_utf8 = DO_UTF8(bufsv); } if (DO_UTF8(bufsv)) { blen = sv_len_utf8_nomg(bufsv); diff --git a/regexec.c b/regexec.c index deecde7c8e..cd0a94fa5f 100644 --- a/regexec.c +++ b/regexec.c @@ -6127,6 +6127,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) while (chars) { if (utf8_target) { + /* XXX This assumes the length is well-formed, as + * does the UTF8SKIP below */ uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len, uniflags); uc += len; @@ -8235,6 +8237,11 @@ NULL ); if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) { if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) { + + /* (We can use memEQ and memNE in this file without + * having to worry about one being shorter than the + * other, since the first byte of each gives the + * length of the character) */ if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)) && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput))) { -- Perl5 Master Repository
