In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/12c43b0a5b21dd9e8c391615febef3017ebb2c66?hp=88b5a12c0ee9aace59f68cc1bde92925ae4c9512>
- Log ----------------------------------------------------------------- commit 12c43b0a5b21dd9e8c391615febef3017ebb2c66 Author: Karl Williamson <[email protected]> Date: Thu Mar 14 21:39:56 2019 -0600 utf8_length() Add two UNLIKELY()s The input is likely to be valid. commit c1a88fe2e96cf992359798e292fdc7251e25e987 Author: Karl Williamson <[email protected]> Date: Wed Nov 22 23:10:01 2017 -0700 S_multiconcat() Use faster variant counting commit 01ccd497717f655b29ce73bfc88c2cb22667a0c4 Author: Karl Williamson <[email protected]> Date: Wed Nov 22 23:12:37 2017 -0700 toke.c: lex_stuff_pvn() Use faster UTF-8 variant count What finally got me to push this commit, since speed really doesn't matter during compilation, is that this removes a hand-rolled implementation. ----------------------------------------------------------------------- Summary of changes: op.c | 9 ++------- toke.c | 10 +++------- utf8.c | 4 ++-- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/op.c b/op.c index b4ba9c8f83..1f7ae3e610 100644 --- a/op.c +++ b/op.c @@ -3102,13 +3102,8 @@ S_maybe_multiconcat(pTHX_ OP *o) /* see if any strings would grow if converted to utf8 */ if (!utf8) { - char *p = (char*)argp->p; - STRLEN len = argp->len; - while (len--) { - U8 c = *p++; - if (!UTF8_IS_INVARIANT(c)) - variant++; - } + variant += variant_under_utf8_count((U8 *) argp->p, + (U8 *) argp->p + argp->len); } } diff --git a/toke.c b/toke.c index f17bfe143e..755740d6c4 100644 --- a/toke.c +++ b/toke.c @@ -1022,13 +1022,9 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) if (flags & LEX_STUFF_UTF8) { goto plain_copy; } else { - STRLEN highhalf = 0; /* Count of variants */ - const char *p, *e = pv+len; - for (p = pv; p != e; p++) { - if (! UTF8_IS_INVARIANT(*p)) { - highhalf++; - } - } + STRLEN highhalf = variant_under_utf8_count((U8 *) pv, + (U8 *) pv + len); + const char *p, *e = pv+len;; if (!highhalf) goto plain_copy; lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); diff --git a/utf8.c b/utf8.c index 9487149f56..b9772b43b8 100644 --- a/utf8.c +++ b/utf8.c @@ -2320,14 +2320,14 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e) * the bitops (especially ~) can create illegal UTF-8. * In other words: in Perl UTF-8 is not just for Unicode. */ - if (e < s) + if (UNLIKELY(e < s)) goto warn_and_return; while (s < e) { s += UTF8SKIP(s); len++; } - if (e != s) { + if (UNLIKELY(e != s)) { len--; warn_and_return: if (PL_op) -- Perl5 Master Repository
