In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/d720149d59afad1fa0ae15d5f092fdc47bd1a4f7?hp=6b776407d46448d59a69054c8cd4cec4d91f50c0>
- Log ----------------------------------------------------------------- commit d720149d59afad1fa0ae15d5f092fdc47bd1a4f7 Author: David Mitchell <[email protected]> Date: Mon Dec 26 12:49:24 2016 +0000 split ' ', $foo: don't check end byte The special-cased code to skip spaces at the start of the string didn't check that s < strend, so relied on the string being \0-terminated to work correctly. The introduction of the isSPACE_utf8_safe() macro showed up this dodgy assumption by causing assert failures in regen.t under LC_ALL=en_US.UTF-8 PERL_UNICODE="". ----------------------------------------------------------------------- Summary of changes: pp.c | 6 +++--- t/op/split.t | 10 +++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pp.c b/pp.c index 300d786421..c015bfe8a6 100644 --- a/pp.c +++ b/pp.c @@ -5794,15 +5794,15 @@ PP(pp_split) orig = s; if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) { if (do_utf8) { - while (isSPACE_utf8_safe(s, strend)) + while (s < strend && isSPACE_utf8_safe(s, strend)) s += UTF8SKIP(s); } else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { - while (isSPACE_LC(*s)) + while (s < strend && isSPACE_LC(*s)) s++; } else { - while (isSPACE(*s)) + while (s < strend && isSPACE(*s)) s++; } } diff --git a/t/op/split.t b/t/op/split.t index ceaea000b8..81c908e875 100644 --- a/t/op/split.t +++ b/t/op/split.t @@ -7,7 +7,7 @@ BEGIN { set_up_inc('../lib'); } -plan tests => 161; +plan tests => 162; $FS = ':'; @@ -621,3 +621,11 @@ is "@a", '1 2 3', 'assignment to split-to-array (stacked)'; ok eval { $a[0] = 'a'; 1; }, "array split filling AvARRAY: assign 0"; is "@a", "a b", "array split filling AvARRAY: result"; } + +# splitting an empty utf8 string gave an assert failure +{ + my $s = "\x{100}"; + chop $s; + my @a = split ' ', $s; + is (+@a, 0, "empty utf8 string"); +} -- Perl5 Master Repository
