In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/a3011959d12bb1bc647e969fba8f643088a706b0?hp=39984de3a8e9c16c0fee320a579cb465d0ce7314>
- Log ----------------------------------------------------------------- commit a3011959d12bb1bc647e969fba8f643088a706b0 Author: Father Chrysostomos <[email protected]> Date: Sat Sep 22 18:48:50 2012 -0700 Increase $B::Deparse::VERSION to 1.18 M dist/B-Deparse/Deparse.pm commit 5255171e6cd0accee6f76ea2980e32b3b5b8e171 Author: Father Chrysostomos <[email protected]> Date: Sat Sep 22 17:54:12 2012 -0700 [perl #94490] const fold should not trigger special split " " The easiest way to fix this was to move the special handling out of the regexp engine. Now a flag is set on the split op itself for this case. A real regexp is still created, as that is the most convenient way to propagate locale settings, and it prevents the need to rework pp_split to handle a null regexp. This also means that custom regexp plugins no longer need to handle split specially (which they all do currently). M dist/B-Deparse/Deparse.pm M op.c M op.h M pp.c M regcomp.c M regen/regcomp.pl M regexp.h M t/op/split.t commit d8e299374017857bcc055c31b6d4a808fb862100 Author: Father Chrysostomos <[email protected]> Date: Sat Sep 22 17:26:48 2012 -0700 regexp.h: Correct comment RXf_SKIPWHITE is for split " ", which is special, *not* for split / /. M regexp.h ----------------------------------------------------------------------- Summary of changes: dist/B-Deparse/Deparse.pm | 7 +++++-- op.c | 10 ++++++---- op.h | 2 +- pp.c | 7 ++++--- regcomp.c | 10 ++-------- regen/regcomp.pl | 2 +- regexp.h | 16 +++++++++++----- t/op/split.t | 7 ++++++- 8 files changed, 36 insertions(+), 25 deletions(-) diff --git a/dist/B-Deparse/Deparse.pm b/dist/B-Deparse/Deparse.pm index fd35140..b36a0c0 100644 --- a/dist/B-Deparse/Deparse.pm +++ b/dist/B-Deparse/Deparse.pm @@ -20,7 +20,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring CVf_METHOD CVf_LVALUE PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED); -$VERSION = '1.17'; +$VERSION = '1.18'; use strict; use vars qw/$AUTOLOAD/; use warnings (); @@ -4657,8 +4657,11 @@ sub pp_split { # handle special case of split(), and split(' ') that compiles to /\s+/ # Under 5.10, the reflags may be undef if the split regexp isn't a constant + # Under 5.17.5+, the special flag is on split itself. $kid = $op->first; - if ( $kid->flags & OPf_SPECIAL + if ( $op->flags & OPf_SPECIAL + or + $kid->flags & OPf_SPECIAL and ( $] < 5.009 ? $kid->pmflags & PMf_SKIPWHITE() : ($kid->reflags || 0) & RXf_SKIPWHITE() ) ) { $exprs[0] = "' '"; diff --git a/op.c b/op.c index 1406ffc..9e4dd30 100644 --- a/op.c +++ b/op.c @@ -4594,9 +4594,6 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) U32 rx_flags = pm->op_pmflags & RXf_PMf_COMPILETIME; regexp_engine const *eng = current_re_engine(); - if (o->op_flags & OPf_SPECIAL) - rx_flags |= RXf_SPLIT; - if (!has_code || !eng->op_comp) { /* compile-time simple constant pattern */ @@ -9783,10 +9780,15 @@ Perl_ck_split(pTHX_ OP *o) cLISTOPo->op_last = kid; /* There was only one element previously */ } + if (kid->op_type == OP_CONST && !(kid->op_private & OPpCONST_FOLDED)) { + SV * const sv = kSVOP->op_sv; + if (SvPOK(sv) && SvCUR(sv) == 1 && *SvPVX(sv) == ' ') + o->op_flags |= OPf_SPECIAL; + } if (kid->op_type != OP_MATCH || kid->op_flags & OPf_STACKED) { OP * const sibl = kid->op_sibling; kid->op_sibling = 0; - kid = pmruntime( newPMOP(OP_MATCH, OPf_SPECIAL), kid, 0, 0); + kid = pmruntime( newPMOP(OP_MATCH, 0), kid, 0, 0); if (cLISTOPo->op_first == cLISTOPo->op_last) cLISTOPo->op_last = kid; cLISTOPo->op_first = kid; diff --git a/op.h b/op.h index 2bfaa0d..3ddce78 100644 --- a/op.h +++ b/op.h @@ -114,7 +114,7 @@ Deprecated. Use C<GIMME_V> instead. /* On OP_ENTERSUB || OP_NULL, saw a "do". */ /* On OP_EXISTS, treat av as av, not avhv. */ /* On OP_(ENTER|LEAVE)EVAL, don't clear $@ */ - /* On pushre, rx is used as part of split, e.g. split " " */ + /* On OP_SPLIT, special split " " */ /* On regcomp, "use re 'eval'" was in scope */ /* On OP_READLINE, was <$filehandle> */ /* On RV2[ACGHS]V, don't create GV--in diff --git a/pp.c b/pp.c index f99c460..b57ee84 100644 --- a/pp.c +++ b/pp.c @@ -5299,6 +5299,7 @@ PP(pp_split) STRLEN len; const char *s = SvPV_const(sv, len); const bool do_utf8 = DO_UTF8(sv); + const bool skipwhite = PL_op->op_flags & OPf_SPECIAL; const char *strend = s + len; PMOP *pm; REGEXP *rx; @@ -5329,7 +5330,7 @@ PP(pp_split) rx = PM_GETRE(pm); TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && - (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); + (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite)); RX_MATCH_UTF8_set(rx, do_utf8); @@ -5369,7 +5370,7 @@ PP(pp_split) } base = SP - PL_stack_base; orig = s; - if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) { + if (skipwhite) { if (do_utf8) { while (*s == ' ' || is_utf8_space((U8*)s)) s += UTF8SKIP(s); @@ -5391,7 +5392,7 @@ PP(pp_split) if (!limit) limit = maxiters + 2; - if (RX_EXTFLAGS(rx) & RXf_WHITE) { + if (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite) { while (--limit) { m = s; /* this one uses 'm' and is a negative test */ diff --git a/regcomp.c b/regcomp.c index 1236f53..61b52c9 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6401,18 +6401,12 @@ reStudy: #ifdef STUPID_PATTERN_CHECKS if (RX_PRELEN(rx) == 0) r->extflags |= RXf_NULL; - if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ') - /* XXX: this should happen BEFORE we compile */ - r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); - else if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3)) + if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3)) r->extflags |= RXf_WHITE; else if (RX_PRELEN(rx) == 1 && RXp_PRECOMP(rx)[0] == '^') r->extflags |= RXf_START_ONLY; #else - if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ') - /* XXX: this should happen BEFORE we compile */ - r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); - else { + { regnode *first = ri->program + 1; U8 fop = OP(first); diff --git a/regen/regcomp.pl b/regen/regcomp.pl index eef5533..16091ca 100644 --- a/regen/regcomp.pl +++ b/regen/regcomp.pl @@ -253,7 +253,7 @@ foreach my $file ("op_reg_common.h", "regexp.h") { # optional leading '_'. Return symbol in $1, and strip it from # rest of line - if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) { + if (s/ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { chomp; my $define = $1; s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank diff --git a/regexp.h b/regexp.h index 1f27fd5..c515667 100644 --- a/regexp.h +++ b/regexp.h @@ -395,11 +395,14 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #define RXf_INTUIT_TAIL (1<<(RXf_BASE_SHIFT+14)) /* - Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will - be used by regex engines to check whether they should set - RXf_SKIPWHITE + This used to be set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. + split. It was used by the regex engine to check whether it should set + RXf_SKIPWHITE. Regexp plugins on CPAN also have done the same thing + historically, so we leave this flag defined, even though it is never set. */ -#define RXf_SPLIT (1<<(RXf_BASE_SHIFT+15)) +#if !defined(PERL_CORE) || defined(PERL_IN_DUMP_C) +# define RXf_SPLIT (1<<(RXf_BASE_SHIFT+15)) +#endif #define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) @@ -414,7 +417,10 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) /* Flags indicating special patterns */ #define RXf_START_ONLY (1<<(RXf_BASE_SHIFT+19)) /* Pattern is /^/ */ -#define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split / / */ +/* No longer used, but CPAN modules still mention it. */ +#if !defined(PERL_CORE) || defined(PERL_IN_DUMP_C) +# define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split " " */ +#endif #define RXf_WHITE (1<<(RXf_BASE_SHIFT+21)) /* Pattern is /\s+/ */ #define RXf_NULL (1U<<(RXf_BASE_SHIFT+22)) /* Pattern is // */ #if RXf_BASE_SHIFT+22 > 31 diff --git a/t/op/split.t b/t/op/split.t index 6903503..76836d9 100644 --- a/t/op/split.t +++ b/t/op/split.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 102; +plan tests => 103; $FS = ':'; @@ -417,3 +417,8 @@ is($cnt, scalar(@ary)); # 'my' doesn't trigger the bug is "@PATH", "Font GlyphNames", "hybrid scalar-and-array context"; } + +# [perl #94490] constant folding should not invoke special split " " +# behaviour. +@_=split(0||" ","foo bar"); +is @_, 3, 'split(0||" ") is not treated like split(" ")'; -- Perl5 Master Repository
