Change 30109 by [EMAIL PROTECTED] on 2007/02/03 18:52:20
Integrate:
[ 29502]
Downgrading a fixed or floating substring of a pattern whilst matching
a studied string seems to get to a "should not happen" [bug #41067]
It seems that Perl_regexec_flags() assumes that if the pre-downgraded
substring is FBM compiled, then the downgraded version will be too,
hence changing the downgrade and upgrade routines to FBM compile seems
to be a correct fix.
[ 29503]
Remove code duplication in S_to_utf8_substr() and S_to_byte_substr()
by taking advantage of how anchored_* and float_* are stored in arrays
to use a loop.
[ 30092]
Change 29502 wasn't perfect - you need to remove any extra trailing
"\n" added by fbm_compile(), before recompiling with the same flags.
In turn, to do that, it's best to store the flags even for short
"PVBM"s.
Affected files ...
... //depot/maint-5.8/perl/regcomp.h#9 integrate
... //depot/maint-5.8/perl/regexec.c#86 edit
... //depot/maint-5.8/perl/t/op/re_tests#9 integrate
... //depot/maint-5.8/perl/util.c#143 integrate
Differences ...
==== //depot/maint-5.8/perl/regcomp.h#9 (text) ====
Index: perl/regcomp.h
--- perl/regcomp.h#8~28128~ 2006-05-08 12:22:03.000000000 -0700
+++ perl/regcomp.h 2007-02-03 10:52:20.000000000 -0800
@@ -374,6 +374,9 @@
void* data[1];
};
+/* Code in S_to_utf8_substr() and S_to_byte_substr() in regexec.c accesses
+ anchored* and float* via array indexes 0 and 1. */
+
struct reg_substr_datum {
I32 min_offset;
I32 max_offset;
==== //depot/maint-5.8/perl/regexec.c#86 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#85~30100~ 2007-02-02 13:52:26.000000000 -0800
+++ perl/regexec.c 2007-02-03 10:52:20.000000000 -0800
@@ -4317,55 +4317,60 @@
STATIC void
S_to_utf8_substr(pTHX_ register regexp *prog)
{
- if (prog->float_substr && !prog->float_utf8) {
- SV* const sv = newSVsv(prog->float_substr);
- prog->float_utf8 = sv;
- sv_utf8_upgrade(sv);
- if (SvTAIL(prog->float_substr))
- SvTAIL_on(sv);
- if (prog->float_substr == prog->check_substr)
- prog->check_utf8 = sv;
- }
- if (prog->anchored_substr && !prog->anchored_utf8) {
- SV* const sv = newSVsv(prog->anchored_substr);
- prog->anchored_utf8 = sv;
- sv_utf8_upgrade(sv);
- if (SvTAIL(prog->anchored_substr))
- SvTAIL_on(sv);
- if (prog->anchored_substr == prog->check_substr)
- prog->check_utf8 = sv;
- }
+ int i = 1;
+ do {
+ if (prog->substrs->data[i].substr
+ && !prog->substrs->data[i].utf8_substr) {
+ SV* const sv = newSVsv(prog->substrs->data[i].substr);
+ prog->substrs->data[i].utf8_substr = sv;
+ sv_utf8_upgrade(sv);
+ if (SvVALID(prog->substrs->data[i].substr)) {
+ const U8 flags = SvTAIL(prog->substrs->data[i].substr)
+ ? FBMcf_TAIL : 0;
+ if (flags) {
+ /* Trim the trailing \n that fbm_compile added last
+ time. */
+ SvCUR_set(sv, SvCUR(sv) - 1);
+ /* Whilst this makes the SV technically "invalid" (as its
+ buffer is no longer followed by "\0") when fbm_compile()
+ adds the "\n" back, a "\0" is restored. */
+ }
+ fbm_compile(sv, flags);
+ }
+ if (prog->substrs->data[i].substr == prog->check_substr)
+ prog->check_utf8 = sv;
+ }
+ } while (i--);
}
STATIC void
S_to_byte_substr(pTHX_ register regexp *prog)
{
- if (prog->float_utf8 && !prog->float_substr) {
- SV* sv = newSVsv(prog->float_utf8);
- prog->float_substr = sv;
- if (sv_utf8_downgrade(sv, TRUE)) {
- if (SvTAIL(prog->float_utf8))
- SvTAIL_on(sv);
- } else {
- SvREFCNT_dec(sv);
- prog->float_substr = sv = &PL_sv_undef;
- }
- if (prog->float_utf8 == prog->check_utf8)
- prog->check_substr = sv;
- }
- if (prog->anchored_utf8 && !prog->anchored_substr) {
- SV* sv = newSVsv(prog->anchored_utf8);
- prog->anchored_substr = sv;
- if (sv_utf8_downgrade(sv, TRUE)) {
- if (SvTAIL(prog->anchored_utf8))
- SvTAIL_on(sv);
- } else {
- SvREFCNT_dec(sv);
- prog->anchored_substr = sv = &PL_sv_undef;
+ int i = 1;
+ do {
+ if (prog->substrs->data[i].utf8_substr
+ && !prog->substrs->data[i].substr) {
+ SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
+ if (sv_utf8_downgrade(sv, TRUE)) {
+ if (SvVALID(prog->substrs->data[i].utf8_substr)) {
+ const U8 flags = SvTAIL(prog->substrs->data[i].utf8_substr)
+ ? FBMcf_TAIL : 0;
+ if (flags) {
+ /* Trim the trailing \n that fbm_compile added last
+ time. */
+ SvCUR_set(sv, SvCUR(sv) - 1);
+ }
+ fbm_compile(sv, flags);
+ }
+ } else {
+ SvREFCNT_dec(sv);
+ sv = &PL_sv_undef;
+ }
+ prog->substrs->data[i].substr = sv;
+ if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
+ prog->check_substr = sv;
}
- if (prog->anchored_utf8 == prog->check_utf8)
- prog->check_substr = sv;
- }
+ } while (i--);
}
/*
==== //depot/maint-5.8/perl/t/op/re_tests#9 (text) ====
Index: perl/t/op/re_tests
--- perl/t/op/re_tests#8~26689~ 2006-01-06 15:03:51.000000000 -0800
+++ perl/t/op/re_tests 2007-02-03 10:52:20.000000000 -0800
@@ -959,3 +959,4 @@
^(a*?)(?!(aa|aaaa)*$) aaaaaaaaaaaaaaaaaaaa y $1 a # [perl
#34195]
^(a*?)(?!(aa|aaaa)*$)(?=a\z) aaaaaaaa y $1 aaaaaaa
^(.)\s+.$(?(1)) A B y $1 A # [perl #37688]
+\x{100}?(??{""})xxx xxx y $& xxx
End of Patch.