Change 30109 by [EMAIL PROTECTED] on 2007/02/03 18:52:20

        Integrate:
        [ 29502]
        Downgrading a fixed or floating substring of a pattern whilst matching
        a studied string seems to get to a "should not happen" [bug #41067]
        It seems that Perl_regexec_flags() assumes that if the pre-downgraded
        substring is FBM compiled, then the downgraded version will be too,
        hence changing the downgrade and upgrade routines to FBM compile seems
        to be a correct fix.
        
        [ 29503]
        Remove code duplication in S_to_utf8_substr() and S_to_byte_substr()
        by taking advantage of how anchored_* and float_* are stored in arrays
        to use a loop.
        
        [ 30092]
        Change 29502 wasn't perfect - you need to remove any extra trailing
        "\n" added by fbm_compile(), before recompiling with the same flags.
        In turn, to do that, it's best to store the flags even for short
        "PVBM"s.

Affected files ...

... //depot/maint-5.8/perl/regcomp.h#9 integrate
... //depot/maint-5.8/perl/regexec.c#86 edit
... //depot/maint-5.8/perl/t/op/re_tests#9 integrate
... //depot/maint-5.8/perl/util.c#143 integrate

Differences ...

==== //depot/maint-5.8/perl/regcomp.h#9 (text) ====
Index: perl/regcomp.h
--- perl/regcomp.h#8~28128~     2006-05-08 12:22:03.000000000 -0700
+++ perl/regcomp.h      2007-02-03 10:52:20.000000000 -0800
@@ -374,6 +374,9 @@
     void* data[1];
 };
 
+/* Code in S_to_utf8_substr() and S_to_byte_substr() in regexec.c accesses
+   anchored* and float* via array indexes 0 and 1.  */
+
 struct reg_substr_datum {
     I32 min_offset;
     I32 max_offset;

==== //depot/maint-5.8/perl/regexec.c#86 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#85~30100~    2007-02-02 13:52:26.000000000 -0800
+++ perl/regexec.c      2007-02-03 10:52:20.000000000 -0800
@@ -4317,55 +4317,60 @@
 STATIC void
 S_to_utf8_substr(pTHX_ register regexp *prog)
 {
-    if (prog->float_substr && !prog->float_utf8) {
-       SV* const sv = newSVsv(prog->float_substr);
-       prog->float_utf8 = sv;
-       sv_utf8_upgrade(sv);
-       if (SvTAIL(prog->float_substr))
-           SvTAIL_on(sv);
-       if (prog->float_substr == prog->check_substr)
-           prog->check_utf8 = sv;
-    }
-    if (prog->anchored_substr && !prog->anchored_utf8) {
-       SV* const sv = newSVsv(prog->anchored_substr);
-       prog->anchored_utf8 = sv;
-       sv_utf8_upgrade(sv);
-       if (SvTAIL(prog->anchored_substr))
-           SvTAIL_on(sv);
-       if (prog->anchored_substr == prog->check_substr)
-           prog->check_utf8 = sv;
-    }
+    int i = 1;
+    do {
+       if (prog->substrs->data[i].substr
+           && !prog->substrs->data[i].utf8_substr) {
+           SV* const sv = newSVsv(prog->substrs->data[i].substr);
+           prog->substrs->data[i].utf8_substr = sv;
+           sv_utf8_upgrade(sv);
+           if (SvVALID(prog->substrs->data[i].substr)) {
+               const U8 flags = SvTAIL(prog->substrs->data[i].substr)
+                   ? FBMcf_TAIL : 0;
+               if (flags) {
+                   /* Trim the trailing \n that fbm_compile added last
+                      time.  */
+                   SvCUR_set(sv, SvCUR(sv) - 1);
+                   /* Whilst this makes the SV technically "invalid" (as its
+                      buffer is no longer followed by "\0") when fbm_compile()
+                      adds the "\n" back, a "\0" is restored.  */
+               }
+               fbm_compile(sv, flags);
+           }
+           if (prog->substrs->data[i].substr == prog->check_substr)
+               prog->check_utf8 = sv;
+       }
+    } while (i--);
 }
 
 STATIC void
 S_to_byte_substr(pTHX_ register regexp *prog)
 {
-    if (prog->float_utf8 && !prog->float_substr) {
-       SV* sv = newSVsv(prog->float_utf8);
-       prog->float_substr = sv;
-       if (sv_utf8_downgrade(sv, TRUE)) {
-           if (SvTAIL(prog->float_utf8))
-               SvTAIL_on(sv);
-       } else {
-           SvREFCNT_dec(sv);
-           prog->float_substr = sv = &PL_sv_undef;
-       }
-       if (prog->float_utf8 == prog->check_utf8)
-           prog->check_substr = sv;
-    }
-    if (prog->anchored_utf8 && !prog->anchored_substr) {
-       SV* sv = newSVsv(prog->anchored_utf8);
-       prog->anchored_substr = sv;
-       if (sv_utf8_downgrade(sv, TRUE)) {
-           if (SvTAIL(prog->anchored_utf8))
-               SvTAIL_on(sv);
-       } else {
-           SvREFCNT_dec(sv);
-           prog->anchored_substr = sv = &PL_sv_undef;
+    int i = 1;
+    do {
+       if (prog->substrs->data[i].utf8_substr
+           && !prog->substrs->data[i].substr) {
+           SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
+           if (sv_utf8_downgrade(sv, TRUE)) {
+               if (SvVALID(prog->substrs->data[i].utf8_substr)) {
+                   const U8 flags = SvTAIL(prog->substrs->data[i].utf8_substr)
+                       ? FBMcf_TAIL : 0;
+                   if (flags) {
+                       /* Trim the trailing \n that fbm_compile added last
+                          time.  */
+                       SvCUR_set(sv, SvCUR(sv) - 1);
+                   }
+                   fbm_compile(sv, flags);
+               }           
+           } else {
+               SvREFCNT_dec(sv);
+               sv = &PL_sv_undef;
+           }
+           prog->substrs->data[i].substr = sv;
+           if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
+               prog->check_substr = sv;
        }
-       if (prog->anchored_utf8 == prog->check_utf8)
-           prog->check_substr = sv;
-    }
+    } while (i--);
 }
 
 /*

==== //depot/maint-5.8/perl/t/op/re_tests#9 (text) ====
Index: perl/t/op/re_tests
--- perl/t/op/re_tests#8~26689~ 2006-01-06 15:03:51.000000000 -0800
+++ perl/t/op/re_tests  2007-02-03 10:52:20.000000000 -0800
@@ -959,3 +959,4 @@
 ^(a*?)(?!(aa|aaaa)*$)  aaaaaaaaaaaaaaaaaaaa    y       $1      a       # [perl 
#34195]
 ^(a*?)(?!(aa|aaaa)*$)(?=a\z)   aaaaaaaa        y       $1      aaaaaaa
 ^(.)\s+.$(?(1))        A B     y       $1      A       # [perl #37688]
+\x{100}?(??{""})xxx    xxx     y       $&      xxx
End of Patch.

Reply via email to