In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/bbc87e33f84c794a9dad986cd2ae8d6bf83c494f?hp=162b417c061ec9190135629d421e3685e8d31dc0>

- Log -----------------------------------------------------------------
commit bbc87e33f84c794a9dad986cd2ae8d6bf83c494f
Author: David Mitchell <[email protected]>
Date:   Tue Jan 13 14:58:22 2015 +0000

    update AUTHORS

M       AUTHORS

commit 9ce1a4d5ec32720328a5dce6ee796ce4b79d6faf
Author: Rostislav Skudnov <[email protected]>
Date:   Wed Dec 24 08:12:52 2014 +0200

    make re_intuit_string() return correct string
    
    Fix #123469 - Bug in split function, with utf8 strings
    
    Each regex has two SV pointers, check_substr and check_utf8,
    which hold a constant string (if any) corresponding to the longest
    constant string in the regexp.
    
    When the regex is first compiled, only one pointer is set, depending on
    whether the pattern is utf8 or not; but subsequent usage of the regex can
    instantiate the other pointer too. So which of the two strings
    re_intuit_string() should return should be based on the UTF8ness of the
    pattern, not whether check_substr is set.

M       regcomp.c
M       t/op/split.t
-----------------------------------------------------------------------

Summary of changes:
 AUTHORS      |  1 +
 regcomp.c    |  9 +++++----
 t/op/split.t | 17 ++++++++++++++++-
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index f4d429d..7f2b8f0 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1037,6 +1037,7 @@ Rodolfo Carvalho          <[email protected]>
 Ronald F. Guilmette            <[email protected]>
 Ronald J. Kimball              <[email protected]>
 Ronald Schmidt                 <[email protected]>
+Rostislav Skudnov              <[email protected]>
 Ruben Schattevoy               <[email protected]>
 Rudolph Todd Maceyko           <[email protected]>
 Rujith S. de Silva             <[email protected]>
diff --git a/regcomp.c b/regcomp.c
index 12ba778..cb6322a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -16606,21 +16606,22 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
 
     DEBUG_COMPILE_r(
        {
-           const char * const s = SvPV_nolen_const(prog->check_substr
-                     ? prog->check_substr : prog->check_utf8);
+           const char * const s = SvPV_nolen_const(RX_UTF8(r)
+                     ? prog->check_utf8 : prog->check_substr);
 
            if (!PL_colorset) reginitcolors();
            PerlIO_printf(Perl_debug_log,
                      "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
                      PL_colors[4],
-                     prog->check_substr ? "" : "utf8 ",
+                     RX_UTF8(r) ? "utf8 " : "",
                      PL_colors[5],PL_colors[0],
                      s,
                      PL_colors[1],
                      (strlen(s) > 60 ? "..." : ""));
        } );
 
-    return prog->check_substr ? prog->check_substr : prog->check_utf8;
+    /* use UTF8 check substring if regexp pattern itself is in UTF8 */
+    return RX_UTF8(r) ? prog->check_utf8 : prog->check_substr;
 }
 
 /*
diff --git a/t/op/split.t b/t/op/split.t
index 9afdd6e..5d5c19d 100644
--- a/t/op/split.t
+++ b/t/op/split.t
@@ -6,7 +6,7 @@ BEGIN {
     set_up_inc('../lib');
 }
 
-plan tests => 125;
+plan tests => 131;
 
 $FS = ':';
 
@@ -374,6 +374,21 @@ is($cnt, scalar(@ary));
 }
 
 {
+    # LATIN SMALL LETTER A WITH DIAERESIS, CYRILLIC SMALL LETTER I
+    for my $pattern ("\x{e4}", "\x{0437}") {
+        utf8::upgrade $pattern;
+        my @res;
+        for my $str ("a${pattern}b", "axb", "a${pattern}b") {
+            @split = split /$pattern/, $str;
+            push @res, scalar(@split);
+        }
+        is($res[0], 2);
+        is($res[1], 1);
+        is($res[2], 2, '#123469 - split with utf8 pattern after handling 
non-utf8 EXPR');
+    }
+}
+
+{
     is (\@a, \@{"a"}, '@a must be global for following test');
     $p="";
     $n = @a = split /,/,$p;

--
Perl5 Master Repository

Reply via email to