In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/749e076fceedeb708a624933726e7989f2302f6a?hp=5aa91856e21c93226d08f19eb69098f224de5fff>

- Log -----------------------------------------------------------------
commit 749e076fceedeb708a624933726e7989f2302f6a
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 28 21:48:57 2013 -0700

    regcomp.c: Reinstate use of synthetic start class
    
    This effectively reverts commit
    a74bca75951b6a3b0ad03ba07eb31e2ca1227308, although the syntax has
    changed.  This commit inadvertently caused a synthetic start class (SSC)
    to not be generated in many cases where it previously was.  The
    SSC is generated by the regex optimizer to hopefully speed up finding
    where to start matching the target string against the regex pattern.  I
    don't know if this is a valid data point, but in the 3 months that this
    was in blead, there were no complaints of a slowdown that could be
    attributed to this.
    
    The commit that caused this was made after discussing it with Yves
    Orton.  It just seemed (and still seems) wrong that doing what the code
    indicates is a logical OR should actually restrict the possibilities.
    The change essentially caused the result of OR'ing together the matches
    of two nodes, one of which nominally could match a sub-string of zero
    length, to also match a sub-string of zero length.  Previous to that
    commit, and after this new one, the result of doing the OR excludes a
    zero-length string, as if it were an AND.
    
    The end result of the change was that the SSC could match a zero-length
    string, and thus was discarded as not being useful.  Yves and I knew
    that the change would not cause bugs; just potentially create more false
    positives.  And we were right.
    
    I believe that this is further indication that the optimizer could
    benefit greatly from an overhaul.  It's clear from looking at the code
    and commits that other people have been similarly fooled.
-----------------------------------------------------------------------

Summary of changes:
 regcomp.c  |  7 ++++---
 t/re/pat.t | 20 +++++++++++++++++++-
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 18130dd..0fa7b83 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4021,6 +4021,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
            else if (flags & SCF_DO_STCLASS_OR) {
                 ssc_add_cp(data->start_class, uc);
                ssc_and(pRExC_state, data->start_class, and_withp);
+                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
            }
            flags &= ~SCF_DO_STCLASS;
        }
@@ -4140,6 +4141,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
            else if (flags & SCF_DO_STCLASS_OR) {
                 ssc_union(data->start_class, EXACTF_invlist, FALSE);
                ssc_and(pRExC_state, data->start_class, and_withp);
+                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
            }
            flags &= ~SCF_DO_STCLASS;
             SvREFCNT_dec(EXACTF_invlist);
@@ -4545,6 +4547,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                               PL_XPosix_ptrs[_CC_VERTSPACE],
                               FALSE);
                    ssc_and(pRExC_state, data->start_class, and_withp);
+                    ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
                 }
                flags &= ~SCF_DO_STCLASS;
             }
@@ -4570,9 +4573,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                 U8 classnum;
                 U8 namedclass;
 
-                if (flags & SCF_DO_STCLASS_AND) {
-                    ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
-                }
+                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
 
                /* Some of the logic below assumes that switching
                   locale on will only add false positives. */
diff --git a/t/re/pat.t b/t/re/pat.t
index 90850b9..905c242 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -20,7 +20,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan tests => 702;  # Update this when adding/deleting tests.
+plan tests => 710;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -1506,6 +1506,24 @@ EOP
         is($i, 0, "RT 120446: mustn't run slowly");
     }
 
+    # These are based on looking at the code in regcomp.c
+    # We don't look for specific code, just the existence of an SSC
+    foreach my $re (qw(     qr/a?c/
+                            qr/a?c/i
+                            qr/[ab]?c/
+                            qr/\R?c/
+                            qr/\d?c/d
+                            qr/\w?c/l
+                            qr/\s?c/a
+                            qr/[[:alpha:]]?c/u
+    )) {
+        my $prog = <<"EOP";
+use re qw(Debug COMPILE);
+$re;
+EOP
+        fresh_perl_like($prog, qr/synthetic stclass/, "stderr", "$re generates 
a synthetic start class");
+    }
+
 
 } # End of sub run_tests
 

--
Perl5 Master Repository

Reply via email to