Change 31693 by [EMAIL PROTECTED] on 2007/08/09 10:10:37

        Subject: [PATCH] Optimize split //
        From: "=?UTF-8?Q?=C3=86var_Arnfj=C3=B6r=C3=B0_Bjarmason?=" <[EMAIL 
PROTECTED]>
        Date: Thu, 9 Aug 2007 07:49:16 +0000
        Message-ID: <[EMAIL PROTECTED]>

Affected files ...

... //depot/perl/pod/perlreapi.pod#12 edit
... //depot/perl/pp.c#594 edit
... //depot/perl/regcomp.c#604 edit
... //depot/perl/regexp.h#109 edit

Differences ...

==== //depot/perl/pod/perlreapi.pod#12 (text) ====
Index: perl/pod/perlreapi.pod
--- perl/pod/perlreapi.pod#11~31499~    2007-06-28 23:28:07.000000000 -0700
+++ perl/pod/perlreapi.pod      2007-08-09 03:10:37.000000000 -0700
@@ -188,6 +188,16 @@
 
 Perl's engine sets this flag if the pattern is C<\s+>.
 
+=item RXf_NULL
+
+Tells the split operatior to split the target string on
+characters. The definition of character varies depending on whether
+the target string is a UTF-8 string.
+
+Perl's engine sets this flag on empty patterns, this optimization
+makes C<split //> much faster than it would otherwise be, it's even
+faster than C<unpack>.
+
 =back
 
 =head2 exec

==== //depot/perl/pp.c#594 (text) ====
Index: perl/pp.c
--- perl/pp.c#593~31377~        2007-06-14 04:06:02.000000000 -0700
+++ perl/pp.c   2007-08-09 03:10:37.000000000 -0700
@@ -4711,6 +4711,43 @@
            s = m;
        }
     }
+    else if (rx->extflags & RXf_NULL && !(s >= strend)) {
+        /*
+          Pre-extend the stack, either the number of bytes or
+          characters in the string or a limited amount, triggered by:
+
+          my ($x, $y) = split //, $str;
+            or
+          split //, $str, $i;
+        */
+        const U32 items = limit - 1; 
+        if (items < slen)
+            EXTEND(SP, items);
+        else
+            EXTEND(SP, slen);
+
+        while (--limit) {
+            m = s;
+            
+            if (do_utf8)
+                s += UTF8SKIP(s);
+            else
+                ++s;
+
+            dstr = newSVpvn(m, s-m);
+
+            if (make_mortal)
+                sv_2mortal(dstr);
+            if (do_utf8)
+                (void)SvUTF8_on(dstr);
+
+            PUSHs(dstr);
+
+            /* are we there yet? */
+            if (s >= strend)
+                break;
+        }
+    }
     else if (do_utf8 == ((rx->extflags & RXf_UTF8) != 0) &&
             (rx->extflags & RXf_USE_INTUIT) && !rx->nparens
             && (rx->extflags & RXf_CHECK_ALL)

==== //depot/perl/regcomp.c#604 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#603~31524~   2007-07-03 07:26:13.000000000 -0700
+++ perl/regcomp.c      2007-08-09 03:10:37.000000000 -0700
@@ -4753,6 +4753,8 @@
         r->paren_names = NULL;
 
 #ifdef STUPID_PATTERN_CHECKS            
+    if (r->prelen == 0)
+        r->extflags |= RXf_NULL;
     if (r->extflags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ')
         /* XXX: this should happen BEFORE we compile */
         r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); 
@@ -4769,7 +4771,9 @@
         U8 fop = OP(first);
         U8 nop = OP(NEXTOPER(first));
         
-         if (PL_regkind[fop] == BOL && nop == END) 
+        if (PL_regkind[fop] == NOTHING && nop == END)
+            r->extflags |= RXf_NULL;
+        else if (PL_regkind[fop] == BOL && nop == END)
             r->extflags |= RXf_START_ONLY;
         else if (fop == PLUS && nop ==SPACE && OP(regnext(first))==END)
             r->extflags |= RXf_WHITE;    

==== //depot/perl/regexp.h#109 (text) ====
Index: perl/regexp.h
--- perl/regexp.h#108~31506~    2007-06-30 08:37:41.000000000 -0700
+++ perl/regexp.h       2007-08-09 03:10:37.000000000 -0700
@@ -240,6 +240,7 @@
 #define RXf_SKIPWHITE          0x00000100 /* Pattern is for a split / / */
 #define RXf_START_ONLY         0x00000200 /* Pattern is /^/ */
 #define RXf_WHITE              0x00000400 /* Pattern is /\s+/ */
+#define RXf_NULL               0x40000000 /* Pattern is // */
 
 /* 0x1F800 of extflags is used by (RXf_)PMf_COMPILETIME */
 #define RXf_PMf_LOCALE         0x00000800 /* use locale */
End of Patch.

Reply via email to