In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/72de20cdcc84ae35e0d8a55c7a92950fece19347?hp=637174112f90e2e782037f7c706f86617e7df263>

- Log -----------------------------------------------------------------
commit 72de20cdcc84ae35e0d8a55c7a92950fece19347
Author: Nicholas Clark <[email protected]>
Date:   Tue Jun 28 15:20:56 2011 +0200

    For shorter strings, store C<study>'s data as U8s or U16s, instead of U32s.
    
    The assumption is that most studied strings are fairly short, hence the pain
    of the extra code is worth it, given the memory savings.
    80 character string, 336 bytes as U8, down from 1344 as U32
    800 character string, 2112 bytes as U16, down from 4224 as U32

M       ext/Devel-Peek/t/Peek.t
M       pod/perldelta.pod
M       pp.c
M       regexec.c
M       util.c

commit b606cf7f37b8b46206c7f521b29167e037397a62
Author: Nicholas Clark <[email protected]>
Date:   Tue Jun 28 12:17:38 2011 +0200

    Store C<study>'s data as U32s, instead of I32s.
    
    The "no more" condition is now represented as ~0, instead of -1.

M       pp.c
M       regexec.c
M       util.c

commit 378b4d0f82057e5af983d31c5b48b7f10f4758b3
Author: Nicholas Clark <[email protected]>
Date:   Mon Jun 27 21:13:39 2011 +0200

    Tidy code in pp_study and Perl_screaminstr()
    
    In pp_study eliminate the variable pos, which duplicates len. ch should be 
U8,
    not I32.
    
    In Perl_screaminstr(), move the declarations of s and x to their point of 
use,
    convert a for loop to a while loop, and avoid incrementing and decrementing 
s.
    found is a boolean.

M       pp.c
M       util.c

commit 4185c9197f4aefd1943fba0b9999fc3200fd902c
Author: Nicholas Clark <[email protected]>
Date:   Mon Jun 27 20:51:04 2011 +0200

    Store C<study>'s data in in mg_ptr instead of interpreter variables.
    
    This allows more than one C<study> to be active at the same time.
    It eliminates PL_screamfirst, PL_lastscream, PL_maxscream.

M       embedvar.h
M       ext/Devel-Peek/t/Peek.t
M       intrpvar.h
M       perl.c
M       pod/perldelta.pod
M       pod/perlfunc.pod
M       pp.c
M       regexec.c
M       sv.c
M       util.c

commit 75fc7bf602cd498829b35780623ebe139c0a0483
Author: Nicholas Clark <[email protected]>
Date:   Mon Jun 27 17:58:10 2011 +0200

    Merge PL_scream{first,next} into one allocated buffer.
    
    Effectively, PL_screamnext is now PL_screamfirst + 256. The actual 
interpreter
    variable PL_screamnext is eliminated.

M       embedvar.h
M       intrpvar.h
M       perl.c
M       pp.c
M       sv.c
M       util.c

commit 56e9eeb1a239fc995bf33475e31f8379bd01cbad
Author: Nicholas Clark <[email protected]>
Date:   Mon Jun 27 17:14:39 2011 +0200

    Change PL_screamnext to store absolute positions.
    
    PL_screamnext gives the position of the next occurrence of the current 
octet.
    Previously it stored this as an offset from the current position, with -pos
    stored for "no more", so that the calculated new offset would be zero,
    allowing a zero/non-zero loop exit test in Perl_screaminstr().
    
    Now it stores absolute position, with -1 for "no more". Also codify -1 as 
the
    "not present" value for PL_screamfirst, instead of any negative value.

M       pp.c
M       regexec.c
M       util.c

commit 0177730e7e0c099d1250571eb39367a76e2d91eb
Author: Nicholas Clark <[email protected]>
Date:   Mon Jun 13 16:24:23 2011 +0200

    Split out study magic from pos magic.
    
    study uses magic to call SvSCREAM_off() if the scalar is modified. Allocate 
it
    its own magic type ('G' for now - pos magic is 'g'). Share the same "set"
    routine and vtable as regexp/bm/fm (setregxp and vtbl_regexp).

M       ext/Devel-Peek/t/Peek.t
M       mg.c
M       mg_names.c
M       mg_raw.h
M       mg_vtable.h
M       pod/perlguts.pod
M       pp.c
M       regen/mg_vtable.pl
M       t/porting/known_pod_issues.dat
-----------------------------------------------------------------------

Summary of changes:
 embedvar.h                     |    8 ---
 ext/Devel-Peek/t/Peek.t        |   91 ++++++++++++++++++++++++++++------
 intrpvar.h                     |    6 +--
 mg.c                           |    6 ++-
 mg_names.c                     |    1 +
 mg_raw.h                       |    4 +-
 mg_vtable.h                    |    3 +-
 perl.c                         |    8 ---
 pod/perldelta.pod              |   12 ++++
 pod/perlfunc.pod               |    5 +-
 pod/perlguts.pod               |    3 +-
 pp.c                           |  100 +++++++++++++++++++++----------------
 regen/mg_vtable.pl             |    5 +-
 regexec.c                      |   17 ++++++-
 sv.c                           |    6 --
 t/porting/known_pod_issues.dat |    2 +-
 util.c                         |  109 ++++++++++++++++++++++++++++++++--------
 17 files changed, 267 insertions(+), 119 deletions(-)

diff --git a/embedvar.h b/embedvar.h
index a540fd6..2405ee5 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -171,7 +171,6 @@
 #define PL_last_swash_tmps     (vTHX->Ilast_swash_tmps)
 #define PL_lastfd              (vTHX->Ilastfd)
 #define PL_lastgotoprobe       (vTHX->Ilastgotoprobe)
-#define PL_lastscream          (vTHX->Ilastscream)
 #define PL_laststatval         (vTHX->Ilaststatval)
 #define PL_laststype           (vTHX->Ilaststype)
 #define PL_localizing          (vTHX->Ilocalizing)
@@ -187,7 +186,6 @@
 #define PL_markstack_ptr       (vTHX->Imarkstack_ptr)
 #define PL_max_intro_pending   (vTHX->Imax_intro_pending)
 #define PL_maxo                        (vTHX->Imaxo)
-#define PL_maxscream           (vTHX->Imaxscream)
 #define PL_maxsysfd            (vTHX->Imaxsysfd)
 #define PL_memory_debug_header (vTHX->Imemory_debug_header)
 #define PL_mess_sv             (vTHX->Imess_sv)
@@ -268,8 +266,6 @@
 #define PL_scopestack_ix       (vTHX->Iscopestack_ix)
 #define PL_scopestack_max      (vTHX->Iscopestack_max)
 #define PL_scopestack_name     (vTHX->Iscopestack_name)
-#define PL_screamfirst         (vTHX->Iscreamfirst)
-#define PL_screamnext          (vTHX->Iscreamnext)
 #define PL_secondgv            (vTHX->Isecondgv)
 #define PL_sharehook           (vTHX->Isharehook)
 #define PL_sig_pending         (vTHX->Isig_pending)
@@ -505,7 +501,6 @@
 #define PL_Ilast_swash_tmps    PL_last_swash_tmps
 #define PL_Ilastfd             PL_lastfd
 #define PL_Ilastgotoprobe      PL_lastgotoprobe
-#define PL_Ilastscream         PL_lastscream
 #define PL_Ilaststatval                PL_laststatval
 #define PL_Ilaststype          PL_laststype
 #define PL_Ilocalizing         PL_localizing
@@ -521,7 +516,6 @@
 #define PL_Imarkstack_ptr      PL_markstack_ptr
 #define PL_Imax_intro_pending  PL_max_intro_pending
 #define PL_Imaxo               PL_maxo
-#define PL_Imaxscream          PL_maxscream
 #define PL_Imaxsysfd           PL_maxsysfd
 #define PL_Imemory_debug_header        PL_memory_debug_header
 #define PL_Imess_sv            PL_mess_sv
@@ -602,8 +596,6 @@
 #define PL_Iscopestack_ix      PL_scopestack_ix
 #define PL_Iscopestack_max     PL_scopestack_max
 #define PL_Iscopestack_name    PL_scopestack_name
-#define PL_Iscreamfirst                PL_screamfirst
-#define PL_Iscreamnext         PL_screamnext
 #define PL_Isecondgv           PL_secondgv
 #define PL_Isharehook          PL_sharehook
 #define PL_Isig_pending                PL_sig_pending
diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t
index ab30b2f..d582a8f 100644
--- a/ext/Devel-Peek/t/Peek.t
+++ b/ext/Devel-Peek/t/Peek.t
@@ -857,17 +857,14 @@ unless ($Config{useithreads}) {
 
     do_test('regular string constant', beer,
 'SV = PV\\($ADDR\\) at $ADDR
-  REFCNT = 5
+  REFCNT = 6
   FLAGS = \\(PADMY,POK,READONLY,pPOK\\)
   PV = $ADDR "foamy"\\\0
   CUR = 5
   LEN = \d+
 ');
 
-    is(study beer, 1, "Our studies were successful");
-
-    do_test('string constant now studied', beer,
-'SV = PVMG\\($ADDR\\) at $ADDR
+    my $want = 'SV = PVMG\\($ADDR\\) at $ADDR
   REFCNT = 6
   FLAGS = \\(PADMY,SMG,POK,READONLY,pPOK,SCREAM\\)
   IV = 0
@@ -876,25 +873,89 @@ unless ($Config{useithreads}) {
   CUR = 5
   LEN = \d+
   MAGIC = $ADDR
-    MG_VIRTUAL = &PL_vtbl_mglob
-    MG_TYPE = PERL_MAGIC_regex_global\\(g\\)
-');
+    MG_VIRTUAL = &PL_vtbl_regexp
+    MG_PRIVATE = 1
+    MG_TYPE = PERL_MAGIC_study\\(G\\)
+    MG_LEN = 261
+    MG_PTR = $ADDR "\\\\377.*"
+';
+
+    is(study beer, 1, "Our studies were successful");
+
+    do_test('string constant now studied', beer, $want);
 
     is (eval 'index "not too foamy", beer', 8, 'correct index');
 
-    do_test('string constant still studied', beer,
+    do_test('string constant still studied', beer, $want);
+
+    my $pie = 'good';
+
+    is(study $pie, 1, "Our studies were successful");
+
+    do_test('string constant still studied', beer, $want);
+
+    do_test('second string also studied', $pie, 'SV = PVMG\\($ADDR\\) at $ADDR
+  REFCNT = 1
+  FLAGS = \\(PADMY,SMG,POK,pPOK,SCREAM\\)
+  IV = 0
+  NV = 0
+  PV = $ADDR "good"\\\0
+  CUR = 4
+  LEN = \d+
+  MAGIC = $ADDR
+    MG_VIRTUAL = &PL_vtbl_regexp
+    MG_PRIVATE = 1
+    MG_TYPE = PERL_MAGIC_study\\(G\\)
+    MG_LEN = 260
+    MG_PTR = $ADDR "\\\\377.*"
+');
+}
+
+{
+  my %z;
+  foreach (1, 254, 255, 65534, 65535) {
+    $z{$_} = "\0" x $_;
+    study $z{$_};
+  }
+  do_test('short studied representation', $z{1},
 'SV = PVMG\\($ADDR\\) at $ADDR
-  REFCNT = 6
-  FLAGS = \\(PADMY,SMG,POK,READONLY,pPOK,SCREAM\\)
+  REFCNT = 1
+  FLAGS = \\(SMG,POK,pPOK,SCREAM\\)
   IV = 0
   NV = 0
-  PV = $ADDR "foamy"\\\0
-  CUR = 5
+  PV = $ADDR "\\\\0"\\\0
+  CUR = 1
   LEN = \d+
   MAGIC = $ADDR
-    MG_VIRTUAL = &PL_vtbl_mglob
-    MG_TYPE = PERL_MAGIC_regex_global\\(g\\)
+    MG_VIRTUAL = &PL_vtbl_regexp
+    MG_PRIVATE = 1
+    MG_TYPE = PERL_MAGIC_study\\(G\\)
+    MG_LEN = 257
+    MG_PTR = $ADDR "\\\\0(?:\\\\377){256}"
 ');
+
+  foreach ([254, 1], [255, 2], [65534, 2], [65535, 4]
+         ) {
+    my ($length, $bytes) = @$_;
+    my $quant = $length <= 32766 ? "{$length}" : '*';
+    do_test("studied representation for length $length", $z{$length},
+           sprintf 
+'SV = PVMG\\($ADDR\\) at $ADDR
+  REFCNT = 1
+  FLAGS = \\(SMG,POK,pPOK,SCREAM\\)
+  IV = 0
+  NV = 0
+  PV = $ADDR "(?:\\\\0)%s"\\\0
+  CUR = %d
+  LEN = \d+
+  MAGIC = $ADDR
+    MG_VIRTUAL = &PL_vtbl_regexp
+    MG_PRIVATE = %d
+    MG_TYPE = PERL_MAGIC_study\\(G\\)
+    MG_LEN = %d
+    MG_PTR = $ADDR "\\\\0.*\\\\377"
+', $quant, $length, $bytes, (256 + $length) * $bytes);
+  }
 }
 
 done_testing();
diff --git a/intrpvar.h b/intrpvar.h
index 9dda6a3..cb8a861 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -155,10 +155,6 @@ PERLVAR(Iefloatsize,       STRLEN)
 
 /* regex stuff */
 
-PERLVAR(Iscreamfirst,  I32 *)
-PERLVAR(Iscreamnext,   I32 *)
-PERLVAR(Ilastscream,   SV *)
-
 PERLVAR(Ireg_state,    struct re_save_state)
 
 PERLVAR(Iregdummy,     regnode)        /* from regcomp.c */
@@ -233,7 +229,7 @@ When you replace this variable, it is considered a good 
practice to store the po
 
 PERLVARI(Iopfreehook,  Perl_ophook_t, 0) /* op_free() hook */
 
-PERLVARI(Imaxscream,   I32,    -1)
+/* Space for U32 */
 PERLVARI(Ireginterp_cnt,I32,    0)     /* Whether "Regexp" was interpolated. */
 PERLVARI(Iwatchaddr,   char **, 0)
 PERLVAR(Iwatchok,      char *)
diff --git a/mg.c b/mg.c
index 1bdf5c4..9e18918 100644
--- a/mg.c
+++ b/mg.c
@@ -2358,9 +2358,8 @@ Perl_magic_setmglob(pTHX_ SV *sv, MAGIC *mg)
 {
     PERL_ARGS_ASSERT_MAGIC_SETMGLOB;
     PERL_UNUSED_CONTEXT;
+    PERL_UNUSED_ARG(sv);
     mg->mg_len = -1;
-    if (!isGV_with_GP(sv))
-       SvSCREAM_off(sv);
     return 0;
 }
 
@@ -2387,6 +2386,9 @@ Perl_magic_setregexp(pTHX_ SV *sv, MAGIC *mg)
     } else if (type == PERL_MAGIC_bm) {
        SvTAIL_off(sv);
        SvVALID_off(sv);
+    } else if (type == PERL_MAGIC_study) {
+       if (!isGV_with_GP(sv))
+           SvSCREAM_off(sv);
     } else {
        assert(type == PERL_MAGIC_fm);
     }
diff --git a/mg_names.c b/mg_names.c
index ff73b9e..43b1945 100644
--- a/mg_names.c
+++ b/mg_names.c
@@ -22,6 +22,7 @@
        { PERL_MAGIC_env,            "env(E)" },
        { PERL_MAGIC_envelem,        "envelem(e)" },
        { PERL_MAGIC_fm,             "fm(f)" },
+       { PERL_MAGIC_study,          "study(G)" },
        { PERL_MAGIC_regex_global,   "regex_global(g)" },
        { PERL_MAGIC_hints,          "hints(H)" },
        { PERL_MAGIC_hintselem,      "hintselem(h)" },
diff --git a/mg_raw.h b/mg_raw.h
index e698dcd..7a45e6d 100644
--- a/mg_raw.h
+++ b/mg_raw.h
@@ -38,8 +38,10 @@
       "/* envelem 'e' %ENV hash element */" },
     { 'f', "want_vtbl_regdata | PERL_MAGIC_READONLY_ACCEPTABLE | 
PERL_MAGIC_VALUE_MAGIC",
       "/* fm 'f' Formline ('compiled' format) */" },
+    { 'G', "want_vtbl_regexp | PERL_MAGIC_READONLY_ACCEPTABLE | 
PERL_MAGIC_VALUE_MAGIC",
+      "/* study 'G' study()ed string */" },
     { 'g', "want_vtbl_mglob | PERL_MAGIC_READONLY_ACCEPTABLE | 
PERL_MAGIC_VALUE_MAGIC",
-      "/* regex_global 'g' m//g target / study()ed string */" },
+      "/* regex_global 'g' m//g target */" },
     { 'H', "want_vtbl_hints",
       "/* hints 'H' %^H hash */" },
     { 'h', "want_vtbl_hintselem",
diff --git a/mg_vtable.h b/mg_vtable.h
index 8846262..2e3ca35 100644
--- a/mg_vtable.h
+++ b/mg_vtable.h
@@ -29,7 +29,8 @@
 #define PERL_MAGIC_env            'E' /* %ENV hash */
 #define PERL_MAGIC_envelem        'e' /* %ENV hash element */
 #define PERL_MAGIC_fm             'f' /* Formline ('compiled' format) */
-#define PERL_MAGIC_regex_global   'g' /* m//g target / study()ed string */
+#define PERL_MAGIC_study          'G' /* study()ed string */
+#define PERL_MAGIC_regex_global   'g' /* m//g target */
 #define PERL_MAGIC_hints          'H' /* %^H hash */
 #define PERL_MAGIC_hintselem      'h' /* %^H hash element */
 #define PERL_MAGIC_isa            'I' /* @ISA array */
diff --git a/perl.c b/perl.c
index 417b2fd..e345ae1 100644
--- a/perl.c
+++ b/perl.c
@@ -905,14 +905,6 @@ perl_destruct(pTHXx)
 
     /* defgv, aka *_ should be taken care of elsewhere */
 
-    /* clean up after study() */
-    SvREFCNT_dec(PL_lastscream);
-    PL_lastscream = NULL;
-    Safefree(PL_screamfirst);
-    PL_screamfirst = 0;
-    Safefree(PL_screamnext);
-    PL_screamnext  = 0;
-
     /* float buffer */
     Safefree(PL_efloatbuf);
     PL_efloatbuf = NULL;
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index ebea453..b06fc7a 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -42,6 +42,12 @@ the built-in C<read> and C<recv> functions (among others) 
parse their
 arguments. This means that one can override the built-in functions with
 custom subroutines that parse their arguments the same way.
 
+=head2 You can now C<study> more than one string
+
+The restriction that you can only have one C<study> active at a time has been
+removed. You can now usefully C<study> as many strings as you want (until you
+exhaust memory).
+
 =head1 Security
 
 XXX Any security-related notices go here.  In particular, any security
@@ -85,6 +91,12 @@ The implementation of C<s///r> makes one fewer copy of the 
scalar's value.
 If a studied scalar is C<split> with a regex, the engine will now take
 advantage of the C<study> data.
 
+=item *
+
+C<study> now uses considerably less memory for shorter strings. Strings shorter
+than 65535 characters use roughly half the memory than previously, strings
+shorter than 255 characters use roughly one quarter of the memory.
+
 =back
 
 =head1 Modules and Pragmata
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index e1453e9..936d1c0 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -6756,9 +6756,8 @@ patterns you are searching and the distribution of 
character
 frequencies in the string to be searched; you probably want to compare
 run times with and without it to see which is faster.  Those loops
 that scan for many short constant strings (including the constant
-parts of more complex patterns) will benefit most.  You may have only
-one C<study> active at a time: if you study a different scalar the first
-is "unstudied".  (The way C<study> works is this: a linked list of every
+parts of more complex patterns) will benefit most.
+(The way C<study> works is this: a linked list of every
 character in the string to be searched is made, so we know, for
 example, where all the C<'k'> characters are.  From each search string,
 the rarest character is selected, based on some static frequency tables
diff --git a/pod/perlguts.pod b/pod/perlguts.pod
index e99c051..d8f0527 100644
--- a/pod/perlguts.pod
+++ b/pod/perlguts.pod
@@ -1055,7 +1055,8 @@ The current kinds of Magic Virtual Tables are:
     E  PERL_MAGIC_env            vtbl_env        %ENV hash
     e  PERL_MAGIC_envelem        vtbl_envelem    %ENV hash element
     f  PERL_MAGIC_fm             vtbl_regdata    Formline ('compiled' format)
-    g  PERL_MAGIC_regex_global   vtbl_mglob      m//g target / study()ed string
+    G  PERL_MAGIC_study          vtbl_regdata    study()ed string
+    g  PERL_MAGIC_regex_global   vtbl_mglob      m//g target
     H  PERL_MAGIC_hints          vtbl_hints      %^H hash
     h  PERL_MAGIC_hintselem      vtbl_hintselem  %^H hash element
     I  PERL_MAGIC_isa            vtbl_isa        @ISA array
diff --git a/pp.c b/pp.c
index 24a34a0..98d6482 100644
--- a/pp.c
+++ b/pp.c
@@ -707,16 +707,15 @@ PP(pp_study)
 {
     dVAR; dSP; dPOPss;
     register unsigned char *s;
-    register I32 pos;
-    register I32 ch;
-    register I32 *sfirst;
-    register I32 *snext;
+    char *sfirst_raw;
     STRLEN len;
+    MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
+    U8 quanta;
+    STRLEN size;
+
+    if (mg && SvSCREAM(sv))
+       RETPUSHYES;
 
-    if (sv == PL_lastscream) {
-       if (SvSCREAM(sv))
-           RETPUSHYES;
-    }
     s = (unsigned char*)(SvPV(sv, len));
     if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
        /* No point in studying a zero length string, and not safe to study
@@ -726,51 +725,66 @@ PP(pp_study)
           stringification.  Also refuse to study an FBM scalar, as this gives
           more flexibility in SV flag usage.  No real-world code would ever
           end up studying an FBM scalar, so this isn't a real pessimisation.
+          Endemic use of I32 in Perl_screaminstr makes it hard to safely push
+          the study length limit from I32_MAX to U32_MAX - 1.
        */
        RETPUSHNO;
     }
-    pos = len;
 
-    if (PL_lastscream) {
-       SvSCREAM_off(PL_lastscream);
-       SvREFCNT_dec(PL_lastscream);
-    }
-    PL_lastscream = SvREFCNT_inc_simple(sv);
+    if (len < 0xFF) {
+       quanta = 1;
+    } else if (len < 0xFFFF) {
+       quanta = 2;
+    } else
+       quanta = 4;
 
-    if (pos > PL_maxscream) {
-       if (PL_maxscream < 0) {
-           PL_maxscream = pos + 80;
-           Newx(PL_screamfirst, 256, I32);
-           Newx(PL_screamnext, PL_maxscream, I32);
-       }
-       else {
-           PL_maxscream = pos + pos / 4;
-           Renew(PL_screamnext, PL_maxscream, I32);
-       }
-    }
+    size = (256 + len) * quanta;
+    sfirst_raw = (char *)safemalloc(size);
 
-    sfirst = PL_screamfirst;
-    snext = PL_screamnext;
-
-    if (!sfirst || !snext)
+    if (!sfirst_raw)
        DIE(aTHX_ "do_study: out of memory");
 
-    for (ch = 256; ch; --ch)
-       *sfirst++ = -1;
-    sfirst -= 256;
-
-    while (--pos >= 0) {
-       register const I32 ch = s[pos];
-       if (sfirst[ch] >= 0)
-           snext[pos] = sfirst[ch] - pos;
-       else
-           snext[pos] = -pos;
-       sfirst[ch] = pos;
+    SvSCREAM_on(sv);
+    if (!mg)
+       mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
+    mg->mg_ptr = sfirst_raw;
+    mg->mg_len = size;
+    mg->mg_private = quanta;
+
+    memset(sfirst_raw, ~0, 256 * quanta);
+
+    /* The assumption here is that most studied strings are fairly short, hence
+       the pain of the extra code is worth it, given the memory savings.
+       80 character string, 336 bytes as U8, down from 1344 as U32
+       800 character string, 2112 bytes as U16, down from 4224 as U32
+    */
+       
+    if (quanta == 1) {
+       U8 *const sfirst = (U8 *)sfirst_raw;
+       U8 *const snext = sfirst + 256;
+       while (len-- > 0) {
+           const U8 ch = s[len];
+           snext[len] = sfirst[ch];
+           sfirst[ch] = len;
+       }
+    } else if (quanta == 2) {
+       U16 *const sfirst = (U16 *)sfirst_raw;
+       U16 *const snext = sfirst + 256;
+       while (len-- > 0) {
+           const U8 ch = s[len];
+           snext[len] = sfirst[ch];
+           sfirst[ch] = len;
+       }
+    } else  {
+       U32 *const sfirst = (U32 *)sfirst_raw;
+       U32 *const snext = sfirst + 256;
+       while (len-- > 0) {
+           const U8 ch = s[len];
+           snext[len] = sfirst[ch];
+           sfirst[ch] = len;
+       }
     }
 
-    SvSCREAM_on(sv);
-    /* piggyback on m//g magic */
-    sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0);
     RETPUSHYES;
 }
 
diff --git a/regen/mg_vtable.pl b/regen/mg_vtable.pl
index af0041d..799be6b 100644
--- a/regen/mg_vtable.pl
+++ b/regen/mg_vtable.pl
@@ -42,9 +42,10 @@ my %mg =
                  desc => '%ENV hash element' },
      fm => { char => 'f', vtable => 'regdata', value_magic => 1,
             readonly_acceptable => 1, desc => "Formline ('compiled' format)" },
+     study => { char => 'G', vtable => 'regexp', value_magic => 1,
+               readonly_acceptable => 1, desc => 'study()ed string' },
      regex_global => { char => 'g', vtable => 'mglob', value_magic => 1,
-                      readonly_acceptable => 1,
-                      desc => 'm//g target / study()ed string' },
+                      readonly_acceptable => 1, desc => 'm//g target' },
      hints => { char => 'H', vtable => 'hints', desc => '%^H hash' },
      hintselem => { char => 'h', vtable => 'hintselem',
                    desc => '%^H hash element' },
diff --git a/regexec.c b/regexec.c
index 6ae2770..99ac5b3 100644
--- a/regexec.c
+++ b/regexec.c
@@ -695,8 +695,23 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char 
*strpos,
     if ((flags & REXEC_SCREAM) && SvSCREAM(sv)) {
        I32 p = -1;                     /* Internal iterator of scream. */
        I32 * const pp = data ? data->scream_pos : &p;
+       const MAGIC *mg;
+       bool found = FALSE;
 
-       if (PL_screamfirst[BmRARE(check)] >= 0
+       assert(SvMAGICAL(sv));
+       mg = mg_find(sv, PERL_MAGIC_study);
+       assert(mg);
+
+       if (mg->mg_private == 1) {
+           found = ((U8 *)mg->mg_ptr)[BmRARE(check)] != (U8)~0;
+       } else if (mg->mg_private == 2) {
+           found = ((U16 *)mg->mg_ptr)[BmRARE(check)] != (U16)~0;
+       } else {
+           assert (mg->mg_private == 4);
+           found = ((U32 *)mg->mg_ptr)[BmRARE(check)] != (U32)~0;
+       }
+
+       if (found
            || ( BmRARE(check) == '\n'
                 && (BmPREVIOUS(check) == SvCUR(check) - 1)
                 && SvTAIL(check) ))
diff --git a/sv.c b/sv.c
index 445f9d4..fffa6e9 100644
--- a/sv.c
+++ b/sv.c
@@ -12994,12 +12994,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
 
     /* regex stuff */
 
-    PL_screamfirst     = NULL;
-    PL_screamnext      = NULL;
-    PL_maxscream       = -1;                   /* reinits on demand */
-    PL_lastscream      = NULL;
-
-
     PL_regdummy                = proto_perl->Iregdummy;
     PL_colorset                = 0;            /* reinits PL_colors[] */
     /*PL_colors[6]     = {0,0,0,0,0,0};*/
diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat
index 1a0d0f1..e17a573 100644
--- a/t/porting/known_pod_issues.dat
+++ b/t/porting/known_pod_issues.dat
@@ -233,7 +233,7 @@ pod/perlgit.pod     Verbatim line length including indents 
exceeds 80 by    14
 pod/perlgpl.pod        Verbatim line length including indents exceeds 80 by    
50
 pod/perlguts.pod       ? Should you be using F<...> or maybe L<...> instead of 
2
 pod/perlguts.pod       ? Should you be using L<...> instead of 1
-pod/perlguts.pod       Verbatim line length including indents exceeds 80 by    
26
+pod/perlguts.pod       Verbatim line length including indents exceeds 80 by    
25
 pod/perlhack.pod       ? Should you be using L<...> instead of 1
 pod/perlhack.pod       Verbatim line length including indents exceeds 80 by    
1
 pod/perlhacktips.pod   Verbatim line length including indents exceeds 80 by    
1
diff --git a/util.c b/util.c
index 093b70e..fcfeda9 100644
--- a/util.c
+++ b/util.c
@@ -854,22 +854,56 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 
start_shift, I32 end_shift
 {
     dVAR;
     register const unsigned char *big;
-    register I32 pos;
+    U32 pos = 0; /* hush a gcc warning */
     register I32 previous;
     register I32 first;
     register const unsigned char *little;
     register I32 stop_pos;
     register const unsigned char *littleend;
-    I32 found = 0;
+    bool found = FALSE;
+    const MAGIC * mg;
+    const void *screamnext_raw = NULL; /* hush a gcc warning */
+    bool cant_find = FALSE; /* hush a gcc warning */
 
     PERL_ARGS_ASSERT_SCREAMINSTR;
 
+    assert(SvMAGICAL(bigstr));
+    mg = mg_find(bigstr, PERL_MAGIC_study);
+    assert(mg);
     assert(SvTYPE(littlestr) == SVt_PVMG);
     assert(SvVALID(littlestr));
 
-    if (*old_posp == -1
-       ? (pos = PL_screamfirst[BmRARE(littlestr)]) < 0
-       : (((pos = *old_posp), pos += PL_screamnext[pos]) == 0)) {
+    if (mg->mg_private == 1) {
+       const U8 *const screamfirst = (U8 *)mg->mg_ptr;
+       const U8 *const screamnext = screamfirst + 256;
+
+       screamnext_raw = (const void *)screamnext;
+
+       pos = *old_posp == -1
+           ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp];
+       cant_find = pos == (U8)~0;
+    } else if (mg->mg_private == 2) {
+       const U16 *const screamfirst = (U16 *)mg->mg_ptr;
+       const U16 *const screamnext = screamfirst + 256;
+
+       screamnext_raw = (const void *)screamnext;
+
+       pos = *old_posp == -1
+           ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp];
+       cant_find = pos == (U16)~0;
+    } else if (mg->mg_private == 4) {
+       const U32 *const screamfirst = (U32 *)mg->mg_ptr;
+       const U32 *const screamnext = screamfirst + 256;
+
+       screamnext_raw = (const void *)screamnext;
+
+       pos = *old_posp == -1
+           ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp];
+       cant_find = pos == (U32)~0;
+    } else
+       Perl_croak(aTHX_ "panic: unknown study size %u", mg->mg_private);
+
+    if (cant_find) {
       cant_find:
        if ( BmRARE(littlestr) == '\n'
             && BmPREVIOUS(littlestr) == SvCUR(littlestr) - 1) {
@@ -900,28 +934,59 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 
start_shift, I32 end_shift
 #endif
        return NULL;
     }
-    while (pos < previous + start_shift) {
-       if (!(pos += PL_screamnext[pos]))
-           goto cant_find;
+    if (mg->mg_private == 1) {
+       const U8 *const screamnext = (const U8 *const) screamnext_raw;
+       while ((I32)pos < previous + start_shift) {
+           pos = screamnext[pos];
+           if (pos == (U8)~0)
+               goto cant_find;
+       }
+    } else if (mg->mg_private == 2) {
+       const U16 *const screamnext = (const U16 *const) screamnext_raw;
+       while ((I32)pos < previous + start_shift) {
+           pos = screamnext[pos];
+           if (pos == (U16)~0)
+               goto cant_find;
+       }
+    } else if (mg->mg_private == 4) {
+       const U32 *const screamnext = (const U32 *const) screamnext_raw;
+       while ((I32)pos < previous + start_shift) {
+           pos = screamnext[pos];
+           if (pos == (U32)~0)
+               goto cant_find;
+       }
     }
     big -= previous;
-    do {
-       register const unsigned char *s, *x;
-       if (pos >= stop_pos) break;
-       if (big[pos] != first)
-           continue;
-       for (x=big+pos+1,s=little; s < littleend; /**/ ) {
-           if (*s++ != *x++) {
-               s--;
-               break;
+    while (1) {
+       if ((I32)pos >= stop_pos) break;
+       if (big[pos] == first) {
+           const unsigned char *s = little;
+           const unsigned char *x = big + pos + 1;
+           while (s < littleend) {
+               if (*s != *x++)
+                   break;
+               ++s;
+           }
+           if (s == littleend) {
+               *old_posp = (I32)pos;
+               if (!last) return (char *)(big+pos);
+               found = TRUE;
            }
        }
-       if (s == littleend) {
-           *old_posp = pos;
-           if (!last) return (char *)(big+pos);
-           found = 1;
+       if (mg->mg_private == 1) {
+           pos = ((const U8 *const)screamnext_raw)[pos];
+           if (pos == (U8)~0)
+               break;
+       } else if (mg->mg_private == 2) {
+           pos = ((const U16 *const)screamnext_raw)[pos];
+           if (pos == (U16)~0)
+               break;
+       } else if (mg->mg_private == 4) {
+           pos = ((const U32 *const)screamnext_raw)[pos];
+           if (pos == (U32)~0)
+               break;
        }
-    } while ( pos += PL_screamnext[pos] );
+    };
     if (last && found)
        return (char *)(big+(*old_posp));
   check_tail:

--
Perl5 Master Repository

Reply via email to