In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/72de20cdcc84ae35e0d8a55c7a92950fece19347?hp=637174112f90e2e782037f7c706f86617e7df263>
- Log ----------------------------------------------------------------- commit 72de20cdcc84ae35e0d8a55c7a92950fece19347 Author: Nicholas Clark <[email protected]> Date: Tue Jun 28 15:20:56 2011 +0200 For shorter strings, store C<study>'s data as U8s or U16s, instead of U32s. The assumption is that most studied strings are fairly short, hence the pain of the extra code is worth it, given the memory savings. 80 character string, 336 bytes as U8, down from 1344 as U32 800 character string, 2112 bytes as U16, down from 4224 as U32 M ext/Devel-Peek/t/Peek.t M pod/perldelta.pod M pp.c M regexec.c M util.c commit b606cf7f37b8b46206c7f521b29167e037397a62 Author: Nicholas Clark <[email protected]> Date: Tue Jun 28 12:17:38 2011 +0200 Store C<study>'s data as U32s, instead of I32s. The "no more" condition is now represented as ~0, instead of -1. M pp.c M regexec.c M util.c commit 378b4d0f82057e5af983d31c5b48b7f10f4758b3 Author: Nicholas Clark <[email protected]> Date: Mon Jun 27 21:13:39 2011 +0200 Tidy code in pp_study and Perl_screaminstr() In pp_study eliminate the variable pos, which duplicates len. ch should be U8, not I32. In Perl_screaminstr(), move the declarations of s and x to their point of use, convert a for loop to a while loop, and avoid incrementing and decrementing s. found is a boolean. M pp.c M util.c commit 4185c9197f4aefd1943fba0b9999fc3200fd902c Author: Nicholas Clark <[email protected]> Date: Mon Jun 27 20:51:04 2011 +0200 Store C<study>'s data in in mg_ptr instead of interpreter variables. This allows more than one C<study> to be active at the same time. It eliminates PL_screamfirst, PL_lastscream, PL_maxscream. M embedvar.h M ext/Devel-Peek/t/Peek.t M intrpvar.h M perl.c M pod/perldelta.pod M pod/perlfunc.pod M pp.c M regexec.c M sv.c M util.c commit 75fc7bf602cd498829b35780623ebe139c0a0483 Author: Nicholas Clark <[email protected]> Date: Mon Jun 27 17:58:10 2011 +0200 Merge PL_scream{first,next} into one allocated buffer. Effectively, PL_screamnext is now PL_screamfirst + 256. The actual interpreter variable PL_screamnext is eliminated. M embedvar.h M intrpvar.h M perl.c M pp.c M sv.c M util.c commit 56e9eeb1a239fc995bf33475e31f8379bd01cbad Author: Nicholas Clark <[email protected]> Date: Mon Jun 27 17:14:39 2011 +0200 Change PL_screamnext to store absolute positions. PL_screamnext gives the position of the next occurrence of the current octet. Previously it stored this as an offset from the current position, with -pos stored for "no more", so that the calculated new offset would be zero, allowing a zero/non-zero loop exit test in Perl_screaminstr(). Now it stores absolute position, with -1 for "no more". Also codify -1 as the "not present" value for PL_screamfirst, instead of any negative value. M pp.c M regexec.c M util.c commit 0177730e7e0c099d1250571eb39367a76e2d91eb Author: Nicholas Clark <[email protected]> Date: Mon Jun 13 16:24:23 2011 +0200 Split out study magic from pos magic. study uses magic to call SvSCREAM_off() if the scalar is modified. Allocate it its own magic type ('G' for now - pos magic is 'g'). Share the same "set" routine and vtable as regexp/bm/fm (setregxp and vtbl_regexp). M ext/Devel-Peek/t/Peek.t M mg.c M mg_names.c M mg_raw.h M mg_vtable.h M pod/perlguts.pod M pp.c M regen/mg_vtable.pl M t/porting/known_pod_issues.dat ----------------------------------------------------------------------- Summary of changes: embedvar.h | 8 --- ext/Devel-Peek/t/Peek.t | 91 ++++++++++++++++++++++++++++------ intrpvar.h | 6 +-- mg.c | 6 ++- mg_names.c | 1 + mg_raw.h | 4 +- mg_vtable.h | 3 +- perl.c | 8 --- pod/perldelta.pod | 12 ++++ pod/perlfunc.pod | 5 +- pod/perlguts.pod | 3 +- pp.c | 100 +++++++++++++++++++++---------------- regen/mg_vtable.pl | 5 +- regexec.c | 17 ++++++- sv.c | 6 -- t/porting/known_pod_issues.dat | 2 +- util.c | 109 ++++++++++++++++++++++++++++++++-------- 17 files changed, 267 insertions(+), 119 deletions(-) diff --git a/embedvar.h b/embedvar.h index a540fd6..2405ee5 100644 --- a/embedvar.h +++ b/embedvar.h @@ -171,7 +171,6 @@ #define PL_last_swash_tmps (vTHX->Ilast_swash_tmps) #define PL_lastfd (vTHX->Ilastfd) #define PL_lastgotoprobe (vTHX->Ilastgotoprobe) -#define PL_lastscream (vTHX->Ilastscream) #define PL_laststatval (vTHX->Ilaststatval) #define PL_laststype (vTHX->Ilaststype) #define PL_localizing (vTHX->Ilocalizing) @@ -187,7 +186,6 @@ #define PL_markstack_ptr (vTHX->Imarkstack_ptr) #define PL_max_intro_pending (vTHX->Imax_intro_pending) #define PL_maxo (vTHX->Imaxo) -#define PL_maxscream (vTHX->Imaxscream) #define PL_maxsysfd (vTHX->Imaxsysfd) #define PL_memory_debug_header (vTHX->Imemory_debug_header) #define PL_mess_sv (vTHX->Imess_sv) @@ -268,8 +266,6 @@ #define PL_scopestack_ix (vTHX->Iscopestack_ix) #define PL_scopestack_max (vTHX->Iscopestack_max) #define PL_scopestack_name (vTHX->Iscopestack_name) -#define PL_screamfirst (vTHX->Iscreamfirst) -#define PL_screamnext (vTHX->Iscreamnext) #define PL_secondgv (vTHX->Isecondgv) #define PL_sharehook (vTHX->Isharehook) #define PL_sig_pending (vTHX->Isig_pending) @@ -505,7 +501,6 @@ #define PL_Ilast_swash_tmps PL_last_swash_tmps #define PL_Ilastfd PL_lastfd #define PL_Ilastgotoprobe PL_lastgotoprobe -#define PL_Ilastscream PL_lastscream #define PL_Ilaststatval PL_laststatval #define PL_Ilaststype PL_laststype #define PL_Ilocalizing PL_localizing @@ -521,7 +516,6 @@ #define PL_Imarkstack_ptr PL_markstack_ptr #define PL_Imax_intro_pending PL_max_intro_pending #define PL_Imaxo PL_maxo -#define PL_Imaxscream PL_maxscream #define PL_Imaxsysfd PL_maxsysfd #define PL_Imemory_debug_header PL_memory_debug_header #define PL_Imess_sv PL_mess_sv @@ -602,8 +596,6 @@ #define PL_Iscopestack_ix PL_scopestack_ix #define PL_Iscopestack_max PL_scopestack_max #define PL_Iscopestack_name PL_scopestack_name -#define PL_Iscreamfirst PL_screamfirst -#define PL_Iscreamnext PL_screamnext #define PL_Isecondgv PL_secondgv #define PL_Isharehook PL_sharehook #define PL_Isig_pending PL_sig_pending diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t index ab30b2f..d582a8f 100644 --- a/ext/Devel-Peek/t/Peek.t +++ b/ext/Devel-Peek/t/Peek.t @@ -857,17 +857,14 @@ unless ($Config{useithreads}) { do_test('regular string constant', beer, 'SV = PV\\($ADDR\\) at $ADDR - REFCNT = 5 + REFCNT = 6 FLAGS = \\(PADMY,POK,READONLY,pPOK\\) PV = $ADDR "foamy"\\\0 CUR = 5 LEN = \d+ '); - is(study beer, 1, "Our studies were successful"); - - do_test('string constant now studied', beer, -'SV = PVMG\\($ADDR\\) at $ADDR + my $want = 'SV = PVMG\\($ADDR\\) at $ADDR REFCNT = 6 FLAGS = \\(PADMY,SMG,POK,READONLY,pPOK,SCREAM\\) IV = 0 @@ -876,25 +873,89 @@ unless ($Config{useithreads}) { CUR = 5 LEN = \d+ MAGIC = $ADDR - MG_VIRTUAL = &PL_vtbl_mglob - MG_TYPE = PERL_MAGIC_regex_global\\(g\\) -'); + MG_VIRTUAL = &PL_vtbl_regexp + MG_PRIVATE = 1 + MG_TYPE = PERL_MAGIC_study\\(G\\) + MG_LEN = 261 + MG_PTR = $ADDR "\\\\377.*" +'; + + is(study beer, 1, "Our studies were successful"); + + do_test('string constant now studied', beer, $want); is (eval 'index "not too foamy", beer', 8, 'correct index'); - do_test('string constant still studied', beer, + do_test('string constant still studied', beer, $want); + + my $pie = 'good'; + + is(study $pie, 1, "Our studies were successful"); + + do_test('string constant still studied', beer, $want); + + do_test('second string also studied', $pie, 'SV = PVMG\\($ADDR\\) at $ADDR + REFCNT = 1 + FLAGS = \\(PADMY,SMG,POK,pPOK,SCREAM\\) + IV = 0 + NV = 0 + PV = $ADDR "good"\\\0 + CUR = 4 + LEN = \d+ + MAGIC = $ADDR + MG_VIRTUAL = &PL_vtbl_regexp + MG_PRIVATE = 1 + MG_TYPE = PERL_MAGIC_study\\(G\\) + MG_LEN = 260 + MG_PTR = $ADDR "\\\\377.*" +'); +} + +{ + my %z; + foreach (1, 254, 255, 65534, 65535) { + $z{$_} = "\0" x $_; + study $z{$_}; + } + do_test('short studied representation', $z{1}, 'SV = PVMG\\($ADDR\\) at $ADDR - REFCNT = 6 - FLAGS = \\(PADMY,SMG,POK,READONLY,pPOK,SCREAM\\) + REFCNT = 1 + FLAGS = \\(SMG,POK,pPOK,SCREAM\\) IV = 0 NV = 0 - PV = $ADDR "foamy"\\\0 - CUR = 5 + PV = $ADDR "\\\\0"\\\0 + CUR = 1 LEN = \d+ MAGIC = $ADDR - MG_VIRTUAL = &PL_vtbl_mglob - MG_TYPE = PERL_MAGIC_regex_global\\(g\\) + MG_VIRTUAL = &PL_vtbl_regexp + MG_PRIVATE = 1 + MG_TYPE = PERL_MAGIC_study\\(G\\) + MG_LEN = 257 + MG_PTR = $ADDR "\\\\0(?:\\\\377){256}" '); + + foreach ([254, 1], [255, 2], [65534, 2], [65535, 4] + ) { + my ($length, $bytes) = @$_; + my $quant = $length <= 32766 ? "{$length}" : '*'; + do_test("studied representation for length $length", $z{$length}, + sprintf +'SV = PVMG\\($ADDR\\) at $ADDR + REFCNT = 1 + FLAGS = \\(SMG,POK,pPOK,SCREAM\\) + IV = 0 + NV = 0 + PV = $ADDR "(?:\\\\0)%s"\\\0 + CUR = %d + LEN = \d+ + MAGIC = $ADDR + MG_VIRTUAL = &PL_vtbl_regexp + MG_PRIVATE = %d + MG_TYPE = PERL_MAGIC_study\\(G\\) + MG_LEN = %d + MG_PTR = $ADDR "\\\\0.*\\\\377" +', $quant, $length, $bytes, (256 + $length) * $bytes); + } } done_testing(); diff --git a/intrpvar.h b/intrpvar.h index 9dda6a3..cb8a861 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -155,10 +155,6 @@ PERLVAR(Iefloatsize, STRLEN) /* regex stuff */ -PERLVAR(Iscreamfirst, I32 *) -PERLVAR(Iscreamnext, I32 *) -PERLVAR(Ilastscream, SV *) - PERLVAR(Ireg_state, struct re_save_state) PERLVAR(Iregdummy, regnode) /* from regcomp.c */ @@ -233,7 +229,7 @@ When you replace this variable, it is considered a good practice to store the po PERLVARI(Iopfreehook, Perl_ophook_t, 0) /* op_free() hook */ -PERLVARI(Imaxscream, I32, -1) +/* Space for U32 */ PERLVARI(Ireginterp_cnt,I32, 0) /* Whether "Regexp" was interpolated. */ PERLVARI(Iwatchaddr, char **, 0) PERLVAR(Iwatchok, char *) diff --git a/mg.c b/mg.c index 1bdf5c4..9e18918 100644 --- a/mg.c +++ b/mg.c @@ -2358,9 +2358,8 @@ Perl_magic_setmglob(pTHX_ SV *sv, MAGIC *mg) { PERL_ARGS_ASSERT_MAGIC_SETMGLOB; PERL_UNUSED_CONTEXT; + PERL_UNUSED_ARG(sv); mg->mg_len = -1; - if (!isGV_with_GP(sv)) - SvSCREAM_off(sv); return 0; } @@ -2387,6 +2386,9 @@ Perl_magic_setregexp(pTHX_ SV *sv, MAGIC *mg) } else if (type == PERL_MAGIC_bm) { SvTAIL_off(sv); SvVALID_off(sv); + } else if (type == PERL_MAGIC_study) { + if (!isGV_with_GP(sv)) + SvSCREAM_off(sv); } else { assert(type == PERL_MAGIC_fm); } diff --git a/mg_names.c b/mg_names.c index ff73b9e..43b1945 100644 --- a/mg_names.c +++ b/mg_names.c @@ -22,6 +22,7 @@ { PERL_MAGIC_env, "env(E)" }, { PERL_MAGIC_envelem, "envelem(e)" }, { PERL_MAGIC_fm, "fm(f)" }, + { PERL_MAGIC_study, "study(G)" }, { PERL_MAGIC_regex_global, "regex_global(g)" }, { PERL_MAGIC_hints, "hints(H)" }, { PERL_MAGIC_hintselem, "hintselem(h)" }, diff --git a/mg_raw.h b/mg_raw.h index e698dcd..7a45e6d 100644 --- a/mg_raw.h +++ b/mg_raw.h @@ -38,8 +38,10 @@ "/* envelem 'e' %ENV hash element */" }, { 'f', "want_vtbl_regdata | PERL_MAGIC_READONLY_ACCEPTABLE | PERL_MAGIC_VALUE_MAGIC", "/* fm 'f' Formline ('compiled' format) */" }, + { 'G', "want_vtbl_regexp | PERL_MAGIC_READONLY_ACCEPTABLE | PERL_MAGIC_VALUE_MAGIC", + "/* study 'G' study()ed string */" }, { 'g', "want_vtbl_mglob | PERL_MAGIC_READONLY_ACCEPTABLE | PERL_MAGIC_VALUE_MAGIC", - "/* regex_global 'g' m//g target / study()ed string */" }, + "/* regex_global 'g' m//g target */" }, { 'H', "want_vtbl_hints", "/* hints 'H' %^H hash */" }, { 'h', "want_vtbl_hintselem", diff --git a/mg_vtable.h b/mg_vtable.h index 8846262..2e3ca35 100644 --- a/mg_vtable.h +++ b/mg_vtable.h @@ -29,7 +29,8 @@ #define PERL_MAGIC_env 'E' /* %ENV hash */ #define PERL_MAGIC_envelem 'e' /* %ENV hash element */ #define PERL_MAGIC_fm 'f' /* Formline ('compiled' format) */ -#define PERL_MAGIC_regex_global 'g' /* m//g target / study()ed string */ +#define PERL_MAGIC_study 'G' /* study()ed string */ +#define PERL_MAGIC_regex_global 'g' /* m//g target */ #define PERL_MAGIC_hints 'H' /* %^H hash */ #define PERL_MAGIC_hintselem 'h' /* %^H hash element */ #define PERL_MAGIC_isa 'I' /* @ISA array */ diff --git a/perl.c b/perl.c index 417b2fd..e345ae1 100644 --- a/perl.c +++ b/perl.c @@ -905,14 +905,6 @@ perl_destruct(pTHXx) /* defgv, aka *_ should be taken care of elsewhere */ - /* clean up after study() */ - SvREFCNT_dec(PL_lastscream); - PL_lastscream = NULL; - Safefree(PL_screamfirst); - PL_screamfirst = 0; - Safefree(PL_screamnext); - PL_screamnext = 0; - /* float buffer */ Safefree(PL_efloatbuf); PL_efloatbuf = NULL; diff --git a/pod/perldelta.pod b/pod/perldelta.pod index ebea453..b06fc7a 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -42,6 +42,12 @@ the built-in C<read> and C<recv> functions (among others) parse their arguments. This means that one can override the built-in functions with custom subroutines that parse their arguments the same way. +=head2 You can now C<study> more than one string + +The restriction that you can only have one C<study> active at a time has been +removed. You can now usefully C<study> as many strings as you want (until you +exhaust memory). + =head1 Security XXX Any security-related notices go here. In particular, any security @@ -85,6 +91,12 @@ The implementation of C<s///r> makes one fewer copy of the scalar's value. If a studied scalar is C<split> with a regex, the engine will now take advantage of the C<study> data. +=item * + +C<study> now uses considerably less memory for shorter strings. Strings shorter +than 65535 characters use roughly half the memory than previously, strings +shorter than 255 characters use roughly one quarter of the memory. + =back =head1 Modules and Pragmata diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index e1453e9..936d1c0 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -6756,9 +6756,8 @@ patterns you are searching and the distribution of character frequencies in the string to be searched; you probably want to compare run times with and without it to see which is faster. Those loops that scan for many short constant strings (including the constant -parts of more complex patterns) will benefit most. You may have only -one C<study> active at a time: if you study a different scalar the first -is "unstudied". (The way C<study> works is this: a linked list of every +parts of more complex patterns) will benefit most. +(The way C<study> works is this: a linked list of every character in the string to be searched is made, so we know, for example, where all the C<'k'> characters are. From each search string, the rarest character is selected, based on some static frequency tables diff --git a/pod/perlguts.pod b/pod/perlguts.pod index e99c051..d8f0527 100644 --- a/pod/perlguts.pod +++ b/pod/perlguts.pod @@ -1055,7 +1055,8 @@ The current kinds of Magic Virtual Tables are: E PERL_MAGIC_env vtbl_env %ENV hash e PERL_MAGIC_envelem vtbl_envelem %ENV hash element f PERL_MAGIC_fm vtbl_regdata Formline ('compiled' format) - g PERL_MAGIC_regex_global vtbl_mglob m//g target / study()ed string + G PERL_MAGIC_study vtbl_regdata study()ed string + g PERL_MAGIC_regex_global vtbl_mglob m//g target H PERL_MAGIC_hints vtbl_hints %^H hash h PERL_MAGIC_hintselem vtbl_hintselem %^H hash element I PERL_MAGIC_isa vtbl_isa @ISA array diff --git a/pp.c b/pp.c index 24a34a0..98d6482 100644 --- a/pp.c +++ b/pp.c @@ -707,16 +707,15 @@ PP(pp_study) { dVAR; dSP; dPOPss; register unsigned char *s; - register I32 pos; - register I32 ch; - register I32 *sfirst; - register I32 *snext; + char *sfirst_raw; STRLEN len; + MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL; + U8 quanta; + STRLEN size; + + if (mg && SvSCREAM(sv)) + RETPUSHYES; - if (sv == PL_lastscream) { - if (SvSCREAM(sv)) - RETPUSHYES; - } s = (unsigned char*)(SvPV(sv, len)); if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) { /* No point in studying a zero length string, and not safe to study @@ -726,51 +725,66 @@ PP(pp_study) stringification. Also refuse to study an FBM scalar, as this gives more flexibility in SV flag usage. No real-world code would ever end up studying an FBM scalar, so this isn't a real pessimisation. + Endemic use of I32 in Perl_screaminstr makes it hard to safely push + the study length limit from I32_MAX to U32_MAX - 1. */ RETPUSHNO; } - pos = len; - if (PL_lastscream) { - SvSCREAM_off(PL_lastscream); - SvREFCNT_dec(PL_lastscream); - } - PL_lastscream = SvREFCNT_inc_simple(sv); + if (len < 0xFF) { + quanta = 1; + } else if (len < 0xFFFF) { + quanta = 2; + } else + quanta = 4; - if (pos > PL_maxscream) { - if (PL_maxscream < 0) { - PL_maxscream = pos + 80; - Newx(PL_screamfirst, 256, I32); - Newx(PL_screamnext, PL_maxscream, I32); - } - else { - PL_maxscream = pos + pos / 4; - Renew(PL_screamnext, PL_maxscream, I32); - } - } + size = (256 + len) * quanta; + sfirst_raw = (char *)safemalloc(size); - sfirst = PL_screamfirst; - snext = PL_screamnext; - - if (!sfirst || !snext) + if (!sfirst_raw) DIE(aTHX_ "do_study: out of memory"); - for (ch = 256; ch; --ch) - *sfirst++ = -1; - sfirst -= 256; - - while (--pos >= 0) { - register const I32 ch = s[pos]; - if (sfirst[ch] >= 0) - snext[pos] = sfirst[ch] - pos; - else - snext[pos] = -pos; - sfirst[ch] = pos; + SvSCREAM_on(sv); + if (!mg) + mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0); + mg->mg_ptr = sfirst_raw; + mg->mg_len = size; + mg->mg_private = quanta; + + memset(sfirst_raw, ~0, 256 * quanta); + + /* The assumption here is that most studied strings are fairly short, hence + the pain of the extra code is worth it, given the memory savings. + 80 character string, 336 bytes as U8, down from 1344 as U32 + 800 character string, 2112 bytes as U16, down from 4224 as U32 + */ + + if (quanta == 1) { + U8 *const sfirst = (U8 *)sfirst_raw; + U8 *const snext = sfirst + 256; + while (len-- > 0) { + const U8 ch = s[len]; + snext[len] = sfirst[ch]; + sfirst[ch] = len; + } + } else if (quanta == 2) { + U16 *const sfirst = (U16 *)sfirst_raw; + U16 *const snext = sfirst + 256; + while (len-- > 0) { + const U8 ch = s[len]; + snext[len] = sfirst[ch]; + sfirst[ch] = len; + } + } else { + U32 *const sfirst = (U32 *)sfirst_raw; + U32 *const snext = sfirst + 256; + while (len-- > 0) { + const U8 ch = s[len]; + snext[len] = sfirst[ch]; + sfirst[ch] = len; + } } - SvSCREAM_on(sv); - /* piggyback on m//g magic */ - sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0); RETPUSHYES; } diff --git a/regen/mg_vtable.pl b/regen/mg_vtable.pl index af0041d..799be6b 100644 --- a/regen/mg_vtable.pl +++ b/regen/mg_vtable.pl @@ -42,9 +42,10 @@ my %mg = desc => '%ENV hash element' }, fm => { char => 'f', vtable => 'regdata', value_magic => 1, readonly_acceptable => 1, desc => "Formline ('compiled' format)" }, + study => { char => 'G', vtable => 'regexp', value_magic => 1, + readonly_acceptable => 1, desc => 'study()ed string' }, regex_global => { char => 'g', vtable => 'mglob', value_magic => 1, - readonly_acceptable => 1, - desc => 'm//g target / study()ed string' }, + readonly_acceptable => 1, desc => 'm//g target' }, hints => { char => 'H', vtable => 'hints', desc => '%^H hash' }, hintselem => { char => 'h', vtable => 'hintselem', desc => '%^H hash element' }, diff --git a/regexec.c b/regexec.c index 6ae2770..99ac5b3 100644 --- a/regexec.c +++ b/regexec.c @@ -695,8 +695,23 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos, if ((flags & REXEC_SCREAM) && SvSCREAM(sv)) { I32 p = -1; /* Internal iterator of scream. */ I32 * const pp = data ? data->scream_pos : &p; + const MAGIC *mg; + bool found = FALSE; - if (PL_screamfirst[BmRARE(check)] >= 0 + assert(SvMAGICAL(sv)); + mg = mg_find(sv, PERL_MAGIC_study); + assert(mg); + + if (mg->mg_private == 1) { + found = ((U8 *)mg->mg_ptr)[BmRARE(check)] != (U8)~0; + } else if (mg->mg_private == 2) { + found = ((U16 *)mg->mg_ptr)[BmRARE(check)] != (U16)~0; + } else { + assert (mg->mg_private == 4); + found = ((U32 *)mg->mg_ptr)[BmRARE(check)] != (U32)~0; + } + + if (found || ( BmRARE(check) == '\n' && (BmPREVIOUS(check) == SvCUR(check) - 1) && SvTAIL(check) )) diff --git a/sv.c b/sv.c index 445f9d4..fffa6e9 100644 --- a/sv.c +++ b/sv.c @@ -12994,12 +12994,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* regex stuff */ - PL_screamfirst = NULL; - PL_screamnext = NULL; - PL_maxscream = -1; /* reinits on demand */ - PL_lastscream = NULL; - - PL_regdummy = proto_perl->Iregdummy; PL_colorset = 0; /* reinits PL_colors[] */ /*PL_colors[6] = {0,0,0,0,0,0};*/ diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat index 1a0d0f1..e17a573 100644 --- a/t/porting/known_pod_issues.dat +++ b/t/porting/known_pod_issues.dat @@ -233,7 +233,7 @@ pod/perlgit.pod Verbatim line length including indents exceeds 80 by 14 pod/perlgpl.pod Verbatim line length including indents exceeds 80 by 50 pod/perlguts.pod ? Should you be using F<...> or maybe L<...> instead of 2 pod/perlguts.pod ? Should you be using L<...> instead of 1 -pod/perlguts.pod Verbatim line length including indents exceeds 80 by 26 +pod/perlguts.pod Verbatim line length including indents exceeds 80 by 25 pod/perlhack.pod ? Should you be using L<...> instead of 1 pod/perlhack.pod Verbatim line length including indents exceeds 80 by 1 pod/perlhacktips.pod Verbatim line length including indents exceeds 80 by 1 diff --git a/util.c b/util.c index 093b70e..fcfeda9 100644 --- a/util.c +++ b/util.c @@ -854,22 +854,56 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift { dVAR; register const unsigned char *big; - register I32 pos; + U32 pos = 0; /* hush a gcc warning */ register I32 previous; register I32 first; register const unsigned char *little; register I32 stop_pos; register const unsigned char *littleend; - I32 found = 0; + bool found = FALSE; + const MAGIC * mg; + const void *screamnext_raw = NULL; /* hush a gcc warning */ + bool cant_find = FALSE; /* hush a gcc warning */ PERL_ARGS_ASSERT_SCREAMINSTR; + assert(SvMAGICAL(bigstr)); + mg = mg_find(bigstr, PERL_MAGIC_study); + assert(mg); assert(SvTYPE(littlestr) == SVt_PVMG); assert(SvVALID(littlestr)); - if (*old_posp == -1 - ? (pos = PL_screamfirst[BmRARE(littlestr)]) < 0 - : (((pos = *old_posp), pos += PL_screamnext[pos]) == 0)) { + if (mg->mg_private == 1) { + const U8 *const screamfirst = (U8 *)mg->mg_ptr; + const U8 *const screamnext = screamfirst + 256; + + screamnext_raw = (const void *)screamnext; + + pos = *old_posp == -1 + ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp]; + cant_find = pos == (U8)~0; + } else if (mg->mg_private == 2) { + const U16 *const screamfirst = (U16 *)mg->mg_ptr; + const U16 *const screamnext = screamfirst + 256; + + screamnext_raw = (const void *)screamnext; + + pos = *old_posp == -1 + ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp]; + cant_find = pos == (U16)~0; + } else if (mg->mg_private == 4) { + const U32 *const screamfirst = (U32 *)mg->mg_ptr; + const U32 *const screamnext = screamfirst + 256; + + screamnext_raw = (const void *)screamnext; + + pos = *old_posp == -1 + ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp]; + cant_find = pos == (U32)~0; + } else + Perl_croak(aTHX_ "panic: unknown study size %u", mg->mg_private); + + if (cant_find) { cant_find: if ( BmRARE(littlestr) == '\n' && BmPREVIOUS(littlestr) == SvCUR(littlestr) - 1) { @@ -900,28 +934,59 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift #endif return NULL; } - while (pos < previous + start_shift) { - if (!(pos += PL_screamnext[pos])) - goto cant_find; + if (mg->mg_private == 1) { + const U8 *const screamnext = (const U8 *const) screamnext_raw; + while ((I32)pos < previous + start_shift) { + pos = screamnext[pos]; + if (pos == (U8)~0) + goto cant_find; + } + } else if (mg->mg_private == 2) { + const U16 *const screamnext = (const U16 *const) screamnext_raw; + while ((I32)pos < previous + start_shift) { + pos = screamnext[pos]; + if (pos == (U16)~0) + goto cant_find; + } + } else if (mg->mg_private == 4) { + const U32 *const screamnext = (const U32 *const) screamnext_raw; + while ((I32)pos < previous + start_shift) { + pos = screamnext[pos]; + if (pos == (U32)~0) + goto cant_find; + } } big -= previous; - do { - register const unsigned char *s, *x; - if (pos >= stop_pos) break; - if (big[pos] != first) - continue; - for (x=big+pos+1,s=little; s < littleend; /**/ ) { - if (*s++ != *x++) { - s--; - break; + while (1) { + if ((I32)pos >= stop_pos) break; + if (big[pos] == first) { + const unsigned char *s = little; + const unsigned char *x = big + pos + 1; + while (s < littleend) { + if (*s != *x++) + break; + ++s; + } + if (s == littleend) { + *old_posp = (I32)pos; + if (!last) return (char *)(big+pos); + found = TRUE; } } - if (s == littleend) { - *old_posp = pos; - if (!last) return (char *)(big+pos); - found = 1; + if (mg->mg_private == 1) { + pos = ((const U8 *const)screamnext_raw)[pos]; + if (pos == (U8)~0) + break; + } else if (mg->mg_private == 2) { + pos = ((const U16 *const)screamnext_raw)[pos]; + if (pos == (U16)~0) + break; + } else if (mg->mg_private == 4) { + pos = ((const U32 *const)screamnext_raw)[pos]; + if (pos == (U32)~0) + break; } - } while ( pos += PL_screamnext[pos] ); + }; if (last && found) return (char *)(big+(*old_posp)); check_tail: -- Perl5 Master Repository
