In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/accb4364d92e26c20e6a538fc04d1af52a8b94e2?hp=ea99f9ada5de06993fcf40478fcde588dd0c7be7>
- Log ----------------------------------------------------------------- commit accb4364d92e26c20e6a538fc04d1af52a8b94e2 Author: Karl Williamson <[email protected]> Date: Tue Feb 23 14:04:19 2016 -0700 Use less memory in compiling regexes This is at least a partial patch for [perl #127392], cutting the maximum memory used on my box from around 8600kB to 7800kB. For [perl #127568], which has been merged into #127392, the savings are even larger, about 37% Previously a large number of large mortal SVs could be created while compiling a single regex pattern, and their accumulated memory quickly added up. This changes things to not use so many mortals. M embed.fnc M embed.h M proto.h M regcomp.c commit f0c0c5adc5daeb0fbad1b23dca12ba5c46560a16 Author: Karl Williamson <[email protected]> Date: Tue Feb 23 14:00:23 2016 -0700 regcomp.c: Guard against corrupting inversion list SV I don't know of any cases where this happens, but in working on the next commit I triggered a problem with shrinking an inversion list so much that the required 0 UV at the beginning was freed. M embed.fnc M proto.h M regcomp.c ----------------------------------------------------------------------- Summary of changes: embed.fnc | 3 +- embed.h | 1 + proto.h | 5 ++- regcomp.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- 4 files changed, 110 insertions(+), 32 deletions(-) diff --git a/embed.fnc b/embed.fnc index 6063135..c35a815 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1526,10 +1526,11 @@ EiMRn |UV |invlist_max |NN SV* const invlist EiM |void |invlist_set_len|NN SV* const invlist|const UV len|const bool offset EiMRn |bool |invlist_is_iterating|NN SV* const invlist #ifndef PERL_EXT_RE_BUILD +EsM |void |invlist_replace_list|NN SV *dest|NN SV *src EiMRn |IV* |get_invlist_previous_index_addr|NN SV* invlist EiMn |void |invlist_set_previous_index|NN SV* const invlist|const IV index EiMRn |IV |invlist_previous_index|NN SV* const invlist -EiMn |void |invlist_trim |NN SV* const invlist +EiMn |void |invlist_trim |NN SV* invlist #endif EiMR |SV* |invlist_clone |NN SV* const invlist EiMRn |STRLEN*|get_invlist_iter_addr |NN SV* invlist diff --git a/embed.h b/embed.h index 4b01dbe..75275c4 100644 --- a/embed.h +++ b/embed.h @@ -949,6 +949,7 @@ # if defined(PERL_IN_REGCOMP_C) #define get_invlist_previous_index_addr S_get_invlist_previous_index_addr #define invlist_previous_index S_invlist_previous_index +#define invlist_replace_list(a,b) S_invlist_replace_list(aTHX_ a,b) #define invlist_set_previous_index S_invlist_set_previous_index #define invlist_trim S_invlist_trim # endif diff --git a/proto.h b/proto.h index f081d9e..9f970a3 100644 --- a/proto.h +++ b/proto.h @@ -3664,10 +3664,13 @@ PERL_STATIC_INLINE IV S_invlist_previous_index(SV* const invlist) #define PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX \ assert(invlist) +STATIC void S_invlist_replace_list(pTHX_ SV *dest, SV *src); +#define PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST \ + assert(dest); assert(src) PERL_STATIC_INLINE void S_invlist_set_previous_index(SV* const invlist, const IV index); #define PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX \ assert(invlist) -PERL_STATIC_INLINE void S_invlist_trim(SV* const invlist); +PERL_STATIC_INLINE void S_invlist_trim(SV* invlist); #define PERL_ARGS_ASSERT_INVLIST_TRIM \ assert(invlist) # endif diff --git a/regcomp.c b/regcomp.c index 5725a3a..7c2f419 100644 --- a/regcomp.c +++ b/regcomp.c @@ -8347,6 +8347,51 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset) #ifndef PERL_IN_XSUB_RE +STATIC void +S_invlist_replace_list(pTHX_ SV * dest, SV * src) +{ + /* Replaces the inversion list in 'src' with the one in 'dest'. It steals + * the list from 'src', so 'src' is made to have a NULL list. This is + * similar to what SvSetMagicSV() would do, if it were implemented on + * inversion lists, though this routine avoids a copy */ + + const UV src_len = _invlist_len(src); + const bool src_offset = *get_invlist_offset_addr(src); + const STRLEN src_byte_len = SvCUR(src); + char * array = SvPVX(src); + + const int oldtainted = TAINT_get; + + PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST; + + assert(SvTYPE(src) == SVt_INVLIST); + assert(SvTYPE(dest) == SVt_INVLIST); + assert(! invlist_is_iterating(src)); + + /* Make sure it ends in the right place with a NUL, as our inversion list + * manipulations aren't careful to keep this true, but sv_usepvn_flags() + * asserts it */ + array[src_byte_len - 1] = '\0'; + + TAINT_NOT; /* Otherwise it breaks */ + sv_usepvn_flags(dest, + (char *) array, + src_byte_len - 1, + + /* This flag is documented to cause a copy to be avoided */ + SV_HAS_TRAILING_NUL); + TAINT_set(oldtainted); + SvPV_set(src, 0); + SvLEN_set(src, 0); + SvCUR_set(src, 0); + + /* Finish up copying over the other fields in an inversion list */ + *get_invlist_offset_addr(dest) = src_offset; + invlist_set_len(dest, src_len, src_offset); + *get_invlist_previous_index_addr(dest) = 0; + invlist_iterfinish(dest); +} + PERL_STATIC_INLINE IV* S_get_invlist_previous_index_addr(SV* invlist) { @@ -8382,15 +8427,18 @@ S_invlist_set_previous_index(SV* const invlist, const IV index) } PERL_STATIC_INLINE void -S_invlist_trim(SV* const invlist) +S_invlist_trim(SV* invlist) { PERL_ARGS_ASSERT_INVLIST_TRIM; assert(SvTYPE(invlist) == SVt_INVLIST); /* Change the length of the inversion list to how many entries it currently - * has */ - SvPV_shrink_to_cur((SV *) invlist); + * has. But don't shorten it so that it would free up the required + * initial 0 UV (and a trailing NUL byte) */ + if (SvCUR(invlist) > TO_INTERNAL_SIZE(1) + 1) { + SvPV_shrink_to_cur(invlist); + } } #endif /* ifndef PERL_IN_XSUB_RE */ @@ -8803,10 +8851,10 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, /* Take the union of two inversion lists and point <output> to it. *output * SHOULD BE DEFINED upon input, and if it points to one of the two lists, * the reference count to that list will be decremented if not already a - * temporary (mortal); otherwise *output will be made correspondingly - * mortal. The first list, <a>, may be NULL, in which case a copy of the - * second list is returned. If <complement_b> is TRUE, the union is taken - * of the complement (inversion) of <b> instead of b itself. + * temporary (mortal); otherwise just its contents will be modified to be + * the union. The first list, <a>, may be NULL, in which case a copy of + * the second list is returned. If <complement_b> is TRUE, the union is + * taken of the complement (inversion) of <b> instead of b itself. * * The basis for this comes from "Unicode Demystified" Chapter 13 by * Richard Gillam, published by Addison-Wesley, and explained at some @@ -9034,21 +9082,30 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, } } - /* We may be removing a reference to one of the inputs. If so, the output - * is made mortal if the input was. (Mortal SVs shouldn't have their ref - * count decremented) */ - if (a == *output || b == *output) { + if (a != *output && b != *output) { + *output = u; + } + else { + /* Here, the output is to be the same as one of the input scalars, + * hence replacing it. The simple thing to do is to free the input + * scalar, making it instead be the output one. But experience has + * shown [perl #127392] that if the input is a mortal, we can get a + * huge build-up of these during regex compilation before they get + * freed. So for that case, replace just the input's interior with + * the output's, and then free the output */ + assert(! invlist_is_iterating(*output)); - if ((SvTEMP(*output))) { - sv_2mortal(u); + + if (! SvTEMP(*output)) { + SvREFCNT_dec_NN(*output); + *output = u; } else { - SvREFCNT_dec_NN(*output); + invlist_replace_list(*output, u); + SvREFCNT_dec_NN(u); } } - *output = u; - return; } @@ -9059,11 +9116,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, /* Take the intersection of two inversion lists and point <i> to it. *i * SHOULD BE DEFINED upon input, and if it points to one of the two lists, * the reference count to that list will be decremented if not already a - * temporary (mortal); otherwise *i will be made correspondingly mortal. - * The first list, <a>, may be NULL, in which case an empty list is - * returned. If <complement_b> is TRUE, the result will be the - * intersection of <a> and the complement (or inversion) of <b> instead of - * <b> directly. + * temporary (mortal); otherwise just its contents will be modified to be + * the intersection. The first list, <a>, may be NULL, in which case an + * empty list is returned. If <complement_b> is TRUE, the result will be + * the intersection of <a> and the complement (or inversion) of <b> instead + * of <b> directly. * * The basis for this comes from "Unicode Demystified" Chapter 13 by * Richard Gillam, published by Addison-Wesley, and explained at some @@ -9275,21 +9332,37 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, } } - /* We may be removing a reference to one of the inputs. If so, the output - * is made mortal if the input was. (Mortal SVs shouldn't have their ref - * count decremented) */ - if (a == *i || b == *i) { + if (a != *i && b != *i) { + *i = r; + } + else { + /* Here, the output is to be the same as one of the input scalars, + * hence replacing it. The simple thing to do is to free the input + * scalar, making it instead be the output one. But experience has + * shown [perl #127392] that if the input is a mortal, we can get a + * huge build-up of these during regex compilation before they get + * freed. So for that case, replace just the input's interior with + * the output's, and then free the output. A short-cut in this case + * is if the output is empty, we can just set the input to be empty */ + assert(! invlist_is_iterating(*i)); - if (SvTEMP(*i)) { - sv_2mortal(r); + + if (! SvTEMP(*i)) { + SvREFCNT_dec_NN(*i); + *i = r; } else { - SvREFCNT_dec_NN(*i); + if (len_r) { + invlist_replace_list(*i, r); + } + else { + invlist_set_len(*i, 0, 0); + invlist_trim(*i); + } + SvREFCNT_dec_NN(r); } } - *i = r; - return; } -- Perl5 Master Repository
