In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/6f8848d55c2ea11b7ed7f8cf1afb2d6a28c76e4d?hp=77eddfe1ca901cdf5a01ed0f5b9f1db812410513>
- Log ----------------------------------------------------------------- commit 6f8848d55c2ea11b7ed7f8cf1afb2d6a28c76e4d Author: Tony Cook <[email protected]> Date: Wed Mar 2 16:19:43 2016 +1100 reduce number of calls to add_cp_to_invlist() Both S_get_ANYOF_cp_list_for_ssc() and S_put_charclass_bitmap_innards() have tight loops that iterate over a bitmap and calls add_cp_to_invlist() for each bit set. add_cp_to_invlist() is a fairly wrapper around _add_range_to_invlist() which create a new invlist to hold the range and then calls _invlist_union() to perform the add. This is moderately expensive. Instead of indirectly calling _add_range_to_invlist(), where it's simple to do so, instead look for ranges of bits set in the bitmaps and call _add_range_to_invlist() directly. This changed the cachegrind output from running: ./perl -e 'qr/_?[\W_]/;' from: ==23406== I refs: 1,752,149 ==23406== I1 misses: 5,405 ==23406== LLi misses: 3,588 ==23406== I1 miss rate: 0.30% ==23406== LLi miss rate: 0.20% ==23406== ==23406== D refs: 683,242 (414,061 rd + 269,181 wr) ==23406== D1 misses: 13,370 ( 7,646 rd + 5,724 wr) ==23406== LLd misses: 7,025 ( 3,417 rd + 3,608 wr) ==23406== D1 miss rate: 1.9% ( 1.8% + 2.1% ) ==23406== LLd miss rate: 1.0% ( 0.8% + 1.3% ) ==23406== ==23406== LL refs: 18,775 ( 13,051 rd + 5,724 wr) ==23406== LL misses: 10,613 ( 7,005 rd + 3,608 wr) ==23406== LL miss rate: 0.4% ( 0.3% + 1.3% ) to: ==25041== I refs: 1,189,412 ==25041== I1 misses: 5,218 ==25041== LLi misses: 3,598 ==25041== I1 miss rate: 0.43% ==25041== LLi miss rate: 0.30% ==25041== ==25041== D refs: 440,339 (283,047 rd + 157,292 wr) ==25041== D1 misses: 11,872 ( 7,257 rd + 4,615 wr) ==25041== LLd misses: 7,024 ( 3,417 rd + 3,607 wr) ==25041== D1 miss rate: 2.6% ( 2.5% + 2.9% ) ==25041== LLd miss rate: 1.5% ( 1.2% + 2.2% ) ==25041== ==25041== LL refs: 17,090 ( 12,475 rd + 4,615 wr) ==25041== LL misses: 10,622 ( 7,015 rd + 3,607 wr) ==25041== LL miss rate: 0.6% ( 0.4% + 2.2% ) ie. from 1.75 million instructions to 1.19 million instructions. ----------------------------------------------------------------------- Summary of changes: regcomp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/regcomp.c b/regcomp.c index bbb998f..893c778 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1423,7 +1423,12 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, /* Add in the points from the bit map */ for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) { if (ANYOF_BITMAP_TEST(node, i)) { - invlist = add_cp_to_invlist(invlist, i); + unsigned int start = i++; + + for (; i < NUM_ANYOF_CODE_POINTS && ANYOF_BITMAP_TEST(node, i); ++i) { + /* empty */ + } + invlist = _add_range_to_invlist(invlist, start, i-1); new_node_has_latin1 = TRUE; } } @@ -19969,7 +19974,11 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, /* Accumulate the bit map into the unconditional match list */ for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) { if (BITMAP_TEST(bitmap, i)) { - invlist = add_cp_to_invlist(invlist, i); + int start = i++; + for (; i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i); i++) { + /* empty */ + } + invlist = _add_range_to_invlist(invlist, start, i-1); } } -- Perl5 Master Repository
