Author: njn Date: 2007-10-12 03:33:12 +0100 (Fri, 12 Oct 2007) New Revision: 6984
Log: Don't dup children of insignificant XPts in dup_XTree. Made many-xpts more than 10x faster. Modified: branches/MASSIF2/massif/ms_main.c Modified: branches/MASSIF2/massif/ms_main.c =================================================================== --- branches/MASSIF2/massif/ms_main.c 2007-10-11 08:46:56 UTC (rev 6983) +++ branches/MASSIF2/massif/ms_main.c 2007-10-12 02:33:12 UTC (rev 6984) @@ -54,6 +54,12 @@ // tinycc 0.45s ma: 7.4s (16.4x, -----) // many-xpts 0.05s ma:23.5s (470.6x, -----) // +// Don't dup children of insignificant XPts in dup_XTree. Made many-xpts +// more than 10x faster. +// heap 0.59s ma:20.3s (34.5x, -----) +// tinycc 0.49s ma: 7.6s (15.4x, -----) +// many-xpts 0.04s ma: 1.9s (46.2x, -----) +// // Todo: // - do snapshots on client requests // - C++ tests -- for each of the allocators, and overloaded versions of @@ -577,30 +583,55 @@ : 0); } +// Does the xpt account for >= 1% (or so) of total memory used? +static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB) +{ + // clo_threshold is measured in hundredths of a percent of total size, + // ie. 10,000ths of total size. So clo_threshold=100 means that the + // threshold is 1% of total size. If curr_total_szB is zero, we consider + // every XPt significant. We also always consider the alloc_xpt to be + // significant. + tl_assert(xpt->curr_szB <= curr_total_szB); + return xpt == alloc_xpt || 0 == clo_threshold || + (0 != curr_total_szB && + // Nb: 10000 is a ULong to avoid possible overflow problems. + xpt->curr_szB * 10000ULL / curr_total_szB >= clo_threshold); +} + //------------------------------------------------------------// //--- XTree Operations ---// //------------------------------------------------------------// -// XXX: taking a full snapshot... could/should just snapshot the significant -// parts. Nb: then the amounts wouldn't add up, unless I represented the -// "insignificant places" in XPts. Might be worthwhile -- there can -// be a lot of zero nodes in the XTree... (simpler: ignore all zero nodes -// unless threshold=0?) -static XPt* dup_XTree(XPt* xpt, XPt* parent) +static XPt* dup_XTree(XPt* xpt, XPt* parent, SizeT total_szB) { Int i; XPt* dup_xpt = VG_(malloc)(sizeof(XPt)); dup_xpt->ip = xpt->ip; dup_xpt->curr_szB = xpt->curr_szB; dup_xpt->parent = parent; // Nb: not xpt->children! - dup_xpt->n_children = xpt->n_children; - dup_xpt->max_children = xpt->n_children; // Nb: don't copy max_children! - dup_xpt->children = VG_(malloc)(dup_xpt->max_children * sizeof(XPt*)); - for (i = 0; i < xpt->n_children; i++) { - dup_xpt->children[i] = dup_XTree(xpt->children[i], dup_xpt); + // If this node is not significant, there's no point duplicating its + // children. And not doing so can make a huge difference, eg. + // it speeds up massif/perf/many-xpts by over 10x. + if (!is_significant_XPt(xpt, total_szB)) { + dup_xpt->n_children = 0; + dup_xpt->max_children = 0; + dup_xpt->children = NULL; + } else { + dup_xpt->n_children = xpt->n_children; + dup_xpt->max_children = xpt->max_children; + // We copy n_children children (not max_children). If n_children==0, + // don't bother allocating an 'children' array in the dup. + if (xpt->n_children > 0) { + dup_xpt->children = VG_(malloc)(dup_xpt->n_children * sizeof(XPt*)); + for (i = 0; i < xpt->n_children; i++) { + dup_xpt->children[i] = + dup_XTree(xpt->children[i], dup_xpt, total_szB); + } + } else { + dup_xpt->children = NULL; + } } - n_dupd_xpts++; return dup_xpt; @@ -1175,7 +1206,9 @@ if (clo_heap) { snapshot->heap_szB = heap_szB; if (is_detailed) { - snapshot->alloc_xpt = dup_XTree(alloc_xpt, /*parent*/NULL); + // XXX: total_szB computed in various places -- factor it out + SizeT total_szB = heap_szB + clo_heap_admin*n_heap_blocks + stacks_szB; + snapshot->alloc_xpt = dup_XTree(alloc_xpt, /*parent*/NULL, total_szB); tl_assert(snapshot->alloc_xpt->curr_szB == heap_szB); } snapshot->heap_admin_szB = clo_heap_admin * n_heap_blocks; @@ -1710,21 +1743,6 @@ return mbuf; } -// Does the xpt account for >= 1% (or so) of total memory used? -static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB) -{ - // clo_threshold is measured in hundredths of a percent of total size, - // ie. 10,000ths of total size. So clo_threshold=100 means that the - // threshold is 1% of total size. If curr_total_szB is zero, we consider - // every XPt significant. We also always consider the alloc_xpt to be - // significant. - tl_assert(xpt->curr_szB <= curr_total_szB); - return xpt == alloc_xpt || 0 == clo_threshold || - (0 != curr_total_szB && - // Nb: 10000 is a ULong to avoid possible overflow problems. - xpt->curr_szB * 10000ULL / curr_total_szB >= clo_threshold); -} - static void pp_snapshot_XPt(Int fd, XPt* xpt, Int depth, Char* depth_str, Int depth_str_len, SizeT curr_heap_szB, SizeT curr_total_szB) ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ _______________________________________________ Valgrind-developers mailing list Valgrind-developers@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/valgrind-developers