Author: njn
Date: 2007-10-12 03:33:12 +0100 (Fri, 12 Oct 2007)
New Revision: 6984

Log:
Don't dup children of insignificant XPts in dup_XTree.  Made many-xpts
more than 10x faster.

Modified:
   branches/MASSIF2/massif/ms_main.c


Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c   2007-10-11 08:46:56 UTC (rev 6983)
+++ branches/MASSIF2/massif/ms_main.c   2007-10-12 02:33:12 UTC (rev 6984)
@@ -54,6 +54,12 @@
 //   tinycc    0.45s  ma: 7.4s (16.4x, -----)
 //   many-xpts 0.05s  ma:23.5s (470.6x, -----)
 //
+// Don't dup children of insignificant XPts in dup_XTree.  Made many-xpts
+// more than 10x faster.
+//   heap      0.59s  ma:20.3s (34.5x, -----)
+//   tinycc    0.49s  ma: 7.6s (15.4x, -----)
+//   many-xpts 0.04s  ma: 1.9s (46.2x, -----)
+//
 // Todo:
 // - do snapshots on client requests
 // - C++ tests -- for each of the allocators, and overloaded versions of
@@ -577,30 +583,55 @@
           :                                    0);
 }
 
+// Does the xpt account for >= 1% (or so) of total memory used?
+static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB)
+{
+   // clo_threshold is measured in hundredths of a percent of total size,
+   // ie. 10,000ths of total size.  So clo_threshold=100 means that the
+   // threshold is 1% of total size.  If curr_total_szB is zero, we consider
+   // every XPt significant.  We also always consider the alloc_xpt to be
+   // significant.
+   tl_assert(xpt->curr_szB <= curr_total_szB);
+   return xpt == alloc_xpt || 0 == clo_threshold ||
+      (0 != curr_total_szB &&
+           // Nb: 10000 is a ULong to avoid possible overflow problems.
+           xpt->curr_szB * 10000ULL / curr_total_szB >= clo_threshold);
+}
 
+
 //------------------------------------------------------------//
 //--- XTree Operations                                     ---//
 //------------------------------------------------------------//
 
-// XXX: taking a full snapshot... could/should just snapshot the significant
-// parts.  Nb: then the amounts wouldn't add up, unless I represented the
-// "insignificant places" in XPts.  Might be worthwhile -- there can
-// be a lot of zero nodes in the XTree... (simpler: ignore all zero nodes
-// unless threshold=0?)
-static XPt* dup_XTree(XPt* xpt, XPt* parent)
+static XPt* dup_XTree(XPt* xpt, XPt* parent, SizeT total_szB)
 {
    Int  i;
    XPt* dup_xpt = VG_(malloc)(sizeof(XPt));
    dup_xpt->ip           = xpt->ip;
    dup_xpt->curr_szB     = xpt->curr_szB;
    dup_xpt->parent       = parent;           // Nb: not xpt->children!
-   dup_xpt->n_children   = xpt->n_children;
-   dup_xpt->max_children = xpt->n_children;  // Nb: don't copy max_children!
-   dup_xpt->children     = VG_(malloc)(dup_xpt->max_children * sizeof(XPt*));
-   for (i = 0; i < xpt->n_children; i++) {
-      dup_xpt->children[i] = dup_XTree(xpt->children[i], dup_xpt);
+   // If this node is not significant, there's no point duplicating its
+   // children.  And not doing so can make a huge difference, eg.
+   // it speeds up massif/perf/many-xpts by over 10x.
+   if (!is_significant_XPt(xpt, total_szB)) {
+      dup_xpt->n_children   = 0;
+      dup_xpt->max_children = 0;
+      dup_xpt->children     = NULL;
+   } else {
+      dup_xpt->n_children   = xpt->n_children;
+      dup_xpt->max_children = xpt->max_children;
+      // We copy n_children children (not max_children).  If n_children==0,
+      // don't bother allocating an 'children' array in the dup.
+      if (xpt->n_children > 0) {
+         dup_xpt->children = VG_(malloc)(dup_xpt->n_children * sizeof(XPt*));
+         for (i = 0; i < xpt->n_children; i++) {
+            dup_xpt->children[i] =
+               dup_XTree(xpt->children[i], dup_xpt, total_szB);
+         }
+      } else {
+         dup_xpt->children = NULL;
+      }
    }
-
    n_dupd_xpts++;
 
    return dup_xpt;
@@ -1175,7 +1206,9 @@
    if (clo_heap) {
       snapshot->heap_szB = heap_szB;
       if (is_detailed) {
-         snapshot->alloc_xpt = dup_XTree(alloc_xpt, /*parent*/NULL);
+         // XXX: total_szB computed in various places -- factor it out
+         SizeT total_szB = heap_szB + clo_heap_admin*n_heap_blocks + 
stacks_szB;
+         snapshot->alloc_xpt = dup_XTree(alloc_xpt, /*parent*/NULL, total_szB);
          tl_assert(snapshot->alloc_xpt->curr_szB == heap_szB);
       }
       snapshot->heap_admin_szB = clo_heap_admin * n_heap_blocks;
@@ -1710,21 +1743,6 @@
    return mbuf;
 }
 
-// Does the xpt account for >= 1% (or so) of total memory used?
-static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB)
-{
-   // clo_threshold is measured in hundredths of a percent of total size,
-   // ie. 10,000ths of total size.  So clo_threshold=100 means that the
-   // threshold is 1% of total size.  If curr_total_szB is zero, we consider
-   // every XPt significant.  We also always consider the alloc_xpt to be
-   // significant.
-   tl_assert(xpt->curr_szB <= curr_total_szB);
-   return xpt == alloc_xpt || 0 == clo_threshold ||
-      (0 != curr_total_szB &&
-           // Nb: 10000 is a ULong to avoid possible overflow problems.
-           xpt->curr_szB * 10000ULL / curr_total_szB >= clo_threshold);
-}
-
 static void pp_snapshot_XPt(Int fd, XPt* xpt, Int depth, Char* depth_str,
                             Int depth_str_len,
                             SizeT curr_heap_szB, SizeT curr_total_szB)


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Valgrind-developers mailing list
Valgrind-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/valgrind-developers

Reply via email to