Author: sewardj
Date: 2008-02-24 19:51:51 +0000 (Sun, 24 Feb 2008)
New Revision: 7452

Log:
Make some efforts to recover the large Dwarf3-reading performance loss
caused by r7435.  r7435 causes information about many more variables
than previously, to be recorded, especially in programs with a lot of
inlining.  This commit recovers some of that lossage by representing
address ranges for TempVars which turns over less memory.



Modified:
   branches/DATASYMS/coregrind/m_debuginfo/readdwarf3.c
   branches/DATASYMS/coregrind/m_debuginfo/storage.c


Modified: branches/DATASYMS/coregrind/m_debuginfo/readdwarf3.c
===================================================================
--- branches/DATASYMS/coregrind/m_debuginfo/readdwarf3.c        2008-02-24 
18:47:12 UTC (rev 7451)
+++ branches/DATASYMS/coregrind/m_debuginfo/readdwarf3.c        2008-02-24 
19:51:51 UTC (rev 7452)
@@ -84,17 +84,33 @@
    different DIEs (generally a declarer and a definer).  We punt on
    these.  Could do better here.
 
-   Improve performance.  The number of type entities that end up in
-   the list of TyAdmins rapidly becomes huge (eg, for
-   libQtGui.so.4.3.2 (amd64-linux, size 80729047 bytes), there are
-   786860 entries in the list).  Mostly this seems to be caused by g++
-   adding type DIEs for all the basic types once for each source file
-   contributing to the compilation unit, and for a large library they
-   add up quickly.  That causes both a lot of work for this reader
-   module, and also wastes vast amounts of memory storing this
-   duplicated information.  We could surely do a lot better here.
-*/
+   POTENTIAL PERFORMANCE IMPROVEMENTS:
 
+   The number of type entities that end up in the list of TyAdmins
+   rapidly becomes huge (eg, for libQtGui.so.4.3.2 (amd64-linux, size
+   80729047 bytes), there are 786860 entries in the list).  Mostly
+   this seems to be caused by g++ adding type DIEs for all the basic
+   types once for each source file contributing to the compilation
+   unit, and for a large library they add up quickly.  That causes
+   both a lot of work for this reader module, and also wastes vast
+   amounts of memory storing this duplicated information.  We could
+   surely do a lot better here.
+
+   Handle interaction between read_DIE and parse_{var,type}_DIE
+   better.  Currently read_DIE reads the entire DIE just to find where
+   the end is (and for debug printing), so that it can later reliably
+   move the cursor to the end regardless of what parse_{var,type}_DIE
+   do.  This means many DIEs (most, even?) are read twice.  It would
+   be smarter to make parse_{var,type}_DIE return a Bool indicating
+   whether or not they advanced the DIE cursor, and only if they
+   didn't should read_DIE itself read through the DIE.
+
+   More generally, reduce the amount of memory allocated and freed
+   while reading Dwarf3 type/variable information.  Even modest (20MB)
+   objects cause this module to allocate and free hundreds of
+   thousands of small blocks, and ML_(arena_malloc) and its various
+   groupies always show up at the top of performance profiles. */
+
 #include "pub_core_basics.h"
 #include "pub_core_libcbase.h"
 #include "pub_core_libcassert.h"
@@ -1005,7 +1021,19 @@
    struct _TempVar {
       struct _TempVar* next;
       UChar*  name; /* in DebugInfo's .strchunks */
-      XArray* ranges; /* of AddrRange.  UNIQUE PTR in AR_DINFO. */
+      /* Represent ranges economically.  nRanges is the number of
+         ranges.  Cases:
+         0: .rngOneMin .rngOneMax .manyRanges are all zero
+         1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
+         2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
+         This is merely an optimisation to avoid having to allocate
+         and free the XArray in the common (98%) of cases where there
+         is zero or one address ranges. */
+      UWord   nRanges;
+      Addr    rngOneMin;
+      Addr    rngOneMax;
+      XArray* rngMany; /* of AddrRange.  UNIQUE PTR in AR_DINFO. */
+      /* --- */
       Int     level;
       Type*   typeR;
       GExpr*  gexpr; /* for this variable */
@@ -1021,7 +1049,7 @@
    }
    TempVar;
 
-#define N_D3_VAR_STACK 16
+#define N_D3_VAR_STACK 24
 
 typedef
    struct {
@@ -1066,10 +1094,14 @@
          vg_assert(parser->fbGX[i] == NULL);
       }
       VG_(printf)(": ");
-      for (j = 0; j < VG_(sizeXA)( xa ); j++) {
-         AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
-         vg_assert(range);
-         VG_(printf)("[%p,%p] ", range->aMin, range->aMax);
+      if (VG_(sizeXA)( xa ) == 0) {
+         VG_(printf)("** empty PC range array **");
+      } else {
+         for (j = 0; j < VG_(sizeXA)( xa ); j++) {
+            AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
+            vg_assert(range);
+            VG_(printf)("[%p,%p] ", range->aMin, range->aMax);
+         }
       }
       VG_(printf)("\n");
    }
@@ -1493,7 +1525,7 @@
             will be the address ranges at the top of the varparser's
             stack. */
          GExpr*   fbGX = NULL;
-         Word     i;
+         Word     i, nRanges;
          XArray*  /* of AddrRange */ xa;
          TempVar* tv;
          /* Stack can't be empty; we put a dummy entry on it for the
@@ -1544,9 +1576,11 @@
             address space.  It is asserted elsewhere that level 0 
             always covers the entire address space. */
          xa = parser->ranges[external ? 0 : parser->sp];
+         nRanges = VG_(sizeXA)(xa);
+         vg_assert(nRanges >= 0);
+
          tv = ML_(dinfo_zalloc)( sizeof(TempVar) );
          tv->name   = name;
-         tv->ranges = xa;
          tv->level  = external ? 0 : parser->sp;
          tv->typeR  = typeR;
          tv->gexpr  = gexpr;
@@ -1555,13 +1589,37 @@
          tv->fLine  = lineNo;
          tv->dioff  = posn;
          tv->absOri = abs_ori;
-         tv->ranges = VG_(cloneXA)( xa ); /* free when 'tv' freed */
 
+         /* See explanation on definition of type TempVar for the
+            reason for this elaboration. */
+         tv->nRanges = nRanges;
+         tv->rngOneMin = 0;
+         tv->rngOneMax = 0;
+         tv->rngMany = NULL;
+         if (nRanges == 1) {
+            AddrRange* range = VG_(indexXA)(xa, 0);
+            tv->rngOneMin = range->aMin;
+            tv->rngOneMax = range->aMax;
+         }
+         else if (nRanges > 1) {
+            tv->rngMany = VG_(cloneXA)( xa ); /* free when 'tv' freed */
+         }
+
          tv->next  = *tempvars;
          *tempvars = tv;
 
          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
                   VG_(sizeXA)(xa) );
+         /* collect stats on how effective the ->ranges special
+            casing is */
+         if (0) {
+           static Int ntot=0, ngt=0;
+           ntot++;
+           if (tv->rngMany) ngt++;
+           if (0 == (ntot % 100000))
+              VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
+         }
+
       }
 
       /* Here are some other weird cases seen in the wild:
@@ -2682,7 +2740,6 @@
    UWord dr_offset;
    Word  i;
    Bool td3 = di->trace_symtab;
-   XArray* /* of AddrRange */ xa;
    XArray* /* of TempVar* */ dioff_lookup_tab;
 
 #if 0
@@ -3121,45 +3178,67 @@
       vg_assert(varp->name);
       vg_assert(varp->typeR);
       vg_assert(varp->level >= 0);
-      vg_assert(varp->ranges);
 
       /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
          each address range in which the variable exists. */
-      xa = varp->ranges;
-      if (varp->level == 0)
-         vg_assert( VG_(sizeXA)(xa) == 1 );
-
-      /* Level 0 is the global address range.  So at level 0 we don't
-         want to bias pcMin/pcMax; but at all other levels we do since
-         those are derived from svmas in the Dwarf we're reading.  Be
-         paranoid ... */
       TRACE_D3("  ACQUIRE for range(s) ");
-      for (i = 0; i < VG_(sizeXA)( xa ); i++) {
-         AddrRange* range = VG_(indexXA)(xa, i);
-         Addr pcMin = range->aMin;
-         Addr pcMax = range->aMax;
-         vg_assert(pcMin <= pcMax);
-         if (varp->level == 0) {
-            vg_assert(pcMin == (Addr)0);
-            vg_assert(pcMax == ~(Addr)0);
-         } else {
-             /* vg_assert(pcMin > (Addr)0);
-                No .. we can legitimately expect to see ranges like 
-                0x0-0x11D (pre-biasing, of course). */
-             vg_assert(pcMax < ~(Addr)0);
-         }
+      { AddrRange  oneRange;
+        AddrRange* varPcRanges;
+        Word       nVarPcRanges;
+        /* Set up to iterate over address ranges, however
+           represented. */
+        if (varp->nRanges == 0 || varp->nRanges == 1) {
+           vg_assert(!varp->rngMany);
+           if (varp->nRanges == 0) {
+              vg_assert(varp->rngOneMin == 0);
+              vg_assert(varp->rngOneMax == 0);
+           }
+           nVarPcRanges = varp->nRanges;
+           oneRange.aMin = varp->rngOneMin;
+           oneRange.aMax = varp->rngOneMax;
+           varPcRanges = &oneRange;
+        } else {
+           vg_assert(varp->rngMany);
+           vg_assert(varp->rngOneMin == 0);
+           vg_assert(varp->rngOneMax == 0);
+           nVarPcRanges = VG_(sizeXA)(varp->rngMany);
+           vg_assert(nVarPcRanges >= 2);
+           vg_assert(nVarPcRanges == (Word)varp->nRanges);
+           varPcRanges = VG_(indexXA)(varp->rngMany, 0);
+        }
+        if (varp->level == 0)
+           vg_assert( nVarPcRanges == 1 );
+        /* and iterate */
+        for (i = 0; i < nVarPcRanges; i++) {
+           Addr pcMin = varPcRanges[i].aMin;
+           Addr pcMax = varPcRanges[i].aMax;
+           vg_assert(pcMin <= pcMax);
+           /* Level 0 is the global address range.  So at level 0 we
+              don't want to bias pcMin/pcMax; but at all other levels
+              we do since those are derived from svmas in the Dwarf
+              we're reading.  Be paranoid ... */
+           if (varp->level == 0) {
+              vg_assert(pcMin == (Addr)0);
+              vg_assert(pcMax == ~(Addr)0);
+           } else {
+              /* vg_assert(pcMin > (Addr)0);
+                 No .. we can legitimately expect to see ranges like 
+                 0x0-0x11D (pre-biasing, of course). */
+              vg_assert(pcMax < ~(Addr)0);
+           }
 
-         if (i > 0 && (i%2) == 0) TRACE_D3("\n                       ");
-         TRACE_D3("[%p,%p] ", pcMin, pcMax );
+           if (i > 0 && (i%2) == 0) TRACE_D3("\n                       ");
+           TRACE_D3("[%p,%p] ", pcMin, pcMax );
 
-         ML_(addVar)(
-            di, varp->level, 
-                pcMin + (varp->level==0 ? 0 : di->text_bias),
-                pcMax + (varp->level==0 ? 0 : di->text_bias), 
-                varp->name, (void*)varp->typeR,
-                varp->gexpr, varp->fbGX,
-                varp->fName, varp->fLine, td3 
-         );
+           ML_(addVar)(
+              di, varp->level, 
+                  pcMin + (varp->level==0 ? 0 : di->text_bias),
+                  pcMax + (varp->level==0 ? 0 : di->text_bias), 
+                  varp->name, (void*)varp->typeR,
+                  varp->gexpr, varp->fbGX,
+                  varp->fName, varp->fLine, td3 
+           );
+        }
       }
 
       TRACE_D3("\n\n");
@@ -3169,8 +3248,8 @@
    /* Now free all the TempVars */
    for (varp = tempvars; varp; varp = varp2) {
       varp2 = varp->next;
-      vg_assert(varp->ranges);
-      VG_(deleteXA)(varp->ranges);
+      if (varp->rngMany)
+         VG_(deleteXA)(varp->rngMany);
       ML_(dinfo_free)(varp);
    }
    tempvars = NULL;

Modified: branches/DATASYMS/coregrind/m_debuginfo/storage.c
===================================================================
--- branches/DATASYMS/coregrind/m_debuginfo/storage.c   2008-02-24 18:47:12 UTC 
(rev 7451)
+++ branches/DATASYMS/coregrind/m_debuginfo/storage.c   2008-02-24 19:51:51 UTC 
(rev 7452)
@@ -585,6 +585,17 @@
    vg_assert(first->aMin <= first->aMax);
    vg_assert(first->aMin <= aMin && aMin <= first->aMax);
 
+   /* Fast track common case, which is that the range specified for
+      the variable exactly coincides with one already-existing
+      range. */
+   if (first->aMin == aMin && first->aMax == aMax) {
+      vg_assert(first->vars);
+      VG_(addToXA)( first->vars, var );
+      return;
+   }
+
+   /* We have to get into splitting ranges, which is complex
+      and slow. */
    if (first->aMin < aMin) {
       DiAddrRange* nyu;
       /* Ok.  We'll have to split 'first'. */
@@ -670,6 +681,7 @@
    vg_assert(rangep->aMax == aMax);
 }
 
+
 /* Top-level place to call to add a variable description (as extracted
    from a DWARF3 .debug_info section. */
 void ML_(addVar)( struct _DebugInfo* di,


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Valgrind-developers mailing list
Valgrind-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/valgrind-developers

Reply via email to