Author: sewardj
Date: 2007-10-12 22:55:30 +0100 (Fri, 12 Oct 2007)
New Revision: 6991

Log:
Performance enhancements:

* use a 2-way set associative cache, instead of direct-mapped
* make the cache larger
* apply inlining

Modified:
   branches/THRCHECK/thrcheck/tc_main.c


Modified: branches/THRCHECK/thrcheck/tc_main.c
===================================================================
--- branches/THRCHECK/thrcheck/tc_main.c        2007-10-12 20:27:33 UTC (rev 
6990)
+++ branches/THRCHECK/thrcheck/tc_main.c        2007-10-12 21:55:30 UTC (rev 
6991)
@@ -323,7 +323,7 @@
       UInt  dict[4]; /* can represent up to 4 diff values in the line */
       UChar ix2s[N_LINE_W8s/4]; /* array of N_LINE_W8s 2-bit dict indexes */
       /* if dict[0] == 0 then dict[1] is the index of the CacheLineF
-        to use */
+         to use */
    }
    CacheLineZ; /* compressed rep for a cache line */
 
@@ -354,7 +354,7 @@
    Each SecMap must hold a power-of-2 number of CacheLines.  Hence
    N_SECMAP_BITS must >= N_LINE_BITS.
 */
-#define N_SECMAP_BITS   12
+#define N_SECMAP_BITS   13
 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
 
 // # CacheLines held by a SecMap
@@ -365,8 +365,8 @@
       Bool mbHasLocks;  /* hint: any locks in range?  safe: True */
       Bool mbHasShared; /* hint: any ShM/ShR states in range?  safe: True */
       CacheLineZ  linesZ[N_SECMAP_ZLINES];
-      CacheLineF* linezF;
-      Int         linezF_size;
+      CacheLineF* linesF;
+      Int         linesF_size;
    }
    SecMap;
 
@@ -399,11 +399,11 @@
    tl_assert(itr->line_no >= 0 && itr->line_no < N_SECMAP_ZLINES);
    lineZ = &sm->linesZ[itr->line_no];
    if (lineZ->dict[0] == 0) {
-      tl_assert(sm->linezF);
-      tl_assert(sm->linezF_size > 0);
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
       tl_assert(lineZ->dict[1] >= 0);
-      tl_assert(lineZ->dict[1] < sm->linezF_size);
-      lineF = &sm->linezF[ lineZ->dict[1] ];
+      tl_assert(lineZ->dict[1] < sm->linesF_size);
+      lineF = &sm->linesF[ lineZ->dict[1] ];
       tl_assert(lineF->inUse);
       tl_assert(itr->word_no >= 0 && itr->word_no < N_LINE_W8s);
       *pVal = &lineF->w32s[itr->word_no];
@@ -444,8 +444,10 @@
    with a bogus tag. */
 typedef
    struct {
-      CacheLine way0 [N_WAY_NENT];
+      CacheLine lyns0[N_WAY_NENT];
+      CacheLine lyns1[N_WAY_NENT];
       Addr      tags0[N_WAY_NENT];
+      Addr      tags1[N_WAY_NENT];
    }
    Cache;
 
@@ -1746,8 +1748,8 @@
       for (j = 0; j < N_LINE_W8s/4; j++)
          sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
    }
-   sm->linezF      = NULL;
-   sm->linezF_size = 0;
+   sm->linesF      = NULL;
+   sm->linesF_size = 0;
    stats__secmaps_allocd++;
    stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
    stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
@@ -2148,15 +2150,32 @@
 
    // check the cache
    for (i = 0; i < N_WAY_NENT; i++) {
-      CacheLine* cl  = &cache_shmem.way0[i];
-      Addr       tag =  cache_shmem.tags0[i];
-      if (tag == 1)
-         continue;
-      if (!is_valid_scache_tag(tag)) BAD("14");
-      if (!is_sane_CacheLine(cl)) BAD("15");
-      if (tag & (N_LINE_W8s-1)) BAD("16");
-      for (j = i+1; j < N_WAY_NENT; j++)
-         if (cache_shmem.tags0[j] == tag) BAD("17");
+      CacheLine* cl;
+      Addr       tag; 
+      /* way0, dude */
+      cl  = &cache_shmem.lyns0[i];
+      tag =  cache_shmem.tags0[i];
+      if (tag != 1) {
+         if (!is_valid_scache_tag(tag)) BAD("14-0");
+         if (!is_sane_CacheLine(cl)) BAD("15-0");
+         if (tag & (N_LINE_W8s-1)) BAD("16-0");
+         for (j = i+1; j < N_WAY_NENT; j++)
+            if (cache_shmem.tags0[j] == tag) BAD("17-0");
+      }
+      /* way1 */
+      cl  = &cache_shmem.lyns1[i];
+      tag =  cache_shmem.tags1[i];
+      if (tag != 1) {
+         if (!is_valid_scache_tag(tag)) BAD("14-1");
+         if (!is_sane_CacheLine(cl)) BAD("15-1");
+         if (tag & (N_LINE_W8s-1)) BAD("16-1");
+         for (j = i+1; j < N_WAY_NENT; j++)
+            if (cache_shmem.tags1[j] == tag) BAD("17-1");
+      }
+      /* and also */
+      if (cache_shmem.tags0[i] != 1 && cache_shmem.tags1[i] != 1
+          && cache_shmem.tags0[i] == cache_shmem.tags1[i])
+         BAD("18");
    }
 
    return;
@@ -2637,12 +2656,12 @@
    lineZ = &sm->linesZ[zix];
    if (lineZ->dict[0] == 0) {
       Int fix = lineZ->dict[1];
-      tl_assert(sm->linezF);
-      tl_assert(sm->linezF_size > 0);
-      tl_assert(fix >= 0 && fix < sm->linezF_size);
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < sm->linesF_size);
       *zp = NULL;
-      *fp = &sm->linezF[fix];
-      tl_assert(sm->linezF[fix].inUse);
+      *fp = &sm->linesF[fix];
+      tl_assert(sm->linesF[fix].inUse);
    } else {
       *zp = lineZ;
       *fp = NULL;
@@ -2666,10 +2685,10 @@
    lineF = NULL;
    if (lineZ->dict[0] == 0) {
       Word fix = lineZ->dict[1];
-      tl_assert(sm->linezF);
-      tl_assert(sm->linezF_size > 0);
-      tl_assert(fix >= 0 && fix < sm->linezF_size);
-      lineF = &sm->linezF[fix];
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < sm->linesF_size);
+      lineF = &sm->linesF[fix];
       tl_assert(lineF->inUse);
       lineZ = NULL;
    }
@@ -2696,10 +2715,10 @@
    /* If lineZ has an associated lineF, free it up. */
    if (lineZ->dict[0] == 0) {
       Word fix = lineZ->dict[1];
-      tl_assert(sm->linezF);
-      tl_assert(sm->linezF_size > 0);
-      tl_assert(fix >= 0 && fix < sm->linezF_size);
-      lineF = &sm->linezF[fix];
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < sm->linesF_size);
+      lineF = &sm->linesF[fix];
       tl_assert(lineF->inUse);
       lineF->inUse = False;
    }
@@ -2714,15 +2733,15 @@
    Word        i, new_size;
    CacheLineF* nyu;
 
-   if (sm->linezF) {
-      tl_assert(sm->linezF_size > 0);
+   if (sm->linesF) {
+      tl_assert(sm->linesF_size > 0);
    } else {
-      tl_assert(sm->linezF_size == 0);
+      tl_assert(sm->linesF_size == 0);
    }
 
-   if (sm->linezF) {
-      for (i = 0; i < sm->linezF_size; i++) {
-         if (!sm->linezF[i].inUse) {
+   if (sm->linesF) {
+      for (i = 0; i < sm->linesF_size; i++) {
+         if (!sm->linesF[i].inUse) {
             *fixp = (Word)i;
             return;
          }
@@ -2730,35 +2749,35 @@
    }
 
    /* No free F line found.  Expand existing array and try again. */
-   new_size = sm->linezF_size==0 ? 1 : 2 * sm->linezF_size;
+   new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
    nyu      = tc_zalloc( new_size * sizeof(CacheLineF) );
    tl_assert(nyu);
 
-   stats__secmap_linesF_allocd += (new_size - sm->linezF_size);
-   stats__secmap_linesF_bytes  += (new_size - sm->linezF_size)
+   stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
+   stats__secmap_linesF_bytes  += (new_size - sm->linesF_size)
                                   * sizeof(CacheLineF);
 
    if (0)
    VG_(printf)("SM %p: expand F array from %d to %d\n", 
-               sm, (Int)sm->linezF_size, new_size);
+               sm, (Int)sm->linesF_size, new_size);
 
    for (i = 0; i < new_size; i++)
       nyu[i].inUse = False;
 
-   if (sm->linezF) {
-      for (i = 0; i < sm->linezF_size; i++) {
-         tl_assert(sm->linezF[i].inUse);
-         nyu[i] = sm->linezF[i];
+   if (sm->linesF) {
+      for (i = 0; i < sm->linesF_size; i++) {
+         tl_assert(sm->linesF[i].inUse);
+         nyu[i] = sm->linesF[i];
       }
-      VG_(memset)(sm->linezF, 0, sm->linezF_size * sizeof(CacheLineF) );
-      tc_free(sm->linezF);
+      VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(CacheLineF) );
+      tc_free(sm->linesF);
    }
 
-   sm->linezF      = nyu;
-   sm->linezF_size = new_size;
+   sm->linesF      = nyu;
+   sm->linesF_size = new_size;
 
-   for (i = 0; i < sm->linezF_size; i++) {
-      if (!sm->linezF[i].inUse) {
+   for (i = 0; i < sm->linesF_size; i++) {
+      if (!sm->linesF[i].inUse) {
          *fixp = (Word)i;
          return;
       }
@@ -2803,6 +2822,7 @@
    return True;
 }
 
+__attribute__((unused))
 static void pp_CacheLine ( CacheLine* cl ) {
   Word i;
 #define FMT "%08x\n"
@@ -2918,49 +2938,6 @@
 
 /* Write the cacheline 'wix' to backing store.  Where it ends up
    is determined by its tag field. */
-static void analyse ( UInt* ws, Word nWs )
-{
-
-static Word qq=0;
- static Word qqx[20];
-
- Word i, j, nDiff;
-
-  if (qq==0) { for (i = 0; i < 20; i++) qqx[i]=0; }
-  qq++;
-
-  nDiff = 1;
-  for (i = 1; i < nWs; i++) {
-    nDiff++;
-    for (j = 0; j < i; j++) {
-      if (ws[j] == ws[i])
-        break;
-    }
-    if (j < i)
-      nDiff--;
-  }
-
-  if (nDiff >= 19) nDiff=19;
-  qqx[nDiff]++;
-
-  if ((qq % 100000) == 0) {
-    tl_assert(qqx[0] == 0);
-    VG_(printf)("%lu   ", qq);
-    for (j = 1; j < 20; j++)
-      VG_(printf)("%lu ", qqx[j]);
-    VG_(printf)("\n");
-  }
-
-#if 0
-  if (nDiff >= 5) {
-  VG_(printf)("diff %ld\n", nDiff);
-  for (i = 0; i < nWs; i++)
-    VG_(printf)("%x ", ws[i]);
-  VG_(printf)("\n\n");
-  }
-#endif
-}
-
 static
 Bool sequentialise_into ( /*OUT*/UInt* dst, Word nDst, CacheLine* src )
 {
@@ -3039,7 +3016,7 @@
 }
 
 
-static void cacheline_wback ( UWord wix )
+static void cacheline_wback ( UWord way, UWord wix )
 {
    Word        i, j;
    Bool        anyShared = False;
@@ -3052,10 +3029,19 @@
    UInt        shvals[N_LINE_W8s];
    UInt        sv;
 
+   if (0)
+   VG_(printf)("scache wback way %d line %d\n", (Int)way, (Int)wix);
+
+   tl_assert(way >= 0 && way < 2);
    tl_assert(wix >= 0 && wix < N_WAY_NENT);
-   //VG_(printf)("scache wback line %d\n", wix);
 
-   tag = cache_shmem.tags0[wix];
+   if (way == 0) {
+      tag =  cache_shmem.tags0[wix];
+      cl  = &cache_shmem.lyns0[wix];
+   } else {
+      tag =  cache_shmem.tags1[wix];
+      cl  = &cache_shmem.lyns1[wix];
+   }
 
    /* The cache line may have been invalidated; if so, ignore it. */
    if (!is_valid_scache_tag(tag))
@@ -3073,7 +3059,6 @@
    lineZ = &sm->linesZ[zix];
 
    /* Generate the data to be stored */
-   cl = &cache_shmem.way0[wix];
    tl_assert(is_sane_CacheLine( cl ));
    anyShared = sequentialise_into( shvals, N_LINE_W8s, cl );
 
@@ -3105,10 +3090,10 @@
    if (i < N_LINE_W8s) {
       /* cannot use the compressed rep.  Use f rep instead. */
       alloc_F_for_writing( sm, &fix );
-      tl_assert(sm->linezF);
-      tl_assert(sm->linezF_size > 0);
-      tl_assert(fix >= 0 && fix < sm->linezF_size);
-      lineF = &sm->linezF[fix];
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < sm->linesF_size);
+      lineF = &sm->linesF[fix];
       tl_assert(!lineF->inUse);
       lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = 0;
       lineZ->dict[1] = (UInt)fix;
@@ -3130,17 +3115,27 @@
    associated with 'wix' is assumed to have already been filled in;
    hence that is used to determine where in the backing store to read
    from. */
-static void cacheline_fetch ( UWord wix )
+static void cacheline_fetch ( UWord way, UWord wix )
 {
-   Word    i;
-   Addr    tag;
+   Word        i;
+   Addr        tag;
+   CacheLine*  cl;
    CacheLineZ* lineZ;
    CacheLineF* lineF;
-   //VG_(printf)("scache fetch line %d\n", wix);
 
+   if (0)
+   VG_(printf)("scache fetch way %d line %d\n", (Int)way, (Int)wix);
+
+   tl_assert(way >= 0 && way < 2);
    tl_assert(wix >= 0 && wix < N_WAY_NENT);
 
-   tag = cache_shmem.tags0[wix];
+   if (way == 0) {
+      tag =  cache_shmem.tags0[wix];
+      cl  = &cache_shmem.lyns0[wix];
+   } else {
+      tag =  cache_shmem.tags1[wix];
+      cl  = &cache_shmem.lyns1[wix];
+   }
 
    /* reject nonsense requests */
    tl_assert(is_valid_scache_tag(tag));
@@ -3155,7 +3150,7 @@
    if (lineF) {
       tl_assert(lineF->inUse);
       for (i = 0; i < N_LINE_W8s; i++) {
-         cache_shmem.way0[wix].w8[i] = lineF->w32s[i];
+         cl->w8[i] = lineF->w32s[i];
       }
       stats__cache_F_fetches++;
    } else {
@@ -3165,18 +3160,19 @@
          tl_assert(ix >= 0 && ix <= 3);
          sv = lineZ->dict[ix];
          tl_assert(sv != 0);
-         cache_shmem.way0[wix].w8[i] = sv;
+         cl->w8[i] = sv;
       }
       stats__cache_Z_fetches++;
    }
-   cacheline_normalise( &cache_shmem.way0[wix] );
+   cacheline_normalise( cl );
 }
 
 static void shmem__flush_scache ( void ) {
    Word wix;
    if (0) VG_(printf)("scache flush\n");
    for (wix = 0; wix < N_WAY_NENT; wix++) {
-      cacheline_wback( wix );
+      cacheline_wback( 0, wix );
+      cacheline_wback( 1, wix );
    }
    stats__cache_flushes++;
 }
@@ -3184,8 +3180,10 @@
    Word wix;
    if (0) VG_(printf)("scache inval\n");
    tl_assert(!is_valid_scache_tag(1));
-   for (wix = 0; wix < N_WAY_NENT; wix++)
+   for (wix = 0; wix < N_WAY_NENT; wix++) {
       cache_shmem.tags0[wix] = 1/*INVALID*/;
+      cache_shmem.tags1[wix] = 1/*INVALID*/;
+   }
    stats__cache_invals++;
 }
 
@@ -3201,28 +3199,62 @@
 static inline UWord get_cacheline_offset ( Addr a ) {
    return (UWord)(a & (N_LINE_W8s - 1));
 }
-static CacheLine* get_cacheline ( Addr a )
+
+static CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
+static inline CacheLine* get_cacheline ( Addr a )
 {
    /* tag is 'a' with the in-line offset masked out, 
       eg a[31]..a[4] 0000 */
-   Addr  tag = a & ~(N_LINE_W8s - 1);
-   UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+   Addr       tag = a & ~(N_LINE_W8s - 1);
+   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
    stats__cache_totrefs++;
+   /* Check both Ways */
    if (LIKELY(tag == cache_shmem.tags0[wix]))
-      return &cache_shmem.way0[wix];
+      return &cache_shmem.lyns0[wix];
+   if (LIKELY(tag == cache_shmem.tags1[wix]))
+      return &cache_shmem.lyns1[wix];
+   return get_cacheline_MISS( a );
+}
+
+static CacheLine* get_cacheline_MISS ( Addr a )
+{
+   /* tag is 'a' with the in-line offset masked out, 
+      eg a[31]..a[4] 0000 */
+   static UWord seed = 0;
+
+   CacheLine* cl;
+   Addr*      tag_old_p;
+   UWord      way;
+   Addr       tag = a & ~(N_LINE_W8s - 1);
+   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+
+   /* Check both Ways */
+   tl_assert(tag != cache_shmem.tags0[wix]);
+   tl_assert(tag != cache_shmem.tags1[wix]);
+
    /* Dump the old line into the backing store. */
    stats__cache_totmisses++;
-   if (is_valid_scache_tag( cache_shmem.tags0[wix] )) {
-      //if (!is_sane_CacheLine( &cache_shmem .way0[wix]))
-      //pp_CacheLine( &cache_shmem .way0[wix]);
-      tl_assert(is_sane_CacheLine( &cache_shmem .way0[wix] ));
-      cacheline_wback( wix );
+
+   /* arbitrarily choose the way to dump (not very scientific) */
+   way = seed++ & 1;
+
+   if (way == 0) {
+     cl        = &cache_shmem.lyns0[wix];
+     tag_old_p = &cache_shmem.tags0[wix];
+   } else {
+     cl        = &cache_shmem.lyns1[wix];
+     tag_old_p = &cache_shmem.tags1[wix];
    }
+
+   if (is_valid_scache_tag( *tag_old_p )) {
+      tl_assert(is_sane_CacheLine( cl ));
+      cacheline_wback( way, wix );
+   }
    /* and reload the new one */
-   cache_shmem.tags0[wix] = tag;
-   cacheline_fetch( wix );
-   tl_assert(is_sane_CacheLine( &cache_shmem. way0[wix] ));
-   return &cache_shmem.way0[wix];
+   *tag_old_p = tag;
+   cacheline_fetch( way, wix );
+   tl_assert(is_sane_CacheLine( cl ));
+   return cl;
 }
 
 /////////////////////////////vvvvvvvvvvvvvvvvvvvvvvvvvvvvv
@@ -5220,6 +5252,7 @@
    }
 }
 
+__attribute__((unused))
 static void laog__sanity_check ( void ) {
    Word i, ws_size;
    Word* ws_words;
@@ -6862,11 +6895,11 @@
                   stats__secmaps_allocd,
                   stats__secmap_ga_space_covered);
       VG_(printf)("  linesZ: %10lu allocd (%10lu bytes occupied)\n",
-                 stats__secmap_linesZ_allocd,
-                 stats__secmap_linesZ_bytes);
+                  stats__secmap_linesZ_allocd,
+                  stats__secmap_linesZ_bytes);
       VG_(printf)("  linesF: %10lu allocd (%10lu bytes occupied)\n",
-                 stats__secmap_linesF_allocd,
-                 stats__secmap_linesF_bytes);
+                  stats__secmap_linesF_allocd,
+                  stats__secmap_linesF_bytes);
       VG_(printf)(" secmaps: %10lu iterator steppings\n",
                   stats__secmap_iterator_steppings);
 
@@ -6882,32 +6915,32 @@
 
       VG_(printf)("\n");
       VG_(printf)("   cline: %10lu normalises\n",
-                 stats__cline_normalises );
+                  stats__cline_normalises );
       VG_(printf)("   cline:  reads 8/4/2/1: %10lu %10lu %10lu %10lu\n",
-                 stats__cline_read8s,
-                 stats__cline_read4s,
-                 stats__cline_read2s,
+                  stats__cline_read8s,
+                  stats__cline_read4s,
+                  stats__cline_read2s,
                   stats__cline_read1s );
       VG_(printf)("   cline: writes 8/4/2/1: %10lu %10lu %10lu %10lu\n",
-                 stats__cline_write8s,
-                 stats__cline_write4s,
-                 stats__cline_write2s,
+                  stats__cline_write8s,
+                  stats__cline_write4s,
+                  stats__cline_write2s,
                   stats__cline_write1s );
       VG_(printf)("   cline:   sets 8/4/2/1: %10lu %10lu %10lu %10lu\n",
-                 stats__cline_set8s,
-                 stats__cline_set4s,
-                 stats__cline_set2s,
+                  stats__cline_set8s,
+                  stats__cline_set4s,
+                  stats__cline_set2s,
                   stats__cline_set1s );
       VG_(printf)("   cline: get1s %lu, copy1s %lu\n",
-                 stats__cline_get1s, stats__cline_copy1s );
+                  stats__cline_get1s, stats__cline_copy1s );
 
       VG_(printf)("   cline:    splits: 8to4 %10lu, 4to2 %10lu, 2to1 %10lu\n",
-                stats__cline_8to4splits,
-                stats__cline_4to2splits,
+                 stats__cline_8to4splits,
+                 stats__cline_4to2splits,
                  stats__cline_2to1splits );
       VG_(printf)("   cline: pulldowns: 8to4 %10lu, 4to2 %10lu, 2to1 %10lu\n",
-                stats__cline_8to4pulldown,
-                stats__cline_4to2pulldown,
+                 stats__cline_8to4pulldown,
+                 stats__cline_4to2pulldown,
                  stats__cline_2to1pulldown );
       VG_(printf)("\n");
    }


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Valgrind-developers mailing list
Valgrind-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/valgrind-developers

Reply via email to