Author: sewardj Date: 2007-10-12 22:55:30 +0100 (Fri, 12 Oct 2007) New Revision: 6991
Log: Performance enhancements: * use a 2-way set associative cache, instead of direct-mapped * make the cache larger * apply inlining Modified: branches/THRCHECK/thrcheck/tc_main.c Modified: branches/THRCHECK/thrcheck/tc_main.c =================================================================== --- branches/THRCHECK/thrcheck/tc_main.c 2007-10-12 20:27:33 UTC (rev 6990) +++ branches/THRCHECK/thrcheck/tc_main.c 2007-10-12 21:55:30 UTC (rev 6991) @@ -323,7 +323,7 @@ UInt dict[4]; /* can represent up to 4 diff values in the line */ UChar ix2s[N_LINE_W8s/4]; /* array of N_LINE_W8s 2-bit dict indexes */ /* if dict[0] == 0 then dict[1] is the index of the CacheLineF - to use */ + to use */ } CacheLineZ; /* compressed rep for a cache line */ @@ -354,7 +354,7 @@ Each SecMap must hold a power-of-2 number of CacheLines. Hence N_SECMAP_BITS must >= N_LINE_BITS. */ -#define N_SECMAP_BITS 12 +#define N_SECMAP_BITS 13 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS) // # CacheLines held by a SecMap @@ -365,8 +365,8 @@ Bool mbHasLocks; /* hint: any locks in range? safe: True */ Bool mbHasShared; /* hint: any ShM/ShR states in range? safe: True */ CacheLineZ linesZ[N_SECMAP_ZLINES]; - CacheLineF* linezF; - Int linezF_size; + CacheLineF* linesF; + Int linesF_size; } SecMap; @@ -399,11 +399,11 @@ tl_assert(itr->line_no >= 0 && itr->line_no < N_SECMAP_ZLINES); lineZ = &sm->linesZ[itr->line_no]; if (lineZ->dict[0] == 0) { - tl_assert(sm->linezF); - tl_assert(sm->linezF_size > 0); + tl_assert(sm->linesF); + tl_assert(sm->linesF_size > 0); tl_assert(lineZ->dict[1] >= 0); - tl_assert(lineZ->dict[1] < sm->linezF_size); - lineF = &sm->linezF[ lineZ->dict[1] ]; + tl_assert(lineZ->dict[1] < sm->linesF_size); + lineF = &sm->linesF[ lineZ->dict[1] ]; tl_assert(lineF->inUse); tl_assert(itr->word_no >= 0 && itr->word_no < N_LINE_W8s); *pVal = &lineF->w32s[itr->word_no]; @@ -444,8 +444,10 @@ with a bogus tag. */ typedef struct { - CacheLine way0 [N_WAY_NENT]; + CacheLine lyns0[N_WAY_NENT]; + CacheLine lyns1[N_WAY_NENT]; Addr tags0[N_WAY_NENT]; + Addr tags1[N_WAY_NENT]; } Cache; @@ -1746,8 +1748,8 @@ for (j = 0; j < N_LINE_W8s/4; j++) sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */ } - sm->linezF = NULL; - sm->linezF_size = 0; + sm->linesF = NULL; + sm->linesF_size = 0; stats__secmaps_allocd++; stats__secmap_ga_space_covered += N_SECMAP_ARANGE; stats__secmap_linesZ_allocd += N_SECMAP_ZLINES; @@ -2148,15 +2150,32 @@ // check the cache for (i = 0; i < N_WAY_NENT; i++) { - CacheLine* cl = &cache_shmem.way0[i]; - Addr tag = cache_shmem.tags0[i]; - if (tag == 1) - continue; - if (!is_valid_scache_tag(tag)) BAD("14"); - if (!is_sane_CacheLine(cl)) BAD("15"); - if (tag & (N_LINE_W8s-1)) BAD("16"); - for (j = i+1; j < N_WAY_NENT; j++) - if (cache_shmem.tags0[j] == tag) BAD("17"); + CacheLine* cl; + Addr tag; + /* way0, dude */ + cl = &cache_shmem.lyns0[i]; + tag = cache_shmem.tags0[i]; + if (tag != 1) { + if (!is_valid_scache_tag(tag)) BAD("14-0"); + if (!is_sane_CacheLine(cl)) BAD("15-0"); + if (tag & (N_LINE_W8s-1)) BAD("16-0"); + for (j = i+1; j < N_WAY_NENT; j++) + if (cache_shmem.tags0[j] == tag) BAD("17-0"); + } + /* way1 */ + cl = &cache_shmem.lyns1[i]; + tag = cache_shmem.tags1[i]; + if (tag != 1) { + if (!is_valid_scache_tag(tag)) BAD("14-1"); + if (!is_sane_CacheLine(cl)) BAD("15-1"); + if (tag & (N_LINE_W8s-1)) BAD("16-1"); + for (j = i+1; j < N_WAY_NENT; j++) + if (cache_shmem.tags1[j] == tag) BAD("17-1"); + } + /* and also */ + if (cache_shmem.tags0[i] != 1 && cache_shmem.tags1[i] != 1 + && cache_shmem.tags0[i] == cache_shmem.tags1[i]) + BAD("18"); } return; @@ -2637,12 +2656,12 @@ lineZ = &sm->linesZ[zix]; if (lineZ->dict[0] == 0) { Int fix = lineZ->dict[1]; - tl_assert(sm->linezF); - tl_assert(sm->linezF_size > 0); - tl_assert(fix >= 0 && fix < sm->linezF_size); + tl_assert(sm->linesF); + tl_assert(sm->linesF_size > 0); + tl_assert(fix >= 0 && fix < sm->linesF_size); *zp = NULL; - *fp = &sm->linezF[fix]; - tl_assert(sm->linezF[fix].inUse); + *fp = &sm->linesF[fix]; + tl_assert(sm->linesF[fix].inUse); } else { *zp = lineZ; *fp = NULL; @@ -2666,10 +2685,10 @@ lineF = NULL; if (lineZ->dict[0] == 0) { Word fix = lineZ->dict[1]; - tl_assert(sm->linezF); - tl_assert(sm->linezF_size > 0); - tl_assert(fix >= 0 && fix < sm->linezF_size); - lineF = &sm->linezF[fix]; + tl_assert(sm->linesF); + tl_assert(sm->linesF_size > 0); + tl_assert(fix >= 0 && fix < sm->linesF_size); + lineF = &sm->linesF[fix]; tl_assert(lineF->inUse); lineZ = NULL; } @@ -2696,10 +2715,10 @@ /* If lineZ has an associated lineF, free it up. */ if (lineZ->dict[0] == 0) { Word fix = lineZ->dict[1]; - tl_assert(sm->linezF); - tl_assert(sm->linezF_size > 0); - tl_assert(fix >= 0 && fix < sm->linezF_size); - lineF = &sm->linezF[fix]; + tl_assert(sm->linesF); + tl_assert(sm->linesF_size > 0); + tl_assert(fix >= 0 && fix < sm->linesF_size); + lineF = &sm->linesF[fix]; tl_assert(lineF->inUse); lineF->inUse = False; } @@ -2714,15 +2733,15 @@ Word i, new_size; CacheLineF* nyu; - if (sm->linezF) { - tl_assert(sm->linezF_size > 0); + if (sm->linesF) { + tl_assert(sm->linesF_size > 0); } else { - tl_assert(sm->linezF_size == 0); + tl_assert(sm->linesF_size == 0); } - if (sm->linezF) { - for (i = 0; i < sm->linezF_size; i++) { - if (!sm->linezF[i].inUse) { + if (sm->linesF) { + for (i = 0; i < sm->linesF_size; i++) { + if (!sm->linesF[i].inUse) { *fixp = (Word)i; return; } @@ -2730,35 +2749,35 @@ } /* No free F line found. Expand existing array and try again. */ - new_size = sm->linezF_size==0 ? 1 : 2 * sm->linezF_size; + new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size; nyu = tc_zalloc( new_size * sizeof(CacheLineF) ); tl_assert(nyu); - stats__secmap_linesF_allocd += (new_size - sm->linezF_size); - stats__secmap_linesF_bytes += (new_size - sm->linezF_size) + stats__secmap_linesF_allocd += (new_size - sm->linesF_size); + stats__secmap_linesF_bytes += (new_size - sm->linesF_size) * sizeof(CacheLineF); if (0) VG_(printf)("SM %p: expand F array from %d to %d\n", - sm, (Int)sm->linezF_size, new_size); + sm, (Int)sm->linesF_size, new_size); for (i = 0; i < new_size; i++) nyu[i].inUse = False; - if (sm->linezF) { - for (i = 0; i < sm->linezF_size; i++) { - tl_assert(sm->linezF[i].inUse); - nyu[i] = sm->linezF[i]; + if (sm->linesF) { + for (i = 0; i < sm->linesF_size; i++) { + tl_assert(sm->linesF[i].inUse); + nyu[i] = sm->linesF[i]; } - VG_(memset)(sm->linezF, 0, sm->linezF_size * sizeof(CacheLineF) ); - tc_free(sm->linezF); + VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(CacheLineF) ); + tc_free(sm->linesF); } - sm->linezF = nyu; - sm->linezF_size = new_size; + sm->linesF = nyu; + sm->linesF_size = new_size; - for (i = 0; i < sm->linezF_size; i++) { - if (!sm->linezF[i].inUse) { + for (i = 0; i < sm->linesF_size; i++) { + if (!sm->linesF[i].inUse) { *fixp = (Word)i; return; } @@ -2803,6 +2822,7 @@ return True; } +__attribute__((unused)) static void pp_CacheLine ( CacheLine* cl ) { Word i; #define FMT "%08x\n" @@ -2918,49 +2938,6 @@ /* Write the cacheline 'wix' to backing store. Where it ends up is determined by its tag field. */ -static void analyse ( UInt* ws, Word nWs ) -{ - -static Word qq=0; - static Word qqx[20]; - - Word i, j, nDiff; - - if (qq==0) { for (i = 0; i < 20; i++) qqx[i]=0; } - qq++; - - nDiff = 1; - for (i = 1; i < nWs; i++) { - nDiff++; - for (j = 0; j < i; j++) { - if (ws[j] == ws[i]) - break; - } - if (j < i) - nDiff--; - } - - if (nDiff >= 19) nDiff=19; - qqx[nDiff]++; - - if ((qq % 100000) == 0) { - tl_assert(qqx[0] == 0); - VG_(printf)("%lu ", qq); - for (j = 1; j < 20; j++) - VG_(printf)("%lu ", qqx[j]); - VG_(printf)("\n"); - } - -#if 0 - if (nDiff >= 5) { - VG_(printf)("diff %ld\n", nDiff); - for (i = 0; i < nWs; i++) - VG_(printf)("%x ", ws[i]); - VG_(printf)("\n\n"); - } -#endif -} - static Bool sequentialise_into ( /*OUT*/UInt* dst, Word nDst, CacheLine* src ) { @@ -3039,7 +3016,7 @@ } -static void cacheline_wback ( UWord wix ) +static void cacheline_wback ( UWord way, UWord wix ) { Word i, j; Bool anyShared = False; @@ -3052,10 +3029,19 @@ UInt shvals[N_LINE_W8s]; UInt sv; + if (0) + VG_(printf)("scache wback way %d line %d\n", (Int)way, (Int)wix); + + tl_assert(way >= 0 && way < 2); tl_assert(wix >= 0 && wix < N_WAY_NENT); - //VG_(printf)("scache wback line %d\n", wix); - tag = cache_shmem.tags0[wix]; + if (way == 0) { + tag = cache_shmem.tags0[wix]; + cl = &cache_shmem.lyns0[wix]; + } else { + tag = cache_shmem.tags1[wix]; + cl = &cache_shmem.lyns1[wix]; + } /* The cache line may have been invalidated; if so, ignore it. */ if (!is_valid_scache_tag(tag)) @@ -3073,7 +3059,6 @@ lineZ = &sm->linesZ[zix]; /* Generate the data to be stored */ - cl = &cache_shmem.way0[wix]; tl_assert(is_sane_CacheLine( cl )); anyShared = sequentialise_into( shvals, N_LINE_W8s, cl ); @@ -3105,10 +3090,10 @@ if (i < N_LINE_W8s) { /* cannot use the compressed rep. Use f rep instead. */ alloc_F_for_writing( sm, &fix ); - tl_assert(sm->linezF); - tl_assert(sm->linezF_size > 0); - tl_assert(fix >= 0 && fix < sm->linezF_size); - lineF = &sm->linezF[fix]; + tl_assert(sm->linesF); + tl_assert(sm->linesF_size > 0); + tl_assert(fix >= 0 && fix < sm->linesF_size); + lineF = &sm->linesF[fix]; tl_assert(!lineF->inUse); lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = 0; lineZ->dict[1] = (UInt)fix; @@ -3130,17 +3115,27 @@ associated with 'wix' is assumed to have already been filled in; hence that is used to determine where in the backing store to read from. */ -static void cacheline_fetch ( UWord wix ) +static void cacheline_fetch ( UWord way, UWord wix ) { - Word i; - Addr tag; + Word i; + Addr tag; + CacheLine* cl; CacheLineZ* lineZ; CacheLineF* lineF; - //VG_(printf)("scache fetch line %d\n", wix); + if (0) + VG_(printf)("scache fetch way %d line %d\n", (Int)way, (Int)wix); + + tl_assert(way >= 0 && way < 2); tl_assert(wix >= 0 && wix < N_WAY_NENT); - tag = cache_shmem.tags0[wix]; + if (way == 0) { + tag = cache_shmem.tags0[wix]; + cl = &cache_shmem.lyns0[wix]; + } else { + tag = cache_shmem.tags1[wix]; + cl = &cache_shmem.lyns1[wix]; + } /* reject nonsense requests */ tl_assert(is_valid_scache_tag(tag)); @@ -3155,7 +3150,7 @@ if (lineF) { tl_assert(lineF->inUse); for (i = 0; i < N_LINE_W8s; i++) { - cache_shmem.way0[wix].w8[i] = lineF->w32s[i]; + cl->w8[i] = lineF->w32s[i]; } stats__cache_F_fetches++; } else { @@ -3165,18 +3160,19 @@ tl_assert(ix >= 0 && ix <= 3); sv = lineZ->dict[ix]; tl_assert(sv != 0); - cache_shmem.way0[wix].w8[i] = sv; + cl->w8[i] = sv; } stats__cache_Z_fetches++; } - cacheline_normalise( &cache_shmem.way0[wix] ); + cacheline_normalise( cl ); } static void shmem__flush_scache ( void ) { Word wix; if (0) VG_(printf)("scache flush\n"); for (wix = 0; wix < N_WAY_NENT; wix++) { - cacheline_wback( wix ); + cacheline_wback( 0, wix ); + cacheline_wback( 1, wix ); } stats__cache_flushes++; } @@ -3184,8 +3180,10 @@ Word wix; if (0) VG_(printf)("scache inval\n"); tl_assert(!is_valid_scache_tag(1)); - for (wix = 0; wix < N_WAY_NENT; wix++) + for (wix = 0; wix < N_WAY_NENT; wix++) { cache_shmem.tags0[wix] = 1/*INVALID*/; + cache_shmem.tags1[wix] = 1/*INVALID*/; + } stats__cache_invals++; } @@ -3201,28 +3199,62 @@ static inline UWord get_cacheline_offset ( Addr a ) { return (UWord)(a & (N_LINE_W8s - 1)); } -static CacheLine* get_cacheline ( Addr a ) + +static CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */ +static inline CacheLine* get_cacheline ( Addr a ) { /* tag is 'a' with the in-line offset masked out, eg a[31]..a[4] 0000 */ - Addr tag = a & ~(N_LINE_W8s - 1); - UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); + Addr tag = a & ~(N_LINE_W8s - 1); + UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); stats__cache_totrefs++; + /* Check both Ways */ if (LIKELY(tag == cache_shmem.tags0[wix])) - return &cache_shmem.way0[wix]; + return &cache_shmem.lyns0[wix]; + if (LIKELY(tag == cache_shmem.tags1[wix])) + return &cache_shmem.lyns1[wix]; + return get_cacheline_MISS( a ); +} + +static CacheLine* get_cacheline_MISS ( Addr a ) +{ + /* tag is 'a' with the in-line offset masked out, + eg a[31]..a[4] 0000 */ + static UWord seed = 0; + + CacheLine* cl; + Addr* tag_old_p; + UWord way; + Addr tag = a & ~(N_LINE_W8s - 1); + UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1); + + /* Check both Ways */ + tl_assert(tag != cache_shmem.tags0[wix]); + tl_assert(tag != cache_shmem.tags1[wix]); + /* Dump the old line into the backing store. */ stats__cache_totmisses++; - if (is_valid_scache_tag( cache_shmem.tags0[wix] )) { - //if (!is_sane_CacheLine( &cache_shmem .way0[wix])) - //pp_CacheLine( &cache_shmem .way0[wix]); - tl_assert(is_sane_CacheLine( &cache_shmem .way0[wix] )); - cacheline_wback( wix ); + + /* arbitrarily choose the way to dump (not very scientific) */ + way = seed++ & 1; + + if (way == 0) { + cl = &cache_shmem.lyns0[wix]; + tag_old_p = &cache_shmem.tags0[wix]; + } else { + cl = &cache_shmem.lyns1[wix]; + tag_old_p = &cache_shmem.tags1[wix]; } + + if (is_valid_scache_tag( *tag_old_p )) { + tl_assert(is_sane_CacheLine( cl )); + cacheline_wback( way, wix ); + } /* and reload the new one */ - cache_shmem.tags0[wix] = tag; - cacheline_fetch( wix ); - tl_assert(is_sane_CacheLine( &cache_shmem. way0[wix] )); - return &cache_shmem.way0[wix]; + *tag_old_p = tag; + cacheline_fetch( way, wix ); + tl_assert(is_sane_CacheLine( cl )); + return cl; } /////////////////////////////vvvvvvvvvvvvvvvvvvvvvvvvvvvvv @@ -5220,6 +5252,7 @@ } } +__attribute__((unused)) static void laog__sanity_check ( void ) { Word i, ws_size; Word* ws_words; @@ -6862,11 +6895,11 @@ stats__secmaps_allocd, stats__secmap_ga_space_covered); VG_(printf)(" linesZ: %10lu allocd (%10lu bytes occupied)\n", - stats__secmap_linesZ_allocd, - stats__secmap_linesZ_bytes); + stats__secmap_linesZ_allocd, + stats__secmap_linesZ_bytes); VG_(printf)(" linesF: %10lu allocd (%10lu bytes occupied)\n", - stats__secmap_linesF_allocd, - stats__secmap_linesF_bytes); + stats__secmap_linesF_allocd, + stats__secmap_linesF_bytes); VG_(printf)(" secmaps: %10lu iterator steppings\n", stats__secmap_iterator_steppings); @@ -6882,32 +6915,32 @@ VG_(printf)("\n"); VG_(printf)(" cline: %10lu normalises\n", - stats__cline_normalises ); + stats__cline_normalises ); VG_(printf)(" cline: reads 8/4/2/1: %10lu %10lu %10lu %10lu\n", - stats__cline_read8s, - stats__cline_read4s, - stats__cline_read2s, + stats__cline_read8s, + stats__cline_read4s, + stats__cline_read2s, stats__cline_read1s ); VG_(printf)(" cline: writes 8/4/2/1: %10lu %10lu %10lu %10lu\n", - stats__cline_write8s, - stats__cline_write4s, - stats__cline_write2s, + stats__cline_write8s, + stats__cline_write4s, + stats__cline_write2s, stats__cline_write1s ); VG_(printf)(" cline: sets 8/4/2/1: %10lu %10lu %10lu %10lu\n", - stats__cline_set8s, - stats__cline_set4s, - stats__cline_set2s, + stats__cline_set8s, + stats__cline_set4s, + stats__cline_set2s, stats__cline_set1s ); VG_(printf)(" cline: get1s %lu, copy1s %lu\n", - stats__cline_get1s, stats__cline_copy1s ); + stats__cline_get1s, stats__cline_copy1s ); VG_(printf)(" cline: splits: 8to4 %10lu, 4to2 %10lu, 2to1 %10lu\n", - stats__cline_8to4splits, - stats__cline_4to2splits, + stats__cline_8to4splits, + stats__cline_4to2splits, stats__cline_2to1splits ); VG_(printf)(" cline: pulldowns: 8to4 %10lu, 4to2 %10lu, 2to1 %10lu\n", - stats__cline_8to4pulldown, - stats__cline_4to2pulldown, + stats__cline_8to4pulldown, + stats__cline_4to2pulldown, stats__cline_2to1pulldown ); VG_(printf)("\n"); } ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ _______________________________________________ Valgrind-developers mailing list Valgrind-developers@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/valgrind-developers