Author: sewardj Date: 2008-03-06 03:21:34 +0000 (Thu, 06 Mar 2008) New Revision: 7575
Log: Rewrite the shadow-value-cache writeback mechanism so as to be less obviously stupid. This improves performance of Helgrind by more than 10% on one test. Modified: branches/HGDEV/helgrind/hg_main.c Modified: branches/HGDEV/helgrind/hg_main.c =================================================================== --- branches/HGDEV/helgrind/hg_main.c 2008-03-05 23:40:41 UTC (rev 7574) +++ branches/HGDEV/helgrind/hg_main.c 2008-03-06 03:21:34 UTC (rev 7575) @@ -1120,10 +1120,10 @@ return ss; } static inline LockSet get_SHVAL_LS (SVal sv) { - LockSet ls; - ls = sv & ((1ULL << LOCK_SET_BITS) - 1); - tl_assert(LS_valid(ls)); - return ls; + LockSet ls; + ls = sv & ((1ULL << LOCK_SET_BITS) - 1); + tl_assert(LS_valid(ls)); + return ls; } static inline Bool is_SHVAL_RW (SVal sv) { @@ -3906,87 +3906,80 @@ } -static -SVal* sequentialise_tree ( /*MOD*/SVal* dst, /*OUT*/Bool* anyShared, - UShort descr, SVal* tree ) { - SVal* dst0 = dst; - *anyShared = False; +typedef struct { UChar count; SVal sval; } CountedSVal; -# define PUT(_n,_v) \ - do { Word i; \ - if (is_SHVAL_Shared(_v)) \ - *anyShared = True; \ - for (i = 0; i < (_n); i++) \ - *dst++ = (_v); \ - } while (0) - - /* byte 0 */ - if (descr & TREE_DESCR_64) PUT(8, tree[0]); else - if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else - if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else - if (descr & TREE_DESCR_8_0) PUT(1, tree[0]); - /* byte 1 */ - if (descr & TREE_DESCR_8_1) PUT(1, tree[1]); - /* byte 2 */ - if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else - if (descr & TREE_DESCR_8_2) PUT(1, tree[2]); - /* byte 3 */ - if (descr & TREE_DESCR_8_3) PUT(1, tree[3]); - /* byte 4 */ - if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else - if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else - if (descr & TREE_DESCR_8_4) PUT(1, tree[4]); - /* byte 5 */ - if (descr & TREE_DESCR_8_5) PUT(1, tree[5]); - /* byte 6 */ - if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else - if (descr & TREE_DESCR_8_6) PUT(1, tree[6]); - /* byte 7 */ - if (descr & TREE_DESCR_8_7) PUT(1, tree[7]); - -# undef PUT - - tl_assert( (((Char*)dst) - ((Char*)dst0)) == 8 * sizeof(SVal) ); - return dst; -} - /* Write the cacheline 'wix' to backing store. Where it ends up is determined by its tag field. */ static -Bool sequentialise_CacheLine ( /*OUT*/SVal* dst, Word nDst, CacheLine* src ) +Bool sequentialise_CacheLine ( /*OUT*/CountedSVal* dst, + /*OUT*/Word* dstUsedP, + Word nDst, CacheLine* src ) { - Word tno, cloff; + Word tno, cloff, dstUsed; Bool anyShared = False; - SVal* dst0 = dst; + tl_assert(nDst == N_LINE_ARANGE); + dstUsed = 0; + for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) { UShort descr = src->descrs[tno]; SVal* tree = &src->svals[cloff]; - Bool bTmp = False; - dst = sequentialise_tree ( dst, &bTmp, descr, tree ); - anyShared |= bTmp; + + /* sequentialise the tree described by (descr,tree). */ +# define PUT(_n,_v) \ + do { if (is_SHVAL_Shared(_v)) \ + anyShared = True; \ + dst[dstUsed ].count = (_n); \ + dst[dstUsed++].sval = (_v); \ + } while (0) + + /* byte 0 */ + if (descr & TREE_DESCR_64) PUT(8, tree[0]); else + if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else + if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else + if (descr & TREE_DESCR_8_0) PUT(1, tree[0]); + /* byte 1 */ + if (descr & TREE_DESCR_8_1) PUT(1, tree[1]); + /* byte 2 */ + if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else + if (descr & TREE_DESCR_8_2) PUT(1, tree[2]); + /* byte 3 */ + if (descr & TREE_DESCR_8_3) PUT(1, tree[3]); + /* byte 4 */ + if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else + if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else + if (descr & TREE_DESCR_8_4) PUT(1, tree[4]); + /* byte 5 */ + if (descr & TREE_DESCR_8_5) PUT(1, tree[5]); + /* byte 6 */ + if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else + if (descr & TREE_DESCR_8_6) PUT(1, tree[6]); + /* byte 7 */ + if (descr & TREE_DESCR_8_7) PUT(1, tree[7]); + +# undef PUT + /* END sequentialise the tree described by (descr,tree). */ + } tl_assert(cloff == N_LINE_ARANGE); + tl_assert(dstUsed <= nDst); - /* Assert we wrote N_LINE_ARANGE shadow values. */ - tl_assert( ((HChar*)dst) - ((HChar*)dst0) - == (Word)(nDst * sizeof(SVal)) ); - + *dstUsedP = dstUsed; return anyShared; } static __attribute__((noinline)) void cacheline_wback ( UWord wix ) { - Word i, j; + Word i, j, k, m; Bool anyShared = False; Addr tag; SecMap* sm; CacheLine* cl; CacheLineZ* lineZ; CacheLineF* lineF; - Word zix, fix; - SVal shvals[N_LINE_ARANGE]; + Word zix, fix, csvalsUsed; + CountedSVal csvals[N_LINE_ARANGE]; SVal sv; if (0) @@ -4015,28 +4008,43 @@ /* Generate the data to be stored */ if (SCE_CACHELINE) tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */ - anyShared = sequentialise_CacheLine( shvals, N_LINE_ARANGE, cl ); + csvalsUsed = -1; + anyShared = sequentialise_CacheLine( csvals, &csvalsUsed, + N_LINE_ARANGE, cl ); + tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE); + if (0) VG_(printf)("%lu ", csvalsUsed); + lineZ->dict[0] = lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = 0; - for (i = 0; i < N_LINE_ARANGE; i++) { + /* i indexes actual shadow values, k is cursor in csvals */ + i = 0; + for (k = 0; k < csvalsUsed; k++) { - sv = shvals[i]; + sv = csvals[k].sval; + if (SCE_SVALS) + tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8); + /* do we already have it? */ for (j = 0; j < 4; j++) { if (sv == lineZ->dict[j]) goto dict_ok; } + /* no. look for a free slot. */ for (j = 0; j < 4; j++) { if (lineZ->dict[j] == 0) break; } tl_assert(j >= 0 && j <= 4); if (j == 4) break; /* we'll have to use the f rep */ - tl_assert(is_SHVAL_valid(sv)); + if (SCE_SVALS) + tl_assert(is_SHVAL_valid(sv)); lineZ->dict[j] = sv; dict_ok: - write_twobit_array( lineZ->ix2s, i, j ); + for (m = csvals[k].count; m > 0; m--) { + write_twobit_array( lineZ->ix2s, i, j ); + i++; + } } @@ -4053,11 +4061,19 @@ lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = 0; lineZ->dict[1] = (SVal)fix; lineF->inUse = True; - for (i = 0; i < N_LINE_ARANGE; i++) { - sv = shvals[i]; - tl_assert(is_SHVAL_valid(sv)); - lineF->w32s[i] = sv; + i = 0; + for (k = 0; k < csvalsUsed; k++) { + if (SCE_SVALS) + tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8); + sv = csvals[k].sval; + if (SCE_SVALS) + tl_assert(is_SHVAL_valid(sv)); + for (m = csvals[k].count; m > 0; m--) { + lineF->w32s[i] = sv; + i++; + } } + tl_assert(i == N_LINE_ARANGE); stats__cache_F_wbacks++; } else { stats__cache_Z_wbacks++; ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ Valgrind-developers mailing list Valgrind-developers@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/valgrind-developers