Author: sewardj Date: 2008-03-04 20:32:23 +0000 (Tue, 04 Mar 2008) New Revision: 7565
Log: Use a hash table instead of a self-organising list to cache happens-before results. (Konstantin Serebryany) Modified: branches/HGDEV/helgrind/hg_main.c Modified: branches/HGDEV/helgrind/hg_main.c =================================================================== --- branches/HGDEV/helgrind/hg_main.c 2008-03-04 19:58:43 UTC (rev 7564) +++ branches/HGDEV/helgrind/hg_main.c 2008-03-04 20:32:23 UTC (rev 7565) @@ -38,7 +38,13 @@ - Consider what to do about BHL all over again - Consider what to do about last-lock-lossage mechanism. - Should it be removed? + Should it be removed? + + - get rid of SVal-cache dirty bits? Basically pointless; almost + all lines become dirty and have to be written back. Quantify. + + - get rid of 64-bit mod in happens_before hash calculation + (is very expensive on 32-bit platforms) */ #include "pub_tool_basics.h" @@ -1439,7 +1445,7 @@ /* fwds */ static void shmem__invalidate_scache ( void ); -static void hbefore__invalidate_cache ( void ); +static void hbefore__invalidate_htable ( void ); static void shmem__set_mbHasLocks ( Addr a, Bool b ); static Bool shmem__get_mbHasLocks ( Addr a ); static void shadow_mem_set8 ( Thread* uu_thr_acc, Addr a, SVal svNew ); @@ -1468,7 +1474,7 @@ /* re <=: < on 64-bit platforms, == on 32-bit ones */ tl_assert(sizeof(SegmentID) <= sizeof(Word)); tl_assert(sizeof(Segment*) == sizeof(Word)); - hbefore__invalidate_cache(); + hbefore__invalidate_htable(); tl_assert(sizeof(Addr) == sizeof(Word)); tl_assert(map_locks == NULL); @@ -1998,9 +2004,7 @@ /*------------ searching the happens-before graph ------------*/ static UWord stats__hbefore_queries = 0; // total # queries -static UWord stats__hbefore_cache0s = 0; // hits at cache[0] -static UWord stats__hbefore_cacheNs = 0; // hits at cache[> 0] -static UWord stats__hbefore_probes = 0; // # checks in cache +static UWord stats__hbefore_hits = 0; // hits at hash table static UWord stats__hbefore_gsearches = 0; // # searches in graph static UWord stats__hbefore_gsearchFs = 0; // # fast searches in graph static UWord stats__hbefore_invals = 0; // # cache invals @@ -2121,55 +2125,56 @@ return reachable; } -/*--------------- the happens_before cache ---------------*/ +/*--------------- the happens_before hash table ---------------*/ -#define HBEFORE__N_CACHE 64 -typedef - struct { SegmentID segid1; SegmentID segid2; Bool result; } - HBeforeCacheEnt; -static HBeforeCacheEnt hbefore__cache[HBEFORE__N_CACHE]; -static void hbefore__invalidate_cache ( void ) -{ - Int i; - SegmentID bogus = 0; - tl_assert(!SEG_id_is_sane(bogus)); +// HBEFORE__N_HTABLE should be a prime number +// #define HBEFORE__N_HTABLE 104729 +// #define HBEFORE__N_HTABLE 399989 +// #define HBEFORE__N_HTABLE 49999 + #define HBEFORE__N_HTABLE 19997 + + +// Simple closed hash table with prime size. +// Each entry is a 64-bit value: +// bits 0-31: SegmentID-2 +// bits 32-62: SegmentID-1 +// bit 63: happens-before(SegmentID-1, SegmentID-2) +static ULong hbefore__hash_table[HBEFORE__N_HTABLE]; +static void hbefore__invalidate_htable ( void ) +{ stats__hbefore_invals++; - for (i = 0; i < HBEFORE__N_CACHE; i++) { - hbefore__cache[i].segid1 = bogus; - hbefore__cache[i].segid2 = bogus; - hbefore__cache[i].result = False; - } + VG_(memset)(hbefore__hash_table, 0, sizeof(hbefore__hash_table)); } + + static Bool happens_before ( SegmentID segid1, SegmentID segid2 ) { + ULong seg_1_and_2 = ((ULong)segid1) << 32 | (ULong)(segid2); Bool hbG, hbV; - Int i, j, iNSERT_POINT; Segment *seg1, *seg2; tl_assert(SEG_id_is_sane(segid1)); tl_assert(SEG_id_is_sane(segid2)); tl_assert(segid1 != segid2); stats__hbefore_queries++; - stats__hbefore_probes++; - if (segid1 == hbefore__cache[0].segid1 - && segid2 == hbefore__cache[0].segid2) { - stats__hbefore_cache0s++; - return hbefore__cache[0].result; - } - for (i = 1; i < HBEFORE__N_CACHE; i++) { - stats__hbefore_probes++; - if (segid1 == hbefore__cache[i].segid1 - && segid2 == hbefore__cache[i].segid2) { - /* Found it. Move it 1 step closer to the front. */ - HBeforeCacheEnt tmp = hbefore__cache[i]; - hbefore__cache[i] = hbefore__cache[i-1]; - hbefore__cache[i-1] = tmp; - stats__hbefore_cacheNs++; - return tmp.result; + { // try hash table + // Hmm, % on ULong is OK on 64-bit machine (32ish cycles on Core2) + // but really bad on 32-bit (there is a call to umoddi3 and that + // can be many tens of instructions) + ULong cached = hbefore__hash_table[seg_1_and_2 % HBEFORE__N_HTABLE]; + if (cached == seg_1_and_2) { + stats__hbefore_hits++; + return False; } + + if (cached == (seg_1_and_2 | (1ULL << 63))) { + stats__hbefore_hits++; + return True; + } } + /* Not found. Search the graph and add an entry to the cache. */ stats__hbefore_gsearches++; @@ -2200,18 +2205,12 @@ } tl_assert(hbV == hbG); - iNSERT_POINT = (1*HBEFORE__N_CACHE)/4 - 1; - /* if (iNSERT_POINT > 4) iNSERT_POINT = 4; */ - - for (j = HBEFORE__N_CACHE-1; j > iNSERT_POINT; j--) { - hbefore__cache[j] = hbefore__cache[j-1]; + { // remember the results in the hash table + ULong cache = hbG ? (seg_1_and_2 | (1ULL << 63)) : seg_1_and_2; + hbefore__hash_table[seg_1_and_2 % HBEFORE__N_HTABLE] = cache; } - hbefore__cache[iNSERT_POINT].segid1 = segid1; - hbefore__cache[iNSERT_POINT].segid2 = segid2; - hbefore__cache[iNSERT_POINT].result = hbG; - if (0) - VG_(printf)("hb %d %d\n", (Int)segid1-(1<<24), (Int)segid2-(1<<24)); + VG_(printf)("hb %d %d\n", (Int)segid1, (Int)segid2); return hbG; } @@ -8414,15 +8413,13 @@ VG_(printf)("\n"); VG_(printf)(" hbefore: %,10lu queries\n", stats__hbefore_queries); - VG_(printf)(" hbefore: %,10lu cache 0 hits\n", stats__hbefore_cache0s); - VG_(printf)(" hbefore: %,10lu cache > 0 hits\n", stats__hbefore_cacheNs); + VG_(printf)(" hbefore: %,10lu hash table hits\n", stats__hbefore_hits); VG_(printf)(" hbefore: %,10lu graph searches\n", stats__hbefore_gsearches); VG_(printf)(" hbefore: %,10lu of which slow\n", stats__hbefore_gsearches - stats__hbefore_gsearchFs); VG_(printf)(" hbefore: %,10lu stack high water mark\n", stats__hbefore_stk_hwm); VG_(printf)(" hbefore: %,10lu cache invals\n", stats__hbefore_invals); - VG_(printf)(" hbefore: %,10lu probes\n", stats__hbefore_probes); VG_(printf)("\n"); VG_(printf)(" segments: %,8lu Segment objects allocated\n", ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ Valgrind-developers mailing list Valgrind-developers@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/valgrind-developers