Re: [Mesa-dev] [PATCH 26/26] glsl: Add Bloom filter to get_static_name
On Monday, July 14, 2014 03:48:58 PM Ian Romanick wrote: From: Ian Romanick ian.d.roman...@intel.com The use of the Bloom filter rejects the vast majority of strings that are not in the table before expensive comparisons are performed. This Bloom filter uses two hashes. One is explicit (calculate_hash in the code), and the other is implicit. The implicit hash is just the length of the name, and the filter is the switch-statement in get_static_name_do_not_call that rejects name with lengths that are not in the table. No change Valgrind massif results for a trimmed apitrace of dota2. NOTE: This patch could probably get squashed into the previous patch. I kept it separate because I thought it would make things easier to review. Signed-off-by: Ian Romanick ian.d.roman...@intel.com --- src/glsl/common_variable_names.py | 113 +- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/src/glsl/common_variable_names.py b/src/glsl/common_variable_names.py index 7435a12..ee5571c 100644 --- a/src/glsl/common_variable_names.py +++ b/src/glsl/common_variable_names.py @@ -119,6 +119,46 @@ name_really_is_not_in_static_names(const char *name) } #endif /* DEBUG */ +static uint32_t +calculate_hash(const char *str) +{ +uint32_t hash = 5381; + +while (*str != '\\0') { +hash = (hash * 33) + *str; +str++; +} + +return hash; +} + +/** + * Check the Bloom filter for the name + */ +static bool +name_is_in_Bloom_filter(const char *name) +{ + static const uint32_t filter[${len(filter)}] = { +% for i in range(0,len(filter), 4): + ${{:#010x}.format(filter[i+0])}, ${{:#010x}.format(filter[i+1])}, ${{:#010x}.format(filter[i+2])}, ${{:#010x}.format(filter[i+3])}, + % endfor + }; + + const uint32_t h = calculate_hash(name); + + if (h ${min(all_hash)}) + return false; + + if (h ${max(all_hash)}) + return false; + + const unsigned bit = (h - ${min(all_hash)}) % ${len(filter) * 32}; + const unsigned i = bit / 32; + const unsigned j = bit % 32; + + return (filter[i] (1U j)) != 0; +} + /** * Search the static name table for the specified name * @@ -129,6 +169,11 @@ name_really_is_not_in_static_names(const char *name) static const char * get_static_name_do_not_call(const char *name) { + /* Do the trivial rejection test before anything. +*/ + if (!name_is_in_Bloom_filter(name)) + return NULL; + const unsigned len = strlen(name); const ${index_type} *table = NULL; unsigned table_size = 0; @@ -188,6 +233,14 @@ get_static_name(const char *name) } +def calculate_hash(str): +hash = 5381 + +for c in str: +hash = ((hash * 33) + ord(c)) 0x0 + +return hash + common_names.sort() # Partiation the list of names into sublists of names with the same length @@ -213,8 +266,66 @@ elif base (1 16): else: index_type = uint32_t +all_hash = [] +for name in common_names: + all_hash.append(calculate_hash(name)) simplify this to: all_hash = [calculate_hash(n) for n in common_names] + +# Experimentally determined that 8,192 bits is sufficient for this dataset. +# This was determined by: +# +# 1. Modify the generated code to log a message on entry to get_static_name. +# +# 2. Modify the generated code to log a message when name_is_in_Bloom_filter +# returns true. +# +# 3. Modifiy the generated code to log a message each time a name is rejected +# due to its length. This is the 'default' case in the switch-statement. +# This is an implicit hash that is part of the Bloom filter. +# +# 4. Modify the generated code to log a message each time a name has actually +# been tested (using strcmp) and was not in the table. This means logging at +# the end of get_static_name_do_not_call and inside the switch-statement where +# lonely names are handled. +# +# 5. Run a ton of shaders through (e.g., shader-db) and collect the output. +# Count the number of times each message occurred. +# +# Two hash functions were tried. The current one and Murmur2. Exhaustive +# testing over shader-db produced the following results. There were a total +# of 6,749,837 calls to get_static_name in each run. The # in the column +# header refers messages mentioned in the list above. +# +#Bits Current hash Murmur2 +##2 / #3 / #4 #2 / #3 / #4 +# 128 5249223 / 262597 / 4826172 763090 / 285531 / 317105 +# 256 4955982 / 162087 / 4633441 508512 / 152491 / 195567 +# 512334736 / 52 / 63130 381691 / 98277 / 122960 +#1024236521 / 43809 / 32258 326657 / 69346 / 96857 +#2048174275 / 1533 / 12288 258885 / 49537 / 48894 +#4096171153 /341 / 10358 185632 /682 / 24496 +#8192
Re: [Mesa-dev] [PATCH 26/26] glsl: Add Bloom filter to get_static_name
On 07/21/2014 01:56 PM, Dylan Baker wrote: On Monday, July 14, 2014 03:48:58 PM Ian Romanick wrote: From: Ian Romanick ian.d.roman...@intel.com The use of the Bloom filter rejects the vast majority of strings that are not in the table before expensive comparisons are performed. This Bloom filter uses two hashes. One is explicit (calculate_hash in the code), and the other is implicit. The implicit hash is just the length of the name, and the filter is the switch-statement in get_static_name_do_not_call that rejects name with lengths that are not in the table. No change Valgrind massif results for a trimmed apitrace of dota2. NOTE: This patch could probably get squashed into the previous patch. I kept it separate because I thought it would make things easier to review. Signed-off-by: Ian Romanick ian.d.roman...@intel.com --- src/glsl/common_variable_names.py | 113 +- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/src/glsl/common_variable_names.py b/src/glsl/common_variable_names.py index 7435a12..ee5571c 100644 --- a/src/glsl/common_variable_names.py +++ b/src/glsl/common_variable_names.py @@ -119,6 +119,46 @@ name_really_is_not_in_static_names(const char *name) } #endif /* DEBUG */ +static uint32_t +calculate_hash(const char *str) +{ +uint32_t hash = 5381; + +while (*str != '\\0') { +hash = (hash * 33) + *str; +str++; +} + +return hash; +} + +/** + * Check the Bloom filter for the name + */ +static bool +name_is_in_Bloom_filter(const char *name) +{ + static const uint32_t filter[${len(filter)}] = { +% for i in range(0,len(filter), 4): + ${{:#010x}.format(filter[i+0])}, ${{:#010x}.format(filter[i+1])}, ${{:#010x}.format(filter[i+2])}, ${{:#010x}.format(filter[i+3])}, + % endfor + }; + + const uint32_t h = calculate_hash(name); + + if (h ${min(all_hash)}) + return false; + + if (h ${max(all_hash)}) + return false; + + const unsigned bit = (h - ${min(all_hash)}) % ${len(filter) * 32}; + const unsigned i = bit / 32; + const unsigned j = bit % 32; + + return (filter[i] (1U j)) != 0; +} + /** * Search the static name table for the specified name * @@ -129,6 +169,11 @@ name_really_is_not_in_static_names(const char *name) static const char * get_static_name_do_not_call(const char *name) { + /* Do the trivial rejection test before anything. +*/ + if (!name_is_in_Bloom_filter(name)) + return NULL; + const unsigned len = strlen(name); const ${index_type} *table = NULL; unsigned table_size = 0; @@ -188,6 +233,14 @@ get_static_name(const char *name) } +def calculate_hash(str): +hash = 5381 + +for c in str: +hash = ((hash * 33) + ord(c)) 0x0 + +return hash + common_names.sort() # Partiation the list of names into sublists of names with the same length @@ -213,8 +266,66 @@ elif base (1 16): else: index_type = uint32_t +all_hash = [] +for name in common_names: + all_hash.append(calculate_hash(name)) simplify this to: all_hash = [calculate_hash(n) for n in common_names] I learned about list comprehensions right after doing all this code. :) + +# Experimentally determined that 8,192 bits is sufficient for this dataset. +# This was determined by: +# +# 1. Modify the generated code to log a message on entry to get_static_name. +# +# 2. Modify the generated code to log a message when name_is_in_Bloom_filter +# returns true. +# +# 3. Modifiy the generated code to log a message each time a name is rejected +# due to its length. This is the 'default' case in the switch-statement. +# This is an implicit hash that is part of the Bloom filter. +# +# 4. Modify the generated code to log a message each time a name has actually +# been tested (using strcmp) and was not in the table. This means logging at +# the end of get_static_name_do_not_call and inside the switch-statement where +# lonely names are handled. +# +# 5. Run a ton of shaders through (e.g., shader-db) and collect the output. +# Count the number of times each message occurred. +# +# Two hash functions were tried. The current one and Murmur2. Exhaustive +# testing over shader-db produced the following results. There were a total +# of 6,749,837 calls to get_static_name in each run. The # in the column +# header refers messages mentioned in the list above. +# +#Bits Current hash Murmur2 +##2 / #3 / #4 #2 / #3 / #4 +# 128 5249223 / 262597 / 4826172 763090 / 285531 / 317105 +# 256 4955982 / 162087 / 4633441 508512 / 152491 / 195567 +# 512334736 / 52 / 63130 381691 / 98277 / 122960 +#1024236521 / 43809 / 32258 326657 / 69346 / 96857 +#2048174275 / 1533 / 12288
Re: [Mesa-dev] [PATCH 26/26] glsl: Add Bloom filter to get_static_name
On Mon, Jul 21, 2014 at 5:36 PM, Ian Romanick i...@freedesktop.org wrote: On 07/21/2014 01:56 PM, Dylan Baker wrote: 3) this is a silly function, all you need is: filter = range(0, filter_bits / 32) But that will be different... that's the same as filter = [0, 1, 2, 3, ...] but I want filter = [0, 0, 0, 0, ...] So... filter = [0 for i in xrange(0, filter_bits / 32)] should do the trick? That would work. Another fun trick: [0] * (filter_bits / 32) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev