From: Ian Romanick <ian.d.roman...@intel.com> Valgrind massif results for a trimmed apitrace of dota2:
n time(i) total(B) useful-heap(B) extra-heap(B) stacks(B) Before (32-bit): 52 40,521,071,734 66,157,928 61,054,870 5,103,058 0 After (32-bit): 79 40,523,892,028 65,999,288 60,937,396 5,061,892 0 Before (64-bit): 48 37,089,379,412 92,630,712 85,454,539 7,176,173 0 After (64-bit): 71 37,091,183,117 92,433,072 85,309,100 7,123,972 0 A real savings of 114KiB on 32-bit and 142KiB on 64-bit. Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> --- src/glsl/.gitignore | 1 + src/glsl/Makefile.am | 10 ++ src/glsl/common_variable_names.py | 220 ++++++++++++++++++++++++++++++++++++++ src/glsl/ir.cpp | 32 ++++-- src/glsl/ir.h | 9 +- 5 files changed, 256 insertions(+), 16 deletions(-) create mode 100644 src/glsl/common_variable_names.py diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore index 43720f6..16e7e81 100644 --- a/src/glsl/.gitignore +++ b/src/glsl/.gitignore @@ -1,3 +1,4 @@ +get_static_name.h glsl_compiler glsl_lexer.cpp glsl_parser.cpp diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 00261fd..c9de9f8 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -149,6 +149,15 @@ glsl_test_SOURCES = \ glsl_test_LDADD = libglsl.la +# I don't understand why automake recognizes that glsl_parser_extras.lo +# depends on glsl_parser.h, but it cannot recognize that ir.lo depends on +# get_static_name.h. + +ir.lo: get_static_name.h + +get_static_name.h: common_variable_names.py + $(PYTHON2) $(GLSL_SRCDIR)/common_variable_names.py > $(GLSL_BUILDDIR)/get_static_name.h + # We write our own rules for yacc and lex below. We'd rather use automake, # but automake makes it especially difficult for a number of reasons: # @@ -205,6 +214,7 @@ BUILT_SOURCES = \ glcpp/glcpp-parse.c \ glcpp/glcpp-lex.c CLEANFILES = \ + get_static_name.h \ glcpp/glcpp-parse.h \ glsl_parser.h \ $(BUILT_SOURCES) diff --git a/src/glsl/common_variable_names.py b/src/glsl/common_variable_names.py new file mode 100644 index 0000000..7435a12 --- /dev/null +++ b/src/glsl/common_variable_names.py @@ -0,0 +1,220 @@ +# Copyright (c) 2014 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +common_names = [ + "atomic_temp_var", + "color", + "diffuse", + "factor", + "fucxadd", + "fucxmul", + "gl_ClipVertex", + "gl_Color", + "gl_FogFragCoord", + "gl_FragColor", + "gl_FragData", + "gl_FragCoord", + "gl_FrontColor", + "gl_FrontFacing", + "gl_FrontSecondaryColor", + "gl_in_TexCoord0", + "gl_ModelViewProjectionMatrixTranspose", + "gl_MultiTexCoord0", + "gl_out_FragData0", + "gl_out_TexCoord0", + "gl_Position", + "gl_TexCoord", + "gl_Vertex", + "io_vLink0", + "oT0", + "oT1", + "oT2", + "oT3", + "oT4", + "oT5", + "oT6", + "oT7", + "pos", + "r10", + "r11", + "r12", + "sampler0", + "sampler1", + "sampler2", + "sampler3", + "sampler4", + "sampler5", + "sampler6", + "sampler7", + "sampler8", + "va_r", + "vcbones", + "vcscreen", + "vLit", + "vTempPos", + "v_vColor", + "v_vNormal", + "v_vPositionView", + "v_vTexcoordLight" + ] + +template = """static const char static_names[] = + "compiler_temp\\0" + % for name in common_names: + "${name}\\0" + % endfor + ; + + % for s in sized_lists: + % if len(sized_lists[s]) != 1: +static const ${index_type} table_${s}[] = { + % for name in sized_lists[s]: + ${location_dict[name]}, + % endfor +}; + + % endif + % endfor +#ifdef DEBUG +/** + * Slowly search the table for the specified string. + * + * Used for debugging the code generator. + */ +static bool +name_really_is_not_in_static_names(const char *name) +{ + const char *ptr = static_names + strlen("compiler_temp") + 1; + + assert(ptr[0] != 0); + + while (ptr[0] != 0) { + const unsigned len = strlen(ptr); + + if (strcmp(name, ptr) == 0) + return false; + + ptr += len + 1; + } + + return true; +} +#endif /* DEBUG */ + +/** + * Search the static name table for the specified name + * + * \\warning + * As the name implies, \\b do \\b not call this function directly. Instead, + * call \\c get_static_name. + */ +static const char * +get_static_name_do_not_call(const char *name) +{ + const unsigned len = strlen(name); + const ${index_type} *table = NULL; + unsigned table_size = 0; + + assert(strcmp("compiler_temp", &static_names[0]) == 0); + assert(len >= (sizeof(void *) - 1)); + + switch (len) { + % for s in sized_lists: + case ${s}: + % if len(sized_lists[s]) != 1: + table = table_${s}; + table_size = ARRAY_SIZE(table_${s}); + break; + % else: + /* ${sized_lists[s][0]} */ + return strcmp(name, &static_names[${location_dict[sized_lists[s][0]]}]) == 0 + ? &static_names[${location_dict[sized_lists[s][0]]}] : NULL; + % endif + % endfor + default: + return NULL; + } + + /* The tables are pretty small, so the extra overhead of using bsearch is + * likely to be much slower than the open-coded linear search. + */ + for (unsigned i = 0; i < table_size; i++) { + if (strcmp(name, &static_names[table[i]]) == 0) + return &static_names[table[i]]; + } + + return NULL; +} + +/** + * Search the static name table for the specified name + * + * In debug builds, this will perform additional sanity checking. + */ +static const char * +get_static_name(const char *name) +{ +#ifdef DEBUG + const char *const ptr = get_static_name_do_not_call(name); + + if (ptr != NULL) { + assert(strcmp(ptr, name) == 0); + return ptr; + } + + assert(name_really_is_not_in_static_names(name)); + return NULL; +#else + return get_static_name_do_not_call(name); +#endif +} +""" + +common_names.sort() + +# Partiation the list of names into sublists of names with the same length +sized_lists = {} +for name in common_names: + if len(name) in sized_lists: + sized_lists[len(name)].append(name) + else: + sized_lists[len(name)] = [name] + +# Create a dictionary that matches each name with the location of its first +# character in the static string table. +location_dict = {} +base = len("compiler_temp") + 1 +for name in common_names: + location_dict[name] = base + base = base + len(name) + 1 + +if base < (1 << 8): + index_type = "uint8_t" +elif base < (1 << 16): + index_type = "uint16_t" +else: + index_type = "uint32_t" + +from mako.template import Template +print Template(template).render(location_dict = location_dict, + sized_lists = sized_lists, + common_names = common_names, + index_type = index_type) diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 4b22439..543718d 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -1532,10 +1532,15 @@ ir_swizzle::variable_referenced() const return this->val->variable_referenced(); } +#include "get_static_name.h" -bool ir_variable::temporaries_allocate_names = false; +/* Note: static_names is defined in get_static_name.h. + */ +const char *const ir_variable::static_names_begin = &static_names[0]; +const char *const ir_variable::static_names_end = + &static_names[ARRAY_SIZE(static_names)]; -const char ir_variable::tmp_name[] = "compiler_temp"; +bool ir_variable::temporaries_allocate_names = false; ir_variable::ir_variable(const struct glsl_type *type, const char *name, ir_variable_mode mode) @@ -1546,26 +1551,31 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name, if (mode == ir_var_temporary && !ir_variable::temporaries_allocate_names) name = NULL; - /* The ir_variable clone method may call this constructor with name set to - * tmp_name. - */ assert(name != NULL || mode == ir_var_temporary || mode == ir_var_function_in || mode == ir_var_function_out || mode == ir_var_function_inout); - assert(name != ir_variable::tmp_name - || mode == ir_var_temporary); - if (mode == ir_var_temporary - && (name == NULL || name == ir_variable::tmp_name)) { - this->name = ir_variable::tmp_name; + + /* If the supplied name is in the table of statically allocated names, just + * re-use the pointer. Don't bother searching the table for it. This can + * occur when called by the ir_variable clone method. + */ + if (name >= ir_variable::static_names_begin + && name < ir_variable::static_names_end) { + this->name = name; + } else if (mode == ir_var_temporary && name == NULL) { + this->name = ir_variable::static_names_begin; } else if (name == NULL) { this->padding[0] = 0; this->name = (char *) this->padding; } else if (strlen(name) < sizeof(this->padding)) { this->name = strcpy((char *) this->padding, name); } else { - this->name = ralloc_strdup(this, name); + const char *const static_name = get_static_name(name); + + this->name = (static_name == NULL) + ? ralloc_strdup(this, name) : static_name; } this->u.max_ifc_array_access = NULL; diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 770fe60..4088f80 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -598,7 +598,8 @@ public: inline bool is_name_ralloced() const { - return this->name != ir_variable::tmp_name + return !(this->name >= ir_variable::static_names_begin + && this->name < ir_variable::static_names_end) && this->name != (char *) this->padding; } @@ -909,10 +910,8 @@ private: */ const glsl_type *interface_type; - /** - * Name used for anonymous compiler temporaries - */ - static const char tmp_name[]; + static const char *const static_names_begin; + static const char *const static_names_end; public: /** -- 1.8.1.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev