18. okt. 2016 00.07 skrev "Jan Ziak" <0xe2.0x9a.0...@gmail.com>: > > This patch replaces the ir_variable_refcount_entry's linked-list > with an array-list. > > The array-list has local storage which does not require ANY additional > allocations if the list has small number of elements. The size of this > storage is configurable for each variable. > > Benchmark results for "./run -1 shaders" from shader-db[1]: > > - The total number of executed instructions goes down from 64.184 to 63.797 > giga-instructions when Mesa is compiled with "gcc -O0 ..." > - In the call tree starting at function do_dead_code(): > - the number of calls to malloc() is reduced by about 10% > - the number of calls to free() is reduced by about 30% > > [1] git://anongit.freedesktop.org/mesa/shader-db
Nice find. I would not be surprised if there are more cases of inappropriate data structures being used, causing overhead. I can't quite tell, as Gmail tends to mangle whitespace stuff, but it looks like there might be some style issues with not everything following the three-space indent, no tabs policy that mesa tries to stick to. On the subject of this patch, some thoughts that came to mind: I think it would be preferred if the implementation was in C. Using this in NIR, which tries to be C only, might be a possibility? I also seem to recall that I reviewed some patches that Jason wrote back a good while ago for NIR that implemented a dynamically resizing array of sorts that might be of interest. Just some random thoughts. -Thomas > > Signed-off-by: Jan Ziak (http://atom-symbol.net) <0xe2.0x9a.0...@gmail.com > > --- > src/compiler/glsl/ir_variable_refcount.cpp | 14 +-- > src/compiler/glsl/ir_variable_refcount.h | 8 +- > src/compiler/glsl/opt_dead_code.cpp | 19 ++-- > src/util/fast_list.h | 167 +++++++++++++++++++++++++++++ > 4 files changed, 176 insertions(+), 32 deletions(-) > > diff --git a/src/compiler/glsl/ir_variable_refcount.cpp b/src/compiler/glsl/ir_variable_refcount.cpp > index 8306be1..94d6edc 100644 > --- a/src/compiler/glsl/ir_variable_refcount.cpp > +++ b/src/compiler/glsl/ir_variable_refcount.cpp > @@ -46,15 +46,6 @@ static void > free_entry(struct hash_entry *entry) > { > ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) entry->data; > - > - /* Free assignment list */ > - exec_node *n; > - while ((n = ivre->assign_list.pop_head()) != NULL) { > - struct assignment_entry *assignment_entry = > - exec_node_data(struct assignment_entry, n, link); > - free(assignment_entry); > - } > - > delete ivre; > } > > @@ -142,10 +133,7 @@ ir_variable_refcount_visitor::visit_leave(ir_assignment *ir) > */ > assert(entry->referenced_count >= entry->assigned_count); > if (entry->referenced_count == entry->assigned_count) { > - struct assignment_entry *assignment_entry = > - (struct assignment_entry *)calloc(1, sizeof(*assignment_entry)); > - assignment_entry->assign = ir; > - entry->assign_list.push_head(&assignment_entry->link); > + entry->assign_list.add(ir); > } > } > > diff --git a/src/compiler/glsl/ir_variable_refcount.h b/src/compiler/glsl/ir_variable_refcount.h > index 08a11c0..c3ec5fe 100644 > --- a/src/compiler/glsl/ir_variable_refcount.h > +++ b/src/compiler/glsl/ir_variable_refcount.h > @@ -32,11 +32,7 @@ > #include "ir.h" > #include "ir_visitor.h" > #include "compiler/glsl_types.h" > - > -struct assignment_entry { > - exec_node link; > - ir_assignment *assign; > -}; > +#include "util/fast_list.h" > > class ir_variable_refcount_entry > { > @@ -50,7 +46,7 @@ public: > * This is intended to be used for dead code optimisation and may > * not be a complete list. > */ > - exec_list assign_list; > + arraylist<ir_assignment*,4> assign_list; > > /** Number of times the variable is referenced, including assignments. */ > unsigned referenced_count; > diff --git a/src/compiler/glsl/opt_dead_code.cpp b/src/compiler/glsl/opt_dead_code.cpp > index 75e668a..06e8c3d 100644 > --- a/src/compiler/glsl/opt_dead_code.cpp > +++ b/src/compiler/glsl/opt_dead_code.cpp > @@ -52,7 +52,7 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned) > > struct hash_entry *e; > hash_table_foreach(v.ht, e) { > - ir_variable_refcount_entry *entry = (ir_variable_refcount_entry *)e->data; > + ir_variable_refcount_entry *const entry = (ir_variable_refcount_entry *)e->data; > > /* Since each assignment is a reference, the refereneced count must be > * greater than or equal to the assignment count. If they are equal, > @@ -89,7 +89,7 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned) > if (entry->var->data.always_active_io) > continue; > > - if (!entry->assign_list.is_empty()) { > + if (!entry->assign_list.empty()) { > /* Remove all the dead assignments to the variable we found. > * Don't do so if it's a shader or function output, though. > */ > @@ -98,26 +98,19 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned) > entry->var->data.mode != ir_var_shader_out && > entry->var->data.mode != ir_var_shader_storage) { > > - while (!entry->assign_list.is_empty()) { > - struct assignment_entry *assignment_entry = > - exec_node_data(struct assignment_entry, > - entry->assign_list.get_head_raw(), link); > - > - assignment_entry->assign->remove(); > - > + for(ir_assignment *assign : entry->assign_list) { > + assign->remove(); > if (debug) { > printf("Removed assignment to %s@%p\n", > entry->var->name, (void *) entry->var); > } > - > - assignment_entry->link.remove(); > - free(assignment_entry); > } > + entry->assign_list.clear(); > progress = true; > } > } > > - if (entry->assign_list.is_empty()) { > + if (entry->assign_list.empty()) { > /* If there are no assignments or references to the variable left, > * then we can remove its declaration. > */ > diff --git a/src/util/fast_list.h b/src/util/fast_list.h > new file mode 100644 > index 0000000..4abd965 > --- /dev/null > +++ b/src/util/fast_list.h > @@ -0,0 +1,167 @@ > +/* > + * Copyright © 2016 Jan Ziak > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + * Jan Ziak (http://atom-symbol.net) <0xe2.0x9x.0...@gmail.com> > + */ > + > +/** > + * @file fast_list.h > + * > + * High-performance C++ list implementation. > + */ > + > +#ifndef _UTIL_FAST_LIST_H > +#define _UTIL_FAST_LIST_H > + > +#include <new> > +#include <stdlib.h> > +#include <string.h> > + > +/** > + * A high-performance array list. > + * > + * The list doesn't support elements whose behavior depends on the memory address of the element. > + * > + * @param T List element type. > + * > + * @param N The number of statically allocated elements to achieve the highest overall performance. > + * N is typically 0, 2, 4 or 8 depending on the runtime usage pattern of a particular list > + * variable in the source code. > + */ > +template<typename T, size_t N> > +struct arraylist > +{ > + size_t len; // Length > + size_t cap; // Capacity (size of memory allocated to 'elem') > + T *elem; // List elements > + uint8_t local[N*sizeof(T)]; // Fast local preallocated storage for lists with length 0..N > + > + arraylist() { > + len = 0; > + cap = N; > + elem = (N ? (T*)local : NULL); > + } > + > + ~arraylist() { > + if(!__has_trivial_destructor(T)) { > + for(size_t i=0; i<len; i++) { > + elem[i].~T(); > + } > + } > + if((N && elem != (T*)local) && elem) { > + free(elem); > + } > + } > + > + arraylist(const arraylist<T,N> &a) = delete; > + void operator=(const arraylist<T,N> &a) = delete; > + > + bool empty() const { > + return len==0; > + } > + > + void clear() { > + if(!__has_trivial_destructor(T)) { > + for(size_t i=0; i<len; i++) { > + elem[i].~T(); > + } > + } > + len = 0; > + } > + > + arraylist<T,N>& add(const T &e) { > + if(len == cap) { > + enlarge1(); > + } > + new (&elem[len++]) T(e); > + return *this; > + } > + > + T& operator[](size_t i) { return elem[i]; } > + const T& operator[](size_t i) const { return elem[i]; } > + > + /* > + * C++ iterators: > + */ > + > + struct const_iterator_t { > + const T *const elem; > + size_t i; > + const_iterator_t(const T *_elem, size_t _i) : elem(_elem), i(_i) {} > + const T& operator*() const { return elem[i]; } > + void operator++() { i++; } > + bool operator!=(const const_iterator_t &b) const { return i < b.i; } > + }; > + struct iterator_t { > + T *const elem; > + size_t i; > + iterator_t(T *_elem, size_t _i) : elem(_elem), i(_i) {} > + T& operator*() const { return elem[i]; } > + void operator++() { i++; } > + bool operator!=(const iterator_t &b) const { return i < b.i; } > + }; > + > + const_iterator_t const_begin() const { return const_iterator_t(elem, 0); } > + const_iterator_t const_end() const { return const_iterator_t(elem, len); } > + > + const_iterator_t begin() const { return const_iterator_t(elem, 0); } > + const_iterator_t end() const { return const_iterator_t(elem, len); } > + > + iterator_t begin() { return iterator_t(elem, 0); } > + iterator_t end() { return iterator_t(elem, len); } > + > + /* > + * Methods for std::vector<T> compatibility: > + */ > + > + arraylist<T,N>& push_back(const T &e) { return add(e); } > + > +private: > + void enlarge1() { > + if(N) { > + cap *= 2; > + if(elem == (T*)local) { > + elem = (T*)malloc(cap*sizeof(T)); > + if(!elem) { > + abort(); > + } > + memcpy(elem, local, len*sizeof(T)); > + } > + else { > + elem = (T*)realloc(elem, cap*sizeof(T)); > + if(!elem) { > + abort(); > + } > + } > + } > + else { > + cap = (cap ? 2*cap : 4); > + elem = (T*)realloc(elem, cap*sizeof(T)); > + if(!elem) { > + abort(); > + } > + } > + } > +}; > + > +#endif /* _UTIL_FAST_LIST_H */ > \ No newline at end of file > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev