Hi Jan,

On 18.10.2016 00:07, Jan Ziak wrote:
This patch replaces the ir_variable_refcount_entry's linked-list
with an array-list.

The array-list has local storage which does not require ANY additional
allocations if the list has small number of elements. The size of this
storage is configurable for each variable.

Benchmark results for "./run -1 shaders" from shader-db[1]:

- The total number of executed instructions goes down from 64.184 to 63.797
   giga-instructions when Mesa is compiled with "gcc -O0 ..."
- In the call tree starting at function do_dead_code():
   - the number of calls to malloc() is reduced by about 10%
   - the number of calls to free() is reduced by about 30%

[1] git://anongit.freedesktop.org/mesa/shader-db

Signed-off-by: Jan Ziak (http://atom-symbol.net) <0xe2.0x9a.0...@gmail.com>
---
  src/compiler/glsl/ir_variable_refcount.cpp |  14 +--
  src/compiler/glsl/ir_variable_refcount.h   |   8 +-
  src/compiler/glsl/opt_dead_code.cpp        |  19 ++--
  src/util/fast_list.h                       | 167 +++++++++++++++++++++++++++++
  4 files changed, 176 insertions(+), 32 deletions(-)

diff --git a/src/compiler/glsl/ir_variable_refcount.cpp 
b/src/compiler/glsl/ir_variable_refcount.cpp
index 8306be1..94d6edc 100644
--- a/src/compiler/glsl/ir_variable_refcount.cpp
+++ b/src/compiler/glsl/ir_variable_refcount.cpp
@@ -46,15 +46,6 @@ static void
  free_entry(struct hash_entry *entry)
  {
     ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) 
entry->data;
-
-   /* Free assignment list */
-   exec_node *n;
-   while ((n = ivre->assign_list.pop_head()) != NULL) {
-      struct assignment_entry *assignment_entry =
-         exec_node_data(struct assignment_entry, n, link);
-      free(assignment_entry);
-   }
-
     delete ivre;
  }
@@ -142,10 +133,7 @@ ir_variable_refcount_visitor::visit_leave(ir_assignment *ir)
         */
        assert(entry->referenced_count >= entry->assigned_count);
        if (entry->referenced_count == entry->assigned_count) {
-         struct assignment_entry *assignment_entry =
-            (struct assignment_entry *)calloc(1, sizeof(*assignment_entry));
-         assignment_entry->assign = ir;
-         entry->assign_list.push_head(&assignment_entry->link);
+         entry->assign_list.add(ir);
        }
     }
diff --git a/src/compiler/glsl/ir_variable_refcount.h b/src/compiler/glsl/ir_variable_refcount.h
index 08a11c0..c3ec5fe 100644
--- a/src/compiler/glsl/ir_variable_refcount.h
+++ b/src/compiler/glsl/ir_variable_refcount.h
@@ -32,11 +32,7 @@
  #include "ir.h"
  #include "ir_visitor.h"
  #include "compiler/glsl_types.h"
-
-struct assignment_entry {
-   exec_node link;
-   ir_assignment *assign;
-};
+#include "util/fast_list.h"
class ir_variable_refcount_entry
  {
@@ -50,7 +46,7 @@ public:
      * This is intended to be used for dead code optimisation and may
      * not be a complete list.
      */
-   exec_list assign_list;
+   arraylist<ir_assignment*,4> assign_list;
/** Number of times the variable is referenced, including assignments. */
     unsigned referenced_count;
diff --git a/src/compiler/glsl/opt_dead_code.cpp 
b/src/compiler/glsl/opt_dead_code.cpp
index 75e668a..06e8c3d 100644
--- a/src/compiler/glsl/opt_dead_code.cpp
+++ b/src/compiler/glsl/opt_dead_code.cpp
@@ -52,7 +52,7 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
struct hash_entry *e;
     hash_table_foreach(v.ht, e) {
-      ir_variable_refcount_entry *entry = (ir_variable_refcount_entry 
*)e->data;
+      ir_variable_refcount_entry *const entry = (ir_variable_refcount_entry 
*)e->data;
/* Since each assignment is a reference, the refereneced count must be
         * greater than or equal to the assignment count.  If they are equal,
@@ -89,7 +89,7 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
        if (entry->var->data.always_active_io)
           continue;
- if (!entry->assign_list.is_empty()) {
+      if (!entry->assign_list.empty()) {
         /* Remove all the dead assignments to the variable we found.
          * Don't do so if it's a shader or function output, though.
          */
@@ -98,26 +98,19 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
               entry->var->data.mode != ir_var_shader_out &&
               entry->var->data.mode != ir_var_shader_storage) {
- while (!entry->assign_list.is_empty()) {
-               struct assignment_entry *assignment_entry =
-                  exec_node_data(struct assignment_entry,
-                                 entry->assign_list.get_head_raw(), link);
-
-              assignment_entry->assign->remove();
-
+            for(ir_assignment *assign : entry->assign_list) {
The original code separates control flow instructions as for or while with a space before the brace, aka "for (...". This applies for all the code.
+              assign->remove();
               if (debug) {
                  printf("Removed assignment to %s@%p\n",
                         entry->var->name, (void *) entry->var);
                 }
-
-               assignment_entry->link.remove();
-               free(assignment_entry);
              }
+            entry->assign_list.clear();
              progress = true;
         }
        }
- if (entry->assign_list.is_empty()) {
+      if (entry->assign_list.empty()) {
         /* If there are no assignments or references to the variable left,
          * then we can remove its declaration.
          */
diff --git a/src/util/fast_list.h b/src/util/fast_list.h
new file mode 100644
index 0000000..4abd965
--- /dev/null
+++ b/src/util/fast_list.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright © 2016 Jan Ziak
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jan Ziak (http://atom-symbol.net) <0xe2.0x9x.0...@gmail.com>
+ */
+
+/**
+ * @file fast_list.h
+ *
+ * High-performance C++ list implementation.
+ */
+
+#ifndef _UTIL_FAST_LIST_H
+#define _UTIL_FAST_LIST_H
+
+#include <new>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * A high-performance array list.
+ *
+ * The list doesn't support elements whose behavior depends on the memory 
address of the element.
+ *
+ * @param T  List element type.
+ *
+ * @param N  The number of statically allocated elements to achieve the 
highest overall performance.
+ *           N is typically 0, 2, 4 or 8 depending on the runtime usage 
pattern of a particular list
+ *           variable in the source code.
+ */
+template<typename T, size_t N>
+struct arraylist
+{
+   size_t len;  // Length
+   size_t cap;  // Capacity (size of memory allocated to 'elem')
+   T *elem;     // List elements
+   uint8_t local[N*sizeof(T)];  // Fast local preallocated storage for lists 
with length 0..N
+
+   arraylist() {
+      len = 0;
+      cap = N;
+      elem = (N ? (T*)local : NULL);
You might want to use nullptr if you go for C++11 anyway (As i assume from the range based loops above)
+   }
+
+   ~arraylist() {
+      if(!__has_trivial_destructor(T)) {
+         for(size_t i=0; i<len; i++) {
Please separate operators by whitespace aka "i < len"
+            elem[i].~T();
+         }
+      }
+      if((N && elem != (T*)local) && elem) {
+         free(elem);
+      }
+   }
+
+   arraylist(const arraylist<T,N> &a) = delete;
+   void operator=(const arraylist<T,N> &a) = delete;
+
+   bool empty() const {
+      return len==0;
same
+   }
+
+   void clear() {
+      if(!__has_trivial_destructor(T)) {
+         for(size_t i=0; i<len; i++) {
same
+            elem[i].~T();
+         }
+      }
+      len = 0;
+   }
+
+   arraylist<T,N>& add(const T &e) {
+      if(len == cap) {
+         enlarge1();
+      }
+      new (&elem[len++]) T(e);
+      return *this;
+   }
+
+   T& operator[](size_t i) { return elem[i]; }
+   const T& operator[](size_t i) const { return elem[i]; }
+
+   /*
+    * C++ iterators:
+    */
+
+   struct const_iterator_t {
+      const T *const elem;
+      size_t i;
+      const_iterator_t(const T *_elem, size_t _i) : elem(_elem), i(_i) {}
+      const T& operator*() const { return elem[i]; }
+      void operator++() { i++; }
+      bool operator!=(const const_iterator_t &b) const { return i < b.i; }
+   };
+   struct iterator_t {
+      T *const elem;
+      size_t i;
+      iterator_t(T *_elem, size_t _i) : elem(_elem), i(_i) {}
+      T& operator*() const { return elem[i]; }
+      void operator++() { i++; }
+      bool operator!=(const iterator_t &b) const { return i < b.i; }
+   };
+
+   const_iterator_t const_begin() const { return const_iterator_t(elem, 0); }
+   const_iterator_t const_end() const { return const_iterator_t(elem, len); }
The stl knows those as cbegin() and cend(), maybe stick with the standard names?
--Michael
+
+   const_iterator_t begin() const { return const_iterator_t(elem, 0); }
+   const_iterator_t end() const { return const_iterator_t(elem, len); }
+
+   iterator_t begin() { return iterator_t(elem, 0); }
+   iterator_t end() { return iterator_t(elem, len); }
+
+   /*
+    * Methods for std::vector<T> compatibility:
+    */
+
+   arraylist<T,N>& push_back(const T &e) { return add(e); }
+
+private:
+   void enlarge1() {
+      if(N) {
+         cap *= 2;
+         if(elem == (T*)local) {
+            elem = (T*)malloc(cap*sizeof(T));
+            if(!elem) {
+               abort();
+            }
+            memcpy(elem, local, len*sizeof(T));
+         }
+         else {
+            elem = (T*)realloc(elem, cap*sizeof(T));
+            if(!elem) {
+               abort();
+            }
+         }
+      }
+      else {
+         cap = (cap ? 2*cap : 4);
+         elem = (T*)realloc(elem, cap*sizeof(T));
+         if(!elem) {
+            abort();
+         }
+      }
+   }
+};
+
+#endif /* _UTIL_FAST_LIST_H */
\ No newline at end of file
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to