This pass looks for variables with vector or array-of-vector types and
narrows the type to only the components used.
---
 src/compiler/nir/nir.h            |   1 +
 src/compiler/nir/nir_split_vars.c | 530 +++++++++++++++++++++++++++++-
 2 files changed, 523 insertions(+), 8 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index c6ed5bb5358..ca437743ff8 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2611,6 +2611,7 @@ bool nir_split_array_vars(nir_shader *shader, 
nir_variable_mode modes);
 bool nir_split_var_copies(nir_shader *shader);
 bool nir_split_per_member_structs(nir_shader *shader);
 bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
+bool nir_narrow_vec_vars(nir_shader *shader, nir_variable_mode modes);
 
 bool nir_lower_returns_impl(nir_function_impl *impl);
 bool nir_lower_returns(nir_shader *shader);
diff --git a/src/compiler/nir/nir_split_vars.c 
b/src/compiler/nir/nir_split_vars.c
index 394ed2be622..8a981dd341d 100644
--- a/src/compiler/nir/nir_split_vars.c
+++ b/src/compiler/nir/nir_split_vars.c
@@ -25,6 +25,9 @@
 #include "nir_builder.h"
 #include "nir_deref.h"
 
+/* Needed for _mesa_bitcount() */
+#include "main/macros.h"
+
 struct split_var_state {
    void *mem_ctx;
 
@@ -46,15 +49,28 @@ struct field {
 };
 
 static const struct glsl_type *
-wrap_type_in_array(const struct glsl_type *type,
-                   const struct glsl_type *array_type)
+wrap_type_in_matrix_or_array(const struct glsl_type *type,
+                             const struct glsl_type *array_type)
 {
-   if (!glsl_type_is_array(array_type))
+   if (glsl_type_is_array(array_type)) {
+      const struct glsl_type *elem_type =
+         wrap_type_in_matrix_or_array(type, 
glsl_get_array_element(array_type));
+      return glsl_array_type(elem_type, glsl_get_length(array_type));
+   } else if (glsl_type_is_matrix(array_type)) {
+      if (glsl_type_is_scalar(type)) {
+         /* This can happen if we reduce the number of rows in a matrix to 1.
+          * Just use an array type in this case.
+          */
+         return glsl_array_type(type, glsl_get_matrix_columns(array_type));
+      } else {
+         assert(glsl_type_is_vector(type));
+         return glsl_matrix_type(glsl_get_base_type(type),
+                                 glsl_get_components(type),
+                                 glsl_get_matrix_columns(array_type));
+      }
+   } else {
       return type;
-
-   const struct glsl_type *elem_type =
-      wrap_type_in_array(type, glsl_get_array_element(array_type));
-   return glsl_array_type(elem_type, glsl_get_length(array_type));
+   }
 }
 
 static void
@@ -86,7 +102,7 @@ init_field_for_type(struct field *field, struct field 
*parent,
    } else {
       const struct glsl_type *var_type = type;
       for (struct field *f = field->parent; f; f = f->parent)
-         var_type = wrap_type_in_array(var_type, f->type);
+         var_type = wrap_type_in_matrix_or_array(var_type, f->type);
 
       nir_variable_mode mode = state->base_var->data.mode;
       if (mode == nir_var_local) {
@@ -689,6 +705,8 @@ split_array_derefs_impl(nir_function_impl *impl,
                /* This level is split, just advance to the next element */
                assert(p->deref_type == nir_deref_type_array);
                unsigned idx = nir_src_as_const_value(p->arr.index)->u32[0];
+               if (idx >= glsl_get_length(tail_elem->type))
+                  idx = 0; /* Overflow; we can give them anything */
                tail_elem = &tail_elem->children[idx];
             } else {
                /* This level isn't split, build a deref */
@@ -794,3 +812,499 @@ nir_split_array_vars(nir_shader *shader, 
nir_variable_mode modes)
 
    return progress;
 }
+
+struct vec_var_usage {
+   nir_component_mask_t comps_read;
+   nir_component_mask_t comps_written;
+
+   nir_component_mask_t comps_kept;
+
+   struct set *vars_copied;
+};
+
+static struct vec_var_usage *
+get_vec_var_usage(nir_variable *var,
+                  struct hash_table *var_usage_map,
+                  bool add_usage_entry, void *mem_ctx)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
+   if (entry)
+      return entry->data;
+
+   if (!add_usage_entry)
+      return NULL;
+
+   struct vec_var_usage *usage = rzalloc(mem_ctx, struct vec_var_usage);
+   _mesa_hash_table_insert(var_usage_map, var, usage);
+
+   return usage;
+}
+
+static void
+mark_deref_used(nir_deref_instr *deref,
+                nir_component_mask_t comps_read,
+                nir_component_mask_t comps_written,
+                nir_deref_instr *copy_deref,
+                struct hash_table *var_usage_map,
+                nir_variable_mode modes,
+                void *mem_ctx)
+{
+   if (!(deref->mode & modes))
+      return;
+
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   const struct glsl_type *vec_type = glsl_without_array_or_matrix(var->type);
+   if (!glsl_type_is_vector_or_scalar(vec_type))
+      return;
+
+   unsigned num_components = glsl_get_components(vec_type);
+
+   struct vec_var_usage *usage =
+      get_vec_var_usage(var, var_usage_map, true, mem_ctx);
+
+   usage->comps_read |= comps_read & ((1u << num_components) - 1);
+   usage->comps_written |= comps_written & ((1u << num_components) - 1);
+
+   if (copy_deref) {
+      if (usage->vars_copied == NULL) {
+         usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                               _mesa_key_pointer_equal);
+      }
+      _mesa_set_add(usage->vars_copied,
+                    nir_deref_instr_get_variable(copy_deref));
+   }
+}
+
+static bool
+src_is_load_deref(nir_src src, nir_src deref_src)
+{
+   assert(src.is_ssa);
+   assert(deref_src.is_ssa);
+
+   if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr);
+   if (load->intrinsic != nir_intrinsic_load_deref)
+      return false;
+
+   assert(load->src[0].is_ssa);
+
+   return load->src[0].ssa == deref_src.ssa;
+}
+
+/* Returns all non-self-referential components of a store instruction.  A
+ * component is self-referential if it comes from the same component of a load
+ * instruction on the same deref.  If the only data in a particular component
+ * of a variable came directly from that component then it's undefined.  The
+ * only way to get defined data into a component of a variable is for it to
+ * get written there by something outside or from a different component.
+ *
+ * This is a fairly common pattern in shaders that come from either GLSL IR or
+ * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
+ * load-vec-store.
+ */
+static nir_component_mask_t
+get_non_self_referential_store_comps(nir_intrinsic_instr *store)
+{
+   nir_component_mask_t comps = nir_intrinsic_write_mask(store);
+
+   assert(store->src[1].is_ssa);
+   nir_instr *src_instr = store->src[1].ssa->parent_instr;
+   if (src_instr->type != nir_instr_type_alu)
+      return comps;
+
+   nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
+
+   if (src_alu->op == nir_op_imov ||
+       src_alu->op == nir_op_fmov) {
+      /* If it's just a swizzle of a load from the same deref, discount any
+       * channels that don't move in the swizzle.
+       */
+      if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
+         for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
+            if (src_alu->src[0].swizzle[i] == i)
+               comps &= ~(1u << i);
+         }
+      }
+   } else if (src_alu->op == nir_op_vec2 ||
+              src_alu->op == nir_op_vec3 ||
+              src_alu->op == nir_op_vec4) {
+      /* If it's a vec, discount any channels that are just loads from the
+       * same deref put in the same spot.
+       */
+      for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
+         if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
+             src_alu->src[i].swizzle[0] == i)
+            comps &= ~(1u << i);
+      }
+   }
+
+   return comps;
+}
+
+static void
+find_used_components_impl(nir_function_impl *impl,
+                          struct hash_table *var_usage_map,
+                          nir_variable_mode modes,
+                          void *mem_ctx)
+{
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_deref:
+            mark_deref_used(nir_src_as_deref(intrin->src[0]),
+                            nir_ssa_def_components_read(&intrin->dest.ssa), 0,
+                            NULL, var_usage_map, modes, mem_ctx);
+            break;
+
+         case nir_intrinsic_store_deref:
+            mark_deref_used(nir_src_as_deref(intrin->src[0]),
+                            0, get_non_self_referential_store_comps(intrin),
+                            NULL, var_usage_map, modes, mem_ctx);
+            break;
+
+         case nir_intrinsic_copy_deref: {
+            /* Just mark everything used for copies. */
+            nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+            nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
+            mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
+            mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
+            break;
+         }
+
+         default:
+            break;
+         }
+      }
+   }
+}
+
+static bool
+narrow_vec_var_list(struct exec_list *vars,
+                    struct hash_table *var_usage_map)
+{
+   /* Initialize the components kept field of each variable.  This is the
+    * AND of the components written and components read.  If a component is
+    * written but never read, it's dead.  If it is read but never written,
+    * then all values read are undefined garbage and we may as well not read
+    * them.
+    */
+   nir_foreach_variable(var, vars) {
+      struct vec_var_usage *usage =
+         get_vec_var_usage(var, var_usage_map, false, NULL);
+      if (usage) {
+         assert(usage->comps_kept == 0);
+         usage->comps_kept = usage->comps_read & usage->comps_written;
+      }
+   }
+
+   /* In order for variable copies to work, we have to have the same data type
+    * on the source and the destination.  In order to satisfy this, we run a
+    * little fixed-point algorithm to transitively ensure that we get enough
+    * components for this to hold for all copies.
+    */
+   bool fp_progress;
+   do {
+      fp_progress = false;
+      nir_foreach_variable(var, vars) {
+         struct vec_var_usage *var_usage =
+            get_vec_var_usage(var, var_usage_map, false, NULL);
+         if (!var_usage || !var_usage->vars_copied)
+            continue;
+
+         const unsigned var_num_components =
+            glsl_get_components(glsl_without_array_or_matrix(var->type));
+
+         struct set_entry *copy_entry;
+         set_foreach(var_usage->vars_copied, copy_entry) {
+            struct vec_var_usage *copy_usage =
+               get_vec_var_usage((void *)copy_entry->key,
+                                 var_usage_map, false, NULL);
+
+            nir_component_mask_t copy_comps;
+            if (!copy_usage) {
+               /* If the copy doesn't have usage information, assume the worst
+                * case that it uses everything.
+                */
+               copy_comps = (1 << var_num_components) - 1;
+            } else {
+               copy_comps = copy_usage->comps_kept;
+            }
+
+            if ((var_usage->comps_kept | copy_comps) != var_usage->comps_kept) 
{
+               var_usage->comps_kept |= copy_comps;
+               fp_progress = true;
+            }
+         }
+      }
+   } while (fp_progress);
+
+   bool vars_narrowed = false;
+   nir_foreach_variable_safe(var, vars) {
+      struct vec_var_usage *usage =
+         get_vec_var_usage(var, var_usage_map, false, NULL);
+      if (!usage)
+         continue;
+
+      const struct glsl_type *vec_type =
+         glsl_without_array_or_matrix(var->type);
+      assert(usage->comps_kept < (1u << glsl_get_components(vec_type)));
+
+      if (usage->comps_kept == (1u << glsl_get_components(vec_type)) - 1) {
+         /* This variable doesn't need to be narrowed.  Remove it from the
+          * hash table so later passes will ignore it.
+          */
+         _mesa_hash_table_remove_key(var_usage_map, var);
+         continue;
+      }
+
+      if (usage->comps_kept == 0) {
+         /* This variable is dead, remove it */
+         exec_node_remove(&var->node);
+      } else {
+         const struct glsl_type *new_vec_type =
+            glsl_vector_type(glsl_get_base_type(vec_type),
+                             _mesa_bitcount(usage->comps_kept));
+         var->type = wrap_type_in_matrix_or_array(new_vec_type, var->type);
+      }
+      vars_narrowed = true;
+   }
+
+   return vars_narrowed;
+}
+
+static bool
+deref_is_dead(nir_deref_instr *deref,
+              struct hash_table *var_usage_map,
+              nir_variable_mode modes)
+{
+   if (!(deref->mode & modes))
+      return false;
+
+   struct vec_var_usage *usage =
+      get_vec_var_usage(nir_deref_instr_get_variable(deref),
+                        var_usage_map, false, NULL);
+   if (!usage)
+      return false;
+
+   return usage->comps_kept == 0;
+}
+
+static void
+narrow_vec_var_access_impl(nir_function_impl *impl,
+                           struct hash_table *var_usage_map,
+                           nir_variable_mode modes)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         switch (instr->type) {
+         case nir_instr_type_deref: {
+            nir_deref_instr *deref = nir_instr_as_deref(instr);
+            if (!(deref->mode & modes))
+               break;
+
+            /* Clean up any dead derefs we find lying around.  They may refer
+             * to variables we're planning to split.
+             */
+            if (nir_deref_instr_remove_if_unused(deref))
+               break;
+
+            /* We don't need to check if this is one of the derefs we're
+             * narrowing because this is a no-op if it isn't.  The worst that
+             * could happen is that we accidentally fix an invalid deref.
+             */
+            if (deref->deref_type == nir_deref_type_var) {
+               deref->type = deref->var->type;
+            } else if (deref->deref_type == nir_deref_type_array ||
+                       deref->deref_type == nir_deref_type_array_wildcard) {
+               nir_deref_instr *parent = nir_deref_instr_parent(deref);
+               assert(glsl_type_is_array(parent->type) ||
+                      glsl_type_is_matrix(parent->type));
+               deref->type = glsl_get_array_element(parent->type);
+            }
+            break;
+         }
+
+         case nir_instr_type_intrinsic: {
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+            /* If we have a copy whose source or destination has been deleted
+             * because we determined the variable was dead, then we just
+             * delete the copy instruction.  If the source variable was dead
+             * then it was writing undefined garbage anyway and if it's the
+             * destination variable that's dead then the write isn't needed.
+             */
+            if (intrin->intrinsic == nir_intrinsic_copy_deref) {
+               nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+               nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
+               if (deref_is_dead(dst, var_usage_map, modes) ||
+                   deref_is_dead(src, var_usage_map, modes)) {
+                  nir_instr_remove(&intrin->instr);
+                  nir_deref_instr_remove_if_unused(dst);
+                  nir_deref_instr_remove_if_unused(src);
+               }
+               continue;
+            }
+
+            if (intrin->intrinsic != nir_intrinsic_load_deref &&
+                intrin->intrinsic != nir_intrinsic_store_deref)
+               continue;
+
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+            if (!(deref->mode & modes))
+               continue;
+
+            struct vec_var_usage *usage =
+               get_vec_var_usage(nir_deref_instr_get_variable(deref),
+                                 var_usage_map, false, NULL);
+            if (!usage)
+               continue;
+
+            if (usage->comps_kept == 0) {
+               if (intrin->intrinsic == nir_intrinsic_load_deref) {
+                  nir_ssa_def *u =
+                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
+                                       intrin->dest.ssa.bit_size);
+                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                           nir_src_for_ssa(u));
+               }
+               nir_instr_remove(&intrin->instr);
+               
nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[0]));
+               continue;
+            }
+
+            if (intrin->intrinsic == nir_intrinsic_load_deref) {
+               unsigned swizzle[NIR_MAX_VEC_COMPONENTS] = { 0, };
+               unsigned c = 0;
+               for (unsigned i = 0; i < intrin->num_components; i++) {
+                  if (usage->comps_kept & (1u << i))
+                     swizzle[i] = c++;
+               }
+
+               b.cursor = nir_after_instr(&intrin->instr);
+
+               nir_ssa_def *swizzled =
+                  nir_swizzle(&b, &intrin->dest.ssa, swizzle,
+                              intrin->num_components, false);
+
+               nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
+                                              nir_src_for_ssa(swizzled),
+                                              swizzled->parent_instr);
+
+               /* The SSA def is now only used by the swizzle.  It's safe to
+                * shrink the number of components.
+                */
+               assert(list_is_singular(&intrin->dest.ssa.uses));
+               intrin->num_components = c;
+               intrin->dest.ssa.num_components = c;
+            } else {
+               nir_component_mask_t write_mask =
+                  nir_intrinsic_write_mask(intrin);
+
+               unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
+               nir_component_mask_t new_write_mask = 0;
+               unsigned c = 0;
+               for (unsigned i = 0; i < intrin->num_components; i++) {
+                  if (usage->comps_kept & (1u << i)) {
+                     swizzle[c] = i;
+                     if (write_mask & (1u << i))
+                        new_write_mask |= 1u << c;
+                     c++;
+                  }
+               }
+
+               b.cursor = nir_before_instr(&intrin->instr);
+
+               nir_ssa_def *swizzled =
+                  nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false);
+
+               /* Rewrite to use the compacted source */
+               nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
+                                     nir_src_for_ssa(swizzled));
+               nir_intrinsic_set_write_mask(intrin, new_write_mask);
+               intrin->num_components = c;
+            }
+            break;
+         }
+
+         default:
+            break;
+         }
+      }
+   }
+}
+
+/** Attempt to narrow (remove components) from (arrays of) vectors
+ *
+ * This pass looks at variables which contain a vector or an array (possibly
+ * multiple dimensions) of vectors and attempts to lower to a smaller vector.
+ */
+bool
+nir_narrow_vec_vars(nir_shader *shader, nir_variable_mode modes)
+{
+   assert((modes & (nir_var_global | nir_var_local)) == modes);
+
+   void *mem_ctx = ralloc_context(NULL);
+
+   struct hash_table *var_usage_map =
+      _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
+
+   bool has_vars_to_narrow = false;
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      /* Don't even bother crawling the IR if we don't have any variables.
+       * Given that this pass deletes any unused variables, it's likely that
+       * we will be in this scenario eventually.
+       */
+      if (!exec_list_is_empty(&shader->globals) ||
+          !exec_list_is_empty(&function->impl->locals)) {
+         has_vars_to_narrow = true;
+         find_used_components_impl(function->impl, var_usage_map,
+                                   modes, mem_ctx);
+      }
+   }
+   if (!has_vars_to_narrow) {
+      ralloc_free(mem_ctx);
+      return false;
+   }
+
+   bool globals_narrowed = false;
+   if (modes & nir_var_global)
+      globals_narrowed = narrow_vec_var_list(&shader->globals, var_usage_map);
+
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      bool locals_narrowed = false;
+      if (modes & nir_var_local) {
+         locals_narrowed = narrow_vec_var_list(&function->impl->locals,
+                                               var_usage_map);
+      }
+
+      if (globals_narrowed || locals_narrowed) {
+         narrow_vec_var_access_impl(function->impl, var_usage_map, modes);
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+         progress = true;
+      }
+   }
+
+   ralloc_free(mem_ctx);
+
+   return progress;
+}
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to