from:"Timothy Arceri"

Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: fix interpolateAt* for arrays

2019-01-18 Thread Timothy Arceri


On 19/1/19 10:29 am, Bas Nieuwenhuizen wrote:

On Sat, Jan 19, 2019 at 12:27 AM Bas Nieuwenhuizen
 wrote:


On Sat, Jan 19, 2019 at 12:17 AM Timothy Arceri  wrote:




On 19/1/19 9:36 am, Bas Nieuwenhuizen wrote:

On Thu, Jan 10, 2019 at 6:59 AM Timothy Arceri  wrote:


This builds on the recent interpolate fix by Rhys ee8488ea3b99.

This doesn't handle arrays of structs but I've got a feeling those
might be broken even for radeonsi tgsi (we currently have no tests).

This fixes the arb_gpu_shader5 interpolateAt* tests that contain
arrays.

Fixes: ee8488ea3b99 ("ac/nir,radv,radeonsi/nir: use correct indices for 
interpolation intrinsics")
---
   src/amd/common/ac_nir_to_llvm.c | 80 +
   1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5023b96f92..00011a439d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2830,15 +2830,16 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
   const nir_intrinsic_instr *instr)
   {
  LLVMValueRef result[4];
-   LLVMValueRef interp_param, attr_number;
+   LLVMValueRef interp_param;
  unsigned location;
  unsigned chan;
  LLVMValueRef src_c0 = NULL;
  LLVMValueRef src_c1 = NULL;
  LLVMValueRef src0 = NULL;

-   nir_variable *var = 
nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
-   int input_index = ctx->abi->fs_input_attr_indices[var->data.location - 
VARYING_SLOT_VAR0];
+   nir_deref_instr *deref_instr = 
nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+   nir_variable *var = nir_deref_instr_get_variable(deref_instr);
+   int input_base = ctx->abi->fs_input_attr_indices[var->data.location - 
VARYING_SLOT_VAR0];
  switch (instr->intrinsic) {
  case nir_intrinsic_interp_deref_at_centroid:
  location = INTERP_CENTROID;
@@ -2868,7 +2869,6 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
  src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
  }
  interp_param = ctx->abi->lookup_interp_param(ctx->abi, 
var->data.interpolation, location);
-   attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);

  if (location == INTERP_CENTER) {
  LLVMValueRef ij_out[2];
@@ -2906,26 +2906,68 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,

  }

+   LLVMValueRef array_idx = ctx->ac.i32_0;
+   while(deref_instr->deref_type != nir_deref_type_var) {
+   if (deref_instr->deref_type == nir_deref_type_array) {
+   unsigned array_size = 
glsl_get_aoa_size(deref_instr->type);
+   if (!array_size)
+   array_size = 1;
+
+   LLVMValueRef offset;
+   nir_const_value *const_value = 
nir_src_as_const_value(deref_instr->arr.index);
+   if (const_value) {
+   offset = LLVMConstInt(ctx->ac.i32, array_size * 
const_value->u32[0], false);
+   } else {
+   LLVMValueRef indirect = get_src(ctx, 
deref_instr->arr.index);
+
+   offset = LLVMBuildMul(ctx->ac.builder, indirect,
+ LLVMConstInt(ctx->ac.i32, 
array_size, false), "");
+   }
+
+   array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, offset, 
"");
+   deref_instr = nir_src_as_deref(deref_instr->parent);
+   } else if (deref_instr->deref_type == nir_deref_type_struct) {
+   /* TODO: Probably need to do more here to support 
arrays of structs etc */
+   deref_instr = nir_src_as_deref(deref_instr->parent);


If we don't have confidence this works can we just have it go to the
unreachable below. IIRC spirv->nir also lowered struct inputs so I'm
not even sure we would encounter this.


This will work for structs, just probably not for arrays of structs. We
do need struct handling for radeonsi so I'd rather leave this as is.


Actually, how does this work for structs? I find it suspicous we don't
care about which member is taken?


Yeah your right. It seems the piglit tests are too simple and always use 
the first member.


I think I will fall through to the unreachable() as you suggested for 
now. Then I'll write some better tests before adding proper struct support.


Thanks for the review.







Otherwise,

Reviewed-by: Bas Nieuwenhuizen 


+   } else {
+   unreachable("Unsupported deref type");
+   }
+

Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: fix interpolateAt* for arrays

2019-01-18 Thread Timothy Arceri




On 19/1/19 9:36 am, Bas Nieuwenhuizen wrote:

On Thu, Jan 10, 2019 at 6:59 AM Timothy Arceri  wrote:


This builds on the recent interpolate fix by Rhys ee8488ea3b99.

This doesn't handle arrays of structs but I've got a feeling those
might be broken even for radeonsi tgsi (we currently have no tests).

This fixes the arb_gpu_shader5 interpolateAt* tests that contain
arrays.

Fixes: ee8488ea3b99 ("ac/nir,radv,radeonsi/nir: use correct indices for 
interpolation intrinsics")
---
  src/amd/common/ac_nir_to_llvm.c | 80 +
  1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5023b96f92..00011a439d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2830,15 +2830,16 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
  const nir_intrinsic_instr *instr)
  {
 LLVMValueRef result[4];
-   LLVMValueRef interp_param, attr_number;
+   LLVMValueRef interp_param;
 unsigned location;
 unsigned chan;
 LLVMValueRef src_c0 = NULL;
 LLVMValueRef src_c1 = NULL;
 LLVMValueRef src0 = NULL;

-   nir_variable *var = 
nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
-   int input_index = ctx->abi->fs_input_attr_indices[var->data.location - 
VARYING_SLOT_VAR0];
+   nir_deref_instr *deref_instr = 
nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+   nir_variable *var = nir_deref_instr_get_variable(deref_instr);
+   int input_base = ctx->abi->fs_input_attr_indices[var->data.location - 
VARYING_SLOT_VAR0];
 switch (instr->intrinsic) {
 case nir_intrinsic_interp_deref_at_centroid:
 location = INTERP_CENTROID;
@@ -2868,7 +2869,6 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
 src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
 }
 interp_param = ctx->abi->lookup_interp_param(ctx->abi, 
var->data.interpolation, location);
-   attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);

 if (location == INTERP_CENTER) {
 LLVMValueRef ij_out[2];
@@ -2906,26 +2906,68 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,

 }

+   LLVMValueRef array_idx = ctx->ac.i32_0;
+   while(deref_instr->deref_type != nir_deref_type_var) {
+   if (deref_instr->deref_type == nir_deref_type_array) {
+   unsigned array_size = 
glsl_get_aoa_size(deref_instr->type);
+   if (!array_size)
+   array_size = 1;
+
+   LLVMValueRef offset;
+   nir_const_value *const_value = 
nir_src_as_const_value(deref_instr->arr.index);
+   if (const_value) {
+   offset = LLVMConstInt(ctx->ac.i32, array_size * 
const_value->u32[0], false);
+   } else {
+   LLVMValueRef indirect = get_src(ctx, 
deref_instr->arr.index);
+
+   offset = LLVMBuildMul(ctx->ac.builder, indirect,
+ LLVMConstInt(ctx->ac.i32, 
array_size, false), "");
+   }
+
+   array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, offset, 
"");
+   deref_instr = nir_src_as_deref(deref_instr->parent);
+   } else if (deref_instr->deref_type == nir_deref_type_struct) {
+   /* TODO: Probably need to do more here to support 
arrays of structs etc */
+   deref_instr = nir_src_as_deref(deref_instr->parent);


If we don't have confidence this works can we just have it go to the
unreachable below. IIRC spirv->nir also lowered struct inputs so I'm
not even sure we would encounter this.


This will work for structs, just probably not for arrays of structs. We 
do need struct handling for radeonsi so I'd rather leave this as is.




Otherwise,

Reviewed-by: Bas Nieuwenhuizen 


+   } else {
+   unreachable("Unsupported deref type");
+   }
+
+   }
+
+   unsigned input_array_size = glsl_get_aoa_size(var->type);
+   if (!input_array_size)
+   input_array_size = 1;
+
 for (chan = 0; chan < 4; chan++) {
+   LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, 
input_array_size));
 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, 
false);

-   if (interp_param) {
-   interp_param = LLVMBuildBitCast(ctx->ac.builder,
+   for (unsigned idx = 0; idx < input_

Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: fix interpolateAt* for arrays

2019-01-18 Thread Timothy Arceri


Ping!

On 10/1/19 4:59 pm, Timothy Arceri wrote:

This builds on the recent interpolate fix by Rhys ee8488ea3b99.

This doesn't handle arrays of structs but I've got a feeling those
might be broken even for radeonsi tgsi (we currently have no tests).

This fixes the arb_gpu_shader5 interpolateAt* tests that contain
arrays.

Fixes: ee8488ea3b99 ("ac/nir,radv,radeonsi/nir: use correct indices for 
interpolation intrinsics")
---
  src/amd/common/ac_nir_to_llvm.c | 80 +
  1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5023b96f92..00011a439d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2830,15 +2830,16 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
 const nir_intrinsic_instr *instr)
  {
LLVMValueRef result[4];
-   LLVMValueRef interp_param, attr_number;
+   LLVMValueRef interp_param;
unsigned location;
unsigned chan;
LLVMValueRef src_c0 = NULL;
LLVMValueRef src_c1 = NULL;
LLVMValueRef src0 = NULL;
  
-	nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));

-   int input_index = ctx->abi->fs_input_attr_indices[var->data.location - 
VARYING_SLOT_VAR0];
+   nir_deref_instr *deref_instr = 
nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+   nir_variable *var = nir_deref_instr_get_variable(deref_instr);
+   int input_base = ctx->abi->fs_input_attr_indices[var->data.location - 
VARYING_SLOT_VAR0];
switch (instr->intrinsic) {
case nir_intrinsic_interp_deref_at_centroid:
location = INTERP_CENTROID;
@@ -2868,7 +2869,6 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
}
interp_param = ctx->abi->lookup_interp_param(ctx->abi, 
var->data.interpolation, location);
-   attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);
  
  	if (location == INTERP_CENTER) {

LLVMValueRef ij_out[2];
@@ -2906,26 +2906,68 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
  
  	}
  
+	LLVMValueRef array_idx = ctx->ac.i32_0;

+   while(deref_instr->deref_type != nir_deref_type_var) {
+   if (deref_instr->deref_type == nir_deref_type_array) {
+   unsigned array_size = 
glsl_get_aoa_size(deref_instr->type);
+   if (!array_size)
+   array_size = 1;
+
+   LLVMValueRef offset;
+   nir_const_value *const_value = 
nir_src_as_const_value(deref_instr->arr.index);
+   if (const_value) {
+   offset = LLVMConstInt(ctx->ac.i32, array_size * 
const_value->u32[0], false);
+   } else {
+   LLVMValueRef indirect = get_src(ctx, 
deref_instr->arr.index);
+
+   offset = LLVMBuildMul(ctx->ac.builder, indirect,
+ LLVMConstInt(ctx->ac.i32, 
array_size, false), "");
+   }
+
+   array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, offset, 
"");
+   deref_instr = nir_src_as_deref(deref_instr->parent);
+   } else if (deref_instr->deref_type == nir_deref_type_struct) {
+   /* TODO: Probably need to do more here to support 
arrays of structs etc */
+   deref_instr = nir_src_as_deref(deref_instr->parent);
+   } else {
+   unreachable("Unsupported deref type");
+   }
+
+   }
+
+   unsigned input_array_size = glsl_get_aoa_size(var->type);
+   if (!input_array_size)
+   input_array_size = 1;
+
for (chan = 0; chan < 4; chan++) {
+   LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, 
input_array_size));
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
  
-		if (interp_param) {

-   interp_param = LLVMBuildBitCast(ctx->ac.builder,
+   for (unsigned idx = 0; idx < input_array_size; ++idx) {
+   LLVMValueRef v, attr_number;
+
+   attr_number = LLVMConstInt(ctx->ac.i32, input_base + 
idx, false);
+   if (interp_param) {
+   interp_param = LLVMBuildBitCast(ctx->ac.builder,
interp_param, ctx->ac.v2f32, 
"");
-   LLVMValueRef i = LLVMBuildExtractElement(
-

[Mesa-dev] [PATCH 1/2] glsl: don't skip GLSL IR opts on first-time compiles

2019-01-16 Thread Timothy Arceri

This basically reverts c2bc0aa7b188.

By running the opts we reduce  memory using in Team Fortress 2
from 1.5GB -> 1.3GB from start-up to game menu.

This will likely increase Deus Ex start up times as per commit
c2bc0aa7b188. However currently 32bit games like Team Fortress 2
can run out of memory on low memory systems, so that seems more
important.
---
 src/compiler/glsl/glsl_parser_extras.cpp | 16 +---
 src/compiler/glsl/shader_cache.cpp   | 17 -
 src/mesa/main/mtypes.h   |  3 +--
 3 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 2048a7f900..200df7759b 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2090,14 +2090,6 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader,
*/
   if (shader->CompileStatus == COMPILE_SUCCESS)
  return;
-
-  if (shader->CompileStatus == COMPILED_NO_OPTS) {
- opt_shader_and_create_symbol_table(ctx,
-NULL, /* source_symbols */
-shader);
- shader->CompileStatus = COMPILE_SUCCESS;
- return;
-  }
}
 
struct _mesa_glsl_parse_state *state =
@@ -2153,13 +2145,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader,
if (!state->error && !shader->ir->is_empty()) {
   assign_subroutine_indexes(state);
   lower_subroutine(shader->ir, state);
-
-  if (!ctx->Cache || force_recompile)
- opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
-  else {
- reparent_ir(shader->ir, shader->ir);
- shader->CompileStatus = COMPILED_NO_OPTS;
-  }
+  opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
}
 
if (!force_recompile) {
diff --git a/src/compiler/glsl/shader_cache.cpp 
b/src/compiler/glsl/shader_cache.cpp
index 31d0aa6296..879511a9d7 100644
--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -264,23 +264,6 @@ shader_cache_read_program_metadata(struct gl_context *ctx,
/* This is used to flag a shader retrieved from cache */
prog->data->LinkStatus = LINKING_SKIPPED;
 
-   /* Since the program load was successful, CompileStatus of all shaders at
-* this point should normally be compile_skipped. However because of how
-* the eviction works, it may happen that some of the individual shader keys
-* have been evicted, resulting in unnecessary recompiles on this load, so
-* mark them again to skip such recompiles next time.
-*/
-   char sha1_buf[41];
-   for (unsigned i = 0; i < prog->NumShaders; i++) {
-  if (prog->Shaders[i]->CompileStatus == COMPILED_NO_OPTS) {
- disk_cache_put_key(cache, prog->Shaders[i]->sha1);
- if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
-_mesa_sha1_format(sha1_buf, prog->Shaders[i]->sha1);
-fprintf(stderr, "re-marking shader: %s\n", sha1_buf);
- }
-  }
-   }
-
free (buffer);
 
return true;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 241c2b92f7..0fdeba4732 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2576,8 +2576,7 @@ enum gl_compile_status
 {
COMPILE_FAILURE = 0,
COMPILE_SUCCESS,
-   COMPILE_SKIPPED,
-   COMPILED_NO_OPTS
+   COMPILE_SKIPPED
 };
 
 /**
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] glsl: be much more aggressive when skipping shader compilation

2019-01-16 Thread Timothy Arceri

Currently only add a cache key for a shader once it is linked.
However games like Team Fortress 2 compile a whole bunch of shaders
which are never actually linked. These compiled shaders can take
up a bunch of memory.

This patch changes things so that we add the key for the shader to
the cache as soon as it is compiled. This means on a warm cache we
can avoid the wasted memory from these shaders. Worst case scenario
is we need to compile the shaders at link time but this can happen
anyway if the shader has been evicted from the cache.

Reduces memory use in Team Fortress 2 from 1.3GB -> 770MB on a
warm cache from start up to the game menu.
---
 src/compiler/glsl/glsl_parser_extras.cpp | 9 +
 src/compiler/glsl/shader_cache.cpp   | 7 +--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 200df7759b..655399a812 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2155,6 +2155,15 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader,
 
delete state->symbols;
ralloc_free(state);
+
+   if (ctx->Cache) {
+  char sha1_buf[41];
+  disk_cache_put_key(ctx->Cache, shader->sha1);
+  if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
+ _mesa_sha1_format(sha1_buf, shader->sha1);
+ fprintf(stderr, "marking shader: %s\n", sha1_buf);
+  }
+   }
 }
 
 } /* extern "C" */
diff --git a/src/compiler/glsl/shader_cache.cpp 
b/src/compiler/glsl/shader_cache.cpp
index 879511a9d7..581098b88f 100644
--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -121,20 +121,15 @@ shader_cache_write_program_metadata(struct gl_context 
*ctx,
if (!cache_item_metadata.keys)
   goto fail;
 
-   char sha1_buf[41];
for (unsigned i = 0; i < prog->NumShaders; i++) {
-  disk_cache_put_key(cache, prog->Shaders[i]->sha1);
   memcpy(cache_item_metadata.keys[i], prog->Shaders[i]->sha1,
  sizeof(cache_key));
-  if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
- _mesa_sha1_format(sha1_buf, prog->Shaders[i]->sha1);
- fprintf(stderr, "marking shader: %s\n", sha1_buf);
-  }
}
 
disk_cache_put(cache, prog->data->sha1, metadata.data, metadata.size,
   _item_metadata);
 
+   char sha1_buf[41];
if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
   _mesa_sha1_format(sha1_buf, prog->data->sha1);
   fprintf(stderr, "putting program metadata in cache: %s\n", sha1_buf);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/5] radeonsi/nir: add missing pieces for bindless image/sampler support

2019-01-16 Thread Timothy Arceri


On 17/1/19 9:41 am, Marek Olšák wrote:
A bindless handle is a temporary (coming from a shader input, UBO, SSBO, 
image, or inline constant), so the bindless code shouldn't have any 
awareness of where it's coming from.


This is a far point, I should move this to nir_to_llvm where we know 
what type of handle we are dealing with. I'll rework this.




Patches 1-4:

Reviewed-by: Marek Olšák mailto:marek.ol...@amd.com>>


Thanks!



Marek

On Wed, Jan 9, 2019 at 3:54 AM Timothy Arceri <mailto:tarc...@itsqueeze.com>> wrote:


---
  src/gallium/drivers/radeonsi/si_shader_nir.c | 33 +++-
  1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 2c95c62d99..256ef28bb1 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -912,6 +912,28 @@ si_nir_lookup_interp_param(struct ac_shader_abi
*abi,
                 LLVMGetParam(ctx->main_fn, interp_param_idx) : NULL;
  }

+static LLVMValueRef
+get_bindless_index(struct ac_shader_abi *abi,
+                  struct si_shader_context *ctx, unsigned base_index,
+                  unsigned constant_index, LLVMValueRef dynamic_index)
+{
+       LLVMValueRef offset = LLVMConstInt(ctx->i32, base_index * 4, 0);
+       LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder,
dynamic_index,
+                                         LLVMConstInt(ctx->ac.i32,
constant_index, 0), "");
+
+       /* Bindless uniforms are 64bit so multiple index by 8 */
+       index = LLVMBuildMul(ctx->ac.builder, index,
LLVMConstInt(ctx->i32, 8, 0), "");
+       offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
+
+       LLVMValueRef ubo_index = abi->load_ubo(abi, ctx->ac.i32_0);
+
+       LLVMValueRef ret = ac_build_buffer_load(>ac, ubo_index,
1, NULL, offset,
+                                               NULL, 0, false,
false, true, true);
+
+       return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->i32, "");
+}
+
+
  static LLVMValueRef
  si_nir_load_sampler_desc(struct ac_shader_abi *abi,
                          unsigned descriptor_set, unsigned base_index,
@@ -937,8 +959,17 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
                 LLVMValueRef list =
                         LLVMGetParam(ctx->main_fn,
ctx->param_bindless_samplers_and_images);

-               /* dynamic_index is the bindless handle */
+               dynamic_index = dynamic_index ? dynamic_index :
ctx->ac.i32_0;
+               dynamic_index = get_bindless_index(abi, ctx, base_index,
+                                                  constant_index,
dynamic_index);
+
                 if (image) {
+                       /* For simplicity, bindless image
descriptors use fixed
+                        * 16-dword slots for now.
+                        */
+                       dynamic_index =
LLVMBuildMul(ctx->ac.builder, dynamic_index,
+                                            LLVMConstInt(ctx->i32,
2, 0), "");
+
                         return si_load_image_desc(ctx, list,
dynamic_index, desc_type,
                                                   dcc_off, true);
                 }
-- 
2.20.1


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org <mailto:mesa-dev@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Thoughts after hitting 100 merge requests?

2019-01-13 Thread Timothy Arceri

To be honest I have mixed feelings about using Gitlab merge requests. As 
Jason mentions bellow the discussions feature is a nice way to avoid the 
mess that can happen with mailings list replys.


However I seem to find myself fighting with the interface more that 
should be necessary. Most of this is probably just that it takes time to 
get used too. But the single most annoying thing is having to expand the 
code comment when looking at a commit. Especially during review you 
always want to view the comment. It would be s much nicer if these 
were always expanded by default. Is there a setting for this somewhere?


Tim

On 12/1/19 4:05 am, Jason Ekstrand wrote:
I'm putting my own thoughts in a reply for some reason.  Here's what 
I've seen.


  1. I really like GitLab "discussions".  It provides a very good way 
for both the author and the reviewers to keep track of what review 
comments have been dealt with and what comments are still outstanding.


  2. GitLab is currently missing a good way to comment on commit 
messages which makes giving review tags rather painful.  There is a 
GitLab issue opened about this: 
https://gitlab.com/gitlab-org/gitlab-ce/issues/38602


  3. GitLab has a bug regarding per-commit comments where they tend to 
get lost while you're looking at the commit itself: 
https://gitlab.com/gitlab-org/gitlab-ce/issues/53175


  4. At least two of those merge requests were small bug fixes by brand 
new contributors who I've never seen on the mailing list.


  5. There's no way with gitlab for Reviewed-by tags to get 
automatically applied as part of the merging process.  This makes 
merging a bit more manual than it needs to be but is really no worse 
than it was before.


Ok, there you have my thoughts.  I'd be happy to hear others.

--Jason

On Fri, Jan 11, 2019 at 10:57 AM Jason Ekstrand > wrote:


All,

The mesa project has now hit 100 merge requests (36 are still
open).  I (and I'm sure others) would be curious to hear people's
initial thoughts on the process.  What's working well?  What's not
working?  Is it total fail and should we go back to mailing lists?

--Jason


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ac/nir_to_llvm: fix interpolateAt* for arrays

2019-01-09 Thread Timothy Arceri

This builds on the recent interpolate fix by Rhys ee8488ea3b99.

This doesn't handle arrays of structs but I've got a feeling those
might be broken even for radeonsi tgsi (we currently have no tests).

This fixes the arb_gpu_shader5 interpolateAt* tests that contain
arrays.

Fixes: ee8488ea3b99 ("ac/nir,radv,radeonsi/nir: use correct indices for 
interpolation intrinsics")
---
 src/amd/common/ac_nir_to_llvm.c | 80 +
 1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5023b96f92..00011a439d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2830,15 +2830,16 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
 const nir_intrinsic_instr *instr)
 {
LLVMValueRef result[4];
-   LLVMValueRef interp_param, attr_number;
+   LLVMValueRef interp_param;
unsigned location;
unsigned chan;
LLVMValueRef src_c0 = NULL;
LLVMValueRef src_c1 = NULL;
LLVMValueRef src0 = NULL;
 
-   nir_variable *var = 
nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
-   int input_index = ctx->abi->fs_input_attr_indices[var->data.location - 
VARYING_SLOT_VAR0];
+   nir_deref_instr *deref_instr = 
nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+   nir_variable *var = nir_deref_instr_get_variable(deref_instr);
+   int input_base = ctx->abi->fs_input_attr_indices[var->data.location - 
VARYING_SLOT_VAR0];
switch (instr->intrinsic) {
case nir_intrinsic_interp_deref_at_centroid:
location = INTERP_CENTROID;
@@ -2868,7 +2869,6 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
}
interp_param = ctx->abi->lookup_interp_param(ctx->abi, 
var->data.interpolation, location);
-   attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);
 
if (location == INTERP_CENTER) {
LLVMValueRef ij_out[2];
@@ -2906,26 +2906,68 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
 
}
 
+   LLVMValueRef array_idx = ctx->ac.i32_0;
+   while(deref_instr->deref_type != nir_deref_type_var) {
+   if (deref_instr->deref_type == nir_deref_type_array) {
+   unsigned array_size = 
glsl_get_aoa_size(deref_instr->type);
+   if (!array_size)
+   array_size = 1;
+
+   LLVMValueRef offset;
+   nir_const_value *const_value = 
nir_src_as_const_value(deref_instr->arr.index);
+   if (const_value) {
+   offset = LLVMConstInt(ctx->ac.i32, array_size * 
const_value->u32[0], false);
+   } else {
+   LLVMValueRef indirect = get_src(ctx, 
deref_instr->arr.index);
+
+   offset = LLVMBuildMul(ctx->ac.builder, indirect,
+ LLVMConstInt(ctx->ac.i32, 
array_size, false), "");
+   }
+
+   array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, 
offset, "");
+   deref_instr = nir_src_as_deref(deref_instr->parent);
+   } else if (deref_instr->deref_type == nir_deref_type_struct) {
+   /* TODO: Probably need to do more here to support 
arrays of structs etc */
+   deref_instr = nir_src_as_deref(deref_instr->parent);
+   } else {
+   unreachable("Unsupported deref type");
+   }
+
+   }
+
+   unsigned input_array_size = glsl_get_aoa_size(var->type);
+   if (!input_array_size)
+   input_array_size = 1;
+
for (chan = 0; chan < 4; chan++) {
+   LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, 
input_array_size));
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
 
-   if (interp_param) {
-   interp_param = LLVMBuildBitCast(ctx->ac.builder,
+   for (unsigned idx = 0; idx < input_array_size; ++idx) {
+   LLVMValueRef v, attr_number;
+
+   attr_number = LLVMConstInt(ctx->ac.i32, input_base + 
idx, false);
+   if (interp_param) {
+   interp_param = LLVMBuildBitCast(ctx->ac.builder,
interp_param, 
ctx->ac.v2f32, "");
-   LLVMValueRef i = LLVMBuildExtractElement(
-   ctx->ac.builder, interp_param, ctx->ac.i32_0, 
"");
-   LLVMValueRef j = LLVMBuildExtractElement(
-   ctx->ac.builder, interp_param,

[Mesa-dev] [PATCH 1/5] ac/nir_to_llvm: fix type handling in image code

2019-01-09 Thread Timothy Arceri

The current code only strips off arrays and cannot find the type
for images that are struct members.

Instead of trying to get the image type from the variable, we just
get it directly from the deref instruction.
---
 src/amd/common/ac_nir_to_llvm.c | 27 ---
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 89c7617529..633cc0aa06 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2212,10 +2212,10 @@ static LLVMValueRef 
adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
return sample_index;
 }
 
-static nir_variable *get_image_variable(const nir_intrinsic_instr *instr)
+static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
 {
assert(instr->src[0].is_ssa);
-   return 
nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+   return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
 }
 
 static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
@@ -2230,7 +2230,7 @@ static void get_image_coords(struct ac_nir_context *ctx,
 const nir_intrinsic_instr *instr,
 struct ac_image_args *args)
 {
-   const struct glsl_type *type = 
glsl_without_array(get_image_variable(instr)->type);
+   const struct glsl_type *type = get_image_deref(instr)->type;
 
LLVMValueRef src0 = get_src(ctx, instr->src[1]);
LLVMValueRef masks[] = {
@@ -2339,10 +2339,9 @@ static LLVMValueRef visit_image_load(struct 
ac_nir_context *ctx,
 const nir_intrinsic_instr *instr)
 {
LLVMValueRef res;
-   const nir_variable *var = get_image_variable(instr);
-   const struct glsl_type *type = var->type;
-
-   type = glsl_without_array(type);
+   const nir_deref_instr *image_deref = get_image_deref(instr);
+   const struct glsl_type *type = image_deref->type;
+   const nir_variable *var = nir_deref_instr_get_variable(image_deref);
 
const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
if (dim == GLSL_SAMPLER_DIM_BUF) {
@@ -2383,8 +2382,9 @@ static void visit_image_store(struct ac_nir_context *ctx,
  nir_intrinsic_instr *instr)
 {
LLVMValueRef params[8];
-   const nir_variable *var = get_image_variable(instr);
-   const struct glsl_type *type = glsl_without_array(var->type);
+   const nir_deref_instr *image_deref = get_image_deref(instr);
+   const struct glsl_type *type = image_deref->type;
+   const nir_variable *var = nir_deref_instr_get_variable(image_deref);
const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
LLVMValueRef glc = ctx->ac.i1false;
bool force_glc = ctx->ac.chip_class == SI;
@@ -2441,13 +2441,12 @@ static LLVMValueRef visit_image_atomic(struct 
ac_nir_context *ctx,
 {
LLVMValueRef params[7];
int param_count = 0;
-   const nir_variable *var = get_image_variable(instr);
+   const struct glsl_type *type = get_image_deref(instr)->type;
 
bool cmpswap = instr->intrinsic == 
nir_intrinsic_image_deref_atomic_comp_swap;
const char *atomic_name;
char intrinsic_name[64];
enum ac_atomic_op atomic_subop;
-   const struct glsl_type *type = glsl_without_array(var->type);
MAYBE_UNUSED int length;
 
bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
@@ -2533,8 +2532,7 @@ static LLVMValueRef visit_image_atomic(struct 
ac_nir_context *ctx,
 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
 {
-   const nir_variable *var = get_image_variable(instr);
-   const struct glsl_type *type = glsl_without_array(var->type);
+   const struct glsl_type *type = get_image_deref(instr)->type;
 
struct ac_image_args args = { 0 };
args.dim = get_ac_sampler_dim(>ac, glsl_get_sampler_dim(type),
@@ -2552,8 +2550,7 @@ static LLVMValueRef visit_image_size(struct 
ac_nir_context *ctx,
 const nir_intrinsic_instr *instr)
 {
LLVMValueRef res;
-   const nir_variable *var = get_image_variable(instr);
-   const struct glsl_type *type = glsl_without_array(var->type);
+   const struct glsl_type *type = get_image_deref(instr)->type;
 
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
return get_buffer_size(ctx, get_image_descriptor(ctx, instr, 
AC_DESC_BUFFER, false), true);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/5] ac/nir_to_llvm: fix regression in bindless support

2019-01-09 Thread Timothy Arceri

This wasn't ported over when deref support was implemented.
---
 src/amd/common/ac_nir_to_llvm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 633cc0aa06..4f7b2e4dc2 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3284,7 +3284,12 @@ static LLVMValueRef get_sampler_desc(struct 
ac_nir_context *ctx,
deref_instr = nir_src_as_deref(deref_instr->parent);
}
descriptor_set = deref_instr->var->data.descriptor_set;
-   base_index = deref_instr->var->data.binding;
+
+   if (deref_instr->var->data.bindless) {
+   base_index = deref_instr->var->data.driver_location;
+   bindless = true;
+   } else
+   base_index = deref_instr->var->data.binding;
}
 
return ctx->abi->load_sampler_desc(ctx->abi,
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/5] radeonsi/nir: add missing pieces for bindless image/sampler support

2019-01-09 Thread Timothy Arceri

---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 33 +++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 2c95c62d99..256ef28bb1 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -912,6 +912,28 @@ si_nir_lookup_interp_param(struct ac_shader_abi *abi,
LLVMGetParam(ctx->main_fn, interp_param_idx) : NULL;
 }
 
+static LLVMValueRef
+get_bindless_index(struct ac_shader_abi *abi,
+  struct si_shader_context *ctx, unsigned base_index,
+  unsigned constant_index, LLVMValueRef dynamic_index)
+{
+   LLVMValueRef offset = LLVMConstInt(ctx->i32, base_index * 4, 0);
+   LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
+ LLVMConstInt(ctx->ac.i32, 
constant_index, 0), "");
+
+   /* Bindless uniforms are 64bit so multiple index by 8 */
+   index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 8, 
0), "");
+   offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
+
+   LLVMValueRef ubo_index = abi->load_ubo(abi, ctx->ac.i32_0);
+
+   LLVMValueRef ret = ac_build_buffer_load(>ac, ubo_index, 1, NULL, 
offset,
+   NULL, 0, false, false, true, 
true);
+
+   return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->i32, "");
+}
+
+
 static LLVMValueRef
 si_nir_load_sampler_desc(struct ac_shader_abi *abi,
 unsigned descriptor_set, unsigned base_index,
@@ -937,8 +959,17 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
LLVMValueRef list =
LLVMGetParam(ctx->main_fn, 
ctx->param_bindless_samplers_and_images);
 
-   /* dynamic_index is the bindless handle */
+   dynamic_index = dynamic_index ? dynamic_index : ctx->ac.i32_0;
+   dynamic_index = get_bindless_index(abi, ctx, base_index,
+  constant_index, 
dynamic_index);
+
if (image) {
+   /* For simplicity, bindless image descriptors use fixed
+* 16-dword slots for now.
+*/
+   dynamic_index = LLVMBuildMul(ctx->ac.builder, 
dynamic_index,
+LLVMConstInt(ctx->i32, 2, 0), "");
+
return si_load_image_desc(ctx, list, dynamic_index, 
desc_type,
  dcc_off, true);
}
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Partial bindless support for radeonsi NIR

2019-01-09 Thread Timothy Arceri

We still need core NIR support for various features such as
image/sampler in/outs etc. But this gets us a step closer.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/5] radeonsi/nir: get correct type for images inside structs

2019-01-09 Thread Timothy Arceri

---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 64acf41679..2c95c62d99 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -163,11 +163,12 @@ static void scan_instruction(struct tgsi_shader_info 
*info,
break;
}
case nir_intrinsic_image_deref_store: {
+   const nir_deref_instr *image_deref = 
nir_instr_as_deref(intr->src[0].ssa->parent_instr);
nir_variable *var = intrinsic_get_var(intr);
if (var->data.bindless) {
info->uses_bindless_images = true;
 
-   if (glsl_get_sampler_dim(var->type) == 
GLSL_SAMPLER_DIM_BUF)
+   if (glsl_get_sampler_dim(image_deref->type) == 
GLSL_SAMPLER_DIM_BUF)
info->uses_bindless_buffer_store = true;
else
info->uses_bindless_image_store = true;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/5] ac/nir_to_llvm: add support for structs to get_sampler_desc()

2019-01-09 Thread Timothy Arceri

---
 src/amd/common/ac_nir_to_llvm.c | 45 +++--
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 4f7b2e4dc2..99eb6fb573 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3261,27 +3261,34 @@ static LLVMValueRef get_sampler_desc(struct 
ac_nir_context *ctx,
base_index = tex_instr->sampler_index;
} else {
while(deref_instr->deref_type != nir_deref_type_var) {
-   unsigned array_size = 
glsl_get_aoa_size(deref_instr->type);
-   if (!array_size)
-   array_size = 1;
-
-   assert(deref_instr->deref_type == nir_deref_type_array);
-   nir_const_value *const_value = 
nir_src_as_const_value(deref_instr->arr.index);
-   if (const_value) {
-   constant_index += array_size * 
const_value->u32[0];
-   } else {
-   LLVMValueRef indirect = get_src(ctx, 
deref_instr->arr.index);
-
-   indirect = LLVMBuildMul(ctx->ac.builder, 
indirect,
-   LLVMConstInt(ctx->ac.i32, array_size, 
false), "");
+   if (deref_instr->deref_type == nir_deref_type_array) {
+   unsigned array_size = 
glsl_get_aoa_size(deref_instr->type);
+   if (!array_size)
+   array_size = 1;
+
+   nir_const_value *const_value = 
nir_src_as_const_value(deref_instr->arr.index);
+   if (const_value) {
+   constant_index += array_size * 
const_value->u32[0];
+   } else {
+   LLVMValueRef indirect = get_src(ctx, 
deref_instr->arr.index);
+
+   indirect = 
LLVMBuildMul(ctx->ac.builder, indirect,
+   LLVMConstInt(ctx->ac.i32, 
array_size, false), "");
+
+   if (!index)
+   index = indirect;
+   else
+   index = 
LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
+   }
 
-if (!index)
-   index = indirect;
-   else
-   index = LLVMBuildAdd(ctx->ac.builder, 
index, indirect, "");
+   deref_instr = 
nir_src_as_deref(deref_instr->parent);
+   } else if (deref_instr->deref_type == 
nir_deref_type_struct) {
+   unsigned sidx = deref_instr->strct.index;
+   deref_instr = 
nir_src_as_deref(deref_instr->parent);
+   constant_index += 
glsl_get_record_location_offset(deref_instr->type, sidx);
+   } else {
+   unreachable("Unsupported deref type");
}
-
-   deref_instr = nir_src_as_deref(deref_instr->parent);
}
descriptor_set = deref_instr->var->data.descriptor_set;
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: add missing parentheses in get_inst_tessfactor_writemask()

2019-01-07 Thread Timothy Arceri


On 8/1/19 10:50 am, Bas Nieuwenhuizen wrote:

I think this got fixed by
https://gitlab.freedesktop.org/mesa/mesa/commit/be6cee51c06dc72ac159bd75b4201c61952515bd
already?


ok thanks. Hadn't pulled that one in yet.



On Tue, Jan 8, 2019 at 12:28 AM Timothy Arceri  wrote:


Cc: Marek Olšák 
---
  src/amd/common/ac_nir_to_llvm.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 84dbe17457..c9e11142eb 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4086,9 +4086,9 @@ get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
 unsigned num_comps = intrin->dest.ssa.num_components;

 if (location == VARYING_SLOT_TESS_LEVEL_INNER)
-   writemask = ((1 << num_comps + 1) - 1) << first_component;
+   writemask = ((1 << (num_comps + 1)) - 1) << first_component;
 else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
-   writemask = (((1 << num_comps + 1) - 1) << first_component) << 
4;
+   writemask = (((1 << (num_comps + 1)) - 1) << first_component) 
<< 4;

 return writemask;
  }
--
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl/linker: specify proper direction in location aliasing error

2019-01-07 Thread Timothy Arceri


Reviewed-by: Timothy Arceri 

On 8/1/19 1:11 am, Andres Gomez wrote:

The check for location aliasing was always asuming output variables
but this validation is also called for input variables.

Fixes: e2abb75b0e4 ("glsl/linker: validate explicit locations for SSO programs")
Cc: Iago Toral Quiroga 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/link_varyings.cpp | 15 ++-
  1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 52e493cb599..3969c0120b3 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -481,9 +481,10 @@ check_location_aliasing(struct explicit_location_info 
explicit_locations[][4],
  /* Component aliasing is not alloed */
  if (comp >= component && comp < last_comp) {
 linker_error(prog,
-"%s shader has multiple outputs explicitly "
+"%s shader has multiple %sputs explicitly "
  "assigned to location %d and component %d\n",
  _mesa_shader_stage_to_string(stage),
+var->data.mode == ir_var_shader_in ? "in" : "out",
  location, comp);
 return false;
  } else {
@@ -502,10 +503,12 @@ check_location_aliasing(struct explicit_location_info 
explicit_locations[][4],
  
 if (info->interpolation != interpolation) {

linker_error(prog,
-   "%s shader has multiple outputs at explicit "
+   "%s shader has multiple %sputs at explicit "
 "location %u with different interpolation "
 "settings\n",
-   _mesa_shader_stage_to_string(stage), location);
+   _mesa_shader_stage_to_string(stage),
+   var->data.mode == ir_var_shader_in ?
+   "in" : "out", location);
return false;
 }
  
@@ -513,9 +516,11 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],

 info->sample != sample ||
 info->patch != patch) {
linker_error(prog,
-   "%s shader has multiple outputs at explicit "
+   "%s shader has multiple %sputs at explicit "
 "location %u with different aux storage\n",
-   _mesa_shader_stage_to_string(stage), location);
+   _mesa_shader_stage_to_string(stage),
+   var->data.mode == ir_var_shader_in ?
+   "in" : "out", location);
return false;
 }
  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ac/nir_to_llvm: add missing parentheses in get_inst_tessfactor_writemask()

2019-01-07 Thread Timothy Arceri

Cc: Marek Olšák 
---
 src/amd/common/ac_nir_to_llvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 84dbe17457..c9e11142eb 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4086,9 +4086,9 @@ get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
unsigned num_comps = intrin->dest.ssa.num_components;
 
if (location == VARYING_SLOT_TESS_LEVEL_INNER)
-   writemask = ((1 << num_comps + 1) - 1) << first_component;
+   writemask = ((1 << (num_comps + 1)) - 1) << first_component;
else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
-   writemask = (((1 << num_comps + 1) - 1) << first_component) << 
4;
+   writemask = (((1 << (num_comps + 1)) - 1) << first_component) 
<< 4;
 
return writemask;
 }
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] ac/nir_to_llvm: add ac_are_tessfactors_def_in_all_invocs()

2019-01-07 Thread Timothy Arceri


On 8/1/19 6:43 am, Marek Olšák wrote:


On Mon, Dec 17, 2018 at 8:18 PM Timothy Arceri <mailto:tarc...@itsqueeze.com>> wrote:


The following patch will use this with the radeonsi NIR backend
but I've added it to ac so we can use it with RADV in future.

This is a NIR implementation of the tgsi function
tgsi_scan_tess_ctrl().
---
  src/amd/common/ac_nir_to_llvm.c | 161 
  src/amd/common/ac_nir_to_llvm.h |   2 +
  2 files changed, 163 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index 4294956de1..055940b75f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4063,3 +4063,164 @@ ac_lower_indirect_derefs(struct nir_shader
*nir, enum chip_class chip_class)

         nir_lower_indirect_derefs(nir, indirect_mask);
  }
+
+static unsigned
+get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
+{
+       if (intrin->intrinsic != nir_intrinsic_store_deref)
+               return 0;
+
+       nir_variable *var =
+ 
  nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));

+
+       if (var->data.mode != nir_var_shader_out)
+               return 0;
+
+       unsigned writemask = 0;
+       const int location = var->data.location;
+       unsigned first_component = var->data.location_frac;
+       unsigned num_comps = intrin->dest.ssa.num_components;
+
+       if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+               writemask = ((1 << num_comps + 1) - 1) <<
first_component;


Parentheses are missing in "1 << num_comps + 1".

+       else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+               writemask = (((1 << num_comps + 1) - 1) <<
first_component) << 4;


Same here.


Good catch. I did test this code when writing it ... maybe these are 
scalars when we see them here. Anyway I'll fix this anyway and send a 
patch shortly.




Marek

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl: correct typo in GLSL compilation error message

2019-01-07 Thread Timothy Arceri


On 8/1/19 2:51 am, Andres Gomez wrote:

On Mon, 2019-01-07 at 16:48 +0100, Erik Faye-Lund wrote:

On Mon, 2019-01-07 at 15:50 +0200, Andres Gomez wrote:

Correct a typo introduced by
037f68d81e1 ("glsl: apply align layout qualifier rules to block
offsets")

Cc: Timothy Arceri 
Signed-off-by: Andres Gomez 


This should probably have this tag:

Fixes: 037f68d81e1 "glsl: apply align layout qualifier rules to block
 offsets"


Didn't think it was important enough to add the tag which will pull
into the stable releases ...

... it will also be more traceable with it added so, I suppose I will
do so.


Seems fine to go into stable :)

Reviewed-by: Timothy Arceri 

Thanks!




With that added:

Reviewed-by: Erik Faye-Lund 


Thanks! ☺


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nir: avoid uninitialized variable warning

2019-01-06 Thread Timothy Arceri

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109231
---
 src/compiler/nir/nir_loop_analyze.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 259f02a854..6deb6cb962 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -252,7 +252,7 @@ compute_induction_information(loop_info_state *state)
 nir_phi_instr *src_phi =
nir_instr_as_phi(src_var->def->parent_instr);
 
-nir_op alu_op;
+nir_op alu_op = nir_num_opcodes; /* avoid uninitialized warning */
 nir_ssa_def *alu_srcs[2] = {0};
 nir_foreach_phi_src(src2, src_phi) {
nir_loop_variable *src_var2 =
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 23/25] radeonsi: factor si_query_buffer logic out of si_query_hw

2019-01-02 Thread Timothy Arceri

This commit seems to cause bad stuttering in the Batman Arkham City 
benchmark.


On 7/12/18 1:00 am, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

This is a move towards using composition instead of inheritance for
different query types.

This change weakens out-of-memory error reporting somewhat, though this
should be acceptable since we didn't consistently report such errors in
the first place.
---
  src/gallium/drivers/radeonsi/si_perfcounter.c |   8 +-
  src/gallium/drivers/radeonsi/si_query.c   | 177 +-
  src/gallium/drivers/radeonsi/si_query.h   |  17 +-
  src/gallium/drivers/radeonsi/si_texture.c |   7 +-
  4 files changed, 99 insertions(+), 110 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 0b3d8f89273..f0d10c054c4 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct si_screen *sscreen,
struct si_query_group *group = query->groups;
query->groups = group->next;
FREE(group);
}
  
  	FREE(query->counters);
  
  	si_query_hw_destroy(sscreen, rquery);

  }
  
-static bool si_pc_query_prepare_buffer(struct si_screen *screen,

-  struct si_query_hw *hwquery,
-  struct r600_resource *buffer)
+static bool si_pc_query_prepare_buffer(struct si_context *ctx,
+  struct si_query_buffer *qbuf)
  {
/* no-op */
return true;
  }
  
  static void si_pc_query_emit_start(struct si_context *sctx,

   struct si_query_hw *hwquery,
   struct r600_resource *buffer, uint64_t va)
  {
struct si_query_pc *query = (struct si_query_pc *)hwquery;
@@ -1055,23 +1054,20 @@ struct pipe_query *si_create_batch_query(struct 
pipe_context *ctx,
counter->base = group->result_base + j;
counter->stride = group->num_counters;
  
  		counter->qwords = 1;

if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
counter->qwords = screen->info.max_se;
if (group->instance < 0)
counter->qwords *= block->num_instances;
}
  
-	if (!si_query_hw_init(screen, >b))

-   goto error;
-
return (struct pipe_query *)query;
  
  error:

si_pc_query_destroy(screen, >b.b);
return NULL;
  }
  
  static bool si_init_block_names(struct si_screen *screen,

struct si_pc_block *block)
  {
diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index 479a1bbf2c4..5b0fba0ed92 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -514,86 +514,129 @@ static struct pipe_query *si_query_sw_create(unsigned 
query_type)
query = CALLOC_STRUCT(si_query_sw);
if (!query)
return NULL;
  
  	query->b.type = query_type;

query->b.ops = _query_ops;
  
  	return (struct pipe_query *)query;

  }
  
-void si_query_hw_destroy(struct si_screen *sscreen,

-struct si_query *rquery)
+void si_query_buffer_destroy(struct si_screen *sscreen, struct si_query_buffer 
*buffer)
  {
-   struct si_query_hw *query = (struct si_query_hw *)rquery;
-   struct si_query_buffer *prev = query->buffer.previous;
+   struct si_query_buffer *prev = buffer->previous;
  
  	/* Release all query buffers. */

while (prev) {
struct si_query_buffer *qbuf = prev;
prev = prev->previous;
r600_resource_reference(>buf, NULL);
FREE(qbuf);
}
  
-	r600_resource_reference(>buffer.buf, NULL);

-   r600_resource_reference(>workaround_buf, NULL);
-   FREE(rquery);
+   r600_resource_reference(>buf, NULL);
+}
+
+void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer 
*buffer)
+{
+   /* Discard all query buffers except for the oldest. */
+   while (buffer->previous) {
+   struct si_query_buffer *qbuf = buffer->previous;
+   buffer->previous = qbuf->previous;
+
+   r600_resource_reference(>buf, NULL);
+   buffer->buf = qbuf->buf; /* move ownership */
+   FREE(qbuf);
+   }
+   buffer->results_end = 0;
+
+   /* Discard even the oldest buffer if it can't be mapped without a 
stall. */
+   if (buffer->buf &&
+   (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, 
RADEON_USAGE_READWRITE) ||
+!sctx->ws->buffer_wait(buffer->buf->buf, 0, 
RADEON_USAGE_READWRITE))) {
+   r600_resource_reference(>buf, NULL);
+   }
  }
  
-static struct r600_resource *si_new_query_buffer(struct si_screen

[Mesa-dev] [PATCH] st/glsl: refactor st_link_nir()

2019-01-02 Thread Timothy Arceri

The functional change here is moving the nir_lower_io_to_scalar_early()
calls inside st_nir_link_shaders() and moving the st_nir_opts() call
after the call to nir_lower_io_arrays_to_elements().

This fixes a bug with the following piglit test due to the current code
not cleaning up dead code after we lower arrays. This was causing an
assert in the new duplicate varyings link time opt introduced in
70be9afccb23.

tests/spec/glsl-1.10/execution/vsfs-unused-array-member.shader_test

Moving the nir_lower_io_to_scalar_early() calls also allows us to tidy
up the code a little and merge some loops.
---
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 52 +++
 1 file changed, 16 insertions(+), 36 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index af83a341e9..cbce4661e9 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -586,8 +586,16 @@ st_nir_get_mesa_program(struct gl_context *ctx,
 static void
 st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
 {
+   if (scalar) {
+  NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
+  NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
+   }
+
nir_lower_io_arrays_to_elements(*producer, *consumer);
 
+   st_nir_opts(*producer, scalar);
+   st_nir_opts(*consumer, scalar);
+
if (nir_link_opt_varyings(*producer, *consumer))
   st_nir_opts(*consumer, scalar);
 
@@ -663,51 +671,23 @@ st_link_nir(struct gl_context *ctx,
struct pipe_screen *screen = st->pipe->screen;
bool is_scalar[MESA_SHADER_STAGES];
 
-   /* Determine scalar property of each shader stage */
+   unsigned last_stage = 0;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
   struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
-  enum pipe_shader_type type;
-
   if (shader == NULL)
  continue;
 
-  type = pipe_shader_type_from_mesa(shader->Stage);
-  is_scalar[i] = screen->get_shader_param(screen, type, 
PIPE_SHADER_CAP_SCALAR_ISA);
-   }
-
-   /* Determine first and last stage. */
-   unsigned first = MESA_SHADER_STAGES;
-   unsigned last = 0;
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-  if (!shader_program->_LinkedShaders[i])
- continue;
-  if (first == MESA_SHADER_STAGES)
- first = i;
-  last = i;
-   }
-
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-  struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
-  if (shader == NULL)
- continue;
+  /* Determine scalar property of each shader stage */
+  enum pipe_shader_type type = pipe_shader_type_from_mesa(shader->Stage);
+  is_scalar[i] = screen->get_shader_param(screen, type,
+  PIPE_SHADER_CAP_SCALAR_ISA);
 
   st_nir_get_mesa_program(ctx, shader_program, shader);
-
-  nir_variable_mode mask = (nir_variable_mode) 0;
-  if (i != first)
- mask = (nir_variable_mode)(mask | nir_var_shader_in);
-
-  if (i != last)
- mask = (nir_variable_mode)(mask | nir_var_shader_out);
-
-  nir_shader *nir = shader->Program->nir;
+  last_stage = i;
 
   if (is_scalar[i]) {
- NIR_PASS_V(nir, nir_lower_io_to_scalar_early, mask);
- NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+ NIR_PASS_V(shader->Program->nir, nir_lower_load_const_to_scalar);
   }
-
-  st_nir_opts(nir, is_scalar[i]);
}
 
/* Linking the stages in the opposite order (from fragment to vertex)
@@ -715,7 +695,7 @@ st_link_nir(struct gl_context *ctx,
 * are eliminated if they are (transitively) not used in a later
 * stage.
 */
-   int next = last;
+   int next = last_stage;
for (int i = next - 1; i >= 0; i--) {
   struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
   if (shader == NULL)
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] nir: simplify does_varying_match()

2019-01-01 Thread Timothy Arceri

---
 src/compiler/nir/nir_linking_helpers.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 97816ccc58..be6b9dd85c 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -702,11 +702,8 @@ nir_link_xfb_varyings(nir_shader *producer, nir_shader 
*consumer)
 static bool
 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 {
-   if (in_var->data.location == out_var->data.location &&
-   in_var->data.location_frac == out_var->data.location_frac)
-  return true;
-
-   return false;
+   return in_var->data.location == out_var->data.location &&
+  in_var->data.location_frac == out_var->data.location_frac;
 }
 
 static nir_variable *
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] nir: make use of does_varying_match() helper

2019-01-01 Thread Timothy Arceri

---
 src/compiler/nir/nir_linking_helpers.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index abbd7e1f5c..97816ccc58 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -772,8 +772,7 @@ replace_constant_input(nir_shader *shader, 
nir_intrinsic_instr *store_intr)
 
  nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 
- if (in_var->data.location != out_var->data.location ||
- in_var->data.location_frac != out_var->data.location_frac)
+ if (!does_varying_match(out_var, in_var))
 continue;
 
  b.cursor = nir_before_instr(instr);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] nir: make nir_opt_remove_phis_impl() static

2019-01-01 Thread Timothy Arceri

---
 src/compiler/nir/nir.h | 1 -
 src/compiler/nir/nir_opt_remove_phis.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4b8de4bb01..94d6578620 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3221,7 +3221,6 @@ bool nir_opt_move_load_ubo(nir_shader *shader);
 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
  bool indirect_load_ok, bool expensive_alu_ok);
 
-bool nir_opt_remove_phis_impl(nir_function_impl *impl);
 bool nir_opt_remove_phis(nir_shader *shader);
 
 bool nir_opt_shrink_load(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_remove_phis.c 
b/src/compiler/nir/nir_opt_remove_phis.c
index e2d3994c49..d7ca2fe717 100644
--- a/src/compiler/nir/nir_opt_remove_phis.c
+++ b/src/compiler/nir/nir_opt_remove_phis.c
@@ -139,7 +139,7 @@ remove_phis_block(nir_block *block, nir_builder *b)
return progress;
 }
 
-bool
+static bool
 nir_opt_remove_phis_impl(nir_function_impl *impl)
 {
bool progress = false;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 5/5] nir: link time opt duplicate varyings

2018-12-19 Thread Timothy Arceri

If we are outputting the same value to more than one output
component rewrite the inputs to read from a single component.

This will allow the duplicate varying components to be optimised
away by the existing opts.

shader-db results i965 (SKL):

total instructions in shared programs: 12869230 -> 12860886 (-0.06%)
instructions in affected programs: 322601 -> 314257 (-2.59%)
helped: 3080
HURT: 8

total cycles in shared programs: 317792574 -> 317730593 (-0.02%)
cycles in affected programs: 2584925 -> 2522944 (-2.40%)
helped: 2975
HURT: 477

shader-db results radeonsi (VEGA):

SGPRS: 31576 -> 31664 (0.28 %)
VGPRS: 17484 -> 17064 (-2.40 %)
Spilled SGPRs: 184 -> 167 (-9.24 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 583340 -> 569368 (-2.40 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 6162 -> 6270 (1.75 %)
Wait states: 0 -> 0 (0.00 %)

vkpipeline-db results RADV (VEGA):

Totals from affected shaders:
SGPRS: 14880 -> 15080 (1.34 %)
VGPRS: 10872 -> 10888 (0.15 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 674016 -> 668396 (-0.83 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 2708 -> 2704 (-0.15 %)
Wait states: 0 -> 0 (0.00 %

V2: bunch of tidy ups suggested by Jason
---
 src/compiler/nir/nir_linking_helpers.c | 83 ++
 1 file changed, 83 insertions(+)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 2a3e6f407c..b74f57e6d8 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -699,6 +699,21 @@ nir_link_xfb_varyings(nir_shader *producer, nir_shader 
*consumer)
}
 }
 
+static nir_variable *
+get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
+{
+   nir_foreach_variable(var, >inputs) {
+  if (var->data.location == out_var->data.location &&
+  var->data.location_frac == out_var->data.location_frac &&
+  var->data.interpolation == out_var->data.interpolation &&
+  get_interp_loc(var) == get_interp_loc(out_var)) {
+ return var;
+  }
+   }
+
+   return NULL;
+}
+
 static bool
 can_replace_varying(nir_variable *out_var)
 {
@@ -774,6 +789,54 @@ replace_constant_input(nir_shader *shader, 
nir_intrinsic_instr *store_intr)
return progress;
 }
 
+static bool
+replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
+ nir_intrinsic_instr *dup_store_intr)
+{
+   assert(input_var);
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+   nir_builder b;
+   nir_builder_init(, impl);
+
+   nir_variable *dup_out_var =
+  nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
+
+   bool progress = false;
+   nir_foreach_block(block, impl) {
+  nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_load_deref)
+continue;
+
+ nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
+ if (in_deref->mode != nir_var_shader_in)
+continue;
+
+ nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
+
+ if (in_var->data.location != dup_out_var->data.location ||
+ in_var->data.location_frac != dup_out_var->data.location_frac ||
+ in_var->data.interpolation != input_var->data.interpolation ||
+ get_interp_loc(in_var) != get_interp_loc(input_var))
+continue;
+
+ b.cursor = nir_before_instr(instr);
+
+ nir_ssa_def *load = nir_load_var(, input_var);
+ nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(load));
+
+ progress = true;
+  }
+   }
+
+   return progress;
+}
+
 bool
 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 {
@@ -787,6 +850,10 @@ nir_link_opt_varyings(nir_shader *producer, nir_shader 
*consumer)
 
nir_function_impl *impl = nir_shader_get_entrypoint(producer);
 
+   struct hash_table *varying_values =
+  _mesa_hash_table_create(NULL,  _mesa_hash_pointer,
+  _mesa_key_pointer_equal);
+
/* If we find a store in the last block of the producer we can be sure this
 * is the only possible value for this output.
 */
@@ -810,8 +877,24 @@ nir_link_opt_varyings(nir_shader *producer, nir_shader 
*consumer)
 
   if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
  progress |= replace_constant_input(consumer, intr);
+  } else {
+ struct hash_entry *entry =
+   _mesa_hash_table_search(varying_values, intr->src[1].ssa);
+ if (entry) {
+progress |= replace_duplicate_input(consumer,
+

[Mesa-dev] [PATCH v2 3/5] nir: add can_replace_varying() helper

2018-12-19 Thread Timothy Arceri

This will be reused by the following patch.

Reviewed-by: Marek Olšák 
---
 src/compiler/nir/nir_linking_helpers.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 4f25c09d00..f3125a9880 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -700,8 +700,7 @@ nir_link_xfb_varyings(nir_shader *producer, nir_shader 
*consumer)
 }
 
 static bool
-try_replace_constant_input(nir_shader *shader,
-   nir_intrinsic_instr *store_intr)
+can_replace_varying(nir_intrinsic_instr *store_intr)
 {
nir_deref_instr *out_deref = nir_src_as_deref(store_intr->src[0]);
if (out_deref->mode != nir_var_shader_out)
@@ -728,11 +727,24 @@ try_replace_constant_input(nir_shader *shader,
out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
   return false;
 
+   return true;
+}
+
+static bool
+try_replace_constant_input(nir_shader *shader,
+   nir_intrinsic_instr *store_intr)
+{
+   if (!can_replace_varying(store_intr))
+  return false;
+
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 
nir_builder b;
nir_builder_init(, impl);
 
+   nir_variable *out_var =
+  nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
+
bool progress = false;
nir_foreach_block(block, impl) {
   nir_foreach_instr(instr, block) {
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 1/5] st/glsl_to_nir: call nir_lower_load_const_to_scalar() in the st

2018-12-19 Thread Timothy Arceri

This will help the new opt introduced in the following patches
allowing us to remove extra duplicate varyings.

Reviewed-by: Marek Olšák 
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 2 --
 src/mesa/state_tracker/st_glsl_to_nir.cpp| 4 +++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 0a692277f6..3883337b00 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -823,8 +823,6 @@ si_lower_nir(struct si_shader_selector* sel)
 
ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class);
 
-   NIR_PASS_V(sel->nir, nir_lower_load_const_to_scalar);
-
bool progress;
do {
progress = false;
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index ed9f643e89..5176756433 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -702,8 +702,10 @@ st_link_nir(struct gl_context *ctx,
 
   nir_shader *nir = shader->Program->nir;
 
-  if (is_scalar[i])
+  if (is_scalar[i]) {
  NIR_PASS_V(nir, nir_lower_io_to_scalar_early, mask);
+ NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+  }
 
   st_nir_opts(nir, is_scalar[i]);
}
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/5] nir: rename nir_link_constant_varyings() nir_link_opt_varyings()

2018-12-19 Thread Timothy Arceri

The following patches will add support for an addition
optimisation so this function will no longer just optimise varying
constants.

Reviewed-by: Marek Olšák 
---
 src/amd/vulkan/radv_pipeline.c| 4 ++--
 src/compiler/nir/nir.h| 2 +-
 src/compiler/nir/nir_linking_helpers.c| 2 +-
 src/intel/compiler/brw_nir.c  | 2 +-
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 3561d17aab..7bc0bd9994 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1814,8 +1814,8 @@ radv_link_shaders(struct radv_pipeline *pipeline, 
nir_shader **shaders)
nir_lower_io_arrays_to_elements(ordered_shaders[i],
ordered_shaders[i - 1]);
 
-   if (nir_link_constant_varyings(ordered_shaders[i],
-  ordered_shaders[i - 1]))
+   if (nir_link_opt_varyings(ordered_shaders[i],
+ ordered_shaders[i - 1]))
radv_optimize_nir(ordered_shaders[i - 1], false, false);
 
nir_remove_dead_variables(ordered_shaders[i],
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 65b51ead9f..4b8de4bb01 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2886,7 +2886,7 @@ bool nir_remove_unused_io_vars(nir_shader *shader, struct 
exec_list *var_list,
 void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
   bool default_to_smooth_interp);
 void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
-bool nir_link_constant_varyings(nir_shader *producer, nir_shader *consumer);
+bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
 
 typedef enum {
/* If set, this forces all non-flat fragment shader inputs to be
diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 222a558b06..4f25c09d00 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -773,7 +773,7 @@ try_replace_constant_input(nir_shader *shader,
 }
 
 bool
-nir_link_constant_varyings(nir_shader *producer, nir_shader *consumer)
+nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 {
/* TODO: Add support for more shader stage combinations */
if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index ab88a5f1fc..42ecc34668 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -771,7 +771,7 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
   *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
}
 
-   if (nir_link_constant_varyings(*producer, *consumer))
+   if (nir_link_opt_varyings(*producer, *consumer))
   *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
 
NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 5176756433..71aeec9486 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -588,7 +588,7 @@ st_nir_link_shaders(nir_shader **producer, nir_shader 
**consumer, bool scalar)
 {
nir_lower_io_arrays_to_elements(*producer, *consumer);
 
-   if (nir_link_constant_varyings(*producer, *consumer))
+   if (nir_link_opt_varyings(*producer, *consumer))
   st_nir_opts(*consumer, scalar);
 
NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 4/5] nir: rework nir_link_opt_varyings()

2018-12-19 Thread Timothy Arceri

This just cleans things up a little and make things more safe for
derefs.
---
 src/compiler/nir/nir_linking_helpers.c | 28 +++---
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index f3125a9880..2a3e6f407c 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -700,14 +700,8 @@ nir_link_xfb_varyings(nir_shader *producer, nir_shader 
*consumer)
 }
 
 static bool
-can_replace_varying(nir_intrinsic_instr *store_intr)
+can_replace_varying(nir_variable *out_var)
 {
-   nir_deref_instr *out_deref = nir_src_as_deref(store_intr->src[0]);
-   if (out_deref->mode != nir_var_shader_out)
-  return false;
-
-   nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
-
/* Skip types that require more complex handling.
 * TODO: add support for these types.
 */
@@ -720,7 +714,7 @@ can_replace_varying(nir_intrinsic_instr *store_intr)
/* Limit this pass to scalars for now to keep things simple. Most varyings
 * should have been lowered to scalars at this point anyway.
 */
-   if (store_intr->num_components != 1)
+   if (!glsl_type_is_scalar(out_var->type))
   return false;
 
if (out_var->data.location < VARYING_SLOT_VAR0 ||
@@ -731,12 +725,8 @@ can_replace_varying(nir_intrinsic_instr *store_intr)
 }
 
 static bool
-try_replace_constant_input(nir_shader *shader,
-   nir_intrinsic_instr *store_intr)
+replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 {
-   if (!can_replace_varying(store_intr))
-  return false;
-
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 
nir_builder b;
@@ -810,11 +800,17 @@ nir_link_opt_varyings(nir_shader *producer, nir_shader 
*consumer)
   if (intr->intrinsic != nir_intrinsic_store_deref)
  continue;
 
-  if (intr->src[1].ssa->parent_instr->type != nir_instr_type_load_const) {
+  nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
+  if (out_deref->mode != nir_var_shader_out)
+ continue;
+
+  nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
+  if (!can_replace_varying(out_var))
  continue;
-  }
 
-  progress |= try_replace_constant_input(consumer, intr);
+  if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
+ progress |= replace_constant_input(consumer, intr);
+  }
}
 
return progress;
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: create 32-bit bcsel for 32-bit conditions

2018-12-18 Thread Timothy Arceri


Reviewed-by: Timothy Arceri 

On 18/12/18 3:16 am, Rhys Perry wrote:

Signed-off-by: Rhys Perry 
---
  src/compiler/nir/nir_opt_peephole_select.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_opt_peephole_select.c 
b/src/compiler/nir/nir_opt_peephole_select.c
index ad9d0abec0..241627ed99 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -205,7 +205,9 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader 
*shader,
   break;
  
nir_phi_instr *phi = nir_instr_as_phi(instr);

-  nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel);
+  nir_op sel_op = nir_src_bit_size(if_stmt->condition) == 1 ?
+  nir_op_bcsel : nir_op_b32csel;
+  nir_alu_instr *sel = nir_alu_instr_create(shader, sel_op);
nir_src_copy(>src[0].src, _stmt->condition, sel);
/* Splat the condition to all channels */
memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 29/29] nir/algebraic: Add some optimizations for D3D-style Booleans

2018-12-17 Thread Timothy Arceri




On 7/12/18 6:45 am, Jason Ekstrand wrote:

D3D Booleans use a 32-bit 0/-1 representation.  Because this previously
matched NIR exactly, we didn't have to really optimize for it.  Now that
we have 1-bit Booleans, we need some specific optimizations to chew
through the D3D12-style Booleans.

Shader-db results on Kaby Lake:

 total instructions in shared programs: 15136811 -> 14967944 (-1.12%)
 instructions in affected programs: 2457021 -> 2288154 (-6.87%)
 helped: 8318
 HURT: 10

 total cycles in shared programs: 373544524 -> 359701825 (-3.71%)
 cycles in affected programs: 151029683 -> 137186984 (-9.17%)
 helped: 7749
 HURT: 682

 total loops in shared programs: 4431 -> 4399 (-0.72%)
 loops in affected programs: 32 -> 0
 helped: 21
 HURT: 0

 total spills in shared programs: 10290 -> 10051 (-2.32%)
 spills in affected programs: 2532 -> 2293 (-9.44%)
 helped: 18
 HURT: 18

 total fills in shared programs: 22203 -> 21732 (-2.12%)
 fills in affected programs: 3319 -> 2848 (-14.19%)
 helped: 18
 HURT: 18

Note that a large chunk of the improvement fixing regressions caused by
switching to 1-bit Booleans.  Previously, our ability to optimize D3D
booleans was improved by using the D3D representation directly in NIR.
How that NIR does 1-bit bools, we need a few more optimizations.

Reviewed-by: Bas Nieuwenhuizen 
---
  src/compiler/nir/nir_opt_algebraic.py | 13 +
  1 file changed, 13 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 3c8af4692b5..506d45e55b5 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -534,6 +534,19 @@ optimizations = [
 (('bcsel', a, b, b), b),
 (('fcsel', a, b, b), b),
  
+   # D3D Boolean emulation

+   (('bcsel', a, -1, 0), ('ineg', ('b2i', 'a@1'))),
+   (('bcsel', a, 0, -1), ('ineg', ('b2i', ('inot', a,
+   (('iand', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
+('ineg', ('b2i', ('iand', a, b,
+   (('ior', ('ineg', ('b2i','a@1')), ('ineg', ('b2i', 'b@1'))),
+('ineg', ('b2i', ('ior', a, b,
+   (('ieq', ('ineg', ('b2i', 'a@1')), 0), ('inot', a)),
+   (('ieq', ('ineg', ('b2i', 'a@1')), -1), a),
+   (('ine', ('ineg', ('b2i', 'a@1')), 0), a),
+   (('ine', ('ineg', ('b2i', 'a@1')), -1), ('inot', a)),
+   (('iand', ('ineg', ('b2i', a)), '1.0@32'), ('b2f', a)),


Hi Jason,

It seems the '1.0@32' matching has been broken somewhere along the line 
in the recent rewrites. See my comments in the RADV bug for more info.


https://bugs.freedesktop.org/show_bug.cgi?id=109075




+
 # Conversions
 (('i2b32', ('b2i', 'a@32')), a),
 (('f2i', ('ftrunc', a)), ('f2i', a)),


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/7] nir/dead_write_vars: Get modes directly from derefs

2018-12-17 Thread Timothy Arceri


Series:

Reviewed-by: Timothy Arceri 

On 18/12/18 3:44 pm, Jason Ekstrand wrote:

Instead of going all the way back to the variable, just look at the
deref.  The modes are guaranteed to be the same by nir_validate whenever
the variable can be found.  This fixes clear_unused_for_modes for
derefs that don't have an accessible variable.
---
  src/compiler/nir/nir_opt_dead_write_vars.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_opt_dead_write_vars.c 
b/src/compiler/nir/nir_opt_dead_write_vars.c
index d43bf5c1193..2ae5f78b791 100644
--- a/src/compiler/nir/nir_opt_dead_write_vars.c
+++ b/src/compiler/nir/nir_opt_dead_write_vars.c
@@ -56,8 +56,7 @@ static void
  clear_unused_for_modes(struct util_dynarray *unused_writes, nir_variable_mode 
modes)
  {
 util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) {
-  nir_variable *var = nir_deref_instr_get_variable(entry->dst);
-  if (var->data.mode & modes)
+  if (entry->dst->mode & modes)
   *entry = util_dynarray_pop(unused_writes, struct write_entry);
 }
  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] ac/nir_to_llvm: add ac_are_tessfactors_def_in_all_invocs()

2018-12-17 Thread Timothy Arceri

The following patch will use this with the radeonsi NIR backend
but I've added it to ac so we can use it with RADV in future.

This is a NIR implementation of the tgsi function
tgsi_scan_tess_ctrl().
---
 src/amd/common/ac_nir_to_llvm.c | 161 
 src/amd/common/ac_nir_to_llvm.h |   2 +
 2 files changed, 163 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 4294956de1..055940b75f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4063,3 +4063,164 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum 
chip_class chip_class)
 
nir_lower_indirect_derefs(nir, indirect_mask);
 }
+
+static unsigned
+get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
+{
+   if (intrin->intrinsic != nir_intrinsic_store_deref)
+   return 0;
+
+   nir_variable *var =
+   nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
+
+   if (var->data.mode != nir_var_shader_out)
+   return 0;
+
+   unsigned writemask = 0;
+   const int location = var->data.location;
+   unsigned first_component = var->data.location_frac;
+   unsigned num_comps = intrin->dest.ssa.num_components;
+
+   if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+   writemask = ((1 << num_comps + 1) - 1) << first_component;
+   else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+   writemask = (((1 << num_comps + 1) - 1) << first_component) << 
4;
+
+   return writemask;
+}
+
+static void
+scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
+  unsigned *cond_block_tf_writemask,
+  bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
+{
+   switch (cf_node->type) {
+   case nir_cf_node_block: {
+   nir_block *block = nir_cf_node_as_block(cf_node);
+   nir_foreach_instr(instr, block) {
+   if (instr->type != nir_instr_type_intrinsic)
+   continue;
+
+   nir_intrinsic_instr *intrin = 
nir_instr_as_intrinsic(instr);
+   if (intrin->intrinsic == nir_intrinsic_barrier) {
+
+   /* If we find a barrier in nested control flow 
put this in the
+* too hard basket. In GLSL this is not 
possible but it is in
+* SPIR-V.
+*/
+   if (is_nested_cf) {
+   *tessfactors_are_def_in_all_invocs = 
false;
+   return;
+   }
+
+   /* The following case must be prevented:
+*gl_TessLevelInner = ...;
+*barrier();
+*if (gl_InvocationID == 1)
+*   gl_TessLevelInner = ...;
+*
+* If you consider disjoint code segments 
separated by barriers, each
+* such segment that writes tess factor 
channels should write the same
+* channels in all codepaths within that 
segment.
+*/
+   if (upper_block_tf_writemask || 
cond_block_tf_writemask) {
+   /* Accumulate the result: */
+   *tessfactors_are_def_in_all_invocs &=
+   !(*cond_block_tf_writemask & 
~(*upper_block_tf_writemask));
+
+   /* Analyze the next code segment from 
scratch. */
+   *upper_block_tf_writemask = 0;
+   *cond_block_tf_writemask = 0;
+   }
+   } else
+   *upper_block_tf_writemask |= 
get_inst_tessfactor_writemask(intrin);
+   }
+
+   break;
+   }
+   case nir_cf_node_if: {
+   unsigned then_tessfactor_writemask = 0;
+   unsigned else_tessfactor_writemask = 0;
+
+   nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+   foreach_list_typed(nir_cf_node, nested_node, node, 
_stmt->then_list) {
+   scan_tess_ctrl(nested_node, _tessfactor_writemask,
+  cond_block_tf_writemask,
+  tessfactors_are_def_in_all_invocs, true);
+   }
+
+   foreach_list_typed(nir_cf_node, nested_node, node, 
_stmt->else_list) {
+   scan_tess_ctrl(nested_node, _tessfactor_writemask,
+  cond_block_tf_writemask,
+

[Mesa-dev] [PATCH 3/3] radeonsi: make use of ac_are_tessfactors_def_in_all_invocs()

2018-12-17 Thread Timothy Arceri

---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index b81bea00b8..931d41245d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -285,14 +285,8 @@ void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
if (nir->info.stage != MESA_SHADER_TESS_CTRL)
return;
 
-   /* Initial value = true. Here the pass will accumulate results from
-* multiple segments surrounded by barriers. If tess factors aren't
-* written at all, it's a shader bug and we don't care if this will be
-* true.
-*/
-   out->tessfactors_are_def_in_all_invocs = true;
-
-   /* TODO: Implement scanning of tess factors, see tgsi backend. */
+   out->tessfactors_are_def_in_all_invocs =
+   ac_are_tessfactors_def_in_all_invocs(nir);
 }
 
 void si_nir_scan_shader(const struct nir_shader *nir,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] radeonsi: remove unrequired param in si_nir_scan_tess_ctrl()

2018-12-17 Thread Timothy Arceri

---
 src/gallium/drivers/radeonsi/si_shader.h| 1 -
 src/gallium/drivers/radeonsi/si_shader_nir.c| 1 -
 src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index f71e601574..cdb57958dd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -708,7 +708,6 @@ const char *si_get_shader_name(const struct si_shader 
*shader, unsigned processo
 void si_nir_scan_shader(const struct nir_shader *nir,
struct tgsi_shader_info *info);
 void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
-  const struct tgsi_shader_info *info,
   struct tgsi_tessctrl_info *out);
 void si_lower_nir(struct si_shader_selector *sel);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 660b5bc356..b81bea00b8 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -278,7 +278,6 @@ static void scan_instruction(struct tgsi_shader_info *info,
 }
 
 void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
-  const struct tgsi_shader_info *info,
   struct tgsi_tessctrl_info *out)
 {
memset(out, 0, sizeof(*out));
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index de00df60ae..2d5d163247 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2238,7 +2238,7 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
sel->nir = state->ir.nir;
 
si_nir_scan_shader(sel->nir, >info);
-   si_nir_scan_tess_ctrl(sel->nir, >info, >tcs_info);
+   si_nir_scan_tess_ctrl(sel->nir, >tcs_info);
 
si_lower_nir(sel);
}
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] nir: link time opt duplicate varyings

2018-12-14 Thread Timothy Arceri


On 15/12/18 1:16 pm, Timothy Arceri wrote:

On 15/12/18 7:32 am, Marek Olšák wrote:

For patches 1-3:

Reviewed-by: Marek Olšák <mailto:marek.ol...@amd.com>>




Thanks!


I'm not so knowledgeable to be able to comment on patch 4.

Does it also merge varyings such as (x,y,undef,undef) and 
(x,undef,undef,w)? There is a game which outputs (x,y,z,undef) and 
(x,y,undef,undef) where the vertex shader is a SSO.


No. All of the link-time optimisations in NIR are currently only applied 
at actual link-time. I've often thought about adding a way to apply 
these to SSO shaders but this is unlikely something I will end up 
working on since our main focus these days is on Vulkan and there we 
always link the entire pipeline so its not an issue.




Just to be clear. The exiting NIR link-time optimisations we have will 
already convert (x,y,undef,undef) and (x,undef,undef,w) to (x,y,x,w).


This pass just takes things further, eliminating components holding the 
same value if we can detect it.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] nir: link time opt duplicate varyings

2018-12-14 Thread Timothy Arceri


On 15/12/18 7:32 am, Marek Olšák wrote:

For patches 1-3:

Reviewed-by: Marek Olšák mailto:marek.ol...@amd.com>>



Thanks!


I'm not so knowledgeable to be able to comment on patch 4.

Does it also merge varyings such as (x,y,undef,undef) and 
(x,undef,undef,w)? There is a game which outputs (x,y,z,undef) and 
(x,y,undef,undef) where the vertex shader is a SSO.


No. All of the link-time optimisations in NIR are currently only applied 
at actual link-time. I've often thought about adding a way to apply 
these to SSO shaders but this is unlikely something I will end up 
working on since our main focus these days is on Vulkan and there we 
always link the entire pipeline so its not an issue.




Marek

On Mon, Dec 10, 2018 at 11:28 PM Timothy Arceri <mailto:tarc...@itsqueeze.com>> wrote:


If we are outputting the same value to more than one output
component rewrite the inputs to read from a single component.

This will allow the duplicate varying components to be optimised
away by the existing opts.

shader-db results i965 (SKL):

total instructions in shared programs: 12869230 -> 12860886 (-0.06%)
instructions in affected programs: 322601 -> 314257 (-2.59%)
helped: 3080
HURT: 8

total cycles in shared programs: 317792574 -> 317730593 (-0.02%)
cycles in affected programs: 2584925 -> 2522944 (-2.40%)
helped: 2975
HURT: 477

shader-db results radeonsi (VEGA):

Totals from affected shaders:
SGPRS: 30960 -> 31056 (0.31 %)
VGPRS: 17052 -> 16672 (-2.23 %)
Spilled SGPRs: 184 -> 167 (-9.24 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 562532 -> 549404 (-2.33 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 6011 -> 6110 (1.65 %)
Wait states: 0 -> 0 (0.00 %)

vkpipeline-db results RADV (VEGA):

Totals from affected shaders:
SGPRS: 14880 -> 15080 (1.34 %)
VGPRS: 10872 -> 10888 (0.15 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 674016 -> 668396 (-0.83 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 2708 -> 2704 (-0.15 %)
Wait states: 0 -> 0 (0.00 %
---
  src/compiler/nir/nir_linking_helpers.c | 95 ++
  1 file changed, 95 insertions(+)

diff --git a/src/compiler/nir/nir_linking_helpers.c
b/src/compiler/nir/nir_linking_helpers.c
index 37644d339f..bdfa7b8c4d 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -700,6 +700,27 @@ nir_link_xfb_varyings(nir_shader *producer,
nir_shader *consumer)
     }
  }

+static nir_variable *
+get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
+{
+   nir_variable *input_var = NULL;
+   nir_foreach_variable(var, >inputs) {
+      if (var->data.location >= VARYING_SLOT_VAR0 &&
+          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
+
+         if (var->data.location == out_var->data.location &&
+             var->data.location_frac == out_var->data.location_frac &&
+             var->data.interpolation == out_var->data.interpolation &&
+             get_interp_loc(var) == get_interp_loc(out_var)) {
+            input_var = var;
+            break;
+         }
+      }
+   }
+
+   return input_var;
+}
+
  static bool
  can_replace_varying(nir_variable *out_var, nir_intrinsic_instr
*store_intr)
  {
@@ -782,6 +803,57 @@ try_replace_constant_input(nir_shader *shader,
     return progress;
  }

+static bool
+try_replace_duplicate_input(nir_shader *shader, nir_variable
*input_var,
+                            nir_intrinsic_instr *dup_store_intr)
+{
+   assert(input_var);
+
+   nir_variable *dup_out_var =
+ 
nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));

+
+   if (!can_replace_varying(dup_out_var, dup_store_intr))
+      return false;
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+   nir_builder b;
+   nir_builder_init(, impl);
+
+   bool progress = false;
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         if (intr->intrinsic != nir_intrinsic_load_deref)
+            continue;
+
+         nir_variable *in_var =
+   
nir_deref_instr_get_variable(nir_src_as_deref(intr-&g

[Mesa-dev] [PATCH 1/2] tgsi/scan: fix loop exit point in tgsi_scan_tess_ctrl()

2018-12-13 Thread Timothy Arceri

This just happened not to crash/assert because all loops have at
least 1 if-statement and due to a second bug we end up matching
the same ENDIF to exit both the iteration over the if-statment
and the loop.

The second bug is fixed in the following patch.

Fixes: 386d165d8d09 ("tgsi/scan: add a new pass that analyzes tess factor 
writes")
---

No changes in shader-db. Didn't run against piglit.

 src/gallium/auxiliary/tgsi/tgsi_scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 4ca84902dd..6e2e23822c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -1171,7 +1171,7 @@ tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
 
   case TGSI_OPCODE_BGNLOOP:
  cond_block_tf_writemask |=
-get_block_tessfactor_writemask(info, , TGSI_OPCODE_ENDIF);
+get_block_tessfactor_writemask(info, , TGSI_OPCODE_ENDLOOP);
  continue;
 
   case TGSI_OPCODE_BARRIER:
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] tgsi/scan: correctly walk instructions in tgsi_scan_tess_ctrl()

2018-12-13 Thread Timothy Arceri

The previous code used a do while loop and continues after walking
a nested loop/if-statement. This means we end up evaluating the
last instruction from the nested block against the while condition
and potentially exit early if it matches the exit condition of the
outer block.

Fixes: 386d165d8d09 ("tgsi/scan: add a new pass that analyzes tess factor 
writes")
---
No changes in shader-db. Didn't run against piglit.

 src/gallium/auxiliary/tgsi/tgsi_scan.c | 72 +++---
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 6e2e23822c..d56844bdc2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -1028,11 +1028,12 @@ get_block_tessfactor_writemask(const struct 
tgsi_shader_info *info,
struct tgsi_full_instruction *inst;
unsigned writemask = 0;
 
-   do {
-  tgsi_parse_token(parse);
-  assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
-  inst = >FullToken.FullInstruction;
-  check_no_subroutines(inst);
+   tgsi_parse_token(parse);
+   assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+   inst = >FullToken.FullInstruction;
+   check_no_subroutines(inst);
+
+   while (inst->Instruction.Opcode != end_opcode) {
 
   /* Recursively process nested blocks. */
   switch (inst->Instruction.Opcode) {
@@ -1040,20 +1041,26 @@ get_block_tessfactor_writemask(const struct 
tgsi_shader_info *info,
   case TGSI_OPCODE_UIF:
  writemask |=
 get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDIF);
- continue;
+ break;
 
   case TGSI_OPCODE_BGNLOOP:
  writemask |=
 get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP);
- continue;
+ break;
 
   case TGSI_OPCODE_BARRIER:
  unreachable("nested BARRIER is illegal");
- continue;
+ break;
+
+  default:
+ writemask |= get_inst_tessfactor_writemask(info, inst);
   }
 
-  writemask |= get_inst_tessfactor_writemask(info, inst);
-   } while (inst->Instruction.Opcode != end_opcode);
+  tgsi_parse_token(parse);
+  assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+  inst = >FullToken.FullInstruction;
+  check_no_subroutines(inst);
+   }
 
return writemask;
 }
@@ -1067,18 +1074,20 @@ get_if_block_tessfactor_writemask(const struct 
tgsi_shader_info *info,
struct tgsi_full_instruction *inst;
unsigned then_tessfactor_writemask = 0;
unsigned else_tessfactor_writemask = 0;
+   unsigned writemask;
bool is_then = true;
 
-   do {
-  tgsi_parse_token(parse);
-  assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
-  inst = >FullToken.FullInstruction;
-  check_no_subroutines(inst);
+   tgsi_parse_token(parse);
+   assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+   inst = >FullToken.FullInstruction;
+   check_no_subroutines(inst);
+
+   while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF) {
 
   switch (inst->Instruction.Opcode) {
   case TGSI_OPCODE_ELSE:
  is_then = false;
- continue;
+ break;
 
   /* Recursively process nested blocks. */
   case TGSI_OPCODE_IF:
@@ -1087,28 +1096,33 @@ get_if_block_tessfactor_writemask(const struct 
tgsi_shader_info *info,
is_then ? 
_tessfactor_writemask :
  
_tessfactor_writemask,
cond_block_tf_writemask);
- continue;
+ break;
 
   case TGSI_OPCODE_BGNLOOP:
  *cond_block_tf_writemask |=
 get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP);
- continue;
+ break;
 
   case TGSI_OPCODE_BARRIER:
  unreachable("nested BARRIER is illegal");
- continue;
-  }
-
-  /* Process an instruction in the current block. */
-  unsigned writemask = get_inst_tessfactor_writemask(info, inst);
+ break;
+  default:
+ /* Process an instruction in the current block. */
+ writemask = get_inst_tessfactor_writemask(info, inst);
 
-  if (writemask) {
- if (is_then)
-then_tessfactor_writemask |= writemask;
- else
-else_tessfactor_writemask |= writemask;
+ if (writemask) {
+if (is_then)
+   then_tessfactor_writemask |= writemask;
+else
+   else_tessfactor_writemask |= writemask;
+ }
   }
-   } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF);
+
+  tgsi_parse_token(parse);
+  assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+  inst = >FullToken.FullInstruction;
+  check_no_subroutines(inst);
+   }
 
if (then_tessfactor_writemask || else_tessfactor_writemask) {

Re: [Mesa-dev] [PATCH] spirv/nir: adjust location assignment for the case of arrays of blocks

2018-12-13 Thread Timothy Arceri




On 13/12/18 11:11 pm, Alejandro Piñeiro wrote:

This is needed due how the types get rearranged after the struct
splitting.

So for example, this array of blocks:

   layout(location = 0) out block {
 vec4 v;
 vec3 v2;
   } x[2];

Would be splitted on two nir variables with the following types:
   * vec4 v[2]
   * vec3 v2[2]

So we need to take into account the length of the array to avoid
locations overlaps one with the other.
---


Hi Jason,

again, sending in advance patches, just in case you are working on the
same.

I was able to fix the location overlapping without all those crazy
ideas about lowering array of blocks into individual blocks, by just
adjusting the locations as this patch shows.

FWIW, the resulting locations are equivalent to those that we get with
GLSL IR, that results on a similar splitting.

With this change I got the following working:
* SPIR-V simple arrays of blocks input/outputs

* The arrays of blocks inputs/outputs + interpolator qualifiers
  test I mentioned to you last week [1] when run its SPIR-V
  equivalent.

* SPIR-V xfb tests using arrays of blocks, where the xfb offset are
  assigned to all block members.

* SPIR-V xfb tests using arrays of blocks, where the xfb offset is
  assigned to just one member, so just that member is captured,
  although as many times as the array length (yes! afaiu by spec
  that needs to work)

So now, the only pending thing is a cleanup and send the series to
review. Specifically, I think that this series can be put on top of
current master instead of the arb_gl_spirv. Will try that and send a
final series this week or early next week.

BR


[1] 
https://github.com/Igalia/piglit/blob/master/tests/spec/glsl-1.50/execution/interface-block-interpolation-array.shader_test



  src/compiler/spirv/vtn_variables.c | 14 +-
  1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index a8f2fdfa534..87386cee42f 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1672,6 +1672,14 @@ add_missing_member_locations(struct vtn_variable *var,
glsl_get_length(glsl_without_array(var->type->type));
 int location = var->base_location;
  
+   /* To know if it is a interface block we can't ask directly for

+* var->type->block because on the case of arrays of blocks, block is set
+* on the array_element.
+*/
+   bool is_array_block = var->var->interface_type != NULL &&
+  glsl_type_is_array(var->type->type);
+   int adjustment = is_array_block ? glsl_get_length(var->type->type) : 1;
+


I think you probably want to use glsl_get_aoa_size() here instead of 
glsl_get_length() ?



 for (unsigned i = 0; i < length; i++) {
/* From the Vulkan spec:
 *
@@ -1702,8 +1710,12 @@ add_missing_member_locations(struct vtn_variable *var,
const struct glsl_type *member_type =
   glsl_get_struct_field(glsl_without_array(var->type->type), i);
  
+  /* For arrays of interface blocks we can't just add the attribute slots

+   * of a member type due how the splitting would rearrange the types, so
+   * we need to adjust for the array length in that case.
+   */
location +=
- glsl_count_attribute_slots(member_type, is_vertex_input);
+ glsl_count_attribute_slots(member_type, is_vertex_input) * adjustment;
 }
  }
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: remove unused variable

2018-12-13 Thread Timothy Arceri

I don't think I've seen the warning so its likely I use this (or 
removing it) in a future patch. But feel free to push this for now.


On 14/12/18 1:25 am, Alejandro Piñeiro wrote:

To avoid the following warning:
./src/compiler/nir/nir_loop_analyze.c:807:16: warning: unused variable ‘ns’ 
[-Wunused-variable]
 nir_shader *ns = impl->function->shader;
---

Perhaps this is solved on any of the loop analysis patches pending to
be reviewed, but just in case, sending it.

  src/compiler/nir/nir_loop_analyze.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 3de45401975..259f02a854e 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -803,7 +803,6 @@ get_loop_info(loop_info_state *state, nir_function_impl 
*impl)
 /* Run through each of the terminators and try to compute a trip-count */
 find_trip_count(state);
  
-   nir_shader *ns = impl->function->shader;

 nir_foreach_block_in_cf_node(block, >loop->cf_node) {
if (force_unroll_heuristics(state, block)) {
   state->loop->info->force_unroll = true;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] nir: add if opt opt_if_loop_last_continue()

2018-12-13 Thread Timothy Arceri


On 13/12/18 8:10 pm, Samuel Pitoiset wrote:

This introduces crashes for

dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_frag 

dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_geom 

dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_tessc 

dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_tesse 

dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_vert 



Test case 
'dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_frag'.. 

deqp-vk: nir/nir_control_flow.c:553: stitch_blocks: Assertion 
`exec_list_is_empty(>instr_list)' failed.

Aborted (core dumped)

Did you run CTS?


Yes I'd run it through Intels CI countless times. I've sent a fix, seems 
the ordering of optimisations on RADV vs intels drivers causes us to 
trip up on this one were the other drivers did not.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nir: fix opt_if_loop_last_continue()

2018-12-13 Thread Timothy Arceri

The pass did not correctly handle loops ending in:

if ssa_7 {
block block_8:
/* preds: block_7 */
continue
/* succs: block_1 */
} else {
block block_9:
/* preds: block_7 */
break
/* succs: block_11 */
}

The break will get eliminated by another opt but if this pass gets
called first (as it does on RADV) we ended up inserting
instructions after the break.

Fixes: 5921a19d4b0c ("nir: add if opt opt_if_loop_last_continue()")
---
 src/compiler/nir/nir_opt_if.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 691448a96e..c21ac9219f 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -318,9 +318,13 @@ opt_if_loop_last_continue(nir_loop *loop)
nir_cf_extract(, nir_after_cf_node(if_node),
 nir_after_block(last_block));
if (then_ends_in_continue) {
-  nir_cf_reinsert(, nir_after_cf_list(>else_list));
+  nir_cursor last_blk_cursor = nir_after_cf_list(>else_list);
+  nir_cf_reinsert(,
+  nir_after_block_before_jump(last_blk_cursor.block));
} else {
-  nir_cf_reinsert(, nir_after_cf_list(>then_list));
+  nir_cursor last_blk_cursor = nir_after_cf_list(>then_list);
+  nir_cf_reinsert(,
+  nir_after_block_before_jump(last_blk_cursor.block));
}
 
/* In order to avoid running nir_lower_regs_to_ssa_impl() every time an if
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 06/10] nir: simplify the loop analysis trip count code a little

2018-12-12 Thread Timothy Arceri

Here we create a helper is_supported_terminator_condition()
and use that rather than embedding all the trip count code
inside a switch.

The new helper will also be used in a following patch.
---
 src/compiler/nir/nir_loop_analyze.c | 176 +++-
 1 file changed, 95 insertions(+), 81 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 0b37da6569..9a1962cfd8 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -672,6 +672,26 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
return -1;
 }
 
+static bool
+is_supported_terminator_condition(nir_alu_instr *alu)
+{
+   switch (alu->op) {
+   case nir_op_fge:
+   case nir_op_ige:
+   case nir_op_uge:
+   case nir_op_flt:
+   case nir_op_ilt:
+   case nir_op_ult:
+   case nir_op_feq:
+   case nir_op_ieq:
+   case nir_op_fne:
+   case nir_op_ine:
+  return true;
+   default:
+  return false;
+   }
+}
+
 /* Run through each of the terminators of the loop and try to infer a possible
  * trip-count. We need to check them all, and set the lowest trip-count as the
  * trip-count of our loop. If one of the terminators has an undecidable
@@ -703,100 +723,94 @@ find_trip_count(loop_info_state *state)
   nir_loop_variable *limit = NULL;
   bool limit_rhs = true;
 
-  switch (alu->op) {
-  case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
-  case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
-  case nir_op_feq:  case nir_op_ieq:
-  case nir_op_fne:  case nir_op_ine:
-
- /* We assume that the limit is the "right" operand */
- basic_ind = get_loop_var(alu->src[0].src.ssa, state);
- limit = get_loop_var(alu->src[1].src.ssa, state);
-
- if (basic_ind->type != basic_induction) {
-/* We had it the wrong way, flip things around */
-basic_ind = get_loop_var(alu->src[1].src.ssa, state);
-limit = get_loop_var(alu->src[0].src.ssa, state);
-limit_rhs = false;
-terminator->induction_rhs = true;
- }
+  if (!is_supported_terminator_condition(alu)) {
+ trip_count_known = false;
+ continue;
+  }
 
- /* The comparison has to have a basic induction variable for us to be
-  * able to find trip counts.
-  */
- if (basic_ind->type != basic_induction) {
-trip_count_known = false;
-continue;
- }
+  /* We assume that the limit is the "right" operand */
+  basic_ind = get_loop_var(alu->src[0].src.ssa, state);
+  limit = get_loop_var(alu->src[1].src.ssa, state);
 
- /* Attempt to find a constant limit for the loop */
- nir_const_value limit_val;
- if (is_var_constant(limit)) {
-limit_val =
-   nir_instr_as_load_const(limit->def->parent_instr)->value;
- } else {
-trip_count_known = false;
-
-if (!try_find_limit_of_alu(limit, _val, terminator, state)) {
-   /* Guess loop limit based on array access */
-   if (!guess_loop_limit(state, _val, basic_ind)) {
-  continue;
-   }
+  if (basic_ind->type != basic_induction) {
+ /* We had it the wrong way, flip things around */
+ basic_ind = get_loop_var(alu->src[1].src.ssa, state);
+ limit = get_loop_var(alu->src[0].src.ssa, state);
+ limit_rhs = false;
+ terminator->induction_rhs = true;
+  }
+
+  /* The comparison has to have a basic induction variable for us to be
+   * able to find trip counts.
+   */
+  if (basic_ind->type != basic_induction) {
+ trip_count_known = false;
+ continue;
+  }
 
-   guessed_trip_count = true;
+  /* Attempt to find a constant limit for the loop */
+  nir_const_value limit_val;
+  if (is_var_constant(limit)) {
+ limit_val =
+nir_instr_as_load_const(limit->def->parent_instr)->value;
+  } else {
+ trip_count_known = false;
+
+ if (!try_find_limit_of_alu(limit, _val, terminator, state)) {
+/* Guess loop limit based on array access */
+if (!guess_loop_limit(state, _val, basic_ind)) {
+   continue;
 }
- }
 
- /* We have determined that we have the following constants:
-  * (With the typical int i = 0; i < x; i++; as an example)
-  *- Upper limit.
-  *- Starting value
-  *- Step / iteration size
-  * Thats all thats needed to calculate the trip-count
-  */
+guessed_trip_count = true;
+ }
+  }
 
- nir_const_value initial_val =
-nir_instr_as_load_const(basic_ind->ind->def_outside_loop->
-   def->parent_instr)->value;
+  /* We have determined that we have the

[Mesa-dev] [PATCH v2 09/10] nir: pass nir_op to calculate_iterations()

2018-12-12 Thread Timothy Arceri

Rather than getting this from the alu instruction this allows us
some flexibility. In the following pass we instead pass the
inverse op.
---
 src/compiler/nir/nir_loop_analyze.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 0352a6fba5..cf2655aa20 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -597,7 +597,8 @@ test_iterations(int32_t iter_int, nir_const_value *step,
 static int
 calculate_iterations(nir_const_value *initial, nir_const_value *step,
  nir_const_value *limit, nir_loop_variable *alu_def,
- nir_alu_instr *cond_alu, bool limit_rhs, bool invert_cond)
+ nir_alu_instr *cond_alu, nir_op alu_op, bool limit_rhs,
+ bool invert_cond)
 {
assert(initial != NULL && step != NULL && limit != NULL);
 
@@ -612,10 +613,10 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
nir_alu_type induction_base_type =
   nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
if (induction_base_type == nir_type_int || induction_base_type == 
nir_type_uint) {
-  
assert(nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[1]) == 
nir_type_int ||
- 
nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[1]) == 
nir_type_uint);
+  assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) 
== nir_type_int ||
+ nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) 
== nir_type_uint);
} else {
-  
assert(nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[0]) ==
+  assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[0]) ==
  induction_base_type);
}
 
@@ -639,7 +640,7 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
   trip_offset = 1;
}
 
-   int iter_int = get_iteration(cond_alu->op, initial, step, limit);
+   int iter_int = get_iteration(alu_op, initial, step, limit);
 
/* If iter_int is negative the loop is ill-formed or is the conditional is
 * unsigned with a huge iteration count so don't bother going any further.
@@ -662,7 +663,7 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
for (int bias = -1; bias <= 1; bias++) {
   const int iter_bias = iter_int + bias;
 
-  if (test_iterations(iter_bias, step, limit, cond_alu->op, bit_size,
+  if (test_iterations(iter_bias, step, limit, alu_op, bit_size,
   induction_base_type, initial,
   limit_rhs, invert_cond)) {
  return iter_bias > 0 ? iter_bias - trip_offset : iter_bias;
@@ -769,6 +770,8 @@ find_trip_count(loop_info_state *state)
   }
 
   nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
+  nir_op alu_op = alu->op;
+
   if (!is_supported_terminator_condition(alu)) {
  trip_count_known = false;
  continue;
@@ -825,7 +828,7 @@ find_trip_count(loop_info_state *state)
   int iterations = calculate_iterations(_val, _val,
 _val,
 basic_ind->ind->alu_def, alu,
-limit_rhs,
+alu_op, limit_rhs,
 terminator->continue_from_then);
 
   /* Where we not able to calculate the iteration count */
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 01/10] nir: add guess trip count support to loop analysis

2018-12-12 Thread Timothy Arceri

This detects an induction variable used as an array index to guess
the trip count of the loop. This enables us to do a partial
unroll of the loop, with can eventually result in the loop being
eliminated.

v2: check if the induction var is used to index more than a single
array and if so get the size of the smallest array.
---
 src/compiler/nir/nir.h  |  4 ++
 src/compiler/nir/nir_loop_analyze.c | 88 +++--
 2 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 5d9c96fe11..b886b83325 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1879,6 +1879,7 @@ typedef struct {
nir_block *continue_from_block;
 
bool continue_from_then;
+   bool induction_rhs;
 
struct list_head loop_terminator_link;
 } nir_loop_terminator;
@@ -1887,6 +1888,9 @@ typedef struct {
/* Number of instructions in the loop */
unsigned num_instructions;
 
+   /* Guessed trip count based on array indexing */
+   unsigned guessed_trip_count;
+
/* Maximum number of times the loop is run (if known) */
unsigned max_trip_count;
 
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 3de4540197..c46c491963 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -426,6 +426,57 @@ find_array_access_via_induction(loop_info_state *state,
return 0;
 }
 
+static bool
+guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
+ nir_loop_variable *basic_ind)
+{
+   unsigned min_array_size = 0;
+
+   nir_foreach_block_in_cf_node(block, >loop->cf_node) {
+  nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* Check for arrays variably-indexed by a loop induction variable. */
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+nir_loop_variable *array_idx = NULL;
+unsigned array_size =
+   find_array_access_via_induction(state,
+   
nir_src_as_deref(intrin->src[0]),
+   _idx);
+if (basic_ind == array_idx &&
+(min_array_size == 0 || min_array_size > array_size)) {
+   min_array_size = array_size;
+}
+
+if (intrin->intrinsic != nir_intrinsic_copy_deref)
+   continue;
+
+array_size =
+   find_array_access_via_induction(state,
+   
nir_src_as_deref(intrin->src[1]),
+   _idx);
+if (basic_ind == array_idx &&
+(min_array_size == 0 || min_array_size > array_size)) {
+   min_array_size = array_size;
+}
+ }
+  }
+   }
+
+   if (min_array_size) {
+  limit_val->i32[0] = min_array_size;
+  return true;
+   }
+
+   return false;
+}
+
 static int32_t
 get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
   nir_const_value *limit)
@@ -602,6 +653,7 @@ static void
 find_trip_count(loop_info_state *state)
 {
bool trip_count_known = true;
+   bool guessed_trip_count = false;
nir_loop_terminator *limiting_terminator = NULL;
int max_trip_count = -1;
 
@@ -637,16 +689,33 @@ find_trip_count(loop_info_state *state)
 basic_ind = get_loop_var(alu->src[1].src.ssa, state);
 limit = get_loop_var(alu->src[0].src.ssa, state);
 limit_rhs = false;
+terminator->induction_rhs = true;
  }
 
- /* The comparison has to have a basic induction variable
-  * and a constant for us to be able to find trip counts
+ /* The comparison has to have a basic induction variable for us to be
+  * able to find trip counts.
   */
- if (basic_ind->type != basic_induction || !is_var_constant(limit)) {
+ if (basic_ind->type != basic_induction) {
 trip_count_known = false;
 continue;
  }
 
+ /* Attempt to find a constant limit for the loop */
+ nir_const_value limit_val;
+ if (is_var_constant(limit)) {
+limit_val =
+   nir_instr_as_load_const(limit->def->parent_instr)->value;
+ } else {
+trip_count_known = false;
+
+/* Guess loop limit based on array access */
+if (!guess_loop_limit(state, _val, basic_ind)) {
+   continue;
+}
+
+guessed_trip_count = true;
+ }
+
  /* We have determined that we have the following constants:
   * (With the typical int i = 0; i < x;

[Mesa-dev] [PATCH v2 05/10] nir: unroll some loops with a variable limit

2018-12-12 Thread Timothy Arceri

For some loops can have a single terminator but the exact trip
count is still unknown. For example:

   for (int i = 0; i < imin(x, 4); i++)
  ...

Shader-db results radeonsi (all affected are from Tropico 5):

Totals from affected shaders:
SGPRS: 200 -> 208 (4.00 %)
VGPRS: 164 -> 148 (-9.76 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 7208 -> 8672 (20.31 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 23 -> 27 (17.39 %)
Wait states: 0 -> 0 (0.00 %)

vkpipeline-db results RADV (Unrolls some Skyrim VR shaders):

Totals from affected shaders:
SGPRS: 304 -> 304 (0.00 %)
VGPRS: 300 -> 292 (-2.67 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 15516 -> 26388 (70.07 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 29 -> 29 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

v2: fix bug where last iteration would get optimised away by
mistake.
---
 src/compiler/nir/nir_opt_loop_unroll.c | 55 ++
 1 file changed, 55 insertions(+)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index 9630e0738a..70e6c67bde 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -460,6 +460,55 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
*unlimit_term,
_mesa_hash_table_destroy(remap_table, NULL);
 }
 
+/**
+ * Unroll loops where we only have a single terminator but the exact trip
+ * count is unknown. For example:
+ *
+ *for (int i = 0; i < imin(x, 4); i++)
+ *   ...
+ */
+static void
+complex_unroll_single_terminator(nir_loop *loop)
+{
+   assert(list_length(>info->loop_terminator_list) == 1);
+   assert(loop->info->limiting_terminator);
+   assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
+ 
loop->info->limiting_terminator->break_block));
+
+   nir_loop_terminator *terminator = loop->info->limiting_terminator;
+
+   loop_prepare_for_unroll(loop);
+
+   /* Pluck out the loop header */
+   nir_cf_list lp_header;
+   nir_cf_extract(_header, nir_before_block(nir_loop_first_block(loop)),
+  nir_before_cf_node(>nif->cf_node));
+
+   struct hash_table *remap_table =
+  _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+  _mesa_key_pointer_equal);
+
+   /* We need to clone the loop one extra time in order to clone the lcssa
+* vars for the last iteration (they are inside the following ifs break
+* branch). We leave other passes to clean up this redundant if.
+*/
+   unsigned num_times_to_clone = loop->info->max_trip_count + 1;
+
+   nir_cf_list lp_body;
+   nir_cf_node *unroll_loc =
+  complex_unroll_loop_body(loop, terminator, _header, _body,
+   remap_table, num_times_to_clone);
+
+   /* Delete the original loop header and body */
+   nir_cf_delete(_header);
+   nir_cf_delete(_body);
+
+   /* The original loop has been replaced so remove it. */
+   nir_cf_node_remove(>cf_node);
+
+   _mesa_hash_table_destroy(remap_table, NULL);
+}
+
 /* Unrolls the classic wrapper loops e.g
  *
  *do {
@@ -856,6 +905,12 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool 
*has_nested_loop_out)
 }
 progress = true;
  }
+
+ if (num_lt == 1) {
+assert(loop->info->limiting_terminator->exact_trip_count_unknown);
+complex_unroll_single_terminator(loop);
+progress = true;
+ }
   }
}
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 02/10] nir: add new partially_unrolled bool to nir_loop

2018-12-12 Thread Timothy Arceri

In order to stop continuously partially unrolling the same loop
we add the bool partially_unrolled to nir_loop, we add it here
rather than in nir_loop_info because nir_loop_info is only set
via loop analysis and is intended to be cleared before each
analysis. Also nir_loop_info is never cloned.
---
 src/compiler/nir/nir.h   | 1 +
 src/compiler/nir/nir_clone.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index b886b83325..055d4d30d3 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1918,6 +1918,7 @@ typedef struct {
struct exec_list body; /** < list of nir_cf_node */
 
nir_loop_info *info;
+   bool partially_unrolled;
 } nir_loop;
 
 /**
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 989c5051a5..b229094679 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -548,6 +548,7 @@ static nir_loop *
 clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
 {
nir_loop *nloop = nir_loop_create(state->ns);
+   nloop->partially_unrolled = loop->partially_unrolled;
 
nir_cf_node_insert_end(cf_list, >cf_node);
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 07/10] nir: add helper to return inversion op of a comparision

2018-12-12 Thread Timothy Arceri

This will be used to help find the trip count of loops that look
like the following:

   while (a < x && i < 8) {
  ...
  i++;
   }

Where the NIR will end up looking something like this:

   vec1 32 ssa_0 = load_const (0x /* 0.00 */)
   vec1 32 ssa_1 = load_const (0x0008 /* 0.00 */)
   loop {
  ...
  vec1 32 ssa_28 = ige ssa_26, ssa_3
  vec1 32 ssa_29 = ige ssa_27, ssa_1
  vec1 32 ssa_30 = iadd ssa_29, ssa_28
  vec1 ssa_31 = ieq ssa_30, ssa_0
  if ssa_31 {
 ...
 break
  } else {
 ...
  }
  ...
   }

So in order to find the trip count we need to find the inverse of
ige.
---
 src/compiler/nir/nir_loop_analyze.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 9a1962cfd8..b96c723f2a 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -672,6 +672,35 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
return -1;
 }
 
+static nir_op
+inverse_comparision(nir_alu_instr *alu)
+{
+   switch (alu->op) {
+   case nir_op_fge:
+  return nir_op_flt;
+   case nir_op_ige:
+  return nir_op_ilt;
+   case nir_op_uge:
+  return nir_op_ult;
+   case nir_op_flt:
+  return nir_op_fge;
+   case nir_op_ilt:
+  return nir_op_ige;
+   case nir_op_ult:
+  return nir_op_uge;
+   case nir_op_feq:
+  return nir_op_fne;
+   case nir_op_ieq:
+  return nir_op_ine;
+   case nir_op_fne:
+  return nir_op_feq;
+   case nir_op_ine:
+  return nir_op_ieq;
+   default:
+  unreachable("Unsuported comparision!");
+   }
+}
+
 static bool
 is_supported_terminator_condition(nir_alu_instr *alu)
 {
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 03/10] nir: add partial loop unrolling support

2018-12-12 Thread Timothy Arceri

This adds partial loop unrolling support and makes use of a
guessed trip count based on array access.

The code is written so that we could use partial unrolling
more generally, but for now it's only use when we have guessed
the trip count.

We use partial unrolling for this guessed trip count because its
possible any out of bounds array access doesn't otherwise affect
the shader e.g the stores/loads to/from the array are unused. So
we insert a copy of the loop in the innermost continue branch of
the unrolled loop. Later on its possible for nir_opt_dead_cf()
to then remove the loop in some cases.

A Renderdoc capture from the Rise of the Tomb Raider benchmark,
reports the following change in an affected compute shader:

GPU duration: 350 -> 325 microseconds

shader-db results radeonsi VEGA (NIR backend):

Totals from affected shaders:
SGPRS: 1120 -> 928 (-17.14 %)
VGPRS: 768 -> 516 (-32.81 %)
Spilled SGPRs: 666 -> 157 (-76.43 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 44072 -> 51880 (17.72 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 108 -> 147 (36.11 %)
Wait states: 0 -> 0 (0.00 %

shader-db results i965 SKL:

total instructions in shared programs: 13098265 -> 13103359 (0.04%)
instructions in affected programs: 5126 -> 10220 (99.38%)
helped: 0
HURT: 21

total cycles in shared programs: 332039949 -> 331985622 (-0.02%)
cycles in affected programs: 289252 -> 234925 (-18.78%)
helped: 12
HURT: 9

vkpipeline-db results VEGA:

Totals from affected shaders:
SGPRS: 184 -> 184 (0.00 %)
VGPRS: 448 -> 448 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 26460 -> 25092 (-5.17 %) bytes
LDS: 6 -> 6 (0.00 %) blocks
Max Waves: 5 -> 5 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/compiler/nir/nir_opt_loop_unroll.c | 206 -
 1 file changed, 198 insertions(+), 8 deletions(-)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index 8406880204..d8df619b32 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -556,19 +556,200 @@ wrapper_unroll(nir_loop *loop)
return true;
 }
 
+static bool
+is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
+unsigned trip_count)
+{
+   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+  if (d->deref_type != nir_deref_type_array)
+ continue;
+
+  nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
+  nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
+  if (!nir_srcs_equal(d->arr.index, src))
+ continue;
+
+  nir_deref_instr *parent = nir_deref_instr_parent(d);
+  assert(glsl_type_is_array(parent->type) ||
+ glsl_type_is_matrix(parent->type));
+
+  /* We have already unrolled the loop and the new one will be imbedded in
+   * the innermost continue branch. So unless the array is greater than
+   * the trip count any iteration over the loop will be an out of bounds
+   * access of the array.
+   */
+  return glsl_get_length(parent->type) <= trip_count;
+   }
+
+   return false;
+}
+
+/* If we know an array access is going to be out of bounds remove or replace
+ * the access with an undef. This can later result in the entire loop being
+ * removed by nir_opt_dead_cf().
+ */
+static void
+remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
+   nir_loop_terminator *term,
+   nir_cf_list *lp_header,
+   nir_cf_list *lp_body,
+   unsigned trip_count)
+{
+   if (!loop->info->guessed_trip_count)
+  return;
+
+   /* Temporarily recreate the original loop so we can alter it */
+   nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
+   nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
+
+   nir_builder b;
+   nir_builder_init(, nir_cf_node_get_function(>cf_node));
+
+   nir_foreach_block_in_cf_node(block, >cf_node) {
+  nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* Check for arrays variably-indexed by a loop induction variable.
+  * If this access is out of bounds remove the instruction or replace
+  * its use with an undefined instruction.
+  * If the loop is no longer useful we leave if for the appropriate
+  * pass to clean it up for us.
+  */
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic ==

[Mesa-dev] [PATCH v2 08/10] nir: add get_induction_and_limit_vars() helper to loop analysis

2018-12-12 Thread Timothy Arceri

This helps make find_trip_count() a little easier to follow but
will also be used by a following patch.
---
 src/compiler/nir/nir_loop_analyze.c | 41 ++---
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index b96c723f2a..0352a6fba5 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -721,6 +721,27 @@ is_supported_terminator_condition(nir_alu_instr *alu)
}
 }
 
+static bool
+get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
+ nir_loop_variable **limit,
+ loop_info_state *state)
+{
+   bool limit_rhs = true;
+
+   /* We assume that the limit is the "right" operand */
+   *ind = get_loop_var(alu->src[0].src.ssa, state);
+   *limit = get_loop_var(alu->src[1].src.ssa, state);
+
+   if ((*ind)->type != basic_induction) {
+  /* We had it the wrong way, flip things around */
+  *ind = get_loop_var(alu->src[1].src.ssa, state);
+  *limit = get_loop_var(alu->src[0].src.ssa, state);
+  limit_rhs = false;
+   }
+
+   return limit_rhs;
+}
+
 /* Run through each of the terminators of the loop and try to infer a possible
  * trip-count. We need to check them all, and set the lowest trip-count as the
  * trip-count of our loop. If one of the terminators has an undecidable
@@ -748,26 +769,16 @@ find_trip_count(loop_info_state *state)
   }
 
   nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
-  nir_loop_variable *basic_ind = NULL;
-  nir_loop_variable *limit = NULL;
-  bool limit_rhs = true;
-
   if (!is_supported_terminator_condition(alu)) {
  trip_count_known = false;
  continue;
   }
 
-  /* We assume that the limit is the "right" operand */
-  basic_ind = get_loop_var(alu->src[0].src.ssa, state);
-  limit = get_loop_var(alu->src[1].src.ssa, state);
-
-  if (basic_ind->type != basic_induction) {
- /* We had it the wrong way, flip things around */
- basic_ind = get_loop_var(alu->src[1].src.ssa, state);
- limit = get_loop_var(alu->src[0].src.ssa, state);
- limit_rhs = false;
- terminator->induction_rhs = true;
-  }
+  nir_loop_variable *basic_ind;
+  nir_loop_variable *limit;
+  bool limit_rhs = get_induction_and_limit_vars(alu, _ind, ,
+state);
+  terminator->induction_rhs = !limit_rhs;
 
   /* The comparison has to have a basic induction variable for us to be
* able to find trip counts.
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 10/10] nir: find induction/limit vars in iand instructions

2018-12-12 Thread Timothy Arceri

This will be used to help find the trip count of loops that look
like the following:

   while (a < x && i < 8) {
  ...
  i++;
   }

Where the NIR will end up looking something like this:

   vec1 32 ssa_0 = load_const (0x /* 0.00 */)
   vec1 32 ssa_1 = load_const (0x0008 /* 0.00 */)
   loop {
  ...
  vec1 32 ssa_28 = ige ssa_26, ssa_3
  vec1 32 ssa_29 = ige ssa_27, ssa_1
  vec1 32 ssa_30 = iadd ssa_29, ssa_28
  vec1 ssa_31 = ieq ssa_30, ssa_0
  if ssa_31 {
 ...
 break
  } else {
 ...
  }
  ...
   }

On RADV this unrolls a bunch of loops in F1-2017 shaders.

Totals from affected shaders:
SGPRS: 4112 -> 4032 (-1.95 %)
VGPRS: 4076 -> 3996 (-1.96 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 510184 -> 589868 (15.62 %) bytes
LDS: 2 -> 2 (0.00 %) blocks
Max Waves: 200 -> 202 (1.00 %)
Wait states: 0 -> 0 (0.00 %)

It also unrolls a couple of loops in shader-db on radeonsi.

Totals from affected shaders:
SGPRS: 128 -> 128 (0.00 %)
VGPRS: 64 -> 64 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 6880 -> 9504 (38.14 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 16 -> 16 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/compiler/nir/nir_loop_analyze.c | 71 -
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index cf2655aa20..4045af4109 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -743,6 +743,59 @@ get_induction_and_limit_vars(nir_alu_instr *alu, 
nir_loop_variable **ind,
return limit_rhs;
 }
 
+static void
+try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
+ nir_loop_variable **ind,
+ nir_loop_variable **limit,
+ bool *limit_rhs,
+ loop_info_state *state)
+{
+   assert((*alu)->op == nir_op_ieq);
+
+   nir_ssa_def *iand_def = (*alu)->src[0].src.ssa;
+   nir_ssa_def *zero_def = (*alu)->src[1].src.ssa;
+
+   if (iand_def->parent_instr->type != nir_instr_type_alu ||
+   zero_def->parent_instr->type != nir_instr_type_load_const) {
+
+  /* Maybe we had it the wrong way, flip things around */
+  iand_def = (*alu)->src[1].src.ssa;
+  zero_def = (*alu)->src[0].src.ssa;
+
+  /* If we still didn't find what we need then return */
+  if (iand_def->parent_instr->type != nir_instr_type_alu ||
+  zero_def->parent_instr->type != nir_instr_type_load_const)
+ return;
+   }
+
+   /* If the loop is not breaking on (x && y) == 0 then return */
+   nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr);
+   nir_const_value zero =
+  nir_instr_as_load_const(zero_def->parent_instr)->value;
+   if (iand->op != nir_op_iand || zero.i32[0] != 0)
+  return;
+
+   /* Check if iand src is a terminator condition and try get induction var
+* and trip limit var.
+*/
+   nir_ssa_def *src = iand->src[0].src.ssa;
+   if (src->parent_instr->type == nir_instr_type_alu) {
+  *alu = nir_instr_as_alu(src->parent_instr);
+  if (is_supported_terminator_condition(*alu))
+ *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
+   }
+
+   /* Try the other iand src if needed */
+   if ((*ind)->type != basic_induction) {
+  src = iand->src[1].src.ssa;
+  if (src->parent_instr->type == nir_instr_type_alu) {
+ *alu = nir_instr_as_alu(src->parent_instr);
+ if (is_supported_terminator_condition(*alu))
+*limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
+  }
+   }
+}
+
 /* Run through each of the terminators of the loop and try to infer a possible
  * trip-count. We need to check them all, and set the lowest trip-count as the
  * trip-count of our loop. If one of the terminators has an undecidable
@@ -781,7 +834,21 @@ find_trip_count(loop_info_state *state)
   nir_loop_variable *limit;
   bool limit_rhs = get_induction_and_limit_vars(alu, _ind, ,
 state);
-  terminator->induction_rhs = !limit_rhs;
+
+  if (basic_ind->type != basic_induction && alu->op == nir_op_ieq) {
+ trip_count_known = false;
+ terminator->exact_trip_count_unknown = true;
+
+ try_find_trip_count_vars_in_iand(, _ind, ,
+  _rhs, state);
+
+ /* The loop is exiting on (x && y) == 0 so we need to get the
+  * inverse of x or y (i.e. which ever contained the induction var) in
+  * order to compute the trip count.
+  */
+ if (basic_ind->type == basic_induction)
+

[Mesa-dev] [PATCH v2 04/10] nir: calculate trip count for more loops

2018-12-12 Thread Timothy Arceri

This adds support to loop analysis for loops where the induction
variable is compared to the result of min(variable, constant).

For example:

   for (int i = 0; i < imin(x, 4); i++)
  ...

We add a new bool to the loop terminator struct in order to
differentiate terminators with this exit condition.
---
 src/compiler/nir/nir.h | 11 +++
 src/compiler/nir/nir_loop_analyze.c| 41 ++
 src/compiler/nir/nir_opt_loop_unroll.c |  3 +-
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 055d4d30d3..4bbc750bd6 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1881,6 +1881,17 @@ typedef struct {
bool continue_from_then;
bool induction_rhs;
 
+   /* This is true if the terminators exact trip count is unknown. For
+* example:
+*
+*for (int i = 0; i < imin(x, 4); i++)
+*   ...
+*
+* Here loop analysis would have set a max_trip_count of 4 however we dont
+* know for sure that this is the exact trip count.
+*/
+   bool exact_trip_count_unknown;
+
struct list_head loop_terminator_link;
 } nir_loop_terminator;
 
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index c46c491963..0b37da6569 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -477,6 +477,35 @@ guess_loop_limit(loop_info_state *state, nir_const_value 
*limit_val,
return false;
 }
 
+static bool
+try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value *limit_val,
+  nir_loop_terminator *terminator, loop_info_state *state)
+{
+   if(!is_var_alu(limit))
+  return false;
+
+   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr);
+
+   if (limit_alu->op == nir_op_imin ||
+   limit_alu->op == nir_op_fmin) {
+  limit = get_loop_var(limit_alu->src[0].src.ssa, state);
+
+  if (!is_var_constant(limit))
+ limit = get_loop_var(limit_alu->src[1].src.ssa, state);
+
+  if (!is_var_constant(limit))
+ return false;
+
+  *limit_val = nir_instr_as_load_const(limit->def->parent_instr)->value;
+
+  terminator->exact_trip_count_unknown = true;
+
+  return true;
+   }
+
+   return false;
+}
+
 static int32_t
 get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
   nir_const_value *limit)
@@ -708,12 +737,14 @@ find_trip_count(loop_info_state *state)
  } else {
 trip_count_known = false;
 
-/* Guess loop limit based on array access */
-if (!guess_loop_limit(state, _val, basic_ind)) {
-   continue;
-}
+if (!try_find_limit_of_alu(limit, _val, terminator, state)) {
+   /* Guess loop limit based on array access */
+   if (!guess_loop_limit(state, _val, basic_ind)) {
+  continue;
+   }
 
-guessed_trip_count = true;
+   guessed_trip_count = true;
+}
  }
 
  /* We have determined that we have the following constants:
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index d8df619b32..9630e0738a 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -830,7 +830,8 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool 
*has_nested_loop_out)
   } else {
  /* Attempt to unroll loops with two terminators. */
  unsigned num_lt = list_length(>info->loop_terminator_list);
- if (num_lt == 2) {
+ if (num_lt == 2 &&
+ !loop->info->limiting_terminator->exact_trip_count_unknown) {
 bool limiting_term_second = true;
 nir_loop_terminator *terminator =
list_first_entry(>info->loop_terminator_list,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] V2 More loop unrolling

2018-12-12 Thread Timothy Arceri

V2:
 - When guessing trip count in patch 1 check if the induction var
   is used in more than a single loop and get the smallest array
   size if so (Suggested by Jason).
 - A bunch of reviewed patches have been pushed

Some piglit tests:

https://patchwork.freedesktop.org/series/53712/ 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] nir: rename nir_link_constant_varyings() nir_link_opt_varyings()

2018-12-10 Thread Timothy Arceri

The following patches will add support for an addition
optimisation so this function will no longer just optimise varying
constants.
---
 src/amd/vulkan/radv_pipeline.c| 4 ++--
 src/compiler/nir/nir.h| 2 +-
 src/compiler/nir/nir_linking_helpers.c| 2 +-
 src/intel/compiler/brw_nir.c  | 2 +-
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 33076cc2bd..69970fbff7 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1814,8 +1814,8 @@ radv_link_shaders(struct radv_pipeline *pipeline, 
nir_shader **shaders)
nir_lower_io_arrays_to_elements(ordered_shaders[i],
ordered_shaders[i - 1]);
 
-   if (nir_link_constant_varyings(ordered_shaders[i],
-  ordered_shaders[i - 1]))
+   if (nir_link_opt_varyings(ordered_shaders[i],
+ ordered_shaders[i - 1]))
radv_optimize_nir(ordered_shaders[i - 1], false, false);
 
nir_remove_dead_variables(ordered_shaders[i],
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 79b51f9c00..a8906da351 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2879,7 +2879,7 @@ bool nir_remove_unused_io_vars(nir_shader *shader, struct 
exec_list *var_list,
 void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
   bool default_to_smooth_interp);
 void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
-bool nir_link_constant_varyings(nir_shader *producer, nir_shader *consumer);
+bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
 
 typedef enum {
/* If set, this forces all non-flat fragment shader inputs to be
diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 8bd4acc2ee..b447729c60 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -774,7 +774,7 @@ try_replace_constant_input(nir_shader *shader,
 }
 
 bool
-nir_link_constant_varyings(nir_shader *producer, nir_shader *consumer)
+nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 {
/* TODO: Add support for more shader stage combinations */
if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index aa6788b9fe..e3c7bcf42c 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -742,7 +742,7 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
   *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
}
 
-   if (nir_link_constant_varyings(*producer, *consumer))
+   if (nir_link_opt_varyings(*producer, *consumer))
   *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
 
NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 39179f86a1..aedaef9f8e 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -588,7 +588,7 @@ st_nir_link_shaders(nir_shader **producer, nir_shader 
**consumer, bool scalar)
 {
nir_lower_io_arrays_to_elements(*producer, *consumer);
 
-   if (nir_link_constant_varyings(*producer, *consumer))
+   if (nir_link_opt_varyings(*producer, *consumer))
   st_nir_opts(*consumer, scalar);
 
NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] nir: add can_replace_varying() helper

2018-12-10 Thread Timothy Arceri

This will be reused by the following patch.
---
 src/compiler/nir/nir_linking_helpers.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index b447729c60..37644d339f 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -701,12 +701,8 @@ nir_link_xfb_varyings(nir_shader *producer, nir_shader 
*consumer)
 }
 
 static bool
-try_replace_constant_input(nir_shader *shader,
-   nir_intrinsic_instr *store_intr)
+can_replace_varying(nir_variable *out_var, nir_intrinsic_instr *store_intr)
 {
-   nir_variable *out_var =
-  nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
-
if (out_var->data.mode != nir_var_shader_out)
   return false;
 
@@ -729,6 +725,19 @@ try_replace_constant_input(nir_shader *shader,
out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
   return false;
 
+   return true;
+}
+
+static bool
+try_replace_constant_input(nir_shader *shader,
+   nir_intrinsic_instr *store_intr)
+{
+   nir_variable *out_var =
+  nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
+
+   if (!can_replace_varying(out_var, store_intr))
+  return false;
+
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 
nir_builder b;
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] st/glsl_to_nir: call nir_lower_load_const_to_scalar() in the st

2018-12-10 Thread Timothy Arceri

This will help the new opt introduced in the following patches
allowing us to remove extra duplicate varyings.
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 2 --
 src/mesa/state_tracker/st_glsl_to_nir.cpp| 4 +++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index e7ba282b07..660b5bc356 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -823,8 +823,6 @@ si_lower_nir(struct si_shader_selector* sel)
 
ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class);
 
-   NIR_PASS_V(sel->nir, nir_lower_load_const_to_scalar);
-
bool progress;
do {
progress = false;
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 7406e26e2f..39179f86a1 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -702,8 +702,10 @@ st_link_nir(struct gl_context *ctx,
 
   nir_shader *nir = shader->Program->nir;
 
-  if (is_scalar[i])
+  if (is_scalar[i]) {
  NIR_PASS_V(nir, nir_lower_io_to_scalar_early, mask);
+ NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+  }
 
   st_nir_opts(nir, is_scalar[i]);
}
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] nir: link time opt duplicate varyings

2018-12-10 Thread Timothy Arceri

If we are outputting the same value to more than one output
component rewrite the inputs to read from a single component.

This will allow the duplicate varying components to be optimised
away by the existing opts.

shader-db results i965 (SKL):

total instructions in shared programs: 12869230 -> 12860886 (-0.06%)
instructions in affected programs: 322601 -> 314257 (-2.59%)
helped: 3080
HURT: 8

total cycles in shared programs: 317792574 -> 317730593 (-0.02%)
cycles in affected programs: 2584925 -> 2522944 (-2.40%)
helped: 2975
HURT: 477

shader-db results radeonsi (VEGA):

Totals from affected shaders:
SGPRS: 30960 -> 31056 (0.31 %)
VGPRS: 17052 -> 16672 (-2.23 %)
Spilled SGPRs: 184 -> 167 (-9.24 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 562532 -> 549404 (-2.33 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 6011 -> 6110 (1.65 %)
Wait states: 0 -> 0 (0.00 %)

vkpipeline-db results RADV (VEGA):

Totals from affected shaders:
SGPRS: 14880 -> 15080 (1.34 %)
VGPRS: 10872 -> 10888 (0.15 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 674016 -> 668396 (-0.83 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 2708 -> 2704 (-0.15 %)
Wait states: 0 -> 0 (0.00 %
---
 src/compiler/nir/nir_linking_helpers.c | 95 ++
 1 file changed, 95 insertions(+)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 37644d339f..bdfa7b8c4d 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -700,6 +700,27 @@ nir_link_xfb_varyings(nir_shader *producer, nir_shader 
*consumer)
}
 }
 
+static nir_variable *
+get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
+{
+   nir_variable *input_var = NULL;
+   nir_foreach_variable(var, >inputs) {
+  if (var->data.location >= VARYING_SLOT_VAR0 &&
+  var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
+
+ if (var->data.location == out_var->data.location &&
+ var->data.location_frac == out_var->data.location_frac &&
+ var->data.interpolation == out_var->data.interpolation &&
+ get_interp_loc(var) == get_interp_loc(out_var)) {
+input_var = var;
+break;
+ }
+  }
+   }
+
+   return input_var;
+}
+
 static bool
 can_replace_varying(nir_variable *out_var, nir_intrinsic_instr *store_intr)
 {
@@ -782,6 +803,57 @@ try_replace_constant_input(nir_shader *shader,
return progress;
 }
 
+static bool
+try_replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
+nir_intrinsic_instr *dup_store_intr)
+{
+   assert(input_var);
+
+   nir_variable *dup_out_var =
+  nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
+
+   if (!can_replace_varying(dup_out_var, dup_store_intr))
+  return false;
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+   nir_builder b;
+   nir_builder_init(, impl);
+
+   bool progress = false;
+   nir_foreach_block(block, impl) {
+  nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_load_deref)
+continue;
+
+ nir_variable *in_var =
+nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+
+ if (in_var->data.mode != nir_var_shader_in)
+continue;
+
+ if (in_var->data.location != dup_out_var->data.location ||
+ in_var->data.location_frac != dup_out_var->data.location_frac ||
+ in_var->data.interpolation != input_var->data.interpolation ||
+ get_interp_loc(in_var) != get_interp_loc(input_var))
+continue;
+
+ b.cursor = nir_before_instr(instr);
+
+ nir_ssa_def *load = nir_load_var(, input_var);
+ nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(load));
+
+ progress = true;
+  }
+   }
+
+   return progress;
+}
+
 bool
 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 {
@@ -795,6 +867,10 @@ nir_link_opt_varyings(nir_shader *producer, nir_shader 
*consumer)
 
nir_function_impl *impl = nir_shader_get_entrypoint(producer);
 
+   struct hash_table *varying_values =
+  _mesa_hash_table_create(NULL,  _mesa_hash_pointer,
+  _mesa_key_pointer_equal);
+
/* If we find a store in the last block of the producer we can be sure this
 * is the only possible value for this output.
 */
@@ -809,11 +885,30 @@ nir_link_opt_varyings(nir_shader *producer, nir_shader 
*consumer)
  continue;
 
   if (intr->src[1].ssa->parent_instr->type != nir_instr_type_load_const) {
+ struct hash_entry

Re: [Mesa-dev] [PATCH] mesa: add EXT_debug_label support

2018-12-10 Thread Timothy Arceri


On 11/12/18 11:35 am, Ian Romanick wrote:

It seems like someone already sent out patches to implement this, and we
decided to not take it for some reason.  Maybe it was Rob?



I discovered a thread from the beginning of 2017 titled "feature.txt & 
EXT_debug_label extension". But couldn't find any implementation.


There was a reply from yourself, but it seems incorrect to me:

"I checked both extensions, and they're not "just" aliases.  The EXT adds
a single function with an enum to select the kind of object.  The KHR
adds a function per kind of object.  It would be easy enough to add, but
it seems more valuable to suggest the developer use the more broadly
supported extension."


On 12/10/18 4:08 PM, Timothy Arceri wrote:

KHR_debug already provides superior functionality but this
extension is still in use and adding support for it seems fairly
harmless. For example it seems to be used by Unity as seen in the
Parkitect trace attached to Mesa bug #108919.
---
  src/mapi/glapi/gen/gl_API.xml| 17 +
  src/mesa/main/extensions_table.h |  1 +
  src/mesa/main/objectlabel.c  |  6 ++
  3 files changed, 24 insertions(+)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index f1def8090d..75423c4edb 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -12973,6 +12973,23 @@
  
  
  
+

+  


Since these are just aliases, I don't think any changes needed in
dispatch-sanity... but did you run 'make check' anyway? :)



Yes :) Passed as expected.



+
+
+
+
+  
+
+  
+
+
+
+
+
+  
+
+
  http://www.w3.org/2001/XInclude"/>
  
  

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index dad38124d5..b68f6781c4 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -217,6 +217,7 @@ EXT(EXT_compiled_vertex_array   , dummy_true
  EXT(EXT_compressed_ETC1_RGB8_sub_texture, 
OES_compressed_ETC1_RGB8_texture   ,  x ,  x , ES1, ES2, 2014)
  EXT(EXT_copy_image  , OES_copy_image  
   ,  x ,  x ,  x ,  30, 2014)
  EXT(EXT_copy_texture, dummy_true  
   , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_debug_label , dummy_true   
  , GLL, GLC,  x ,  x , 2013)
  EXT(EXT_depth_bounds_test   , EXT_depth_bounds_test   
   , GLL, GLC,  x ,  x , 2002)
  EXT(EXT_discard_framebuffer , dummy_true  
   ,  x ,  x , ES1, ES2, 2009)
  EXT(EXT_disjoint_timer_query, EXT_disjoint_timer_query
   ,  x ,  x ,  x , ES2, 2016)
diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 1e3022ee54..9d4cc1871e 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -139,6 +139,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
  
 switch (identifier) {

 case GL_BUFFER:
+   case GL_BUFFER_OBJECT_EXT:
{
   struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, name);
   if (bufObj)
@@ -146,6 +147,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
}
break;
 case GL_SHADER:
+   case GL_SHADER_OBJECT_EXT:
{
   struct gl_shader *shader = _mesa_lookup_shader(ctx, name);
   if (shader)
@@ -153,6 +155,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
}
break;
 case GL_PROGRAM:
+   case GL_PROGRAM_OBJECT_EXT:
{
   struct gl_shader_program *program =
  _mesa_lookup_shader_program(ctx, name);
@@ -161,6 +164,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
}
break;
 case GL_VERTEX_ARRAY:
+   case GL_VERTEX_ARRAY_OBJECT_EXT:
{
   struct gl_vertex_array_object *obj = _mesa_lookup_vao(ctx, name);
   if (obj)
@@ -168,6 +172,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
}
break;
 case GL_QUERY:
+   case GL_QUERY_OBJECT_EXT:
{
   struct gl_query_object *query = _mesa_lookup_query_object(ctx, name);
   if (query)
@@ -225,6 +230,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
}
break;
 case GL_PROGRAM_PIPELINE:
+   case GL_PROGRAM_PIPELINE_OBJECT_EXT:
{
   struct gl_pipeline_object *pipe =
  _mesa_lookup_pipeline_object(ctx, name);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: add EXT_debug_label support

2018-12-10 Thread Timothy Arceri

KHR_debug already provides superior functionality but this
extension is still in use and adding support for it seems fairly
harmless. For example it seems to be used by Unity as seen in the
Parkitect trace attached to Mesa bug #108919.
---
 src/mapi/glapi/gen/gl_API.xml| 17 +
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/objectlabel.c  |  6 ++
 3 files changed, 24 insertions(+)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index f1def8090d..75423c4edb 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -12973,6 +12973,23 @@
 
 
 
+
+  
+
+
+
+
+  
+
+  
+
+
+
+
+
+  
+
+
 http://www.w3.org/2001/XInclude"/>
 
 
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index dad38124d5..b68f6781c4 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -217,6 +217,7 @@ EXT(EXT_compiled_vertex_array   , dummy_true
 EXT(EXT_compressed_ETC1_RGB8_sub_texture, OES_compressed_ETC1_RGB8_texture 
  ,  x ,  x , ES1, ES2, 2014)
 EXT(EXT_copy_image  , OES_copy_image   
  ,  x ,  x ,  x ,  30, 2014)
 EXT(EXT_copy_texture, dummy_true   
  , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_debug_label , dummy_true   
  , GLL, GLC,  x ,  x , 2013)
 EXT(EXT_depth_bounds_test   , EXT_depth_bounds_test
  , GLL, GLC,  x ,  x , 2002)
 EXT(EXT_discard_framebuffer , dummy_true   
  ,  x ,  x , ES1, ES2, 2009)
 EXT(EXT_disjoint_timer_query, EXT_disjoint_timer_query 
  ,  x ,  x ,  x , ES2, 2016)
diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 1e3022ee54..9d4cc1871e 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -139,6 +139,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
 
switch (identifier) {
case GL_BUFFER:
+   case GL_BUFFER_OBJECT_EXT:
   {
  struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, name);
  if (bufObj)
@@ -146,6 +147,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
   }
   break;
case GL_SHADER:
+   case GL_SHADER_OBJECT_EXT:
   {
  struct gl_shader *shader = _mesa_lookup_shader(ctx, name);
  if (shader)
@@ -153,6 +155,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
   }
   break;
case GL_PROGRAM:
+   case GL_PROGRAM_OBJECT_EXT:
   {
  struct gl_shader_program *program =
 _mesa_lookup_shader_program(ctx, name);
@@ -161,6 +164,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
   }
   break;
case GL_VERTEX_ARRAY:
+   case GL_VERTEX_ARRAY_OBJECT_EXT:
   {
  struct gl_vertex_array_object *obj = _mesa_lookup_vao(ctx, name);
  if (obj)
@@ -168,6 +172,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
   }
   break;
case GL_QUERY:
+   case GL_QUERY_OBJECT_EXT:
   {
  struct gl_query_object *query = _mesa_lookup_query_object(ctx, name);
  if (query)
@@ -225,6 +230,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
   }
   break;
case GL_PROGRAM_PIPELINE:
+   case GL_PROGRAM_PIPELINE_OBJECT_EXT:
   {
  struct gl_pipeline_object *pipe =
 _mesa_lookup_pipeline_object(ctx, name);
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 3/4] nir: rewrite varying component packing

2018-12-09 Thread Timothy Arceri

Sorry please ignore this for now. I've realised there is a bug here 
where we could end up packing components in only one of the shaders but 
not the other. For example if we have an array on one side but just a 
bunch of individual varyings on the other (which is legal I believe). 
I'll send a version 3 to fix this.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] nir: fixed some missed varying compaction opportunities

2018-12-09 Thread Timothy Arceri


I'd much rather land the first 3 patches from this series if possible.

https://patchwork.freedesktop.org/series/53800/

I've confirmed it packs the shaders you were looking at as expected once 
you patch 2 is applied. The series makes this code much more flexible 
(for future improvements) and easier to follow.


On 9/12/18 5:28 am, Rob Clark wrote:

Previously, if we had a .z or .w component that could be compacted
to .y, we'd could overlook that opportunity.

Signed-off-by: Rob Clark 
---
  src/compiler/nir/nir_linking_helpers.c | 30 --
  1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 1ab9c095657..ce368a3c132 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -431,12 +431,30 @@ compact_components(nir_shader *producer, nir_shader 
*consumer, uint8_t *comps,
   uint8_t interp = get_interp_type(var, type, 
default_to_smooth_interp);
   for (; cursor[interp] < 32; cursor[interp]++) {
  uint8_t cursor_used_comps = comps[cursor[interp]];
+uint8_t unused_comps = ~cursor_used_comps;
  
-/* We couldn't find anywhere to pack the varying continue on. */

-if (cursor[interp] == location &&
-(var->data.location_frac == 0 ||
- cursor_used_comps & ((1 << (var->data.location_frac)) - 1)))
-   break;
+/* Don't search beyond our current location, we are just trying
+ * to pack later varyings to lower positions:
+ */
+if (cursor[interp] == location) {
+   if (var->data.location_frac == 0)
+  break;
+
+   /* If not already aligned to slot, see if we can shift it up.
+* Note that if we get this far it is a scalar so we know that
+* shifting this var to any other open position won't conflict
+* with it's current position.
+*/
+   unsigned p = ffs(unused_comps & 0xf);
+   if (!p)
+  break;
+
+   /* ffs returns 1 for bit zero: */
+   p--;
+
+   if (p >= var->data.location_frac)
+  break;
+}
  
  /* We can only pack varyings with matching interpolation types */

  if (interp_type[cursor[interp]] != interp)
@@ -460,8 +478,6 @@ compact_components(nir_shader *producer, nir_shader 
*consumer, uint8_t *comps,
  if (!cursor_used_comps)
 continue;
  
-uint8_t unused_comps = ~cursor_used_comps;

-
  for (unsigned i = 0; i < 4; i++) {
 uint8_t new_var_comps = 1 << i;
 if (unused_comps & new_var_comps) {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC v2 4/4] nir: add crude live range analysis to nir_compact_varyings()

2018-12-09 Thread Timothy Arceri

Using robs packing fix for the st I'm actually getting results for 
radeonsi now but they are pretty mixed for this patch:


Totals from affected shaders:
SGPRS: 35992 -> 35520 (-1.31 %)
VGPRS: 20688 -> 20808 (0.58 %)
Spilled SGPRs: 1926 -> 1996 (3.63 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 1053168 -> 1055452 (0.22 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 4636 -> 4616 (-0.43 %)
Wait states: 0 -> 0 (0.00 %)

On 10/12/18 11:31 am, Timothy Arceri wrote:

vkpipeline-db results RADV (VEGA):

Totals from affected shaders:
SGPRS: 27168 -> 27872 (2.59 %)
VGPRS: 24180 -> 24056 (-0.51 %)
Spilled SGPRs: 28 -> 24 (-14.29 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 1584936 -> 1585552 (0.04 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 3804 -> 3824 (0.53 %)
Wait states: 0 -> 0 (0.00 %)
---
  src/compiler/nir/nir_linking_helpers.c | 45 ++
  1 file changed, 45 insertions(+)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 8bd4acc2ee..badda80979 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -415,6 +415,8 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
  
  struct varying_component {

 nir_variable *var;
+   unsigned first_block_use;
+   unsigned last_block_use;
 uint8_t interp_type;
 uint8_t interp_loc;
 bool is_patch;
@@ -441,10 +443,36 @@ cmp_varying_component(const void *comp1_v, const void 
*comp2_v)
 if (comp1->interp_loc != comp2->interp_loc)
return comp1->interp_loc - comp2->interp_loc;
  
+   /* Attempt to reduce register pressure with crude live range analysis */

+   if (comp1->first_block_use != comp2->first_block_use)
+  return comp1->first_block_use - comp2->first_block_use;
+   if (comp1->last_block_use != comp2->last_block_use)
+  return comp1->last_block_use - comp2->last_block_use;
+
 /* If everything else matches just use the original location to sort */
 return comp1->var->data.location - comp2->var->data.location;
  }
  
+static void

+set_block_use(struct varying_component *vc_info, nir_src *src,
+  bool is_if_condition)
+{
+   nir_block *blk
+  = nir_cursor_current_block(nir_before_src(src, is_if_condition));
+
+   if (vc_info->initialised) {
+  if (vc_info->first_block_use > blk->index)
+ vc_info->first_block_use = blk->index;
+
+  if (vc_info->last_block_use < blk->index)
+ vc_info->last_block_use = blk->index;
+   } else {
+  vc_info->first_block_use = blk->index;
+  vc_info->last_block_use = blk->index;
+  vc_info->initialised = true;
+   }
+}
+
  static void
  gather_varying_component_info(nir_shader *consumer,
struct varying_component **varying_comp_info,
@@ -533,6 +561,14 @@ gather_varying_component_info(nir_shader *consumer,
  vc_info->interp_loc = get_interp_loc(in_var);
  vc_info->is_patch = in_var->data.patch;
   }
+
+ nir_foreach_use(src, >dest.ssa) {
+set_block_use(vc_info, src, false);
+ }
+
+ nir_foreach_if_use(src, >dest.ssa) {
+set_block_use(vc_info, src, true);
+ }
}
 }
  }
@@ -651,6 +687,12 @@ void
  nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
   bool default_to_smooth_interp)
  {
+   nir_function_impl *p_impl = nir_shader_get_entrypoint(producer);
+   nir_function_impl *c_impl = nir_shader_get_entrypoint(consumer);
+
+   nir_metadata_require(p_impl, nir_metadata_block_index);
+   nir_metadata_require(c_impl, nir_metadata_block_index);
+
 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 assert(consumer->info.stage != MESA_SHADER_VERTEX);
  
@@ -665,6 +707,9 @@ nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
  
 compact_components(producer, consumer, unmoveable_comps,

default_to_smooth_interp);
+
+   nir_metadata_preserve(p_impl, nir_metadata_block_index);
+   nir_metadata_preserve(c_impl, nir_metadata_block_index);
  }
  
  /*



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 3/4] nir: rewrite varying component packing

2018-12-09 Thread Timothy Arceri

There are three reasons for the rewrite.

1. Adding support for packing tess patch varyings in a sane way.

2. Making use of qsort allowing the code to be much easier to
   follow.

3. Allowing us to add a crude live range analysis for deciding
   which components should be packed together. This support will
   be added in a future patch.

v2: pack moveable components with the unmoveable components. The
new pass is now functionally the same as the old pass besides
the new support for packing patches.
---
 src/compiler/nir/nir_linking_helpers.c | 305 -
 1 file changed, 196 insertions(+), 109 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index c26582ddec..8bd4acc2ee 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -247,22 +247,20 @@ is_packing_supported_for_type(const struct glsl_type 
*type)
return true;
 }
 
+/* Packing arrays and dual slot varyings is difficult so to avoid complex
+ * algorithms this function marks these components as unmoveable.
+ */
 static void
-get_slot_component_masks_and_interp_types(struct exec_list *var_list,
-  uint8_t *comps,
-  uint8_t *interp_type,
-  uint8_t *interp_loc,
-  gl_shader_stage stage,
-  bool default_to_smooth_interp)
+get_unmoveable_components_masks(struct exec_list *var_list, uint8_t *comps,
+gl_shader_stage stage,
+bool default_to_smooth_interp)
 {
nir_foreach_variable_safe(var, var_list) {
   assert(var->data.location >= 0);
 
-  /* Only remap things that aren't built-ins.
-   * TODO: add TES patch support.
-   */
+  /* Only remap things that aren't built-ins. */
   if (var->data.location >= VARYING_SLOT_VAR0 &&
-  var->data.location - VARYING_SLOT_VAR0 < 32) {
+  var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 
  const struct glsl_type *type = var->type;
  if (nir_is_per_vertex_io(var, stage)) {
@@ -270,6 +268,12 @@ get_slot_component_masks_and_interp_types(struct exec_list 
*var_list,
 type = glsl_get_array_element(type);
  }
 
+ /* If we can pack this varying then don't mark the components as
+  * used.
+  */
+ if (is_packing_supported_for_type(type))
+continue;
+
  unsigned location = var->data.location - VARYING_SLOT_VAR0;
  unsigned elements =
 glsl_get_vector_elements(glsl_without_array(type));
@@ -278,10 +282,6 @@ get_slot_component_masks_and_interp_types(struct exec_list 
*var_list,
  unsigned slots = glsl_count_attribute_slots(type, false);
  unsigned comps_slot2 = 0;
  for (unsigned i = 0; i < slots; i++) {
-interp_type[location + i] =
-   get_interp_type(var, type, default_to_smooth_interp);
-interp_loc[location + i] = get_interp_loc(var);
-
 if (dual_slot) {
if (i & 1) {
   comps[location + i] |= ((1 << comps_slot2) - 1);
@@ -413,32 +413,55 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
*p_out_slots_read = out_slots_read_tmp[1];
 }
 
-/* If there are empty components in the slot compact the remaining components
- * as close to component 0 as possible. This will make it easier to fill the
- * empty components with components from a different slot in a following pass.
- */
-static void
-compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
-   uint8_t *interp_type, uint8_t *interp_loc,
-   bool default_to_smooth_interp)
+struct varying_component {
+   nir_variable *var;
+   uint8_t interp_type;
+   uint8_t interp_loc;
+   bool is_patch;
+   bool initialised;
+};
+
+static int
+cmp_varying_component(const void *comp1_v, const void *comp2_v)
 {
-   struct exec_list *input_list = >inputs;
-   struct exec_list *output_list = >outputs;
-   struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
+   struct varying_component *comp1 = (struct varying_component *) comp1_v;
+   struct varying_component *comp2 = (struct varying_component *) comp2_v;
 
-   /* Create a cursor for each interpolation type */
-   unsigned cursor[4] = {0};
+   /* We want patches to be order at the end of the array */
+   if (comp1->is_patch != comp2->is_patch)
+  return comp1->is_patch ? 1 : -1;
 
-   /* We only need to pass over one stage and we choose the consumer as it 
seems
-* to cause a larger reduction in instruction counts (tested on i965).
+   /* We can only pack varyings with matching interpolation types so group
+* them together.
 */
-   nir_foreach_variable(var,

[Mesa-dev] [RFC v2 4/4] nir: add crude live range analysis to nir_compact_varyings()

2018-12-09 Thread Timothy Arceri

vkpipeline-db results RADV (VEGA):

Totals from affected shaders:
SGPRS: 27168 -> 27872 (2.59 %)
VGPRS: 24180 -> 24056 (-0.51 %)
Spilled SGPRs: 28 -> 24 (-14.29 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 1584936 -> 1585552 (0.04 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 3804 -> 3824 (0.53 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/compiler/nir/nir_linking_helpers.c | 45 ++
 1 file changed, 45 insertions(+)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 8bd4acc2ee..badda80979 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -415,6 +415,8 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
 
 struct varying_component {
nir_variable *var;
+   unsigned first_block_use;
+   unsigned last_block_use;
uint8_t interp_type;
uint8_t interp_loc;
bool is_patch;
@@ -441,10 +443,36 @@ cmp_varying_component(const void *comp1_v, const void 
*comp2_v)
if (comp1->interp_loc != comp2->interp_loc)
   return comp1->interp_loc - comp2->interp_loc;
 
+   /* Attempt to reduce register pressure with crude live range analysis */
+   if (comp1->first_block_use != comp2->first_block_use)
+  return comp1->first_block_use - comp2->first_block_use;
+   if (comp1->last_block_use != comp2->last_block_use)
+  return comp1->last_block_use - comp2->last_block_use;
+
/* If everything else matches just use the original location to sort */
return comp1->var->data.location - comp2->var->data.location;
 }
 
+static void
+set_block_use(struct varying_component *vc_info, nir_src *src,
+  bool is_if_condition)
+{
+   nir_block *blk
+  = nir_cursor_current_block(nir_before_src(src, is_if_condition));
+
+   if (vc_info->initialised) {
+  if (vc_info->first_block_use > blk->index)
+ vc_info->first_block_use = blk->index;
+
+  if (vc_info->last_block_use < blk->index)
+ vc_info->last_block_use = blk->index;
+   } else {
+  vc_info->first_block_use = blk->index;
+  vc_info->last_block_use = blk->index;
+  vc_info->initialised = true;
+   }
+}
+
 static void
 gather_varying_component_info(nir_shader *consumer,
   struct varying_component **varying_comp_info,
@@ -533,6 +561,14 @@ gather_varying_component_info(nir_shader *consumer,
 vc_info->interp_loc = get_interp_loc(in_var);
 vc_info->is_patch = in_var->data.patch;
  }
+
+ nir_foreach_use(src, >dest.ssa) {
+set_block_use(vc_info, src, false);
+ }
+
+ nir_foreach_if_use(src, >dest.ssa) {
+set_block_use(vc_info, src, true);
+ }
   }
}
 }
@@ -651,6 +687,12 @@ void
 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
  bool default_to_smooth_interp)
 {
+   nir_function_impl *p_impl = nir_shader_get_entrypoint(producer);
+   nir_function_impl *c_impl = nir_shader_get_entrypoint(consumer);
+
+   nir_metadata_require(p_impl, nir_metadata_block_index);
+   nir_metadata_require(c_impl, nir_metadata_block_index);
+
assert(producer->info.stage != MESA_SHADER_FRAGMENT);
assert(consumer->info.stage != MESA_SHADER_VERTEX);
 
@@ -665,6 +707,9 @@ nir_compact_varyings(nir_shader *producer, nir_shader 
*consumer,
 
compact_components(producer, consumer, unmoveable_comps,
   default_to_smooth_interp);
+
+   nir_metadata_preserve(p_impl, nir_metadata_block_index);
+   nir_metadata_preserve(c_impl, nir_metadata_block_index);
 }
 
 /*
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 1/4] nir: add support for marking used patches when packing varyings

2018-12-09 Thread Timothy Arceri

This adds support needed for marking the varyings as used but we
don't actually support packing patches in this patch.
---
 src/compiler/nir/nir_linking_helpers.c | 73 ++
 1 file changed, 51 insertions(+), 22 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index a05890ada4..845aba5c87 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -289,15 +289,35 @@ struct varying_loc
uint32_t location;
 };
 
+static void
+mark_all_slots_used(nir_variable *var, uint64_t *slots_used,
+uint64_t slots_used_mask, unsigned num_slots)
+{
+   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
+
+   slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
+  (((uint64_t)1 << num_slots) - 1) << (var->data.location - loc_offset);
+}
+
+static void
+mark_used_slots(nir_variable *var, uint64_t *slots_used, unsigned offset)
+{
+   unsigned loc_offset = offset - (var->data.patch ? VARYING_SLOT_PATCH0 : 0);
+
+   slots_used[var->data.patch ? 1 : 0] |= (uint64_t)1 << (var->data.location + 
loc_offset);
+}
+
 static void
 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
struct varying_loc (*remap)[4],
-   uint64_t *slots_used, uint64_t *out_slots_read)
+   uint64_t *slots_used, uint64_t *out_slots_read,
+   uint32_t *p_slots_used, uint32_t *p_out_slots_read)
  {
-   uint64_t out_slots_read_tmp = 0;
+   uint64_t out_slots_read_tmp[2] = {0};
+   uint64_t slots_used_tmp[2] = {0};
 
/* We don't touch builtins so just copy the bitmask */
-   uint64_t slots_used_tmp =
+   slots_used_tmp[0] =
   *slots_used & (((uint64_t)1 << (VARYING_SLOT_VAR0 - 1)) - 1);
 
nir_foreach_variable(var, var_list) {
@@ -305,8 +325,8 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
 
   /* Only remap things that aren't built-ins */
   if (var->data.location >= VARYING_SLOT_VAR0 &&
-  var->data.location - VARYING_SLOT_VAR0 < 32) {
- assert(var->data.location - VARYING_SLOT_VAR0 < 32);
+  var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
+ assert(var->data.location - VARYING_SLOT_VAR0 < 
MAX_VARYINGS_INCL_PATCH);
 
  const struct glsl_type *type = var->type;
  if (nir_is_per_vertex_io(var, stage)) {
@@ -321,11 +341,17 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
  unsigned location = var->data.location - VARYING_SLOT_VAR0;
  struct varying_loc *new_loc = 
[location][var->data.location_frac];
 
- uint64_t slots = (((uint64_t)1 << num_slots) - 1) << 
var->data.location;
- if (slots & *slots_used)
+ unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
+ uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
+ uint64_t outs_used =
+var->data.patch ? *p_out_slots_read : *out_slots_read;
+ uint64_t slots =
+(((uint64_t)1 << num_slots) - 1) << (var->data.location - 
loc_offset);
+
+ if (slots & used)
 used_across_stages = true;
 
- if (slots & *out_slots_read)
+ if (slots & outs_used)
 outputs_read = true;
 
  if (new_loc->location) {
@@ -339,30 +365,29 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
  * otherwise we will mess up the mask for things like partially
  * marked arrays.
  */
-if (used_across_stages) {
-   slots_used_tmp |=
-  *slots_used & (((uint64_t)1 << num_slots) - 1) << 
var->data.location;
-}
+if (used_across_stages)
+   mark_all_slots_used(var, slots_used_tmp, used, num_slots);
 
 if (outputs_read) {
-   out_slots_read_tmp |=
-  *out_slots_read & (((uint64_t)1 << num_slots) - 1) << 
var->data.location;
+   mark_all_slots_used(var, out_slots_read_tmp, outs_used,
+   num_slots);
 }
-
  } else {
 for (unsigned i = 0; i < num_slots; i++) {
if (used_across_stages)
-  slots_used_tmp |= (uint64_t)1 << (var->data.location + i);
+  mark_used_slots(var, slots_used_tmp, i);
 
if (outputs_read)
-  out_slots_read_tmp |= (uint64_t)1 << (var->data.location + 
i);
+  mark_used_slots(var, out_slots_read_tmp, i);
 }
  }
   }
}
 
-   *slots_used = slots_used_tmp;
-   *out_slots_read = out_slots_read_tmp;
+   *slots_used = slots_used_tmp[0];
+   *out_slots_read = out_slots_read_tmp[0];
+   *p_slots_used = slots_used_tmp[1];
+   *p_out_slots_read = out_slots_read_tmp[1];
 }
 
 /* If

[Mesa-dev] [PATCH v2 2/4] nir: add is_packing_supported_for_type() helper

2018-12-09 Thread Timothy Arceri

This will be used in the following patches to determine if we
support packing the components of a varying.
---
 src/compiler/nir/nir_linking_helpers.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 845aba5c87..c26582ddec 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -224,6 +224,29 @@ get_interp_loc(nir_variable *var)
   return INTERPOLATE_LOC_CENTER;
 }
 
+static bool
+is_packing_supported_for_type(const struct glsl_type *type)
+{
+   /* Skip types that require more complex packing handling.
+* TODO: add support for these types?
+*/
+   if (glsl_type_is_array(type) ||
+   glsl_type_is_dual_slot(type) ||
+   glsl_type_is_matrix(type) ||
+   glsl_type_is_struct(type) ||
+   glsl_type_is_64bit(type))
+  return false;
+
+   /* We ignore complex types above and all other vector types should
+* have been split into scalar variables by the lower_io_to_scalar
+* pass. The only exeption should by OpenGL xfb varyings.
+*/
+   if (glsl_get_vector_elements(type) != 1)
+  return false;
+
+   return true;
+}
+
 static void
 get_slot_component_masks_and_interp_types(struct exec_list *var_list,
   uint8_t *comps,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] mesa/st/nir: fix missing nir_compact_varyings

2018-12-09 Thread Timothy Arceri



On 9/12/18 5:28 am, Rob Clark wrote:

Not entirely sure when this changed, but it seem like
LinkedTransformFeedback is (usually?) populated,


Yeah it looks like this code was wrong when introduced. I also recall 
somebody complaining the performance dropped in Shadow of Mordor with 
Eric's fix, which makes a little more sense now.


Looking over the code in link_varying.cpp what happens is we always 
create LinkedTransformFeedback for the last vertex stage. Which means we 
have not been packing the frgament shader inputs since this fix was 
introduced :( Maybe update the commit message to make this a little clearer.


Also please add:

Fixes: dbd52585fa9f ("st/nir: Disable varying packing when doing 
transform feedback.")


With those changes patches 1-2 are:

Reviewed-by: Timothy Arceri 

Thanks for looking into this.

 even if

NumVaryings is zero.  So make the check about whether it
is safe to nir_compact_varyings() a bit more complete.

Signed-off-by: Rob Clark 
---
  src/mesa/state_tracker/st_glsl_to_nir.cpp | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index d0475fb538a..7406e26e2f8 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -758,7 +758,8 @@ st_link_nir(struct gl_context *ctx,
* the pipe_stream_output->output_register field is based on the
* pre-compacted driver_locations.
*/
- if (!prev_shader->sh.LinkedTransformFeedback)
+ if (!(prev_shader->sh.LinkedTransformFeedback &&
+   prev_shader->sh.LinkedTransformFeedback->NumVarying > 0))
  
nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
nir, ctx->API != API_OPENGL_COMPAT);
}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 07/20] nir: add guess trip count support to loop analysis

2018-12-07 Thread Timothy Arceri


On 8/12/18 11:16 am, Jason Ekstrand wrote:
On Thu, Dec 6, 2018 at 9:08 PM Timothy Arceri <mailto:tarc...@itsqueeze.com>> wrote:


This detects an induction variable used as an array index to guess
the trip count of the loop. This enables us to do a partial
unroll of the loop, with can eventually result in the loop being
eliminated.
---
  src/compiler/nir/nir.h              |  4 ++
  src/compiler/nir/nir_loop_analyze.c | 78 ++---
  2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ce4a81fbe1..a40e5a1418 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1878,6 +1878,7 @@ typedef struct {
     nir_block *continue_from_block;

     bool continue_from_then;
+   bool induction_rhs;

     struct list_head loop_terminator_link;
  } nir_loop_terminator;
@@ -1886,6 +1887,9 @@ typedef struct {
     /* Number of instructions in the loop */
     unsigned num_instructions;

+   /* Guessed trip count based on array indexing */
+   unsigned guessed_trip_count;
+
     /* Maximum number of times the loop is run (if known) */
     unsigned max_trip_count;

diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index eef224e4d5..ffcf2a3c27 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -382,6 +382,50 @@ find_array_access_via_induction(loop_info_state
*state,
     return 0;
  }

+static bool
+guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
+                 nir_loop_variable *basic_ind)
+{
+   nir_foreach_block_in_cf_node(block, >loop->cf_node) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         /* Check for arrays variably-indexed by a loop induction
variable. */
+         if (intrin->intrinsic == nir_intrinsic_load_deref ||
+             intrin->intrinsic == nir_intrinsic_store_deref ||
+             intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+            nir_loop_variable *array_idx = NULL;
+            unsigned array_size =
+               find_array_access_via_induction(state,
+ 
  nir_src_as_deref(intrin->src[0]),

+                                               _idx);
+            if (basic_ind == array_idx) {
+               limit_val->i32[0] = array_size;
+               return true;


What if it's used for multiple array accesses of different lengths?  
This just takes the first one.  It seems like we could be smarter.


Yeah I guess so. I'll have another go at this.



+            }
+
+            if (intrin->intrinsic != nir_intrinsic_copy_deref)
+               continue;
+
+            array_size =
+               find_array_access_via_induction(state,
+ 
  nir_src_as_deref(intrin->src[1]),

+                                               _idx);
+            if (basic_ind == array_idx) {
+               limit_val->i32[0] = array_size;
+               return true;
+            }
+         }
+      }
+   }
+
+   return false;
+}
+
  static int32_t
  get_iteration(nir_op cond_op, nir_const_value *initial,
nir_const_value *step,
                nir_const_value *limit)
@@ -558,6 +602,7 @@ static void
  find_trip_count(loop_info_state *state)
  {
     bool trip_count_known = true;
+   bool guessed_trip_count = false;
     nir_loop_terminator *limiting_terminator = NULL;
     int max_trip_count = -1;

@@ -593,16 +638,33 @@ find_trip_count(loop_info_state *state)
              basic_ind = get_loop_var(alu->src[1].src.ssa, state);
              limit = get_loop_var(alu->src[0].src.ssa, state);
              limit_rhs = false;
+            terminator->induction_rhs = true;
           }

-         /* The comparison has to have a basic induction variable
-          * and a constant for us to be able to find trip counts
+         /* The comparison has to have a basic induction variable
for us to be
+          * able to find trip counts.
            */
-         if (basic_ind->type != basic_induction ||
!is_var_constant(limit)) {
+         if (basic_ind->type != basic_induction) {
              trip_count_known = false;
              continue;
           }

+         /* Attempt to find a constant limit for the loop */
+         nir_const_value limit_val;
+

[Mesa-dev] [PATCH 08/20] nir: add new partially_unrolled bool to nir_loop

2018-12-06 Thread Timothy Arceri

In order to stop continuously partially unrolling the same loop
we add the bool partialy_unrolled to nir_loop, we add it here
rather than in nir_loop_info because nir_loop_info is only set
via loop analysis and is intended to be cleared before each
analysis. Also nir_loop_info is never cloned.
---
 src/compiler/nir/nir.h   | 1 +
 src/compiler/nir/nir_clone.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a40e5a1418..bf015bb53a 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1917,6 +1917,7 @@ typedef struct {
struct exec_list body; /** < list of nir_cf_node */
 
nir_loop_info *info;
+   bool partially_unrolled;
 } nir_loop;
 
 /**
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 989c5051a5..b229094679 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -548,6 +548,7 @@ static nir_loop *
 clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
 {
nir_loop *nloop = nir_loop_create(state->ns);
+   nloop->partially_unrolled = loop->partially_unrolled;
 
nir_cf_node_insert_end(cf_list, >cf_node);
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/20] nir: calculate trip count for more loops

2018-12-06 Thread Timothy Arceri

This adds support to loop analysis for loops where the induction
variable is compared to the result of min(variable, constant).

For example:

   for (int i = 0; i < imin(x, 4); i++)
  ...

We add a new bool to the loop terminator struct in order to
differentiate terminators with this exit condition.
---
 src/compiler/nir/nir.h | 11 +++
 src/compiler/nir/nir_loop_analyze.c| 41 ++
 src/compiler/nir/nir_opt_loop_unroll.c |  3 +-
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index bf015bb53a..f31e91a3c0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1880,6 +1880,17 @@ typedef struct {
bool continue_from_then;
bool induction_rhs;
 
+   /* This is true if the terminators exact trip count is unknown. For
+* example:
+*
+*for (int i = 0; i < imin(x, 4); i++)
+*   ...
+*
+* Here loop analysis would have set a max_trip_count of 4 however we dont
+* know for sure that this is the exact trip count.
+*/
+   bool exact_trip_count_unknown;
+
struct list_head loop_terminator_link;
 } nir_loop_terminator;
 
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index ffcf2a3c27..b003b1f198 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -426,6 +426,35 @@ guess_loop_limit(loop_info_state *state, nir_const_value 
*limit_val,
return false;
 }
 
+static bool
+try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value *limit_val,
+  nir_loop_terminator *terminator, loop_info_state *state)
+{
+   if(!is_var_alu(limit))
+  return false;
+
+   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr);
+
+   if (limit_alu->op == nir_op_imin ||
+   limit_alu->op == nir_op_fmin) {
+  limit = get_loop_var(limit_alu->src[0].src.ssa, state);
+
+  if (!is_var_constant(limit))
+ limit = get_loop_var(limit_alu->src[1].src.ssa, state);
+
+  if (!is_var_constant(limit))
+ return false;
+
+  *limit_val = nir_instr_as_load_const(limit->def->parent_instr)->value;
+
+  terminator->exact_trip_count_unknown = true;
+
+  return true;
+   }
+
+   return false;
+}
+
 static int32_t
 get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
   nir_const_value *limit)
@@ -657,12 +686,14 @@ find_trip_count(loop_info_state *state)
  } else {
 trip_count_known = false;
 
-/* Guess loop limit based on array access */
-if (!guess_loop_limit(state, _val, basic_ind)) {
-   continue;
-}
+if (!try_find_limit_of_alu(limit, _val, terminator, state)) {
+   /* Guess loop limit based on array access */
+   if (!guess_loop_limit(state, _val, basic_ind)) {
+  continue;
+   }
 
-guessed_trip_count = true;
+   guessed_trip_count = true;
+}
  }
 
  /* We have determined that we have the following constants:
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index d8df619b32..9630e0738a 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -830,7 +830,8 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool 
*has_nested_loop_out)
   } else {
  /* Attempt to unroll loops with two terminators. */
  unsigned num_lt = list_length(>info->loop_terminator_list);
- if (num_lt == 2) {
+ if (num_lt == 2 &&
+ !loop->info->limiting_terminator->exact_trip_count_unknown) {
 bool limiting_term_second = true;
 nir_loop_terminator *terminator =
list_first_entry(>info->loop_terminator_list,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 17/20] nir: add helper to return inversion op of a comparision

2018-12-06 Thread Timothy Arceri

This will be used to help find the trip count of loops that look
like the following:

   while (a < x && i < 8) {
  ...
  i++;
   }

Where the NIR will end up looking something like this:

   vec1 32 ssa_0 = load_const (0x /* 0.00 */)
   vec1 32 ssa_1 = load_const (0x0008 /* 0.00 */)
   loop {
  ...
  vec1 32 ssa_28 = ige ssa_26, ssa_3
  vec1 32 ssa_29 = ige ssa_27, ssa_1
  vec1 32 ssa_30 = iadd ssa_29, ssa_28
  vec1 ssa_31 = ieq ssa_30, ssa_0
  if ssa_31 {
 ...
 break
  } else {
 ...
  }
  ...
   }

So in order to find the trip count we need to find the inverse of
ige.
---
 src/compiler/nir/nir_loop_analyze.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 5446e7a120..2dd7dd7b20 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -665,6 +665,35 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
return -1;
 }
 
+static nir_op
+inverse_comparision(nir_alu_instr *alu)
+{
+   switch (alu->op) {
+   case nir_op_fge:
+  return nir_op_flt;
+   case nir_op_ige:
+  return nir_op_ilt;
+   case nir_op_uge:
+  return nir_op_ult;
+   case nir_op_flt:
+  return nir_op_fge;
+   case nir_op_ilt:
+  return nir_op_ige;
+   case nir_op_ult:
+  return nir_op_uge;
+   case nir_op_feq:
+  return nir_op_fne;
+   case nir_op_ieq:
+  return nir_op_ine;
+   case nir_op_fne:
+  return nir_op_feq;
+   case nir_op_ine:
+  return nir_op_ieq;
+   default:
+  unreachable("Unsuported comparision!");
+   }
+}
+
 static bool
 is_supported_terminator_condition(nir_alu_instr *alu)
 {
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 14/20] nir: reword code comment

2018-12-06 Thread Timothy Arceri

Reviewed-by: Thomas Helland 
---
 src/compiler/nir/nir_loop_analyze.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index fbaa638884..ef69422c12 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -239,8 +239,8 @@ compute_induction_information(loop_info_state *state)
   nir_foreach_phi_src(src, phi) {
  nir_loop_variable *src_var = get_loop_var(src->src.ssa, state);
 
- /* If one of the sources is in a conditional or nested block then
-  * panic.
+ /* If one of the sources is in an if branch or nested loop then don't
+  * attempt to go any further.
   */
  if (src_var->in_if_branch || src_var->in_nested_loop)
 break;
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 13/20] nir: in loop analysis track actual control flow type

2018-12-06 Thread Timothy Arceri

This will allow us to improve analysis to find more induction
variables.

Reviewed-by: Thomas Helland 
---
 src/compiler/nir/nir_loop_analyze.c | 34 ++---
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index b003b1f198..fbaa638884 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -49,8 +49,11 @@ typedef struct {
/* If this is of type basic_induction */
struct nir_basic_induction_var *ind;
 
-   /* True if variable is in an if branch or a nested loop */
-   bool in_control_flow;
+   /* True if variable is in an if branch */
+   bool in_if_branch;
+
+   /* True if variable is in a nested loop */
+   bool in_nested_loop;
 
 } nir_loop_variable;
 
@@ -83,7 +86,8 @@ get_loop_var(nir_ssa_def *value, loop_info_state *state)
 
 typedef struct {
loop_info_state *state;
-   bool in_control_flow;
+   bool in_if_branch;
+   bool in_nested_loop;
 } init_loop_state;
 
 static bool
@@ -92,8 +96,10 @@ init_loop_def(nir_ssa_def *def, void *void_init_loop_state)
init_loop_state *loop_init_state = void_init_loop_state;
nir_loop_variable *var = get_loop_var(def, loop_init_state->state);
 
-   if (loop_init_state->in_control_flow) {
-  var->in_control_flow = true;
+   if (loop_init_state->in_nested_loop) {
+  var->in_nested_loop = true;
+   } else if (loop_init_state->in_if_branch) {
+  var->in_if_branch = true;
} else {
   /* Add to the tail of the list. That way we start at the beginning of
* the defs in the loop instead of the end when walking the list. This
@@ -110,9 +116,10 @@ init_loop_def(nir_ssa_def *def, void *void_init_loop_state)
 
 static bool
 init_loop_block(nir_block *block, loop_info_state *state,
-bool in_control_flow)
+bool in_if_branch, bool in_nested_loop)
 {
-   init_loop_state init_state = {.in_control_flow = in_control_flow,
+   init_loop_state init_state = {.in_if_branch = in_if_branch,
+ .in_nested_loop = in_nested_loop,
  .state = state };
 
nir_foreach_instr(instr, block) {
@@ -198,7 +205,7 @@ compute_invariance_information(loop_info_state *state)
 */
list_for_each_entry_safe(nir_loop_variable, var, >process_list,
 process_link) {
-  assert(!var->in_control_flow);
+  assert(!var->in_if_branch && !var->in_nested_loop);
 
   if (mark_invariant(var->def, state))
  list_del(>process_link);
@@ -216,7 +223,8 @@ compute_induction_information(loop_info_state *state)
* things in nested loops or conditionals should have been removed from
* the list by compute_invariance_information().
*/
-  assert(!var->in_control_flow && var->type != invariant);
+  assert(!var->in_if_branch && !var->in_nested_loop &&
+ var->type != invariant);
 
   /* We are only interested in checking phis for the basic induction
* variable case as its simple to detect. All basic induction variables
@@ -234,7 +242,7 @@ compute_induction_information(loop_info_state *state)
  /* If one of the sources is in a conditional or nested block then
   * panic.
   */
- if (src_var->in_control_flow)
+ if (src_var->in_if_branch || src_var->in_nested_loop)
 break;
 
  if (!src_var->in_loop) {
@@ -814,17 +822,17 @@ get_loop_info(loop_info_state *state, nir_function_impl 
*impl)
   switch (node->type) {
 
   case nir_cf_node_block:
- init_loop_block(nir_cf_node_as_block(node), state, false);
+ init_loop_block(nir_cf_node_as_block(node), state, false, false);
  break;
 
   case nir_cf_node_if:
  nir_foreach_block_in_cf_node(block, node)
-init_loop_block(block, state, true);
+init_loop_block(block, state, true, false);
  break;
 
   case nir_cf_node_loop:
  nir_foreach_block_in_cf_node(block, node) {
-init_loop_block(block, state, true);
+init_loop_block(block, state, false, true);
  }
  break;
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 16/20] nir: simplify the loop analysis trip count code a little

2018-12-06 Thread Timothy Arceri

Here we create a helper is_supported_terminator_condition()
and use that rather than embedding all the trip count code
inside a switch.

The new helper will also be used in a following patch.
---
 src/compiler/nir/nir_loop_analyze.c | 172 +++-
 1 file changed, 93 insertions(+), 79 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index be74105594..5446e7a120 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -665,6 +665,26 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
return -1;
 }
 
+static bool
+is_supported_terminator_condition(nir_alu_instr *alu)
+{
+   switch (alu->op) {
+   case nir_op_fge:
+   case nir_op_ige:
+   case nir_op_uge:
+   case nir_op_flt:
+   case nir_op_ilt:
+   case nir_op_ult:
+   case nir_op_feq:
+   case nir_op_ieq:
+   case nir_op_fne:
+   case nir_op_ine:
+  return true;
+   default:
+  return false;
+   }
+}
+
 /* Run through each of the terminators of the loop and try to infer a possible
  * trip-count. We need to check them all, and set the lowest trip-count as the
  * trip-count of our loop. If one of the terminators has an undecidable
@@ -696,97 +716,91 @@ find_trip_count(loop_info_state *state)
   nir_loop_variable *limit = NULL;
   bool limit_rhs = true;
 
-  switch (alu->op) {
-  case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
-  case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
-  case nir_op_feq:  case nir_op_ieq:
-  case nir_op_fne:  case nir_op_ine:
-
- /* We assume that the limit is the "right" operand */
- basic_ind = get_loop_var(alu->src[0].src.ssa, state);
- limit = get_loop_var(alu->src[1].src.ssa, state);
-
- if (basic_ind->type != basic_induction) {
-/* We had it the wrong way, flip things around */
-basic_ind = get_loop_var(alu->src[1].src.ssa, state);
-limit = get_loop_var(alu->src[0].src.ssa, state);
-limit_rhs = false;
-terminator->induction_rhs = true;
- }
+  if (!is_supported_terminator_condition(alu)) {
+ trip_count_known = false;
+ continue;
+  }
 
- /* The comparison has to have a basic induction variable for us to be
-  * able to find trip counts.
-  */
- if (basic_ind->type != basic_induction) {
-trip_count_known = false;
-continue;
- }
+  /* We assume that the limit is the "right" operand */
+  basic_ind = get_loop_var(alu->src[0].src.ssa, state);
+  limit = get_loop_var(alu->src[1].src.ssa, state);
 
- /* Attempt to find a constant limit for the loop */
- nir_const_value limit_val;
- if (is_var_constant(limit)) {
-limit_val =
-   nir_instr_as_load_const(limit->def->parent_instr)->value;
- } else {
-trip_count_known = false;
-
-if (!try_find_limit_of_alu(limit, _val, terminator, state)) {
-   /* Guess loop limit based on array access */
-   if (!guess_loop_limit(state, _val, basic_ind)) {
-  continue;
-   }
+  if (basic_ind->type != basic_induction) {
+ /* We had it the wrong way, flip things around */
+ basic_ind = get_loop_var(alu->src[1].src.ssa, state);
+ limit = get_loop_var(alu->src[0].src.ssa, state);
+ limit_rhs = false;
+ terminator->induction_rhs = true;
+  }
 
-   guessed_trip_count = true;
-}
- }
+  /* The comparison has to have a basic induction variable for us to be
+   * able to find trip counts.
+   */
+  if (basic_ind->type != basic_induction) {
+ trip_count_known = false;
+ continue;
+  }
 
- /* We have determined that we have the following constants:
-  * (With the typical int i = 0; i < x; i++; as an example)
-  *- Upper limit.
-  *- Starting value
-  *- Step / iteration size
-  * Thats all thats needed to calculate the trip-count
-  */
+  /* Attempt to find a constant limit for the loop */
+  nir_const_value limit_val;
+  if (is_var_constant(limit)) {
+ limit_val =
+nir_instr_as_load_const(limit->def->parent_instr)->value;
+  } else {
+ trip_count_known = false;
 
- nir_const_value initial_val =
-nir_instr_as_load_const(basic_ind->ind->def_outside_loop->
-   def->parent_instr)->value;
+ if (!try_find_limit_of_alu(limit, _val, terminator, state)) {
+/* Guess loop limit based on array access */
+if (!guess_loop_limit(state, _val, basic_ind)) {
+   continue;
+}
 
- nir_const_value step_val =
-

[Mesa-dev] [PATCH 15/20] nir: detect more induction variables

2018-12-06 Thread Timothy Arceri

This allows loop analysis to detect inductions variables that
are incremented in both branches of an if rather than in a main
loop block. For example:

   loop {
  block block_1:
  /* preds: block_0 block_7 */
  vec1 32 ssa_8 = phi block_0: ssa_4, block_7: ssa_20
  vec1 32 ssa_9 = phi block_0: ssa_0, block_7: ssa_4
  vec1 32 ssa_10 = phi block_0: ssa_1, block_7: ssa_4
  vec1 32 ssa_11 = phi block_0: ssa_2, block_7: ssa_21
  vec1 32 ssa_12 = phi block_0: ssa_3, block_7: ssa_22
  vec4 32 ssa_13 = vec4 ssa_12, ssa_11, ssa_10, ssa_9
  vec1 32 ssa_14 = ige ssa_8, ssa_5
  /* succs: block_2 block_3 */
  if ssa_14 {
 block block_2:
 /* preds: block_1 */
 break
 /* succs: block_8 */
  } else {
 block block_3:
 /* preds: block_1 */
 /* succs: block_4 */
  }
  block block_4:
  /* preds: block_3 */
  vec1 32 ssa_15 = ilt ssa_6, ssa_8
  /* succs: block_5 block_6 */
  if ssa_15 {
 block block_5:
 /* preds: block_4 */
 vec1 32 ssa_16 = iadd ssa_8, ssa_7
 vec1 32 ssa_17 = load_const (0x3f80 /* 1.00*/)
 /* succs: block_7 */
  } else {
 block block_6:
 /* preds: block_4 */
 vec1 32 ssa_18 = iadd ssa_8, ssa_7
 vec1 32 ssa_19 = load_const (0x3f80 /* 1.00*/)
 /* succs: block_7 */
  }
  block block_7:
  /* preds: block_5 block_6 */
  vec1 32 ssa_20 = phi block_5: ssa_16, block_6: ssa_18
  vec1 32 ssa_21 = phi block_5: ssa_17, block_6: ssa_4
  vec1 32 ssa_22 = phi block_5: ssa_4, block_6: ssa_19
  /* succs: block_1 */
   }

Unfortunatly GCM could move the addition out of the if for us
(making this patch unrequired) but we still cannot enable the GCM
pass without regressions.

This unrolls a loop in Rise of The Tomb Raider.

vkpipeline-db results (VEGA):

Totals from affected shaders:
SGPRS: 88 -> 96 (9.09 %)
VGPRS: 56 -> 52 (-7.14 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 2168 -> 4560 (110.33 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 4 -> 4 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

Reviewed-by: Thomas Helland 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32211
---
 src/compiler/nir/nir_loop_analyze.c | 36 +
 1 file changed, 36 insertions(+)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index ef69422c12..be74105594 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -245,6 +245,42 @@ compute_induction_information(loop_info_state *state)
  if (src_var->in_if_branch || src_var->in_nested_loop)
 break;
 
+ /* Detect inductions variables that are incremented in both branches
+  * of an unnested if rather than in a loop block.
+  */
+ if (is_var_phi(src_var)) {
+nir_phi_instr *src_phi =
+   nir_instr_as_phi(src_var->def->parent_instr);
+
+nir_op alu_op;
+nir_ssa_def *alu_srcs[2] = {0};
+nir_foreach_phi_src(src2, src_phi) {
+   nir_loop_variable *src_var2 =
+  get_loop_var(src2->src.ssa, state);
+
+   if (!src_var2->in_if_branch || !is_var_alu(src_var2))
+  break;
+
+   nir_alu_instr *alu =
+  nir_instr_as_alu(src_var2->def->parent_instr);
+   if (nir_op_infos[alu->op].num_inputs != 2)
+  break;
+
+   if (alu->src[0].src.ssa == alu_srcs[0] &&
+   alu->src[1].src.ssa == alu_srcs[1] &&
+   alu->op == alu_op) {
+  /* Both branches perform the same calculation so we can use
+   * one of them to find the induction variable.
+   */
+  src_var = src_var2;
+   } else {
+  alu_srcs[0] = alu->src[0].src.ssa;
+  alu_srcs[1] = alu->src[1].src.ssa;
+  alu_op = alu->op;
+   }
+}
+ }
+
  if (!src_var->in_loop) {
 biv->def_outside_loop = src_var;
  } else if (is_var_alu(src_var)) {
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 11/20] nir: unroll some loops with a variable limit

2018-12-06 Thread Timothy Arceri

For some loops can have a single terminator but the exact trip
count is still unknown. For example:

   for (int i = 0; i < imin(x, 4); i++)
  ...

Shader-db results radeonsi (all affected are from Tropico 5):

Totals from affected shaders:
SGPRS: 200 -> 208 (4.00 %)
VGPRS: 164 -> 148 (-9.76 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 7208 -> 8672 (20.31 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 23 -> 27 (17.39 %)
Wait states: 0 -> 0 (0.00 %)

vkpipeline-db results RADV (Unrolls some Skyrim VR shaders):

Totals from affected shaders:
SGPRS: 304 -> 304 (0.00 %)
VGPRS: 300 -> 292 (-2.67 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 15516 -> 26388 (70.07 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 29 -> 29 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

v2: fix bug where last iteration would get optimised away by
mistake.
---
 src/compiler/nir/nir_opt_loop_unroll.c | 55 ++
 1 file changed, 55 insertions(+)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index 9630e0738a..70e6c67bde 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -460,6 +460,55 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
*unlimit_term,
_mesa_hash_table_destroy(remap_table, NULL);
 }
 
+/**
+ * Unroll loops where we only have a single terminator but the exact trip
+ * count is unknown. For example:
+ *
+ *for (int i = 0; i < imin(x, 4); i++)
+ *   ...
+ */
+static void
+complex_unroll_single_terminator(nir_loop *loop)
+{
+   assert(list_length(>info->loop_terminator_list) == 1);
+   assert(loop->info->limiting_terminator);
+   assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
+ 
loop->info->limiting_terminator->break_block));
+
+   nir_loop_terminator *terminator = loop->info->limiting_terminator;
+
+   loop_prepare_for_unroll(loop);
+
+   /* Pluck out the loop header */
+   nir_cf_list lp_header;
+   nir_cf_extract(_header, nir_before_block(nir_loop_first_block(loop)),
+  nir_before_cf_node(>nif->cf_node));
+
+   struct hash_table *remap_table =
+  _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+  _mesa_key_pointer_equal);
+
+   /* We need to clone the loop one extra time in order to clone the lcssa
+* vars for the last iteration (they are inside the following ifs break
+* branch). We leave other passes to clean up this redundant if.
+*/
+   unsigned num_times_to_clone = loop->info->max_trip_count + 1;
+
+   nir_cf_list lp_body;
+   nir_cf_node *unroll_loc =
+  complex_unroll_loop_body(loop, terminator, _header, _body,
+   remap_table, num_times_to_clone);
+
+   /* Delete the original loop header and body */
+   nir_cf_delete(_header);
+   nir_cf_delete(_body);
+
+   /* The original loop has been replaced so remove it. */
+   nir_cf_node_remove(>cf_node);
+
+   _mesa_hash_table_destroy(remap_table, NULL);
+}
+
 /* Unrolls the classic wrapper loops e.g
  *
  *do {
@@ -856,6 +905,12 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool 
*has_nested_loop_out)
 }
 progress = true;
  }
+
+ if (num_lt == 1) {
+assert(loop->info->limiting_terminator->exact_trip_count_unknown);
+complex_unroll_single_terminator(loop);
+progress = true;
+ }
   }
}
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 19/20] nir: pass nir_op to calculate_iterations()

2018-12-06 Thread Timothy Arceri

Rather than getting this from the alu instruction this allows us
some flexibility. In the following pass we instead pass the
inverse op.
---
 src/compiler/nir/nir_loop_analyze.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index fab58144ea..ea20db9dbf 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -590,7 +590,8 @@ test_iterations(int32_t iter_int, nir_const_value *step,
 static int
 calculate_iterations(nir_const_value *initial, nir_const_value *step,
  nir_const_value *limit, nir_loop_variable *alu_def,
- nir_alu_instr *cond_alu, bool limit_rhs, bool invert_cond)
+ nir_alu_instr *cond_alu, nir_op alu_op, bool limit_rhs,
+ bool invert_cond)
 {
assert(initial != NULL && step != NULL && limit != NULL);
 
@@ -605,10 +606,10 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
nir_alu_type induction_base_type =
   nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
if (induction_base_type == nir_type_int || induction_base_type == 
nir_type_uint) {
-  
assert(nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[1]) == 
nir_type_int ||
- 
nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[1]) == 
nir_type_uint);
+  assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) 
== nir_type_int ||
+ nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) 
== nir_type_uint);
} else {
-  
assert(nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[0]) ==
+  assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[0]) ==
  induction_base_type);
}
 
@@ -632,7 +633,7 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
   trip_offset = 1;
}
 
-   int iter_int = get_iteration(cond_alu->op, initial, step, limit);
+   int iter_int = get_iteration(alu_op, initial, step, limit);
 
/* If iter_int is negative the loop is ill-formed or is the conditional is
 * unsigned with a huge iteration count so don't bother going any further.
@@ -655,7 +656,7 @@ calculate_iterations(nir_const_value *initial, 
nir_const_value *step,
for (int bias = -1; bias <= 1; bias++) {
   const int iter_bias = iter_int + bias;
 
-  if (test_iterations(iter_bias, step, limit, cond_alu->op, bit_size,
+  if (test_iterations(iter_bias, step, limit, alu_op, bit_size,
   induction_base_type, initial,
   limit_rhs, invert_cond)) {
  return iter_bias > 0 ? iter_bias - trip_offset : iter_bias;
@@ -762,6 +763,8 @@ find_trip_count(loop_info_state *state)
   }
 
   nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
+  nir_op alu_op = alu->op;
+
   if (!is_supported_terminator_condition(alu)) {
  trip_count_known = false;
  continue;
@@ -818,7 +821,7 @@ find_trip_count(loop_info_state *state)
   int iterations = calculate_iterations(_val, _val,
 _val,
 basic_ind->ind->alu_def, alu,
-limit_rhs,
+alu_op, limit_rhs,
 terminator->continue_from_then);
 
   /* Where we not able to calculate the iteration count */
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/20] nir: rework force_unroll_array_access()

2018-12-06 Thread Timothy Arceri

Here we rework force_unroll_array_access() so that we can reused
the induction variable detection in a following patch.

Reviewed-by: Thomas Helland 
---
 src/compiler/nir/nir_loop_analyze.c | 49 -
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 700d1fe552..eef224e4d5 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -350,6 +350,38 @@ find_loop_terminators(loop_info_state *state)
return success;
 }
 
+/* This function looks for an array access within a loop that uses an
+ * induction variable for the array index. If found it returns the size of the
+ * array, otherwise 0 is returned. If we find an induction var we pass it back
+ * to the caller via array_index_out.
+ */
+static unsigned
+find_array_access_via_induction(loop_info_state *state,
+nir_deref_instr *deref,
+nir_loop_variable **array_index_out)
+{
+   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+  if (d->deref_type != nir_deref_type_array)
+ continue;
+
+  assert(d->arr.index.is_ssa);
+  nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
+
+  if (array_index->type != basic_induction)
+ continue;
+
+  if (array_index_out)
+ *array_index_out = array_index;
+
+  nir_deref_instr *parent = nir_deref_instr_parent(d);
+  assert(glsl_type_is_array_or_matrix(parent->type));
+
+  return glsl_get_length(parent->type);
+   }
+
+   return 0;
+}
+
 static int32_t
 get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
   nir_const_value *limit)
@@ -626,20 +658,9 @@ find_trip_count(loop_info_state *state)
 static bool
 force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
 {
-   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
-  if (d->deref_type != nir_deref_type_array)
- continue;
-
-  assert(d->arr.index.is_ssa);
-  nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
-
-  if (array_index->type != basic_induction)
- continue;
-
-  nir_deref_instr *parent = nir_deref_instr_parent(d);
-  assert(glsl_type_is_array(parent->type) ||
- glsl_type_is_matrix(parent->type));
-  if (glsl_get_length(parent->type) == state->loop->info->max_trip_count)
+   unsigned array_size = find_array_access_via_induction(state, deref, NULL);
+   if (array_size) {
+  if (array_size == state->loop->info->max_trip_count)
  return true;
 
   if (deref->mode & state->indirect_mask)
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/20] nir: add partial loop unrolling support

2018-12-06 Thread Timothy Arceri

This adds partial loop unrolling support and makes use of a
guessed trip count based on array access.

The code is written so that we could use partial unrolling
more generally, but for now it's only use when we have guessed
the trip count.

We use partial unrolling for this guessed trip count because its
possible any out of bounds array access doesn't otherwise affect
the shader e.g the stores/loads to/from the array are unused. So
we insert a copy of the loop in the innermost continue branch of
the unrolled loop. Later on its possible for nir_opt_dead_cf()
to then remove the loop in some cases.

A Renderdoc capture from the Rise of the Tomb Raider benchmark,
reports the following change in an affected compute shader:

GPU duration: 350 -> 325 microseconds

shader-db results radeonsi VEGA (NIR backend):

Totals from affected shaders:
SGPRS: 1120 -> 928 (-17.14 %)
VGPRS: 768 -> 516 (-32.81 %)
Spilled SGPRs: 666 -> 157 (-76.43 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 44072 -> 51880 (17.72 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 108 -> 147 (36.11 %)
Wait states: 0 -> 0 (0.00 %

shader-db results i965 SKL:

total instructions in shared programs: 13098265 -> 13103359 (0.04%)
instructions in affected programs: 5126 -> 10220 (99.38%)
helped: 0
HURT: 21

total cycles in shared programs: 332039949 -> 331985622 (-0.02%)
cycles in affected programs: 289252 -> 234925 (-18.78%)
helped: 12
HURT: 9

vkpipeline-db results VEGA:

Totals from affected shaders:
SGPRS: 184 -> 184 (0.00 %)
VGPRS: 448 -> 448 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 26460 -> 25092 (-5.17 %) bytes
LDS: 6 -> 6 (0.00 %) blocks
Max Waves: 5 -> 5 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/compiler/nir/nir_opt_loop_unroll.c | 206 -
 1 file changed, 198 insertions(+), 8 deletions(-)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index 8406880204..d8df619b32 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -556,19 +556,200 @@ wrapper_unroll(nir_loop *loop)
return true;
 }
 
+static bool
+is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
+unsigned trip_count)
+{
+   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+  if (d->deref_type != nir_deref_type_array)
+ continue;
+
+  nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
+  nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
+  if (!nir_srcs_equal(d->arr.index, src))
+ continue;
+
+  nir_deref_instr *parent = nir_deref_instr_parent(d);
+  assert(glsl_type_is_array(parent->type) ||
+ glsl_type_is_matrix(parent->type));
+
+  /* We have already unrolled the loop and the new one will be imbedded in
+   * the innermost continue branch. So unless the array is greater than
+   * the trip count any iteration over the loop will be an out of bounds
+   * access of the array.
+   */
+  return glsl_get_length(parent->type) <= trip_count;
+   }
+
+   return false;
+}
+
+/* If we know an array access is going to be out of bounds remove or replace
+ * the access with an undef. This can later result in the entire loop being
+ * removed by nir_opt_dead_cf().
+ */
+static void
+remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
+   nir_loop_terminator *term,
+   nir_cf_list *lp_header,
+   nir_cf_list *lp_body,
+   unsigned trip_count)
+{
+   if (!loop->info->guessed_trip_count)
+  return;
+
+   /* Temporarily recreate the original loop so we can alter it */
+   nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
+   nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
+
+   nir_builder b;
+   nir_builder_init(, nir_cf_node_get_function(>cf_node));
+
+   nir_foreach_block_in_cf_node(block, >cf_node) {
+  nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* Check for arrays variably-indexed by a loop induction variable.
+  * If this access is out of bounds remove the instruction or replace
+  * its use with an undefined instruction.
+  * If the loop is no longer useful we leave if for the appropriate
+  * pass to clean it up for us.
+  */
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic ==

[Mesa-dev] [PATCH 20/20] nir: find induction/limit vars in iand instructions

2018-12-06 Thread Timothy Arceri

This will be used to help find the trip count of loops that look
like the following:

   while (a < x && i < 8) {
  ...
  i++;
   }

Where the NIR will end up looking something like this:

   vec1 32 ssa_0 = load_const (0x /* 0.00 */)
   vec1 32 ssa_1 = load_const (0x0008 /* 0.00 */)
   loop {
  ...
  vec1 32 ssa_28 = ige ssa_26, ssa_3
  vec1 32 ssa_29 = ige ssa_27, ssa_1
  vec1 32 ssa_30 = iadd ssa_29, ssa_28
  vec1 ssa_31 = ieq ssa_30, ssa_0
  if ssa_31 {
 ...
 break
  } else {
 ...
  }
  ...
   }

On RADV this unrolls a bunch of loops in F1-2017 shaders.

Totals from affected shaders:
SGPRS: 4112 -> 4032 (-1.95 %)
VGPRS: 4076 -> 3996 (-1.96 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 510184 -> 589868 (15.62 %) bytes
LDS: 2 -> 2 (0.00 %) blocks
Max Waves: 200 -> 202 (1.00 %)
Wait states: 0 -> 0 (0.00 %)

It also unrolls a couple of loops in shader-db on radeonsi.

Totals from affected shaders:
SGPRS: 128 -> 128 (0.00 %)
VGPRS: 64 -> 64 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 6880 -> 9504 (38.14 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 16 -> 16 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/compiler/nir/nir_loop_analyze.c | 71 -
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index ea20db9dbf..27f4ee427c 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -736,6 +736,59 @@ get_induction_and_limit_vars(nir_alu_instr *alu, 
nir_loop_variable **ind,
return limit_rhs;
 }
 
+static void
+try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
+ nir_loop_variable **ind,
+ nir_loop_variable **limit,
+ bool *limit_rhs,
+ loop_info_state *state)
+{
+   assert((*alu)->op == nir_op_ieq);
+
+   nir_ssa_def *iand_def = (*alu)->src[0].src.ssa;
+   nir_ssa_def *zero_def = (*alu)->src[1].src.ssa;
+
+   if (iand_def->parent_instr->type != nir_instr_type_alu ||
+   zero_def->parent_instr->type != nir_instr_type_load_const) {
+
+  /* Maybe we had it the wrong way, flip things around */
+  iand_def = (*alu)->src[1].src.ssa;
+  zero_def = (*alu)->src[0].src.ssa;
+
+  /* If we still didn't find what we need then return */
+  if (iand_def->parent_instr->type != nir_instr_type_alu ||
+  zero_def->parent_instr->type != nir_instr_type_load_const)
+ return;
+   }
+
+   /* If the loop is not breaking on (x && y) == 0 then return */
+   nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr);
+   nir_const_value zero =
+  nir_instr_as_load_const(zero_def->parent_instr)->value;
+   if (iand->op != nir_op_iand || zero.i32[0] != 0)
+  return;
+
+   /* Check if iand src is a terminator condition and try get induction var
+* and trip limit var.
+*/
+   nir_ssa_def *src = iand->src[0].src.ssa;
+   if (src->parent_instr->type == nir_instr_type_alu) {
+  *alu = nir_instr_as_alu(src->parent_instr);
+  if (is_supported_terminator_condition(*alu))
+ *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
+   }
+
+   /* Try the other iand src if needed */
+   if ((*ind)->type != basic_induction) {
+  src = iand->src[1].src.ssa;
+  if (src->parent_instr->type == nir_instr_type_alu) {
+ *alu = nir_instr_as_alu(src->parent_instr);
+ if (is_supported_terminator_condition(*alu))
+*limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
+  }
+   }
+}
+
 /* Run through each of the terminators of the loop and try to infer a possible
  * trip-count. We need to check them all, and set the lowest trip-count as the
  * trip-count of our loop. If one of the terminators has an undecidable
@@ -774,7 +827,21 @@ find_trip_count(loop_info_state *state)
   nir_loop_variable *limit;
   bool limit_rhs = get_induction_and_limit_vars(alu, _ind, ,
 state);
-  terminator->induction_rhs = !limit_rhs;
+
+  if (basic_ind->type != basic_induction && alu->op == nir_op_ieq) {
+ trip_count_known = false;
+ terminator->exact_trip_count_unknown = true;
+
+ try_find_trip_count_vars_in_iand(, _ind, ,
+  _rhs, state);
+
+ /* The loop is exiting on (x && y) == 0 so we need to get the
+  * inverse of x or y (i.e. which ever contained the induction var) in
+  * order to compute the trip count.
+  */
+ if (basic_ind->type == basic_induction)
+

[Mesa-dev] [PATCH 18/20] nir: add get_induction_and_limit_vars() helper to loop analysis

2018-12-06 Thread Timothy Arceri

This helps make find_trip_count() a little easier to follow but
will also be used by a following patch.
---
 src/compiler/nir/nir_loop_analyze.c | 41 ++---
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 2dd7dd7b20..fab58144ea 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -714,6 +714,27 @@ is_supported_terminator_condition(nir_alu_instr *alu)
}
 }
 
+static bool
+get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
+ nir_loop_variable **limit,
+ loop_info_state *state)
+{
+   bool limit_rhs = true;
+
+   /* We assume that the limit is the "right" operand */
+   *ind = get_loop_var(alu->src[0].src.ssa, state);
+   *limit = get_loop_var(alu->src[1].src.ssa, state);
+
+   if ((*ind)->type != basic_induction) {
+  /* We had it the wrong way, flip things around */
+  *ind = get_loop_var(alu->src[1].src.ssa, state);
+  *limit = get_loop_var(alu->src[0].src.ssa, state);
+  limit_rhs = false;
+   }
+
+   return limit_rhs;
+}
+
 /* Run through each of the terminators of the loop and try to infer a possible
  * trip-count. We need to check them all, and set the lowest trip-count as the
  * trip-count of our loop. If one of the terminators has an undecidable
@@ -741,26 +762,16 @@ find_trip_count(loop_info_state *state)
   }
 
   nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
-  nir_loop_variable *basic_ind = NULL;
-  nir_loop_variable *limit = NULL;
-  bool limit_rhs = true;
-
   if (!is_supported_terminator_condition(alu)) {
  trip_count_known = false;
  continue;
   }
 
-  /* We assume that the limit is the "right" operand */
-  basic_ind = get_loop_var(alu->src[0].src.ssa, state);
-  limit = get_loop_var(alu->src[1].src.ssa, state);
-
-  if (basic_ind->type != basic_induction) {
- /* We had it the wrong way, flip things around */
- basic_ind = get_loop_var(alu->src[1].src.ssa, state);
- limit = get_loop_var(alu->src[0].src.ssa, state);
- limit_rhs = false;
- terminator->induction_rhs = true;
-  }
+  nir_loop_variable *basic_ind;
+  nir_loop_variable *limit;
+  bool limit_rhs = get_induction_and_limit_vars(alu, _ind, ,
+state);
+  terminator->induction_rhs = !limit_rhs;
 
   /* The comparison has to have a basic induction variable for us to be
* able to find trip counts.
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 12/20] nir: add if opt opt_if_loop_last_continue()

2018-12-06 Thread Timothy Arceri

From: Danylo Piliaiev 

Removing the last continue can allow more loops to unroll. Also
inserting code into the if branch can allow the various if opts
to progress further.

The insertion of some loops into the if branch also reduces VGPR
use in some shaders.

vkpipeline-db results (VEGA):

Totals from affected shaders:
SGPRS: 6552 -> 6576 (0.37 %)
VGPRS: 6544 -> 6532 (-0.18 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 481952 -> 478032 (-0.81 %) bytes
LDS: 13 -> 13 (0.00 %) blocks
Max Waves: 241 -> 242 (0.41 %)
Wait states: 0 -> 0 (0.00 %)

Shader-db results radeonsi (VEGA):

Totals from affected shaders:
SGPRS: 168 -> 168 (0.00 %)
VGPRS: 144 -> 140 (-2.78 %)
Spilled SGPRs: 157 -> 157 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 8524 -> 8488 (-0.42 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 7 -> 7 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

v2: (Timothy Arceri):
- allow for continues in either branch
- move any trailing loops inside the if as well as blocks.
- leave nir_opt_trivial_continues() to actually remove the
  continue.

Reviewed-by: Thomas Helland 
Signed-off-by: Timothy Arceri 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32211
---
 src/compiler/nir/nir_opt_if.c | 95 +++
 1 file changed, 95 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index dd488b1787..4a9dffb782 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -263,6 +263,100 @@ rewrite_phi_predecessor_blocks(nir_if *nif,
}
 }
 
+static bool
+nir_block_ends_in_continue(nir_block *block)
+{
+   if (exec_list_is_empty(>instr_list))
+  return false;
+
+   nir_instr *instr = nir_block_last_instr(block);
+   return instr->type == nir_instr_type_jump &&
+  nir_instr_as_jump(instr)->type == nir_jump_continue;
+}
+
+/**
+ * This optimization turns:
+ *
+ * loop {
+ *...
+ *if (cond) {
+ *   do_work_1();
+ *   continue;
+ *} else {
+ *}
+ *do_work_2();
+ * }
+ *
+ * into:
+ *
+ * loop {
+ *...
+ *if (cond) {
+ *   do_work_1();
+ *   continue;
+ *} else {
+ *   do_work_2();
+ *}
+ * }
+ *
+ * The continue should then be removed by nir_opt_trivial_continues() and the
+ * loop can potentially be unrolled.
+ *
+ * Note: do_work_2() is only ever blocks and nested loops. We could also nest
+ * other if-statments in the branch which would allow further continues to
+ * be removed. However in practice this can result in increased register
+ * pressure.
+ */
+static bool
+opt_if_loop_last_continue(nir_loop *loop)
+{
+   /* Get the last if-stament in the loop */
+   nir_block *last_block = nir_loop_last_block(loop);
+   nir_cf_node *if_node = nir_cf_node_prev(_block->cf_node);
+   while (if_node) {
+  if (if_node->type == nir_cf_node_if)
+ break;
+
+  if_node = nir_cf_node_prev(if_node);
+   }
+
+   if (!if_node || if_node->type != nir_cf_node_if)
+  return false;
+
+   nir_if *nif = nir_cf_node_as_if(if_node);
+   nir_block *then_block = nir_if_last_then_block(nif);
+   nir_block *else_block = nir_if_last_else_block(nif);
+
+   bool then_ends_in_continue = nir_block_ends_in_continue(then_block);
+   bool else_ends_in_continue = nir_block_ends_in_continue(else_block);
+
+   /* If both branches end in a continue do nothing, this should be handled
+* by nir_opt_dead_cf().
+*/
+   if (then_ends_in_continue && else_ends_in_continue)
+  return false;
+
+   if (!then_ends_in_continue && !else_ends_in_continue)
+  return false;
+
+   /* Move the last block of the loop inside the last if-statement */
+   nir_cf_list tmp;
+   nir_cf_extract(, nir_after_cf_node(if_node),
+nir_after_block(last_block));
+   if (then_ends_in_continue) {
+  nir_cf_reinsert(, nir_after_cf_list(>else_list));
+   } else {
+  nir_cf_reinsert(, nir_after_cf_list(>then_list));
+   }
+
+   /* In order to avoid running nir_lower_regs_to_ssa_impl() every time an if
+* opt makes progress we leave nir_opt_trivial_continues() to remove the
+* continue now that the end of the loop has been simplified.
+*/
+
+   return true;
+}
+
 /**
  * This optimization turns:
  *
@@ -700,6 +794,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
  nir_loop *loop = nir_cf_node_as_loop(cf_node);
  progress |= opt_if_cf_list(b, >body);
  progress |= opt_peel_loop_initial_if(loop);
+ progress |= opt_if_loop_last_continue(loop);
  break;
   }
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 05/20] nir: factor out some of the complex loop unroll code to a helper

2018-12-06 Thread Timothy Arceri

Reviewed-by: Thomas Helland 
---
 src/compiler/nir/nir_opt_loop_unroll.c | 115 ++---
 1 file changed, 64 insertions(+), 51 deletions(-)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index c267c185b6..8406880204 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -236,6 +236,65 @@ get_complex_unroll_insert_location(nir_cf_node *node, bool 
continue_from_then)
}
 }
 
+static nir_cf_node *
+complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
+ nir_cf_list *lp_header, nir_cf_list *lp_body,
+ struct hash_table *remap_table,
+ unsigned num_times_to_clone)
+{
+   /* In the terminator that we have no trip count for move everything after
+* the terminator into the continue from branch.
+*/
+   nir_cf_list loop_end;
+   nir_cf_extract(_end, nir_after_cf_node(_term->nif->cf_node),
+  nir_after_block(nir_loop_last_block(loop)));
+   move_cf_list_into_loop_term(_end, unlimit_term);
+
+   /* Pluck out the loop body. */
+   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+  nir_after_block(nir_loop_last_block(loop)));
+
+   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
+*/
+   nir_cf_node *unroll_loc = >cf_node;
+
+   /* Temp list to store the cloned loop as we unroll */
+   nir_cf_list unrolled_lp_body;
+
+   for (unsigned i = 0; i < num_times_to_clone; i++) {
+
+  nir_cursor cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+unlimit_term->continue_from_then);
+
+  /* Clone loop header and insert in if branch */
+  nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
+ cursor, remap_table);
+
+  cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+unlimit_term->continue_from_then);
+
+  /* Clone loop body */
+  nir_cf_list_clone(_lp_body, lp_body, loop->cf_node.parent,
+remap_table);
+
+  unroll_loc = exec_node_data(nir_cf_node,
+  exec_list_get_tail(_lp_body.list),
+  node);
+  assert(unroll_loc->type == nir_cf_node_block &&
+ 
exec_list_is_empty(_cf_node_as_block(unroll_loc)->instr_list));
+
+  /* Get the unrolled if node */
+  unroll_loc = nir_cf_node_prev(unroll_loc);
+
+  /* Insert unrolled loop body */
+  nir_cf_reinsert(_lp_body, cursor);
+   }
+
+   return unroll_loc;
+}
+
 /**
  * Unroll a loop with two exists when the trip count of one of the exits is
  * unknown.  If continue_from_then is true, the loop is repeated only when the
@@ -358,60 +417,14 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
*unlimit_term,
   num_times_to_clone = loop->info->max_trip_count;
}
 
-   /* In the terminator that we have no trip count for move everything after
-* the terminator into the continue from branch.
-*/
-   nir_cf_list loop_end;
-   nir_cf_extract(_end, nir_after_cf_node(_term->nif->cf_node),
-  nir_after_block(nir_loop_last_block(loop)));
-   move_cf_list_into_loop_term(_end, unlimit_term);
-
-   /* Pluck out the loop body. */
-   nir_cf_list loop_body;
-   nir_cf_extract(_body, nir_before_block(nir_loop_first_block(loop)),
-  nir_after_block(nir_loop_last_block(loop)));
-
struct hash_table *remap_table =
   _mesa_hash_table_create(NULL, _mesa_hash_pointer,
   _mesa_key_pointer_equal);
 
-   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
-*/
-   nir_cf_node *unroll_loc = >cf_node;
-
-   /* Temp lists to store the cloned loop as we unroll */
-   nir_cf_list unrolled_lp_body;
-
-   for (unsigned i = 0; i < num_times_to_clone; i++) {
-
-  nir_cursor cursor =
- get_complex_unroll_insert_location(unroll_loc,
-unlimit_term->continue_from_then);
-
-  /* Clone loop header and insert in if branch */
-  nir_cf_list_clone_and_reinsert(_header, loop->cf_node.parent,
- cursor, remap_table);
-
-  cursor =
- get_complex_unroll_insert_location(unroll_loc,
-unlimit_term->continue_from_then);
-
-  /* Clone loop body */
-  nir_cf_list_clone(_lp_body, _body, loop->cf_node.parent,
-remap_table);
-
-  unroll_loc = exec_node_data(nir_cf_node,
-  exec_list_get_tail(_lp_body.list),
-  node);
-  assert(unroll_loc->type == nir_cf_node_block &&
- 
exec_list_is_empty(_cf_node_as_block(unroll_loc)->instr_list));
-
-  /* Get the unrolled if

[Mesa-dev] [PATCH 07/20] nir: add guess trip count support to loop analysis

2018-12-06 Thread Timothy Arceri

This detects an induction variable used as an array index to guess
the trip count of the loop. This enables us to do a partial
unroll of the loop, with can eventually result in the loop being
eliminated.
---
 src/compiler/nir/nir.h  |  4 ++
 src/compiler/nir/nir_loop_analyze.c | 78 ++---
 2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ce4a81fbe1..a40e5a1418 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1878,6 +1878,7 @@ typedef struct {
nir_block *continue_from_block;
 
bool continue_from_then;
+   bool induction_rhs;
 
struct list_head loop_terminator_link;
 } nir_loop_terminator;
@@ -1886,6 +1887,9 @@ typedef struct {
/* Number of instructions in the loop */
unsigned num_instructions;
 
+   /* Guessed trip count based on array indexing */
+   unsigned guessed_trip_count;
+
/* Maximum number of times the loop is run (if known) */
unsigned max_trip_count;
 
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index eef224e4d5..ffcf2a3c27 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -382,6 +382,50 @@ find_array_access_via_induction(loop_info_state *state,
return 0;
 }
 
+static bool
+guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
+ nir_loop_variable *basic_ind)
+{
+   nir_foreach_block_in_cf_node(block, >loop->cf_node) {
+  nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* Check for arrays variably-indexed by a loop induction variable. */
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+nir_loop_variable *array_idx = NULL;
+unsigned array_size =
+   find_array_access_via_induction(state,
+   
nir_src_as_deref(intrin->src[0]),
+   _idx);
+if (basic_ind == array_idx) {
+   limit_val->i32[0] = array_size;
+   return true;
+}
+
+if (intrin->intrinsic != nir_intrinsic_copy_deref)
+   continue;
+
+array_size =
+   find_array_access_via_induction(state,
+   
nir_src_as_deref(intrin->src[1]),
+   _idx);
+if (basic_ind == array_idx) {
+   limit_val->i32[0] = array_size;
+   return true;
+}
+ }
+  }
+   }
+
+   return false;
+}
+
 static int32_t
 get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
   nir_const_value *limit)
@@ -558,6 +602,7 @@ static void
 find_trip_count(loop_info_state *state)
 {
bool trip_count_known = true;
+   bool guessed_trip_count = false;
nir_loop_terminator *limiting_terminator = NULL;
int max_trip_count = -1;
 
@@ -593,16 +638,33 @@ find_trip_count(loop_info_state *state)
 basic_ind = get_loop_var(alu->src[1].src.ssa, state);
 limit = get_loop_var(alu->src[0].src.ssa, state);
 limit_rhs = false;
+terminator->induction_rhs = true;
  }
 
- /* The comparison has to have a basic induction variable
-  * and a constant for us to be able to find trip counts
+ /* The comparison has to have a basic induction variable for us to be
+  * able to find trip counts.
   */
- if (basic_ind->type != basic_induction || !is_var_constant(limit)) {
+ if (basic_ind->type != basic_induction) {
 trip_count_known = false;
 continue;
  }
 
+ /* Attempt to find a constant limit for the loop */
+ nir_const_value limit_val;
+ if (is_var_constant(limit)) {
+limit_val =
+   nir_instr_as_load_const(limit->def->parent_instr)->value;
+ } else {
+trip_count_known = false;
+
+/* Guess loop limit based on array access */
+if (!guess_loop_limit(state, _val, basic_ind)) {
+   continue;
+}
+
+guessed_trip_count = true;
+ }
+
  /* We have determined that we have the following constants:
   * (With the typical int i = 0; i < x; i++; as an example)
   *- Upper limit.
@@ -619,9 +681,6 @@ find_trip_count(loop_info_state *state)
 nir_instr_as_load_const(basic_ind->ind->invariant->def->
parent_instr)->value;
 
- nir_const_value limit_val =
-

[Mesa-dev] [PATCH 03/20] nir: add a new nir_cf_list_clone_and_reinsert() helper

2018-12-06 Thread Timothy Arceri

Reviewed-by: Thomas Helland 
---
 src/compiler/nir/nir_control_flow.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/compiler/nir/nir_control_flow.h 
b/src/compiler/nir/nir_control_flow.h
index 2ea460e5df..9111b30a29 100644
--- a/src/compiler/nir/nir_control_flow.h
+++ b/src/compiler/nir/nir_control_flow.h
@@ -145,6 +145,16 @@ void nir_cf_delete(nir_cf_list *cf_list);
 void nir_cf_list_clone(nir_cf_list *dst, nir_cf_list *src, nir_cf_node *parent,
struct hash_table *remap_table);
 
+static inline void
+nir_cf_list_clone_and_reinsert(nir_cf_list *src_list, nir_cf_node *parent,
+   nir_cursor cursor,
+   struct hash_table *remap_table)
+{
+   nir_cf_list list;
+   nir_cf_list_clone(, src_list, parent, remap_table);
+   nir_cf_reinsert(, cursor);
+}
+
 static inline void
 nir_cf_list_extract(nir_cf_list *extracted, struct exec_list *cf_list)
 {
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/20] nir: make use of new nir_cf_list_clone_and_reinsert() helper

2018-12-06 Thread Timothy Arceri

Reviewed-by: Thomas Helland 
---
 src/compiler/nir/nir_opt_loop_unroll.c | 76 ++
 1 file changed, 28 insertions(+), 48 deletions(-)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index 0e9966320b..c267c185b6 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -169,32 +169,21 @@ simple_unroll(nir_loop *loop)
   _mesa_hash_table_create(NULL, _mesa_hash_pointer,
   _mesa_key_pointer_equal);
 
-   /* Clone the loop header */
-   nir_cf_list cloned_header;
-   nir_cf_list_clone(_header, _header, loop->cf_node.parent,
- remap_table);
+   /* Clone the loop header and insert before the loop */
+   nir_cf_list_clone_and_reinsert(_header, loop->cf_node.parent,
+  nir_before_cf_node(>cf_node),
+  remap_table);
 
-   /* Insert cloned loop header before the loop */
-   nir_cf_reinsert(_header, nir_before_cf_node(>cf_node));
-
-   /* Temp list to store the cloned loop body as we unroll */
-   nir_cf_list unrolled_lp_body;
-
-   /* Clone loop header and append to the loop body */
for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
-  /* Clone loop body */
-  nir_cf_list_clone(_lp_body, _body, loop->cf_node.parent,
-remap_table);
-
-  /* Insert unrolled loop body before the loop */
-  nir_cf_reinsert(_lp_body, nir_before_cf_node(>cf_node));
-
-  /* Clone loop header */
-  nir_cf_list_clone(_header, _header, loop->cf_node.parent,
-remap_table);
-
-  /* Insert loop header after loop body */
-  nir_cf_reinsert(_header, nir_before_cf_node(>cf_node));
+  /* Clone loop body and insert before the loop */
+  nir_cf_list_clone_and_reinsert(_body, loop->cf_node.parent,
+ nir_before_cf_node(>cf_node),
+ remap_table);
+
+  /* Clone loop header and insert after loop body */
+  nir_cf_list_clone_and_reinsert(_header, loop->cf_node.parent,
+ nir_before_cf_node(>cf_node),
+ remap_table);
}
 
/* Remove the break from the loop terminator and add instructions from
@@ -207,11 +196,9 @@ simple_unroll(nir_loop *loop)
   nir_after_block(limiting_term->break_block));
 
/* Clone so things get properly remapped */
-   nir_cf_list cloned_break_list;
-   nir_cf_list_clone(_break_list, _list, loop->cf_node.parent,
- remap_table);
-
-   nir_cf_reinsert(_break_list, nir_before_cf_node(>cf_node));
+   nir_cf_list_clone_and_reinsert(_list, loop->cf_node.parent,
+  nir_before_cf_node(>cf_node),
+  remap_table);
 
/* Remove the loop */
nir_cf_node_remove(>cf_node);
@@ -394,19 +381,16 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
*unlimit_term,
 
/* Temp lists to store the cloned loop as we unroll */
nir_cf_list unrolled_lp_body;
-   nir_cf_list cloned_header;
 
for (unsigned i = 0; i < num_times_to_clone; i++) {
-  /* Clone loop header */
-  nir_cf_list_clone(_header, _header, loop->cf_node.parent,
-remap_table);
 
   nir_cursor cursor =
  get_complex_unroll_insert_location(unroll_loc,
 unlimit_term->continue_from_then);
 
-  /* Insert cloned loop header */
-  nir_cf_reinsert(_header, cursor);
+  /* Clone loop header and insert in if branch */
+  nir_cf_list_clone_and_reinsert(_header, loop->cf_node.parent,
+ cursor, remap_table);
 
   cursor =
  get_complex_unroll_insert_location(unroll_loc,
@@ -432,28 +416,24 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
*unlimit_term,
if (!limiting_term_second) {
   assert(unroll_loc->type == nir_cf_node_if);
 
-  nir_cf_list_clone(_header, _header, loop->cf_node.parent,
-remap_table);
-
   nir_cursor cursor =
  get_complex_unroll_insert_location(unroll_loc,
 unlimit_term->continue_from_then);
 
-  /* Insert cloned loop header */
-  nir_cf_reinsert(_header, cursor);
-
-  /* Clone so things get properly remapped, and insert break block from
-   * the limiting terminator.
-   */
-  nir_cf_list cloned_break_blk;
-  nir_cf_list_clone(_break_blk, _break_list,
-loop->cf_node.parent, remap_table);
+  /* Clone loop header and insert in if branch */
+  nir_cf_list_clone_and_reinsert(_header, loop->cf_node.parent,
+ cursor, remap_table);
 
   cursor =
  get_complex_unroll_insert_location(unroll_loc,

[Mesa-dev] [PATCH 01/20] nir: small tidy ups for nir_loop_analyze()

2018-12-06 Thread Timothy Arceri

Reviewed-by: Thomas Helland 
---
 src/compiler/nir/nir_loop_analyze.c | 31 ++---
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 9c3fd2f286..c779383b36 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -624,8 +624,7 @@ find_trip_count(loop_info_state *state)
 }
 
 static bool
-force_unroll_array_access(loop_info_state *state, nir_shader *ns,
-  nir_deref_instr *deref)
+force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
 {
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
   if (d->deref_type != nir_deref_type_array)
@@ -640,23 +639,18 @@ force_unroll_array_access(loop_info_state *state, 
nir_shader *ns,
   nir_deref_instr *parent = nir_deref_instr_parent(d);
   assert(glsl_type_is_array(parent->type) ||
  glsl_type_is_matrix(parent->type));
-  if (glsl_get_length(parent->type) == state->loop->info->trip_count) {
- state->loop->info->force_unroll = true;
+  if (glsl_get_length(parent->type) == state->loop->info->trip_count)
  return true;
-  }
 
-  if (deref->mode & state->indirect_mask) {
- state->loop->info->force_unroll = true;
+  if (deref->mode & state->indirect_mask)
  return true;
-  }
}
 
return false;
 }
 
 static bool
-force_unroll_heuristics(loop_info_state *state, nir_shader *ns,
-nir_block *block)
+force_unroll_heuristics(loop_info_state *state, nir_block *block)
 {
nir_foreach_instr(instr, block) {
   if (instr->type != nir_instr_type_intrinsic)
@@ -670,12 +664,12 @@ force_unroll_heuristics(loop_info_state *state, 
nir_shader *ns,
   if (intrin->intrinsic == nir_intrinsic_load_deref ||
   intrin->intrinsic == nir_intrinsic_store_deref ||
   intrin->intrinsic == nir_intrinsic_copy_deref) {
- if (force_unroll_array_access(state, ns,
+ if (force_unroll_array_access(state,
nir_src_as_deref(intrin->src[0])))
 return true;
 
  if (intrin->intrinsic == nir_intrinsic_copy_deref &&
- force_unroll_array_access(state, ns,
+ force_unroll_array_access(state,
nir_src_as_deref(intrin->src[1])))
 return true;
   }
@@ -745,15 +739,10 @@ get_loop_info(loop_info_state *state, nir_function_impl 
*impl)
find_trip_count(state);
 
nir_shader *ns = impl->function->shader;
-   foreach_list_typed_safe(nir_cf_node, node, node, >loop->body) {
-  if (node->type == nir_cf_node_block) {
- if (force_unroll_heuristics(state, ns, nir_cf_node_as_block(node)))
-break;
-  } else {
- nir_foreach_block_in_cf_node(block, node) {
-if (force_unroll_heuristics(state, ns, block))
-   break;
- }
+   nir_foreach_block_in_cf_node(block, >loop->cf_node) {
+  if (force_unroll_heuristics(state, block)) {
+ state->loop->info->force_unroll = true;
+ break;
   }
}
 }
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/20] nir: clarify some nit_loop_info member names

2018-12-06 Thread Timothy Arceri

Following commits will introduce additional fields such as
guessed_trip_count. Renaming these will help avoid confusion
as our unrolling feature set grows.

Reviewed-by: Thomas Helland 
---
 src/compiler/nir/nir.h |  8 +---
 src/compiler/nir/nir_loop_analyze.c| 14 +++---
 src/compiler/nir/nir_opt_loop_unroll.c | 14 +++---
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index db935c8496..ce4a81fbe1 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1886,9 +1886,11 @@ typedef struct {
/* Number of instructions in the loop */
unsigned num_instructions;
 
-   /* How many times the loop is run (if known) */
-   unsigned trip_count;
-   bool is_trip_count_known;
+   /* Maximum number of times the loop is run (if known) */
+   unsigned max_trip_count;
+
+   /* Do we know the exact number of times the loop will be run */
+   bool exact_trip_count_known;
 
/* Unroll the loop regardless of its size */
bool force_unroll;
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index c779383b36..700d1fe552 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -527,7 +527,7 @@ find_trip_count(loop_info_state *state)
 {
bool trip_count_known = true;
nir_loop_terminator *limiting_terminator = NULL;
-   int min_trip_count = -1;
+   int max_trip_count = -1;
 
list_for_each_entry(nir_loop_terminator, terminator,
>loop->info->loop_terminator_list,
@@ -606,8 +606,8 @@ find_trip_count(loop_info_state *state)
   * iterations than previously (we have identified a more limiting
   * terminator) set the trip count and limiting terminator.
   */
- if (min_trip_count == -1 || iterations < min_trip_count) {
-min_trip_count = iterations;
+ if (max_trip_count == -1 || iterations < max_trip_count) {
+max_trip_count = iterations;
 limiting_terminator = terminator;
  }
  break;
@@ -617,9 +617,9 @@ find_trip_count(loop_info_state *state)
   }
}
 
-   state->loop->info->is_trip_count_known = trip_count_known;
-   if (min_trip_count > -1)
-  state->loop->info->trip_count = min_trip_count;
+   state->loop->info->exact_trip_count_known = trip_count_known;
+   if (max_trip_count > -1)
+  state->loop->info->max_trip_count = max_trip_count;
state->loop->info->limiting_terminator = limiting_terminator;
 }
 
@@ -639,7 +639,7 @@ force_unroll_array_access(loop_info_state *state, 
nir_deref_instr *deref)
   nir_deref_instr *parent = nir_deref_instr_parent(d);
   assert(glsl_type_is_array(parent->type) ||
  glsl_type_is_matrix(parent->type));
-  if (glsl_get_length(parent->type) == state->loop->info->trip_count)
+  if (glsl_get_length(parent->type) == state->loop->info->max_trip_count)
  return true;
 
   if (deref->mode & state->indirect_mask)
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
index ea2012e292..0e9966320b 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -181,7 +181,7 @@ simple_unroll(nir_loop *loop)
nir_cf_list unrolled_lp_body;
 
/* Clone loop header and append to the loop body */
-   for (unsigned i = 0; i < loop->info->trip_count; i++) {
+   for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
   /* Clone loop body */
   nir_cf_list_clone(_lp_body, _body, loop->cf_node.parent,
 remap_table);
@@ -340,7 +340,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
*unlimit_term,
* trip count == 1 we execute the code above the break twice and the
* code below it once so we need clone things twice and so on.
*/
-  num_times_to_clone = loop->info->trip_count + 1;
+  num_times_to_clone = loop->info->max_trip_count + 1;
} else {
   /* Pluck out the loop header */
   nir_cf_extract(_header, nir_before_block(header_blk),
@@ -368,7 +368,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
*unlimit_term,
 
   nir_cf_node_remove(_term->nif->cf_node);
 
-  num_times_to_clone = loop->info->trip_count;
+  num_times_to_clone = loop->info->max_trip_count;
}
 
/* In the terminator that we have no trip count for move everything after
@@ -568,14 +568,14 @@ is_loop_small_enough_to_unroll(nir_shader *shader, 
nir_loop_info *li)
 {
unsigned max_iter = shader->options->max_unroll_iterations;
 
-   if (li->trip_count > max_iter)
+   if (li->max_trip_count > max_iter)
   return false;
 
if (li->force_unroll)
   return true;
 
bool loop_not_too_large =
-  li->num_instructions * li->trip_count <= max_iter * LOOP_UNROLL_LIMIT;
+  li->num_instructions * li->max_trip_count <= max_iter * 
LOOP_UNROLL_LIMIT;
 
return

[Mesa-dev] More loop unrolling

2018-12-06 Thread Timothy Arceri

This is three series combined. I've sent the first two previously
(patch 1-11 & patch 12-15) and they have been partially reviewed
by Thomas. Please see the previous sends of those series for cover
letters.

There is a small bug fix in patch 11 that was discovered by some
new piglit tests [1]. Otherwise those series only contain small
changes suggested by Thomas during review.

Patches 16-20 are new and improve loop analysis so we can unroll
more loops with the unroll function introduced in patch 11.

[1] https://patchwork.freedesktop.org/series/53712/


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 1/3] nir: add glsl_replace_vector_type()

2018-12-03 Thread Timothy Arceri


Ping! on the series
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Partial loop unrolling support v2

2018-12-03 Thread Timothy Arceri


Ping!

On 20/11/18 7:14 pm, Timothy Arceri wrote:

This series add support for partial loop unrolling for loops with an
unknown trip count. The new partial_unroll function allows the caller
to specifiy how may times the loop should be unrolled and then the
loop is inserted in the innermost continue branch of the unrolled loop.

For now we only do partial unrolling for loops where we can guess the
iteration count based on array access that uses an induction variable
as its index (see patch 7).

Patches 1-6 are tidy-ups/refactors.

Patch 7 adds a method for guessing the trip count for the loop.

Patch 8-9 add partial unrolling support and also support for removing
redundant load/stores from the remaining loop when considering if
we would be accessing an array out bounds. Eliminating these out of
bounds accesses allows the loop to be completely removed in
some cases.

Patch 10-11 are support for full unrolling of some loops pointed
out to me by Jason.

v2:
- added patch 2 to clarify some loop info member names and hopefully
   make the code easier to follow now that the number of unrolling
   scenarios are growing.
- added patches 10-11 these do full unrolling rather than partial
   unrolling but make use of some of the refactors done by this series.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] nir: add support for marking used patches when packing varyings

2018-12-03 Thread Timothy Arceri

This adds support needed for marking the varyings as used but we
don't actually support packing patches in this patch.
---
 src/compiler/nir/nir_linking_helpers.c | 73 ++
 1 file changed, 51 insertions(+), 22 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index a05890ada4..845aba5c87 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -289,15 +289,35 @@ struct varying_loc
uint32_t location;
 };
 
+static void
+mark_all_slots_used(nir_variable *var, uint64_t *slots_used,
+uint64_t slots_used_mask, unsigned num_slots)
+{
+   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
+
+   slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
+  (((uint64_t)1 << num_slots) - 1) << (var->data.location - loc_offset);
+}
+
+static void
+mark_used_slots(nir_variable *var, uint64_t *slots_used, unsigned offset)
+{
+   unsigned loc_offset = offset - (var->data.patch ? VARYING_SLOT_PATCH0 : 0);
+
+   slots_used[var->data.patch ? 1 : 0] |= (uint64_t)1 << (var->data.location + 
loc_offset);
+}
+
 static void
 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
struct varying_loc (*remap)[4],
-   uint64_t *slots_used, uint64_t *out_slots_read)
+   uint64_t *slots_used, uint64_t *out_slots_read,
+   uint32_t *p_slots_used, uint32_t *p_out_slots_read)
  {
-   uint64_t out_slots_read_tmp = 0;
+   uint64_t out_slots_read_tmp[2] = {0};
+   uint64_t slots_used_tmp[2] = {0};
 
/* We don't touch builtins so just copy the bitmask */
-   uint64_t slots_used_tmp =
+   slots_used_tmp[0] =
   *slots_used & (((uint64_t)1 << (VARYING_SLOT_VAR0 - 1)) - 1);
 
nir_foreach_variable(var, var_list) {
@@ -305,8 +325,8 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
 
   /* Only remap things that aren't built-ins */
   if (var->data.location >= VARYING_SLOT_VAR0 &&
-  var->data.location - VARYING_SLOT_VAR0 < 32) {
- assert(var->data.location - VARYING_SLOT_VAR0 < 32);
+  var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
+ assert(var->data.location - VARYING_SLOT_VAR0 < 
MAX_VARYINGS_INCL_PATCH);
 
  const struct glsl_type *type = var->type;
  if (nir_is_per_vertex_io(var, stage)) {
@@ -321,11 +341,17 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
  unsigned location = var->data.location - VARYING_SLOT_VAR0;
  struct varying_loc *new_loc = 
[location][var->data.location_frac];
 
- uint64_t slots = (((uint64_t)1 << num_slots) - 1) << 
var->data.location;
- if (slots & *slots_used)
+ unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
+ uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
+ uint64_t outs_used =
+var->data.patch ? *p_out_slots_read : *out_slots_read;
+ uint64_t slots =
+(((uint64_t)1 << num_slots) - 1) << (var->data.location - 
loc_offset);
+
+ if (slots & used)
 used_across_stages = true;
 
- if (slots & *out_slots_read)
+ if (slots & outs_used)
 outputs_read = true;
 
  if (new_loc->location) {
@@ -339,30 +365,29 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
  * otherwise we will mess up the mask for things like partially
  * marked arrays.
  */
-if (used_across_stages) {
-   slots_used_tmp |=
-  *slots_used & (((uint64_t)1 << num_slots) - 1) << 
var->data.location;
-}
+if (used_across_stages)
+   mark_all_slots_used(var, slots_used_tmp, used, num_slots);
 
 if (outputs_read) {
-   out_slots_read_tmp |=
-  *out_slots_read & (((uint64_t)1 << num_slots) - 1) << 
var->data.location;
+   mark_all_slots_used(var, out_slots_read_tmp, outs_used,
+   num_slots);
 }
-
  } else {
 for (unsigned i = 0; i < num_slots; i++) {
if (used_across_stages)
-  slots_used_tmp |= (uint64_t)1 << (var->data.location + i);
+  mark_used_slots(var, slots_used_tmp, i);
 
if (outputs_read)
-  out_slots_read_tmp |= (uint64_t)1 << (var->data.location + 
i);
+  mark_used_slots(var, out_slots_read_tmp, i);
 }
  }
   }
}
 
-   *slots_used = slots_used_tmp;
-   *out_slots_read = out_slots_read_tmp;
+   *slots_used = slots_used_tmp[0];
+   *out_slots_read = out_slots_read_tmp[0];
+   *p_slots_used = slots_used_tmp[1];
+   *p_out_slots_read = out_slots_read_tmp[1];
 }
 
 /* If

[Mesa-dev] [RFC 2/2] nir: rewrite varying component packing

2018-12-03 Thread Timothy Arceri

There are three reasons for the rewrite.

1. Adding support for packing tess patch varyings in a sane way.

2. Making use of qsort allowing the code to be much easier to
   follow.

3. Adding a crude live range analysis for deciding which components
   should be packed together.

We could simplify get_unmoveable_components_masks() a bit more as
we just skip these slots if we come across them but I've left it
for now in case we want to pack components in these slots in future.

vkshader-db RADV (VEGA):

Totals from affected shaders:
SGPRS: 103384 -> 103880 (0.48 %)
VGPRS: 70384 -> 70072 (-0.44 %)
Spilled SGPRs: 116 -> 112 (-3.45 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 4875040 -> 4905848 (0.63 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 18777 -> 18861 (0.45 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/compiler/nir/nir_linking_helpers.c | 368 +
 1 file changed, 259 insertions(+), 109 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 845aba5c87..bf31509cf0 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -224,22 +224,43 @@ get_interp_loc(nir_variable *var)
   return INTERPOLATE_LOC_CENTER;
 }
 
+static bool
+is_packing_supported_for_type(const struct glsl_type *type)
+{
+   /* Skip types that require more complex packing handling.
+* TODO: add support for these types?
+*/
+   if (glsl_type_is_array(type) ||
+   glsl_type_is_dual_slot(type) ||
+   glsl_type_is_matrix(type) ||
+   glsl_type_is_struct(type) ||
+   glsl_type_is_64bit(type))
+  return false;
+
+   /* We ignore complex types above and all other vector types should
+* have been split into scalar variables by the lower_io_to_scalar
+* pass. The only exeption should by OpenGL xfb varyings.
+*/
+   if (glsl_get_vector_elements(type) != 1)
+  return false;
+
+   return true;
+}
+
+/* If we cannot pack a component this function mark the components we cannot
+ * move.
+ */
 static void
-get_slot_component_masks_and_interp_types(struct exec_list *var_list,
-  uint8_t *comps,
-  uint8_t *interp_type,
-  uint8_t *interp_loc,
-  gl_shader_stage stage,
-  bool default_to_smooth_interp)
+get_unmoveable_components_masks(struct exec_list *var_list, uint8_t *comps,
+gl_shader_stage stage,
+bool default_to_smooth_interp)
 {
nir_foreach_variable_safe(var, var_list) {
   assert(var->data.location >= 0);
 
-  /* Only remap things that aren't built-ins.
-   * TODO: add TES patch support.
-   */
+  /* Only remap things that aren't built-ins. */
   if (var->data.location >= VARYING_SLOT_VAR0 &&
-  var->data.location - VARYING_SLOT_VAR0 < 32) {
+  var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 
  const struct glsl_type *type = var->type;
  if (nir_is_per_vertex_io(var, stage)) {
@@ -247,6 +268,12 @@ get_slot_component_masks_and_interp_types(struct exec_list 
*var_list,
 type = glsl_get_array_element(type);
  }
 
+ /* If we can pack this varying then don't mark the components as
+  * used.
+  */
+ if (is_packing_supported_for_type(type))
+continue;
+
  unsigned location = var->data.location - VARYING_SLOT_VAR0;
  unsigned elements =
 glsl_get_vector_elements(glsl_without_array(type));
@@ -255,10 +282,6 @@ get_slot_component_masks_and_interp_types(struct exec_list 
*var_list,
  unsigned slots = glsl_count_attribute_slots(type, false);
  unsigned comps_slot2 = 0;
  for (unsigned i = 0; i < slots; i++) {
-interp_type[location + i] =
-   get_interp_type(var, type, default_to_smooth_interp);
-interp_loc[location + i] = get_interp_loc(var);
-
 if (dual_slot) {
if (i & 1) {
   comps[location + i] |= ((1 << comps_slot2) - 1);
@@ -390,32 +413,83 @@ remap_slots_and_components(struct exec_list *var_list, 
gl_shader_stage stage,
*p_out_slots_read = out_slots_read_tmp[1];
 }
 
-/* If there are empty components in the slot compact the remaining components
- * as close to component 0 as possible. This will make it easier to fill the
- * empty components with components from a different slot in a following pass.
- */
+struct varying_component {
+   nir_variable *var;
+   unsigned first_block_use;
+   unsigned last_block_use;
+   uint8_t interp_type;
+   uint8_t interp_loc;
+   bool is_patch;
+   bool initialised;
+};
+
+static int
+cmp_varying_component(const void *comp1_v, const

Re: [Mesa-dev] Representing explicit memory layouts in NIR

2018-11-30 Thread Timothy Arceri


On 1/12/18 9:11 am, Jason Ekstrand wrote:

All,

This week, I've been working on trying to move UBO and SSBO access in 
NIR over to deref instructions.  I'm hoping that this will allow us to 
start doing alias analysis and copy-propagation on it.  The passes we 
have in NIR *should* be able to work with SSBOs as long as 
nir_compare_derefs does the right thing.


# A story about derefs

In that effort, I've run into a bit of a snag with how to represent the 
layout information.  What we get in from SPIR-V for Vulkan is a byte 
offset for every struct member and a byte stride for every array (and 
pointer in the OpPtrAccessChain case).  For matrices, there is an 
additional RowMajor boolean we need to track somewhere.  With OpenCL 
memory access, you don't get these decorations but it's trivial to 
translate the OpenCL layout (It's the same as C) to offset/stride when 
creating the type.  I've come up with three different ways to represent 
the information and they all have their own downsides:


## 1. Put the information on the glsl_type similar to how it's done in 
SPIR-V


This has the advantage of being fairly non-invasive to glsl_type.  A lot 
of the fields we need are already there and the only real change is to 
allow array types to have strides.  The downside is that the information 
is often not where you want.  Arrays and structs are ok but, for 
matrices, you have to go fishing all the way back to the struct type to 
get the RowMajor and MatrixStride decorations.  (Thanks, SPIR-V...)  
While this seems like a local annoyance, it actually destroys basically 
all the advantages of having the information on the type and makes 
lower_io a real pain.


## 2. Put the information on the type but do it properly

In this version, we would put the matrix stride and RowMajor decoration 
directly on the matrix type.  One obvious advantage here is that it 
means no fishing for matrix type information.  Another is that, by 
having the types specialized like this, the only way to change layouts 
mid-deref-chain would be to have a cast.  Option 1 doesn't provide this 
because matrix types are the same regardless of whether or not they're 
declared RowMajor in the struct.  The downside to this option is that it 
requires glsl_type surgery to make it work.  More on that in a bit.


## 3. Put the information directly on the deref

Instead of putting the stride/offset information on the type, we just 
put it on the deref as we build the deref chain.  This is easy enough to 
do in spirv_to_nir and someone could also do it easily enough as a 
lowering pass based on a type_size function.  This has the advantage of 
simplicity because you don't have to modify glsl_type at all and 
lowering is stupid-easy because all the information you need is right 
there on the deref.  The downside, however, is that you alias analysis 
is potentially harder because you don't have the nice guarantee that you 
don't see a layout change without a type cast.  The other downside is 
that we can't ever use copy_deref with anything bigger than a vector 
because you don't know the sizes of any types and, unless spirv_to_nir 
puts the offset/stride information on the deref, there's now way to 
reconstruct it.


I've prototyped both 1 and 3 so far and I definitely like 3 better than 
1 but it's not great.  I haven't prototyped 2 yet due to the issue 
mentioned with glsl_type.


Between 2 and 3, I really don't know how much we actually loose in terms 
of our ability to do alias analysis.  I've written the alias analysis 
for 3 and it isn't too bad.  I'm also not sure how much we would 
actually loose from not being able to express whole-array or 
whole-struct copies.  However, without a good reason otherwise, option 2 
really seems like it's the best of all worlds


# glsl_type surgery

You want a good reason, eh?  You should have known this was coming...

The problem with option 2 above is that it requires significant 
glsl_type surgery to do it.  Putting decorations on matrices violates 
one of the core principals of glsl_type, namely that all fundamental 
types: scalars, vectors, matrices, images, and samplers are singletons.  
Other types such as structs and arrays we build on-the-fly and cache 
as-needed.  In order to do what we need for option 2 above, you have to 
at least drop this for matrices and possibly vectors (the columns of a 
row-major mat4 are vectors with a stride of 16).  Again, I see two options:


## A. Major rework of the guts of glsl_type

Basically, get rid of the static singletons and just use the build 
on-the-fly and cache model for everything.  This would mean that mat4 == 
mat4 is no longer guaranteed unless you know a priori that none of your 
types are decorated with layout information.  It would also be, not only 
a pile of work, but a single mega-patch.  I don't know of any way to 
make that change without just ripping it all up and putting it back 
together.


Do we really need to throw away the singleton

Re: [Mesa-dev] [PATCH 4/4] nir: detect more induction variables

2018-11-30 Thread Timothy Arceri


On 30/11/18 5:38 pm, Thomas Helland wrote:

Den ons. 28. nov. 2018 kl. 10:23 skrev Timothy Arceri :


On 28/11/18 6:52 pm, Thomas Helland wrote:

Den ons. 28. nov. 2018 kl. 04:26 skrev Timothy Arceri :


This adds allows loop analysis to detect inductions varibales that
are incremented in both branches of an if rather than in a main
loop block. For example:

 loop {
block block_1:
/* preds: block_0 block_7 */
vec1 32 ssa_8 = phi block_0: ssa_4, block_7: ssa_20
vec1 32 ssa_9 = phi block_0: ssa_0, block_7: ssa_4
vec1 32 ssa_10 = phi block_0: ssa_1, block_7: ssa_4
vec1 32 ssa_11 = phi block_0: ssa_2, block_7: ssa_21
vec1 32 ssa_12 = phi block_0: ssa_3, block_7: ssa_22
vec4 32 ssa_13 = vec4 ssa_12, ssa_11, ssa_10, ssa_9
vec1 32 ssa_14 = ige ssa_8, ssa_5
/* succs: block_2 block_3 */
if ssa_14 {
   block block_2:
   /* preds: block_1 */
   break
   /* succs: block_8 */
} else {
   block block_3:
   /* preds: block_1 */
   /* succs: block_4 */
}
block block_4:
/* preds: block_3 */
vec1 32 ssa_15 = ilt ssa_6, ssa_8
/* succs: block_5 block_6 */
if ssa_15 {
   block block_5:
   /* preds: block_4 */
   vec1 32 ssa_16 = iadd ssa_8, ssa_7
   vec1 32 ssa_17 = load_const (0x3f80 /* 1.00*/)
   /* succs: block_7 */
} else {
   block block_6:
   /* preds: block_4 */
   vec1 32 ssa_18 = iadd ssa_8, ssa_7
   vec1 32 ssa_19 = load_const (0x3f80 /* 1.00*/)
   /* succs: block_7 */
}
block block_7:
/* preds: block_5 block_6 */
vec1 32 ssa_20 = phi block_5: ssa_16, block_6: ssa_18
vec1 32 ssa_21 = phi block_5: ssa_17, block_6: ssa_4
vec1 32 ssa_22 = phi block_5: ssa_4, block_6: ssa_19
/* succs: block_1 */
 }

Unfortunatly GCM could move the addition out of the if for us
(making this patch unrequired) but we still cannot enable the GCM
pass without regressions.



Just some questions / suggestions from my side for now.
I'll try to take a closer look at the patch later today.

While GCM would be nice, to me it seems that adding an
if-opt instead, that pulls common code from both branches
of an if out of the if on a more general basis, would get us
this, plus a bunch of other benefits? As far as I can see there
should never be negative impacts from pulling common code
out like that, but I might be wrong. Did you look into that?
I bet out did, I'm just interested in how that worked out.


I didn't attempt this because pulling code out of the ifs can increase
register pressure. This is one of the problems we have with the GCM pass
currently, so for now I chose a more conservative approach.



Yeah, of course. I'm being dumb. It looks better in source code,
but as long as it does not lead to other optimizations it will only
cause the live range of the add to intersect with that of the branch
condition. The same amount of instructions will be executed
either way.



Since GCM is not yet where we want it to be, maybe we'd
want to implement LICM? That obviously does not come
into play with what this patch adresses, but it might help
get a more accurate estimate of the cost/benefit of unrolling?
(Invariant computations that will be CSE'd will not be
counted multiple times). This might already be accounted
for by counting the invariant computations only once?


No we don't do anything like this currently. The GCM pass can pull
things out of loops also, but again we hit register pressure issues with
that pass.

As far as I'm aware reducing invariants is not where we get most of our
wins from with unrolling. Removing indirect array access, improving
opportunities for constant folding (and a bunch of other passes), being
able to evaluate the unfolded loop with the surrounding code etc all
result in greater benefits.

With the limits we place on making sure we don't unroll large loops that
are going to cause register use issues, nobody has yet been able to show
that always unrolling loops is causing any harm, and it's certainly been
shown to help :)


Thanks for taking the time with my stupidity =) I'll try to take a look at
these patches later tonight =)


All valid questions :) Thanks for taking a look.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] nir: add a compiler option for disabling float comparison simplifications

2018-11-29 Thread Timothy Arceri

On Thu, Nov 29, 2018, at 7:47 AM, Connor Abbott wrote:
> On Thu, Nov 29, 2018 at 4:22 PM Jason Ekstrand  wrote:
> >
> > Can you provide some context for this?  Those rules are already flagged 
> > "inexact" (that's what the ~ means) so they won't apply to anything that's 
> > "precise" or "invariant".
> 
> I think the concern is that this isn't allowed in SPIR-V, even without
> exact or invariant. We even go out of our way to do the correct thing
> in the frontend by inserting an "&& a == a" or "|| a != a", but then
> opt_algebraic removes it with another rule and then this rule can flip
> it from ordered to unordered. The spec says that operations don't have
> to produce NaN, but it doesn't say anything on comparisons other than
> the generic "everything must follow IEEE rules" and an entry in the
> table that says "produces correct results." Then again, I can't find
> anything in GLSL allowing these transforms either, so maybe we just
> need to get rid of them.

FYI here are the shader-db results (SKL) from removing them:

total instructions in shared programs: 12858124 -> 12889104 (0.24%)
instructions in affected programs: 1687380 -> 1718360 (1.84%)
helped: 2
HURT: 7073

total cycles in shared programs: 317838109 -> 318266406 (0.13%)
cycles in affected programs: 62285268 -> 62713565 (0.69%)
helped: 1017
HURT: 6552

total loops in shared programs: 3808 -> 3808 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 6793 -> 6785 (-0.12%)
spills in affected programs: 166 -> 158 (-4.82%)
helped: 2
HURT: 0

total fills in shared programs: 9561 -> 9541 (-0.21%)
fills in affected programs: 852 -> 832 (-2.35%)
helped: 2
HURT: 3

LOST:   0
GAINED: 1 

> 
> >
> > On Thu, Nov 29, 2018 at 9:18 AM Samuel Pitoiset  
> > wrote:
> >>
> >> It's correct in GLSL because the behaviour is undefined in
> >> presence of NaNs. But this seems incorrect in Vulkan.
> >>
> >> Signed-off-by: Samuel Pitoiset 
> >> ---
> >>  src/compiler/nir/nir.h| 6 ++
> >>  src/compiler/nir/nir_opt_algebraic.py | 8 
> >>  2 files changed, 10 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> >> index db935c8496b..4107c293962 100644
> >> --- a/src/compiler/nir/nir.h
> >> +++ b/src/compiler/nir/nir.h
> >> @@ -2188,6 +2188,12 @@ typedef struct nir_shader_compiler_options {
> >> /* Set if nir_lower_wpos_ytransform() should also invert 
> >> gl_PointCoord. */
> >> bool lower_wpos_pntc;
> >>
> >> +   /* If false, lower ~inot(flt(a,b)) -> fge(a,b) and variants.
> >> +* In presence of NaNs, this is correct in GLSL because the 
> >> behaviour is
> >> +* undefined. In Vulkan, doing these transformations is incorrect.
> >> +*/
> >> +   bool exact_float_comparisons;
> >> +
> >> /**
> >>  * Should nir_lower_io() create load_interpolated_input intrinsics?
> >>  *
> >> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> >> b/src/compiler/nir/nir_opt_algebraic.py
> >> index f2a7be0c403..3750874407b 100644
> >> --- a/src/compiler/nir/nir_opt_algebraic.py
> >> +++ b/src/compiler/nir/nir_opt_algebraic.py
> >> @@ -154,10 +154,10 @@ optimizations = [
> >> (('ishl', ('imul', a, '#b'), '#c'), ('imul', a, ('ishl', b, c))),
> >>
> >> # Comparison simplifications
> >> -   (('~inot', ('flt', a, b)), ('fge', a, b)),
> >> -   (('~inot', ('fge', a, b)), ('flt', a, b)),
> >> -   (('~inot', ('feq', a, b)), ('fne', a, b)),
> >> -   (('~inot', ('fne', a, b)), ('feq', a, b)),
> >> +   (('~inot', ('flt', a, b)), ('fge', a, b), 
> >> '!options->exact_float_comparisons'),
> >> +   (('~inot', ('fge', a, b)), ('flt', a, b), 
> >> '!options->exact_float_comparisons'),
> >> +   (('~inot', ('feq', a, b)), ('fne', a, b), 
> >> '!options->exact_float_comparisons'),
> >> +   (('~inot', ('fne', a, b)), ('feq', a, b), 
> >> '!options->exact_float_comparisons'),
> >
> >
> > The feq/fne one is actually completely safe.  fne is defined to be !feq 
> > even when NaN is considered.
> >
> > --Jasoan
> >
> >>
> >> (('inot', ('ilt', a, b)), ('ige', a, b)),
> >> (('inot', ('ult', a, b)), ('uge', a, b)),
> >> (('inot', ('ige', a, b)), ('ilt', a, b)),
> >> --
> >> 2.19.2
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] nir: detect more induction variables

2018-11-28 Thread Timothy Arceri


On 28/11/18 6:52 pm, Thomas Helland wrote:

Den ons. 28. nov. 2018 kl. 04:26 skrev Timothy Arceri :


This adds allows loop analysis to detect inductions varibales that
are incremented in both branches of an if rather than in a main
loop block. For example:

loop {
   block block_1:
   /* preds: block_0 block_7 */
   vec1 32 ssa_8 = phi block_0: ssa_4, block_7: ssa_20
   vec1 32 ssa_9 = phi block_0: ssa_0, block_7: ssa_4
   vec1 32 ssa_10 = phi block_0: ssa_1, block_7: ssa_4
   vec1 32 ssa_11 = phi block_0: ssa_2, block_7: ssa_21
   vec1 32 ssa_12 = phi block_0: ssa_3, block_7: ssa_22
   vec4 32 ssa_13 = vec4 ssa_12, ssa_11, ssa_10, ssa_9
   vec1 32 ssa_14 = ige ssa_8, ssa_5
   /* succs: block_2 block_3 */
   if ssa_14 {
  block block_2:
  /* preds: block_1 */
  break
  /* succs: block_8 */
   } else {
  block block_3:
  /* preds: block_1 */
  /* succs: block_4 */
   }
   block block_4:
   /* preds: block_3 */
   vec1 32 ssa_15 = ilt ssa_6, ssa_8
   /* succs: block_5 block_6 */
   if ssa_15 {
  block block_5:
  /* preds: block_4 */
  vec1 32 ssa_16 = iadd ssa_8, ssa_7
  vec1 32 ssa_17 = load_const (0x3f80 /* 1.00*/)
  /* succs: block_7 */
   } else {
  block block_6:
  /* preds: block_4 */
  vec1 32 ssa_18 = iadd ssa_8, ssa_7
  vec1 32 ssa_19 = load_const (0x3f80 /* 1.00*/)
  /* succs: block_7 */
   }
   block block_7:
   /* preds: block_5 block_6 */
   vec1 32 ssa_20 = phi block_5: ssa_16, block_6: ssa_18
   vec1 32 ssa_21 = phi block_5: ssa_17, block_6: ssa_4
   vec1 32 ssa_22 = phi block_5: ssa_4, block_6: ssa_19
   /* succs: block_1 */
}

Unfortunatly GCM could move the addition out of the if for us
(making this patch unrequired) but we still cannot enable the GCM
pass without regressions.



Just some questions / suggestions from my side for now.
I'll try to take a closer look at the patch later today.

While GCM would be nice, to me it seems that adding an
if-opt instead, that pulls common code from both branches
of an if out of the if on a more general basis, would get us
this, plus a bunch of other benefits? As far as I can see there
should never be negative impacts from pulling common code
out like that, but I might be wrong. Did you look into that?
I bet out did, I'm just interested in how that worked out.


I didn't attempt this because pulling code out of the ifs can increase 
register pressure. This is one of the problems we have with the GCM pass 
currently, so for now I chose a more conservative approach.




Since GCM is not yet where we want it to be, maybe we'd
want to implement LICM? That obviously does not come
into play with what this patch adresses, but it might help
get a more accurate estimate of the cost/benefit of unrolling?
(Invariant computations that will be CSE'd will not be
counted multiple times). This might already be accounted
for by counting the invariant computations only once?


No we don't do anything like this currently. The GCM pass can pull 
things out of loops also, but again we hit register pressure issues with 
that pass.


As far as I'm aware reducing invariants is not where we get most of our 
wins from with unrolling. Removing indirect array access, improving 
opportunities for constant folding (and a bunch of other passes), being 
able to evaluate the unfolded loop with the surrounding code etc all 
result in greater benefits.


With the limits we place on making sure we don't unroll large loops that 
are going to cause register use issues, nobody has yet been able to show 
that always unrolling loops is causing any harm, and it's certainly been 
shown to help :)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] nir: in loop analysis track actual control flow type

2018-11-27 Thread Timothy Arceri

This will allow us to improve analysis to find more induction
variables.
---
 src/compiler/nir/nir_loop_analyze.c | 34 ++---
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 587e9d7865..c804a66ac4 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -49,8 +49,11 @@ typedef struct {
/* If this is of type basic_induction */
struct nir_basic_induction_var *ind;
 
-   /* True if variable is in an if branch or a nested loop */
-   bool in_control_flow;
+   /* True if variable is in an if branch */
+   bool in_if_branch;
+
+   /* True if variable is in a nested loop */
+   bool in_nested_loop;
 
 } nir_loop_variable;
 
@@ -83,7 +86,8 @@ get_loop_var(nir_ssa_def *value, loop_info_state *state)
 
 typedef struct {
loop_info_state *state;
-   bool in_control_flow;
+   bool in_if_branch;
+   bool in_nested_loop;
 } init_loop_state;
 
 static bool
@@ -92,8 +96,10 @@ init_loop_def(nir_ssa_def *def, void *void_init_loop_state)
init_loop_state *loop_init_state = void_init_loop_state;
nir_loop_variable *var = get_loop_var(def, loop_init_state->state);
 
-   if (loop_init_state->in_control_flow) {
-  var->in_control_flow = true;
+   if (loop_init_state->in_nested_loop) {
+  var->in_nested_loop = true;
+   } else if (loop_init_state->in_if_branch) {
+  var->in_if_branch = true;
} else {
   /* Add to the tail of the list. That way we start at the beginning of
* the defs in the loop instead of the end when walking the list. This
@@ -110,9 +116,10 @@ init_loop_def(nir_ssa_def *def, void *void_init_loop_state)
 
 static bool
 init_loop_block(nir_block *block, loop_info_state *state,
-bool in_control_flow)
+bool in_if_branch, bool in_nested_loop)
 {
-   init_loop_state init_state = {.in_control_flow = in_control_flow,
+   init_loop_state init_state = {.in_if_branch = in_if_branch,
+ .in_nested_loop = in_nested_loop,
  .state = state };
 
nir_foreach_instr(instr, block) {
@@ -198,7 +205,7 @@ compute_invariance_information(loop_info_state *state)
 */
list_for_each_entry_safe(nir_loop_variable, var, >process_list,
 process_link) {
-  assert(!var->in_control_flow);
+  assert(!var->in_if_branch && !var->in_nested_loop);
 
   if (mark_invariant(var->def, state))
  list_del(>process_link);
@@ -216,7 +223,8 @@ compute_induction_information(loop_info_state *state)
* things in nested loops or conditionals should have been removed from
* the list by compute_invariance_information().
*/
-  assert(!var->in_control_flow && var->type != invariant);
+  assert(!var->in_if_branch && !var->in_nested_loop &&
+ var->type != invariant);
 
   /* We are only interested in checking phis for the basic induction
* variable case as its simple to detect. All basic induction variables
@@ -234,7 +242,7 @@ compute_induction_information(loop_info_state *state)
  /* If one of the sources is in a conditional or nested block then
   * panic.
   */
- if (src_var->in_control_flow)
+ if (src_var->in_if_branch || src_var->in_nested_loop)
 break;
 
  if (!src_var->in_loop) {
@@ -833,17 +841,17 @@ get_loop_info(loop_info_state *state, nir_function_impl 
*impl)
   switch (node->type) {
 
   case nir_cf_node_block:
- init_loop_block(nir_cf_node_as_block(node), state, false);
+ init_loop_block(nir_cf_node_as_block(node), state, false, false);
  break;
 
   case nir_cf_node_if:
  nir_foreach_block_in_cf_node(block, node)
-init_loop_block(block, state, true);
+init_loop_block(block, state, true, false);
  break;
 
   case nir_cf_node_loop:
  nir_foreach_block_in_cf_node(block, node) {
-init_loop_block(block, state, true);
+init_loop_block(block, state, false, true);
  }
  break;
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

< 1 2 3 4 5 6 7 8 9 10 >

201 - 300 of 6672 matches

Mail list logo