Re: [Mesa-dev] [PATCH v2] radeonsi/nir: gather buffers declared more accurately and use const fast path

2018-04-01 Thread Marek Olšák
On Fri, Mar 30, 2018 at 4:18 AM, Timothy Arceri 
wrote:

> For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip
> setting the more accurate masks for some builtin uniforms for now as
> it causes some piglit regressions.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c |  7 +++
>  src/gallium/drivers/radeonsi/si_shader_nir.c | 90
> ++--
>  2 files changed, 91 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 714415edba7..13b588eb68f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2374,8 +2374,15 @@ static LLVMValueRef load_const_buffer_desc(struct
> si_shader_context *ctx, int i)
>  static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef
> index)
>  {
> struct si_shader_context *ctx = si_shader_context_from_abi(abi);
> +   struct si_shader_selector *sel = ctx->shader->selector;
> +
> LLVMValueRef ptr = LLVMGetParam(ctx->main_fn,
> ctx->param_const_and_shader_buffers);
>
> +   if (sel->info.const_buffers_declared == 1 &&
> +   sel->info.shader_buffers_declared == 0) {
> +   return load_const_buffer_desc_fast_path(ctx);
> +   }
> +
> index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
> index = LLVMBuildAdd(ctx->ac.builder, index,
>  LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS,
> 0), "");
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
> b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 437eefc54d0..e116c23815b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -600,23 +600,98 @@ void si_nir_scan_shader(const struct nir_shader *nir,
>
> info->num_outputs = num_outputs;
>
> +   struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
> +  _mesa_key_pointer_equal);
> +
> +   unsigned ubo_idx = 1;
> nir_foreach_variable(variable, >uniforms) {
> const struct glsl_type *type = variable->type;
> enum glsl_base_type base_type =
> glsl_get_base_type(glsl_without_array(type));
> unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
>
> +   /* Gather buffers declared bitmasks. Note: radeonsi doesn't
> +* really use the mask (other than ubo_idx == 1 for regular
> +* uniforms) its really only used for getting the buffer
> count
> +* so we don't need to worry about the ordering.
> +*/
> +   if (variable->interface_type != NULL) {
> +   if (variable->data.mode == nir_var_uniform) {
> +
> +   unsigned block_count;
> +   if (base_type != GLSL_TYPE_INTERFACE) {
> +   struct set_entry *entry =
> +   _mesa_set_search(ubo_set,
> variable->interface_type);
> +
> +   /* Check if we have already
> processed
> +* a member from this ubo.
> +*/
> +   if (entry)
> +   continue;
> +
> +   block_count = 1;
> +   } else {
> +   block_count = aoa_size;
> +   }
> +
> +   info->const_buffers_declared |=
> u_bit_consecutive(ubo_idx, block_count);
> +   ubo_idx += block_count;
> +
> +   _mesa_set_add(ubo_set,
> variable->interface_type);
> +   }
> +
> +   if (variable->data.mode == nir_var_shader_storage)
> {
> +   /* TODO: make this more accurate */
> +   info->shader_buffers_declared =
> +   u_bit_consecutive(0,
> SI_NUM_SHADER_BUFFERS);
> +   }
> +
> +   continue;
> +   }
> +
> /* We rely on the fact that nir_lower_samplers_as_deref has
>  * eliminated struct dereferences.
>  */
> -   if (base_type == GLSL_TYPE_SAMPLER)
> +   if (base_type == GLSL_TYPE_SAMPLER) {
> info->samplers_declared |=
> u_bit_consecutive(variable->data.binding,
> aoa_size);
> -   else if (base_type == GLSL_TYPE_IMAGE)
> +
> +   if (variable->data.bindless) {
> +   info->const_buffers_declared |= 1;

[Mesa-dev] [PATCH v2] radeonsi/nir: gather buffers declared more accurately and use const fast path

2018-03-30 Thread Timothy Arceri
For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip
setting the more accurate masks for some builtin uniforms for now as
it causes some piglit regressions.
---
 src/gallium/drivers/radeonsi/si_shader.c |  7 +++
 src/gallium/drivers/radeonsi/si_shader_nir.c | 90 ++--
 2 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 714415edba7..13b588eb68f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2374,8 +2374,15 @@ static LLVMValueRef load_const_buffer_desc(struct 
si_shader_context *ctx, int i)
 static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
 {
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+   struct si_shader_selector *sel = ctx->shader->selector;
+
LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, 
ctx->param_const_and_shader_buffers);
 
+   if (sel->info.const_buffers_declared == 1 &&
+   sel->info.shader_buffers_declared == 0) {
+   return load_const_buffer_desc_fast_path(ctx);
+   }
+
index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
index = LLVMBuildAdd(ctx->ac.builder, index,
 LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), 
"");
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 437eefc54d0..e116c23815b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -600,23 +600,98 @@ void si_nir_scan_shader(const struct nir_shader *nir,
 
info->num_outputs = num_outputs;
 
+   struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
+  _mesa_key_pointer_equal);
+
+   unsigned ubo_idx = 1;
nir_foreach_variable(variable, >uniforms) {
const struct glsl_type *type = variable->type;
enum glsl_base_type base_type =
glsl_get_base_type(glsl_without_array(type));
unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
 
+   /* Gather buffers declared bitmasks. Note: radeonsi doesn't
+* really use the mask (other than ubo_idx == 1 for regular
+* uniforms) its really only used for getting the buffer count
+* so we don't need to worry about the ordering.
+*/
+   if (variable->interface_type != NULL) {
+   if (variable->data.mode == nir_var_uniform) {
+
+   unsigned block_count;
+   if (base_type != GLSL_TYPE_INTERFACE) {
+   struct set_entry *entry =
+   _mesa_set_search(ubo_set, 
variable->interface_type);
+
+   /* Check if we have already processed
+* a member from this ubo.
+*/
+   if (entry)
+   continue;
+
+   block_count = 1;
+   } else {
+   block_count = aoa_size;
+   }
+
+   info->const_buffers_declared |= 
u_bit_consecutive(ubo_idx, block_count);
+   ubo_idx += block_count;
+
+   _mesa_set_add(ubo_set, 
variable->interface_type);
+   }
+
+   if (variable->data.mode == nir_var_shader_storage) {
+   /* TODO: make this more accurate */
+   info->shader_buffers_declared =
+   u_bit_consecutive(0, 
SI_NUM_SHADER_BUFFERS);
+   }
+
+   continue;
+   }
+
/* We rely on the fact that nir_lower_samplers_as_deref has
 * eliminated struct dereferences.
 */
-   if (base_type == GLSL_TYPE_SAMPLER)
+   if (base_type == GLSL_TYPE_SAMPLER) {
info->samplers_declared |=
u_bit_consecutive(variable->data.binding, 
aoa_size);
-   else if (base_type == GLSL_TYPE_IMAGE)
+
+   if (variable->data.bindless) {
+   info->const_buffers_declared |= 1;
+   info->const_file_max[0] +=
+   glsl_count_attribute_slots(type, false);
+   }
+   } else if (base_type == GLSL_TYPE_IMAGE) {
info->images_declared |=