Re: [Mesa-dev] [PATCH 2/2] [rfc] radv: inline push constants where possible. (v2)

2018-01-12 Thread Alex Smith
Looks like it's working fine here now. One comment inline below.

On 12 January 2018 at 02:43, Dave Airlie  wrote:

> From: Dave Airlie 
>
> Instead of putting the push constants into the upload buffer,
> if we have space in the sgprs we can upload the per-stage
> constants into the shaders directly.
>
> This saves a few reads from memory in the meta shaders,
> we should also be able to inline other objects like
> descriptors.
>
> v2: fixup 16->available_sgprs (Samuel)
> fixup dynamic offsets. (Alex)
> bump to 12.
> handle push consts > 32 better, avoid F1 2017 crash
>
> TODO: proper vega support (Samuel)
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_nir_to_llvm.c  | 102 ++
> +
>  src/amd/common/ac_nir_to_llvm.h  |   5 ++
>  src/amd/common/ac_shader_info.c  |   5 +-
>  src/amd/common/ac_shader_info.h  |   1 +
>  src/amd/vulkan/radv_cmd_buffer.c |  75 +---
>  5 files changed, 159 insertions(+), 29 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_
> llvm.c
> index 6c578de3aca..00ad76a82f7 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -92,6 +92,7 @@ struct nir_to_llvm_context {
> LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
> LLVMValueRef ring_offsets;
> LLVMValueRef push_constants;
> +   LLVMValueRef inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST];
> LLVMValueRef view_index;
> LLVMValueRef num_work_groups;
> LLVMValueRef workgroup_ids[3];
> @@ -243,7 +244,7 @@ static void set_llvm_calling_convention(LLVMValueRef
> func,
> LLVMSetFunctionCallConv(func, calling_conv);
>  }
>
> -#define MAX_ARGS 23
> +#define MAX_ARGS 32
>  struct arg_info {
> LLVMTypeRef types[MAX_ARGS];
> LLVMValueRef *assign[MAX_ARGS];
> @@ -538,6 +539,8 @@ struct user_sgpr_info {
> bool need_ring_offsets;
> uint8_t sgpr_count;
> bool indirect_all_descriptor_sets;
> +   uint8_t base_inline_push_consts;
> +   uint8_t num_inline_push_consts;
>  };
>
>  static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
> @@ -609,8 +612,49 @@ static void allocate_user_sgprs(struct
> nir_to_llvm_context *ctx,
> } else {
> user_sgpr_info->sgpr_count += 
> util_bitcount(ctx->shader_info->info.desc_set_used_mask)
> * 2;
> }
> +
> +   if (ctx->shader_info->info.loads_push_constants) {
> +   uint32_t remaining_sgprs = available_sgprs -
> user_sgpr_info->sgpr_count;
> +   if (!ctx->shader_info->info.has_indirect_push_constants &&
> +   !ctx->shader_info->info.loads_dynamic_offsets)
> +   remaining_sgprs += 2;
> +
> +   if (ctx->options->layout->push_constant_size) {
> +   uint8_t num_32bit_push_consts =
> (ctx->shader_info->info.max_push_constant_used -
> +
> ctx->shader_info->info.min_push_constant_used) / 4;
> +
> +   if ((ctx->shader_info->info.min_push_constant_used
> / 4) <= 63 &&
> +   (ctx->shader_info->info.max_push_constant_used
> / 4) <= 63) {
> +   user_sgpr_info->base_inline_push_consts =
> ctx->shader_info->info.min_push_constant_used / 4;
> +
> +   if (num_32bit_push_consts <
> remaining_sgprs) {
> +   user_sgpr_info->num_inline_push_consts
> = num_32bit_push_consts;
> +   if (!ctx->shader_info->info.has_
> indirect_push_constants)
> +
>  ctx->shader_info->info.loads_push_constants = false;
> +   } else {
> +   user_sgpr_info->num_inline_push_consts
> = remaining_sgprs;
> +   }
> +
> +   if (user_sgpr_info->num_inline_push_consts
> > AC_UD_MAX_INLINE_PUSH_CONST)
> +   user_sgpr_info->num_inline_push_consts
> = AC_UD_MAX_INLINE_PUSH_CONST;
>

This is done after possibly setting loads_push_constants to false, if this
happens shouldn't that still be true?

With that fixed, for the series:

Reviewed-by: Alex Smith 

Thanks,
Alex


> +   }
> +   }
> +   }
>  }
>
> +static void
> +declare_inline_push_consts(struct nir_to_llvm_context *ctx,
> +  gl_shader_stage stage,
> +  const struct user_sgpr_info *user_sgpr_info,
> +  struct arg_info *args)
> +{
> +   ctx->shader_info->inline_push_const_mask = (1ULL <<
> user_sgpr_info->num_inline_push_consts) - 1;
> +   ctx->shader_info->inline_push_const_base =
> user_sgpr_info->base_inline_push_consts;
> +
> +   for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts;
> i++)
> +   

[Mesa-dev] [PATCH 2/2] [rfc] radv: inline push constants where possible. (v2)

2018-01-11 Thread Dave Airlie
From: Dave Airlie 

Instead of putting the push constants into the upload buffer,
if we have space in the sgprs we can upload the per-stage
constants into the shaders directly.

This saves a few reads from memory in the meta shaders,
we should also be able to inline other objects like
descriptors.

v2: fixup 16->available_sgprs (Samuel)
fixup dynamic offsets. (Alex)
bump to 12.
handle push consts > 32 better, avoid F1 2017 crash

TODO: proper vega support (Samuel)

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c  | 102 +++
 src/amd/common/ac_nir_to_llvm.h  |   5 ++
 src/amd/common/ac_shader_info.c  |   5 +-
 src/amd/common/ac_shader_info.h  |   1 +
 src/amd/vulkan/radv_cmd_buffer.c |  75 +---
 5 files changed, 159 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6c578de3aca..00ad76a82f7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -92,6 +92,7 @@ struct nir_to_llvm_context {
LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
LLVMValueRef ring_offsets;
LLVMValueRef push_constants;
+   LLVMValueRef inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST];
LLVMValueRef view_index;
LLVMValueRef num_work_groups;
LLVMValueRef workgroup_ids[3];
@@ -243,7 +244,7 @@ static void set_llvm_calling_convention(LLVMValueRef func,
LLVMSetFunctionCallConv(func, calling_conv);
 }
 
-#define MAX_ARGS 23
+#define MAX_ARGS 32
 struct arg_info {
LLVMTypeRef types[MAX_ARGS];
LLVMValueRef *assign[MAX_ARGS];
@@ -538,6 +539,8 @@ struct user_sgpr_info {
bool need_ring_offsets;
uint8_t sgpr_count;
bool indirect_all_descriptor_sets;
+   uint8_t base_inline_push_consts;
+   uint8_t num_inline_push_consts;
 };
 
 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
@@ -609,8 +612,49 @@ static void allocate_user_sgprs(struct nir_to_llvm_context 
*ctx,
} else {
user_sgpr_info->sgpr_count += 
util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
}
+
+   if (ctx->shader_info->info.loads_push_constants) {
+   uint32_t remaining_sgprs = available_sgprs - 
user_sgpr_info->sgpr_count;
+   if (!ctx->shader_info->info.has_indirect_push_constants &&
+   !ctx->shader_info->info.loads_dynamic_offsets)
+   remaining_sgprs += 2;
+
+   if (ctx->options->layout->push_constant_size) {
+   uint8_t num_32bit_push_consts = 
(ctx->shader_info->info.max_push_constant_used -
+
ctx->shader_info->info.min_push_constant_used) / 4;
+
+   if ((ctx->shader_info->info.min_push_constant_used / 4) 
<= 63 &&
+   (ctx->shader_info->info.max_push_constant_used / 4) 
<= 63) {
+   user_sgpr_info->base_inline_push_consts = 
ctx->shader_info->info.min_push_constant_used / 4;
+
+   if (num_32bit_push_consts < remaining_sgprs) {
+   user_sgpr_info->num_inline_push_consts 
= num_32bit_push_consts;
+   if 
(!ctx->shader_info->info.has_indirect_push_constants)
+   
ctx->shader_info->info.loads_push_constants = false;
+   } else {
+   user_sgpr_info->num_inline_push_consts 
= remaining_sgprs;
+   }
+
+   if (user_sgpr_info->num_inline_push_consts > 
AC_UD_MAX_INLINE_PUSH_CONST)
+   user_sgpr_info->num_inline_push_consts 
= AC_UD_MAX_INLINE_PUSH_CONST;
+   }
+   }
+   }
 }
 
+static void
+declare_inline_push_consts(struct nir_to_llvm_context *ctx,
+  gl_shader_stage stage,
+  const struct user_sgpr_info *user_sgpr_info,
+  struct arg_info *args)
+{
+   ctx->shader_info->inline_push_const_mask = (1ULL << 
user_sgpr_info->num_inline_push_consts) - 1;
+   ctx->shader_info->inline_push_const_base = 
user_sgpr_info->base_inline_push_consts;
+
+   for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++)
+   add_arg(args, ARG_SGPR, ctx->ac.i32, 
>inline_push_consts[i]);
+
+}
 static void
 declare_global_input_sgprs(struct nir_to_llvm_context *ctx,
   gl_shader_stage stage,
@@ -640,10 +684,14 @@ declare_global_input_sgprs(struct nir_to_llvm_context 
*ctx,
add_array_arg(args, const_array(type, 32), desc_sets);
}
 
-   if (ctx->shader_info->info.loads_push_constants) {
+   if