Re: [Mesa-dev] [PATCH 2/2] [rfc] radv: inline push constants where possible. (v2)
Looks like it's working fine here now. One comment inline below. On 12 January 2018 at 02:43, Dave Airliewrote: > From: Dave Airlie > > Instead of putting the push constants into the upload buffer, > if we have space in the sgprs we can upload the per-stage > constants into the shaders directly. > > This saves a few reads from memory in the meta shaders, > we should also be able to inline other objects like > descriptors. > > v2: fixup 16->available_sgprs (Samuel) > fixup dynamic offsets. (Alex) > bump to 12. > handle push consts > 32 better, avoid F1 2017 crash > > TODO: proper vega support (Samuel) > > Signed-off-by: Dave Airlie > --- > src/amd/common/ac_nir_to_llvm.c | 102 ++ > + > src/amd/common/ac_nir_to_llvm.h | 5 ++ > src/amd/common/ac_shader_info.c | 5 +- > src/amd/common/ac_shader_info.h | 1 + > src/amd/vulkan/radv_cmd_buffer.c | 75 +--- > 5 files changed, 159 insertions(+), 29 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_ > llvm.c > index 6c578de3aca..00ad76a82f7 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -92,6 +92,7 @@ struct nir_to_llvm_context { > LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; > LLVMValueRef ring_offsets; > LLVMValueRef push_constants; > + LLVMValueRef inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST]; > LLVMValueRef view_index; > LLVMValueRef num_work_groups; > LLVMValueRef workgroup_ids[3]; > @@ -243,7 +244,7 @@ static void set_llvm_calling_convention(LLVMValueRef > func, > LLVMSetFunctionCallConv(func, calling_conv); > } > > -#define MAX_ARGS 23 > +#define MAX_ARGS 32 > struct arg_info { > LLVMTypeRef types[MAX_ARGS]; > LLVMValueRef *assign[MAX_ARGS]; > @@ -538,6 +539,8 @@ struct user_sgpr_info { > bool need_ring_offsets; > uint8_t sgpr_count; > bool indirect_all_descriptor_sets; > + uint8_t base_inline_push_consts; > + uint8_t num_inline_push_consts; > }; > > static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, > @@ -609,8 +612,49 @@ static void allocate_user_sgprs(struct > nir_to_llvm_context *ctx, > } else { > user_sgpr_info->sgpr_count += > util_bitcount(ctx->shader_info->info.desc_set_used_mask) > * 2; > } > + > + if (ctx->shader_info->info.loads_push_constants) { > + uint32_t remaining_sgprs = available_sgprs - > user_sgpr_info->sgpr_count; > + if (!ctx->shader_info->info.has_indirect_push_constants && > + !ctx->shader_info->info.loads_dynamic_offsets) > + remaining_sgprs += 2; > + > + if (ctx->options->layout->push_constant_size) { > + uint8_t num_32bit_push_consts = > (ctx->shader_info->info.max_push_constant_used - > + > ctx->shader_info->info.min_push_constant_used) / 4; > + > + if ((ctx->shader_info->info.min_push_constant_used > / 4) <= 63 && > + (ctx->shader_info->info.max_push_constant_used > / 4) <= 63) { > + user_sgpr_info->base_inline_push_consts = > ctx->shader_info->info.min_push_constant_used / 4; > + > + if (num_32bit_push_consts < > remaining_sgprs) { > + user_sgpr_info->num_inline_push_consts > = num_32bit_push_consts; > + if (!ctx->shader_info->info.has_ > indirect_push_constants) > + > ctx->shader_info->info.loads_push_constants = false; > + } else { > + user_sgpr_info->num_inline_push_consts > = remaining_sgprs; > + } > + > + if (user_sgpr_info->num_inline_push_consts > > AC_UD_MAX_INLINE_PUSH_CONST) > + user_sgpr_info->num_inline_push_consts > = AC_UD_MAX_INLINE_PUSH_CONST; > This is done after possibly setting loads_push_constants to false, if this happens shouldn't that still be true? With that fixed, for the series: Reviewed-by: Alex Smith Thanks, Alex > + } > + } > + } > } > > +static void > +declare_inline_push_consts(struct nir_to_llvm_context *ctx, > + gl_shader_stage stage, > + const struct user_sgpr_info *user_sgpr_info, > + struct arg_info *args) > +{ > + ctx->shader_info->inline_push_const_mask = (1ULL << > user_sgpr_info->num_inline_push_consts) - 1; > + ctx->shader_info->inline_push_const_base = > user_sgpr_info->base_inline_push_consts; > + > + for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; > i++) > +
[Mesa-dev] [PATCH 2/2] [rfc] radv: inline push constants where possible. (v2)
From: Dave AirlieInstead of putting the push constants into the upload buffer, if we have space in the sgprs we can upload the per-stage constants into the shaders directly. This saves a few reads from memory in the meta shaders, we should also be able to inline other objects like descriptors. v2: fixup 16->available_sgprs (Samuel) fixup dynamic offsets. (Alex) bump to 12. handle push consts > 32 better, avoid F1 2017 crash TODO: proper vega support (Samuel) Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 102 +++ src/amd/common/ac_nir_to_llvm.h | 5 ++ src/amd/common/ac_shader_info.c | 5 +- src/amd/common/ac_shader_info.h | 1 + src/amd/vulkan/radv_cmd_buffer.c | 75 +--- 5 files changed, 159 insertions(+), 29 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 6c578de3aca..00ad76a82f7 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -92,6 +92,7 @@ struct nir_to_llvm_context { LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; LLVMValueRef ring_offsets; LLVMValueRef push_constants; + LLVMValueRef inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST]; LLVMValueRef view_index; LLVMValueRef num_work_groups; LLVMValueRef workgroup_ids[3]; @@ -243,7 +244,7 @@ static void set_llvm_calling_convention(LLVMValueRef func, LLVMSetFunctionCallConv(func, calling_conv); } -#define MAX_ARGS 23 +#define MAX_ARGS 32 struct arg_info { LLVMTypeRef types[MAX_ARGS]; LLVMValueRef *assign[MAX_ARGS]; @@ -538,6 +539,8 @@ struct user_sgpr_info { bool need_ring_offsets; uint8_t sgpr_count; bool indirect_all_descriptor_sets; + uint8_t base_inline_push_consts; + uint8_t num_inline_push_consts; }; static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, @@ -609,8 +612,49 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, } else { user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; } + + if (ctx->shader_info->info.loads_push_constants) { + uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; + if (!ctx->shader_info->info.has_indirect_push_constants && + !ctx->shader_info->info.loads_dynamic_offsets) + remaining_sgprs += 2; + + if (ctx->options->layout->push_constant_size) { + uint8_t num_32bit_push_consts = (ctx->shader_info->info.max_push_constant_used - + ctx->shader_info->info.min_push_constant_used) / 4; + + if ((ctx->shader_info->info.min_push_constant_used / 4) <= 63 && + (ctx->shader_info->info.max_push_constant_used / 4) <= 63) { + user_sgpr_info->base_inline_push_consts = ctx->shader_info->info.min_push_constant_used / 4; + + if (num_32bit_push_consts < remaining_sgprs) { + user_sgpr_info->num_inline_push_consts = num_32bit_push_consts; + if (!ctx->shader_info->info.has_indirect_push_constants) + ctx->shader_info->info.loads_push_constants = false; + } else { + user_sgpr_info->num_inline_push_consts = remaining_sgprs; + } + + if (user_sgpr_info->num_inline_push_consts > AC_UD_MAX_INLINE_PUSH_CONST) + user_sgpr_info->num_inline_push_consts = AC_UD_MAX_INLINE_PUSH_CONST; + } + } + } } +static void +declare_inline_push_consts(struct nir_to_llvm_context *ctx, + gl_shader_stage stage, + const struct user_sgpr_info *user_sgpr_info, + struct arg_info *args) +{ + ctx->shader_info->inline_push_const_mask = (1ULL << user_sgpr_info->num_inline_push_consts) - 1; + ctx->shader_info->inline_push_const_base = user_sgpr_info->base_inline_push_consts; + + for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) + add_arg(args, ARG_SGPR, ctx->ac.i32, >inline_push_consts[i]); + +} static void declare_global_input_sgprs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, @@ -640,10 +684,14 @@ declare_global_input_sgprs(struct nir_to_llvm_context *ctx, add_array_arg(args, const_array(type, 32), desc_sets); } - if (ctx->shader_info->info.loads_push_constants) { + if