From: Nicolai Hähnle <nicolai.haeh...@amd.com> Aligns the code a bit more with ac/nir, and simplifies the setup of ac_shader_abi. --- src/gallium/drivers/radeonsi/si_shader.c | 618 ++++++++++++++++--------------- 1 file changed, 320 insertions(+), 298 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 55d1232..28923e4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -54,20 +54,35 @@ static const char *scratch_rsrc_dword1_symbol = "SCRATCH_RSRC_DWORD1"; struct si_shader_output_values { LLVMValueRef values[4]; unsigned semantic_name; unsigned semantic_index; ubyte vertex_stream[4]; }; +/** + * Used to collect types and other info about arguments of the LLVM function + * before the function is created. + */ +struct si_function_info { + LLVMTypeRef types[100]; + unsigned num_sgpr_params; + unsigned num_params; +}; + +enum si_arg_regfile { + ARG_SGPR, + ARG_VGPR +}; + static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, LLVMTargetMachineRef tm); static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data); static void si_dump_shader_key(unsigned processor, const struct si_shader *shader, FILE *f); @@ -97,20 +112,49 @@ static bool is_merged_shader(struct si_shader *shader) { if (shader->selector->screen->b.chip_class <= VI) return false; return shader->key.as_ls || shader->key.as_es || shader->selector->type == PIPE_SHADER_TESS_CTRL || shader->selector->type == PIPE_SHADER_GEOMETRY; } +static void si_init_function_info(struct si_function_info *fninfo) +{ + fninfo->num_params = 0; + fninfo->num_sgpr_params = 0; +} + +static unsigned add_arg(struct si_function_info *fninfo, + enum si_arg_regfile regfile, LLVMTypeRef type) +{ + assert(regfile != ARG_SGPR || fninfo->num_sgpr_params == fninfo->num_params); + + unsigned idx = fninfo->num_params++; + assert(idx < ARRAY_SIZE(fninfo->types)); + + if (regfile == ARG_SGPR) + fninfo->num_sgpr_params = fninfo->num_params; + + fninfo->types[idx] = type; + return idx; +} + +static void add_arg_checked(struct si_function_info *fninfo, + enum si_arg_regfile regfile, LLVMTypeRef type, + unsigned idx) +{ + MAYBE_UNUSED unsigned actual = add_arg(fninfo, regfile, type); + assert(actual == idx); +} + /** * Returns a unique index for a per-patch semantic name and index. The index * must be less than 32, so that a 32-bit bitmask of used inputs or outputs * can be calculated. */ unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index) { switch (semantic_name) { case TGSI_SEMANTIC_TESSOUTER: return 0; @@ -3935,30 +3979,30 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, } static const struct lp_build_tgsi_action interp_action = { .fetch_args = interp_fetch_args, .emit = build_interp_intrinsic, }; static void si_create_function(struct si_shader_context *ctx, const char *name, LLVMTypeRef *returns, unsigned num_returns, - LLVMTypeRef *params, unsigned num_params, - int last_sgpr, unsigned max_workgroup_size) + struct si_function_info *fninfo, + unsigned max_workgroup_size) { int i; si_llvm_create_func(ctx, name, returns, num_returns, - params, num_params); + fninfo->types, fninfo->num_params); ctx->return_value = LLVMGetUndef(ctx->return_type); - for (i = 0; i <= last_sgpr; ++i) { + for (i = 0; i < fninfo->num_sgpr_params; ++i) { LLVMValueRef P = LLVMGetParam(ctx->main_fn, i); /* The combination of: * - ByVal * - dereferenceable * - invariant.load * allows the optimization passes to move loads and reduces * SGPR spilling significantly. */ if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { @@ -3989,40 +4033,39 @@ static void si_create_function(struct si_shader_context *ctx, "no-nans-fp-math", "true"); LLVMAddTargetDependentFunctionAttr(ctx->main_fn, "unsafe-fp-math", "true"); } } static void declare_streamout_params(struct si_shader_context *ctx, struct pipe_stream_output_info *so, - LLVMTypeRef *params, LLVMTypeRef i32, - unsigned *num_params) + struct si_function_info *fninfo) { int i; /* Streamout SGPRs. */ if (so->num_outputs) { if (ctx->type != PIPE_SHADER_TESS_EVAL) - params[ctx->param_streamout_config = (*num_params)++] = i32; + ctx->param_streamout_config = add_arg(fninfo, ARG_SGPR, ctx->ac.i32); else - ctx->param_streamout_config = *num_params - 1; + ctx->param_streamout_config = fninfo->num_params - 1; - params[ctx->param_streamout_write_index = (*num_params)++] = i32; + ctx->param_streamout_write_index = add_arg(fninfo, ARG_SGPR, ctx->ac.i32); } /* A streamout buffer offset is loaded if the stride is non-zero. */ for (i = 0; i < 4; i++) { if (!so->stride[i]) continue; - params[ctx->param_streamout_offset[i] = (*num_params)++] = i32; + ctx->param_streamout_offset[i] = add_arg(fninfo, ARG_SGPR, ctx->ac.i32); } } static unsigned llvm_get_type_size(LLVMTypeRef type) { LLVMTypeKind kind = LLVMGetTypeKind(type); switch (kind) { case LLVMIntegerTypeKind: return LLVMGetIntTypeWidth(type) / 8; @@ -4079,202 +4122,202 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader) if (!max_work_group_size) { /* This is a variable group size compute shader, * compile it for the maximum possible group size. */ max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK; } return max_work_group_size; } static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, - LLVMTypeRef *params, - unsigned *num_params, + struct si_function_info *fninfo, bool assign_params) { - params[(*num_params)++] = si_const_array(ctx->v4i32, - SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS); - params[(*num_params)++] = si_const_array(ctx->v8i32, - SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2); + unsigned const_and_shader_buffers = + add_arg(fninfo, ARG_SGPR, + si_const_array(ctx->v4i32, + SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS)); + unsigned samplers_and_images = + add_arg(fninfo, ARG_SGPR, + si_const_array(ctx->v8i32, + SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2)); if (assign_params) { - ctx->param_const_and_shader_buffers = *num_params - 2; - ctx->param_samplers_and_images = *num_params - 1; + ctx->param_const_and_shader_buffers = const_and_shader_buffers; + ctx->param_samplers_and_images = samplers_and_images; } } static void declare_default_desc_pointers(struct si_shader_context *ctx, - LLVMTypeRef *params, - unsigned *num_params) + struct si_function_info *fninfo) { - params[ctx->param_rw_buffers = (*num_params)++] = - si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS); - declare_per_stage_desc_pointers(ctx, params, num_params, true); + ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR, + si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS)); + declare_per_stage_desc_pointers(ctx, fninfo, true); } static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx, - LLVMTypeRef *params, - unsigned *num_params) + struct si_function_info *fninfo) { - params[ctx->param_vertex_buffers = (*num_params)++] = - si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS); - params[ctx->param_base_vertex = (*num_params)++] = ctx->i32; - params[ctx->param_start_instance = (*num_params)++] = ctx->i32; - params[ctx->param_draw_id = (*num_params)++] = ctx->i32; - params[ctx->param_vs_state_bits = (*num_params)++] = ctx->i32; + ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR, + si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS)); + ctx->param_base_vertex = add_arg(fninfo, ARG_SGPR, ctx->i32); + ctx->param_start_instance = add_arg(fninfo, ARG_SGPR, ctx->i32); + ctx->param_draw_id = add_arg(fninfo, ARG_SGPR, ctx->i32); + ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32); } static void declare_vs_input_vgprs(struct si_shader_context *ctx, - LLVMTypeRef *params, unsigned *num_params, + struct si_function_info *fninfo, unsigned *num_prolog_vgprs) { struct si_shader *shader = ctx->shader; - params[ctx->param_vertex_id = (*num_params)++] = ctx->i32; + ctx->param_vertex_id = add_arg(fninfo, ARG_VGPR, ctx->i32); if (shader->key.as_ls) { - params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32; - params[ctx->param_instance_id = (*num_params)++] = ctx->i32; + ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32); + ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32); } else { - params[ctx->param_instance_id = (*num_params)++] = ctx->i32; - params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32; + ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32); + ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32); } - params[(*num_params)++] = ctx->i32; /* unused */ + add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */ if (!shader->is_gs_copy_shader) { /* Vertex load indices. */ - ctx->param_vertex_index0 = (*num_params); + ctx->param_vertex_index0 = fninfo->num_params; for (unsigned i = 0; i < shader->selector->info.num_inputs; i++) - params[(*num_params)++] = ctx->i32; + add_arg(fninfo, ARG_VGPR, ctx->i32); *num_prolog_vgprs += shader->selector->info.num_inputs; } } static void declare_tes_input_vgprs(struct si_shader_context *ctx, - LLVMTypeRef *params, unsigned *num_params) + struct si_function_info *fninfo) { - params[ctx->param_tes_u = (*num_params)++] = ctx->f32; - params[ctx->param_tes_v = (*num_params)++] = ctx->f32; - params[ctx->param_tes_rel_patch_id = (*num_params)++] = ctx->i32; - params[ctx->param_tes_patch_id = (*num_params)++] = ctx->i32; + ctx->param_tes_u = add_arg(fninfo, ARG_VGPR, ctx->f32); + ctx->param_tes_v = add_arg(fninfo, ARG_VGPR, ctx->f32); + ctx->param_tes_rel_patch_id = add_arg(fninfo, ARG_VGPR, ctx->i32); + ctx->param_tes_patch_id = add_arg(fninfo, ARG_VGPR, ctx->i32); } enum { /* Convenient merged shader definitions. */ SI_SHADER_MERGED_VERTEX_TESSCTRL = PIPE_SHADER_TYPES, SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY, }; static void create_function(struct si_shader_context *ctx) { struct lp_build_tgsi_context *bld_base = &ctx->bld_base; struct gallivm_state *gallivm = &ctx->gallivm; struct si_shader *shader = ctx->shader; - LLVMTypeRef params[100]; /* just make it large enough */ + struct si_function_info fninfo; LLVMTypeRef returns[16+32*4]; - unsigned i, last_sgpr, num_params = 0, num_return_sgprs; + unsigned i, num_return_sgprs; unsigned num_returns = 0; unsigned num_prolog_vgprs = 0; unsigned type = ctx->type; + si_init_function_info(&fninfo); + /* Set MERGED shaders. */ if (ctx->screen->b.chip_class >= GFX9) { if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL) type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */ else if (shader->key.as_es || type == PIPE_SHADER_GEOMETRY) type = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY; } LLVMTypeRef v3i32 = LLVMVectorType(ctx->i32, 3); switch (type) { case PIPE_SHADER_VERTEX: - declare_default_desc_pointers(ctx, params, &num_params); - declare_vs_specific_input_sgprs(ctx, params, &num_params); + declare_default_desc_pointers(ctx, &fninfo); + declare_vs_specific_input_sgprs(ctx, &fninfo); if (shader->key.as_es) { - params[ctx->param_es2gs_offset = num_params++] = ctx->i32; + ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); } else if (shader->key.as_ls) { /* no extra parameters */ } else { - if (shader->is_gs_copy_shader) - num_params = ctx->param_rw_buffers + 1; + if (shader->is_gs_copy_shader) { + fninfo.num_params = ctx->param_rw_buffers + 1; + fninfo.num_sgpr_params = fninfo.num_params; + } /* The locations of the other parameters are assigned dynamically. */ declare_streamout_params(ctx, &shader->selector->so, - params, ctx->i32, &num_params); + &fninfo); } - last_sgpr = num_params-1; - /* VGPRs */ - declare_vs_input_vgprs(ctx, params, &num_params, - &num_prolog_vgprs); + declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); break; case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */ - declare_default_desc_pointers(ctx, params, &num_params); - params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32; - params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32; - params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32; - params[ctx->param_vs_state_bits = num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32; - params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; - params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32; - last_sgpr = num_params - 1; + declare_default_desc_pointers(ctx, &fninfo); + ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* VGPRs */ - params[ctx->param_tcs_patch_id = num_params++] = ctx->i32; - params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32; + ctx->param_tcs_patch_id = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_tcs_rel_ids = add_arg(&fninfo, ARG_VGPR, ctx->i32); /* param_tcs_offchip_offset and param_tcs_factor_offset are * placed after the user SGPRs. */ for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 3; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ break; case SI_SHADER_MERGED_VERTEX_TESSCTRL: /* Merged stages have 8 system SGPRs at the beginning. */ - params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */ - si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS); - params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; - params[ctx->param_merged_wave_info = num_params++] = ctx->i32; - params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32; - params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; /* unused */ - params[num_params++] = ctx->i32; /* unused */ - - params[num_params++] = ctx->i32; /* unused */ - params[num_params++] = ctx->i32; /* unused */ - declare_per_stage_desc_pointers(ctx, params, &num_params, + ctx->param_rw_buffers = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */ + add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS)); + ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_VERTEX); - declare_vs_specific_input_sgprs(ctx, params, &num_params); + declare_vs_specific_input_sgprs(ctx, &fninfo); - params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32; - params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32; - params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32; - params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; /* unused */ + ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - declare_per_stage_desc_pointers(ctx, params, &num_params, + declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_TESS_CTRL); - last_sgpr = num_params - 1; /* VGPRs (first TCS, then VS) */ - params[ctx->param_tcs_patch_id = num_params++] = ctx->i32; - params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32; + ctx->param_tcs_patch_id = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_tcs_rel_ids = add_arg(&fninfo, ARG_VGPR, ctx->i32); if (ctx->type == PIPE_SHADER_VERTEX) { - declare_vs_input_vgprs(ctx, params, &num_params, + declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); /* LS return values are inputs to the TCS main shader part. */ for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 2; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ } else { /* TCS return values are inputs to the TCS epilog. * @@ -4284,145 +4327,141 @@ static void create_function(struct si_shader_context *ctx) */ for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 3; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ } break; case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY: /* Merged stages have 8 system SGPRs at the beginning. */ - params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */ - si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS); - params[ctx->param_gs2vs_offset = num_params++] = ctx->i32; - params[ctx->param_merged_wave_info = num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; - params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */ - params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */ - - params[num_params++] = ctx->i32; /* unused */ - params[num_params++] = ctx->i32; /* unused */ - declare_per_stage_desc_pointers(ctx, params, &num_params, + ctx->param_rw_buffers = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */ + add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS)); + ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */ + + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + declare_per_stage_desc_pointers(ctx, &fninfo, (ctx->type == PIPE_SHADER_VERTEX || ctx->type == PIPE_SHADER_TESS_EVAL)); if (ctx->type == PIPE_SHADER_VERTEX) { - declare_vs_specific_input_sgprs(ctx, params, &num_params); + declare_vs_specific_input_sgprs(ctx, &fninfo); } else { /* TESS_EVAL (and also GEOMETRY): * Declare as many input SGPRs as the VS has. */ - params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; /* unused */ - params[num_params++] = ctx->i32; /* unused */ - params[num_params++] = ctx->i32; /* unused */ - params[ctx->param_vs_state_bits = num_params++] = ctx->i32; /* unused */ + ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ } - declare_per_stage_desc_pointers(ctx, params, &num_params, + declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_GEOMETRY); - last_sgpr = num_params - 1; /* VGPRs (first GS, then VS/TES) */ - params[ctx->param_gs_vtx01_offset = num_params++] = ctx->i32; - params[ctx->param_gs_vtx23_offset = num_params++] = ctx->i32; - params[ctx->param_gs_prim_id = num_params++] = ctx->i32; - params[ctx->param_gs_instance_id = num_params++] = ctx->i32; - params[ctx->param_gs_vtx45_offset = num_params++] = ctx->i32; + ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_prim_id = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_instance_id = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_vtx45_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); if (ctx->type == PIPE_SHADER_VERTEX) { - declare_vs_input_vgprs(ctx, params, &num_params, + declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); } else if (ctx->type == PIPE_SHADER_TESS_EVAL) { - declare_tes_input_vgprs(ctx, params, &num_params); + declare_tes_input_vgprs(ctx, &fninfo); } if (ctx->type == PIPE_SHADER_VERTEX || ctx->type == PIPE_SHADER_TESS_EVAL) { /* ES return values are inputs to GS. */ for (i = 0; i < 8 + GFX9_GS_NUM_USER_SGPR; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 5; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ } break; case PIPE_SHADER_TESS_EVAL: - declare_default_desc_pointers(ctx, params, &num_params); - params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32; + declare_default_desc_pointers(ctx, &fninfo); + ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); if (shader->key.as_es) { - params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[ctx->param_es2gs_offset = num_params++] = ctx->i32; + ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); } else { - params[num_params++] = ctx->i32; + add_arg(&fninfo, ARG_SGPR, ctx->i32); declare_streamout_params(ctx, &shader->selector->so, - params, ctx->i32, &num_params); - params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; + &fninfo); + ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); } - last_sgpr = num_params - 1; /* VGPRs */ - declare_tes_input_vgprs(ctx, params, &num_params); + declare_tes_input_vgprs(ctx, &fninfo); break; case PIPE_SHADER_GEOMETRY: - declare_default_desc_pointers(ctx, params, &num_params); - params[ctx->param_gs2vs_offset = num_params++] = ctx->i32; - params[ctx->param_gs_wave_id = num_params++] = ctx->i32; - last_sgpr = num_params - 1; + declare_default_desc_pointers(ctx, &fninfo); + ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_gs_wave_id = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* VGPRs */ - params[ctx->param_gs_vtx0_offset = num_params++] = ctx->i32; - params[ctx->param_gs_vtx1_offset = num_params++] = ctx->i32; - params[ctx->param_gs_prim_id = num_params++] = ctx->i32; - params[ctx->param_gs_vtx2_offset = num_params++] = ctx->i32; - params[ctx->param_gs_vtx3_offset = num_params++] = ctx->i32; - params[ctx->param_gs_vtx4_offset = num_params++] = ctx->i32; - params[ctx->param_gs_vtx5_offset = num_params++] = ctx->i32; - params[ctx->param_gs_instance_id = num_params++] = ctx->i32; + ctx->param_gs_vtx0_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_vtx1_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_prim_id = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_vtx2_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_vtx3_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_vtx4_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_vtx5_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); + ctx->param_gs_instance_id = add_arg(&fninfo, ARG_VGPR, ctx->i32); break; case PIPE_SHADER_FRAGMENT: - declare_default_desc_pointers(ctx, params, &num_params); - params[SI_PARAM_ALPHA_REF] = ctx->f32; - params[SI_PARAM_PRIM_MASK] = ctx->i32; - last_sgpr = SI_PARAM_PRIM_MASK; - params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32; - params[SI_PARAM_PERSP_CENTER] = ctx->v2i32; - params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32; - params[SI_PARAM_PERSP_PULL_MODEL] = v3i32; - params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32; - params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32; - params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32; - params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32; - params[SI_PARAM_POS_X_FLOAT] = ctx->f32; - params[SI_PARAM_POS_Y_FLOAT] = ctx->f32; - params[SI_PARAM_POS_Z_FLOAT] = ctx->f32; - params[SI_PARAM_POS_W_FLOAT] = ctx->f32; - params[SI_PARAM_FRONT_FACE] = ctx->i32; + declare_default_desc_pointers(ctx, &fninfo); + add_arg_checked(&fninfo, ARG_SGPR, ctx->f32, SI_PARAM_ALPHA_REF); + add_arg_checked(&fninfo, ARG_SGPR, ctx->i32, SI_PARAM_PRIM_MASK); + + add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_SAMPLE); + add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_CENTER); + add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_CENTROID); + add_arg_checked(&fninfo, ARG_VGPR, v3i32, SI_PARAM_PERSP_PULL_MODEL); + add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_SAMPLE); + add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_CENTER); + add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_CENTROID); + add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_LINE_STIPPLE_TEX); + add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_POS_X_FLOAT); + add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_POS_Y_FLOAT); + add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_POS_Z_FLOAT); + add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_POS_W_FLOAT); + add_arg_checked(&fninfo, ARG_VGPR, ctx->i32, SI_PARAM_FRONT_FACE); shader->info.face_vgpr_index = 20; - params[SI_PARAM_ANCILLARY] = ctx->i32; - params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32; - params[SI_PARAM_POS_FIXED_PT] = ctx->i32; - num_params = SI_PARAM_POS_FIXED_PT+1; + add_arg_checked(&fninfo, ARG_VGPR, ctx->i32, SI_PARAM_ANCILLARY); + add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_SAMPLE_COVERAGE); + add_arg_checked(&fninfo, ARG_VGPR, ctx->i32, SI_PARAM_POS_FIXED_PT); /* Color inputs from the prolog. */ if (shader->selector->info.colors_read) { unsigned num_color_elements = util_bitcount(shader->selector->info.colors_read); - assert(num_params + num_color_elements <= ARRAY_SIZE(params)); + assert(fninfo.num_params + num_color_elements <= ARRAY_SIZE(fninfo.types)); for (i = 0; i < num_color_elements; i++) - params[num_params++] = ctx->f32; + add_arg(&fninfo, ARG_VGPR, ctx->f32); num_prolog_vgprs += num_color_elements; } /* Outputs for the epilog. */ num_return_sgprs = SI_SGPR_ALPHA_REF + 1; num_returns = num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 + shader->selector->info.writes_z + @@ -4434,69 +4473,65 @@ static void create_function(struct si_shader_context *ctx) num_return_sgprs + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); for (i = 0; i < num_return_sgprs; i++) returns[i] = ctx->i32; for (; i < num_returns; i++) returns[i] = ctx->f32; break; case PIPE_SHADER_COMPUTE: - declare_default_desc_pointers(ctx, params, &num_params); + declare_default_desc_pointers(ctx, &fninfo); if (shader->selector->info.uses_grid_size) - params[ctx->param_grid_size = num_params++] = v3i32; + ctx->param_grid_size = add_arg(&fninfo, ARG_SGPR, v3i32); if (shader->selector->info.uses_block_size) - params[ctx->param_block_size = num_params++] = v3i32; + ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, v3i32); for (i = 0; i < 3; i++) { ctx->param_block_id[i] = -1; if (shader->selector->info.uses_block_id[i]) - params[ctx->param_block_id[i] = num_params++] = ctx->i32; + ctx->param_block_id[i] = add_arg(&fninfo, ARG_SGPR, ctx->i32); } - last_sgpr = num_params - 1; - params[ctx->param_thread_id = num_params++] = v3i32; + ctx->param_thread_id = add_arg(&fninfo, ARG_VGPR, v3i32); break; default: assert(0 && "unimplemented shader"); return; } - assert(num_params <= ARRAY_SIZE(params)); - - si_create_function(ctx, "main", returns, num_returns, params, - num_params, last_sgpr, + si_create_function(ctx, "main", returns, num_returns, &fninfo, si_get_max_workgroup_size(shader)); /* Reserve register locations for VGPR inputs the PS prolog may need. */ if (ctx->type == PIPE_SHADER_FRAGMENT && ctx->separate_prolog) { si_llvm_add_attribute(ctx->main_fn, "InitialPSInputAddr", S_0286D0_PERSP_SAMPLE_ENA(1) | S_0286D0_PERSP_CENTER_ENA(1) | S_0286D0_PERSP_CENTROID_ENA(1) | S_0286D0_LINEAR_SAMPLE_ENA(1) | S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINEAR_CENTROID_ENA(1) | S_0286D0_FRONT_FACE_ENA(1) | S_0286D0_POS_FIXED_PT_ENA(1)); } shader->info.num_input_sgprs = 0; shader->info.num_input_vgprs = 0; - for (i = 0; i <= last_sgpr; ++i) - shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4; + for (i = 0; i < fninfo.num_sgpr_params; ++i) + shader->info.num_input_sgprs += llvm_get_type_size(fninfo.types[i]) / 4; - for (; i < num_params; ++i) - shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4; + for (; i < fninfo.num_params; ++i) + shader->info.num_input_vgprs += llvm_get_type_size(fninfo.types[i]) / 4; assert(shader->info.num_input_vgprs >= num_prolog_vgprs); shader->info.num_input_vgprs -= num_prolog_vgprs; if (!ctx->screen->has_ds_bpermute && bld_base->info && (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 || @@ -5761,46 +5796,48 @@ static void si_get_ps_epilog_key(struct si_shader *shader, /** * Build the GS prolog function. Rotate the input vertices for triangle strips * with adjacency. */ static void si_build_gs_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { unsigned num_sgprs, num_vgprs; struct gallivm_state *gallivm = &ctx->gallivm; + struct si_function_info fninfo; LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef params[48]; /* 40 SGPRs (maximum) + some VGPRs */ LLVMTypeRef returns[48]; LLVMValueRef func, ret; + si_init_function_info(&fninfo); + if (ctx->screen->b.chip_class >= GFX9) { num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR; num_vgprs = 5; /* ES inputs are not needed by GS */ } else { num_sgprs = GFX6_GS_NUM_USER_SGPR + 2; num_vgprs = 8; } for (unsigned i = 0; i < num_sgprs; ++i) { - params[i] = ctx->i32; + add_arg(&fninfo, ARG_SGPR, ctx->i32); returns[i] = ctx->i32; } for (unsigned i = 0; i < num_vgprs; ++i) { - params[num_sgprs + i] = ctx->i32; + add_arg(&fninfo, ARG_VGPR, ctx->i32); returns[num_sgprs + i] = ctx->f32; } /* Create the function. */ si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, - params, num_sgprs + num_vgprs, num_sgprs - 1, 0); + &fninfo, 0); func = ctx->main_fn; /* Set the full EXEC mask for the prolog, because we are only fiddling * with registers here. The main shader part will set the correct EXEC * mask. */ if (ctx->screen->b.chip_class >= GFX9 && !key->gs_prolog.is_monolithic) si_init_exec_full_mask(ctx); /* Copy inputs to outputs. This should be no-op, as the registers match, @@ -5886,97 +5923,91 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, */ static void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts, unsigned num_parts, unsigned main_part, unsigned next_shader_first_part) { struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = ctx->gallivm.builder; /* PS epilog has one arg per color component */ - LLVMTypeRef param_types[48]; + struct si_function_info fninfo; LLVMValueRef initial[48], out[48]; LLVMTypeRef function_type; - unsigned num_params; + unsigned num_first_params; unsigned num_out, initial_num_out; MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */ MAYBE_UNUSED unsigned initial_num_out_sgpr; /* used in debug checks */ unsigned num_sgprs, num_vgprs; - unsigned last_sgpr_param; unsigned gprs; struct lp_build_if_state if_state; + si_init_function_info(&fninfo); + for (unsigned i = 0; i < num_parts; ++i) { lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE); LLVMSetLinkage(parts[i], LLVMPrivateLinkage); } /* The parameters of the wrapper function correspond to those of the * first part in terms of SGPRs and VGPRs, but we use the types of the * main part to get the right types. This is relevant for the * dereferenceable attribute on descriptor table pointers. */ num_sgprs = 0; num_vgprs = 0; function_type = LLVMGetElementType(LLVMTypeOf(parts[0])); - num_params = LLVMCountParamTypes(function_type); + num_first_params = LLVMCountParamTypes(function_type); - for (unsigned i = 0; i < num_params; ++i) { + for (unsigned i = 0; i < num_first_params; ++i) { LLVMValueRef param = LLVMGetParam(parts[0], i); if (ac_is_sgpr_param(param)) { assert(num_vgprs == 0); num_sgprs += llvm_get_type_size(LLVMTypeOf(param)) / 4; } else { num_vgprs += llvm_get_type_size(LLVMTypeOf(param)) / 4; } } - assert(num_vgprs + num_sgprs <= ARRAY_SIZE(param_types)); - num_params = 0; - last_sgpr_param = 0; gprs = 0; while (gprs < num_sgprs + num_vgprs) { - LLVMValueRef param = LLVMGetParam(parts[main_part], num_params); - unsigned size; + LLVMValueRef param = LLVMGetParam(parts[main_part], fninfo.num_params); + LLVMTypeRef type = LLVMTypeOf(param); + unsigned size = llvm_get_type_size(type) / 4; - param_types[num_params] = LLVMTypeOf(param); - if (gprs < num_sgprs) - last_sgpr_param = num_params; - size = llvm_get_type_size(param_types[num_params]) / 4; - num_params++; + add_arg(&fninfo, gprs < num_sgprs ? ARG_SGPR : ARG_VGPR, type); assert(ac_is_sgpr_param(param) == (gprs < num_sgprs)); assert(gprs + size <= num_sgprs + num_vgprs && (gprs >= num_sgprs || gprs + size <= num_sgprs)); gprs += size; } - si_create_function(ctx, "wrapper", NULL, 0, param_types, num_params, - last_sgpr_param, + si_create_function(ctx, "wrapper", NULL, 0, &fninfo, si_get_max_workgroup_size(ctx->shader)); if (is_merged_shader(ctx->shader)) si_init_exec_full_mask(ctx); /* Record the arguments of the function as if they were an output of * a previous part. */ num_out = 0; num_out_sgpr = 0; - for (unsigned i = 0; i < num_params; ++i) { + for (unsigned i = 0; i < fninfo.num_params; ++i) { LLVMValueRef param = LLVMGetParam(ctx->main_fn, i); LLVMTypeRef param_type = LLVMTypeOf(param); - LLVMTypeRef out_type = i <= last_sgpr_param ? ctx->i32 : ctx->f32; + LLVMTypeRef out_type = i < fninfo.num_sgpr_params ? ctx->i32 : ctx->f32; unsigned size = llvm_get_type_size(param_type) / 4; if (size == 1) { if (param_type != out_type) param = LLVMBuildBitCast(builder, param, out_type, ""); out[num_out++] = param; } else { LLVMTypeRef vector_type = LLVMVectorType(out_type, size); if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) { @@ -5985,37 +6016,35 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, } if (param_type != vector_type) param = LLVMBuildBitCast(builder, param, vector_type, ""); for (unsigned j = 0; j < size; ++j) out[num_out++] = LLVMBuildExtractElement( builder, param, LLVMConstInt(ctx->i32, j, 0), ""); } - if (i <= last_sgpr_param) + if (i < fninfo.num_sgpr_params) num_out_sgpr = num_out; } memcpy(initial, out, sizeof(out)); initial_num_out = num_out; initial_num_out_sgpr = num_out_sgpr; /* Now chain the parts. */ for (unsigned part = 0; part < num_parts; ++part) { LLVMValueRef in[48]; LLVMValueRef ret; LLVMTypeRef ret_type; unsigned out_idx = 0; - - num_params = LLVMCountParams(parts[part]); - assert(num_params <= ARRAY_SIZE(param_types)); + unsigned num_params = LLVMCountParams(parts[part]); /* Merged shaders are executed conditionally depending * on the number of enabled threads passed in the input SGPRs. */ if (is_merged_shader(ctx->shader) && (part == 0 || part == next_shader_first_part)) { LLVMValueRef ena, count = initial[3]; /* The thread count for the 2nd shader is at bit-offset 8. */ if (part == next_shader_first_part) { count = LLVMBuildLShr(builder, count, @@ -6554,76 +6583,74 @@ static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) * input_v2, * input_v3, * (VertexID + BaseVertex), * (InstanceID + StartInstance), * (InstanceID / 2 + StartInstance) */ static void si_build_vs_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct gallivm_state *gallivm = &ctx->gallivm; - LLVMTypeRef *params, *returns; + struct si_function_info fninfo; + LLVMTypeRef *returns; LLVMValueRef ret, func; - int last_sgpr, num_params, num_returns, i; + int num_returns, i; unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs + key->vs_prolog.num_merged_next_stage_vgprs; unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4; unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + num_input_vgprs; unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0; ctx->param_vertex_id = first_vs_vgpr; ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1); + si_init_function_info(&fninfo); + /* 4 preloaded VGPRs + vertex load indices as prolog outputs */ - params = alloca(num_all_input_regs * sizeof(LLVMTypeRef)); returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) * sizeof(LLVMTypeRef)); - num_params = 0; num_returns = 0; /* Declare input and output SGPRs. */ - num_params = 0; for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { - params[num_params++] = ctx->i32; + add_arg(&fninfo, ARG_SGPR, ctx->i32); returns[num_returns++] = ctx->i32; } - last_sgpr = num_params - 1; /* Preloaded VGPRs (outputs must be floats) */ for (i = 0; i < num_input_vgprs; i++) { - params[num_params++] = ctx->i32; + add_arg(&fninfo, ARG_VGPR, ctx->i32); returns[num_returns++] = ctx->f32; } /* Vertex load indices. */ for (i = 0; i <= key->vs_prolog.last_input; i++) returns[num_returns++] = ctx->f32; /* Create the function. */ - si_create_function(ctx, "vs_prolog", returns, num_returns, params, - num_params, last_sgpr, 0); + si_create_function(ctx, "vs_prolog", returns, num_returns, &fninfo, 0); func = ctx->main_fn; if (key->vs_prolog.num_merged_next_stage_vgprs && !key->vs_prolog.is_monolithic) si_init_exec_from_input(ctx, 3, 0); /* Copy inputs to outputs. This should be no-op, as the registers match, * but it will prevent the compiler from overwriting them unintentionally. */ ret = ctx->return_value; for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { LLVMValueRef p = LLVMGetParam(func, i); ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); } - for (; i < num_params; i++) { + for (; i < fninfo.num_params; i++) { LLVMValueRef p = LLVMGetParam(func, i); p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, ""); ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); } /* Compute vertex load indices from instance divisors. */ LLVMValueRef instance_divisor_constbuf = NULL; if (key->vs_prolog.states.instance_divisor_is_fetched) { LLVMValueRef list = si_prolog_get_rw_buffers(ctx); @@ -6658,21 +6685,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, } else { /* VertexID + BaseVertex */ index = LLVMBuildAdd(gallivm->builder, LLVMGetParam(func, ctx->param_vertex_id), LLVMGetParam(func, user_sgpr_base + SI_SGPR_BASE_VERTEX), ""); } index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, ""); ret = LLVMBuildInsertValue(gallivm->builder, ret, index, - num_params++, ""); + fninfo.num_params + i, ""); } si_llvm_build_ret(ctx, ret); } static bool si_get_vs_prolog(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, struct pipe_debug_callback *debug, struct si_shader *main_part, @@ -6711,74 +6738,75 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen, /** * Compile the TCS epilog function. This writes tesselation factors to memory * based on the output primitive type of the tesselator (determined by TES). */ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct gallivm_state *gallivm = &ctx->gallivm; struct lp_build_tgsi_context *bld_base = &ctx->bld_base; - LLVMTypeRef params[32]; + struct si_function_info fninfo; LLVMValueRef func; - int last_sgpr, num_params = 0; + + si_init_function_info(&fninfo); if (ctx->screen->b.chip_class >= GFX9) { - params[num_params++] = ctx->i64; - params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; /* wave info */ - params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32; - params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32; + add_arg(&fninfo, ARG_SGPR, ctx->i64); + ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */ + ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); } else { - params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; - params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32; - params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32; - params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; - params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32; - } - last_sgpr = num_params - 1; - - params[num_params++] = ctx->i32; /* patch index within the wave (REL_PATCH_ID) */ - params[num_params++] = ctx->i32; /* invocation ID within the patch */ - params[num_params++] = ctx->i32; /* LDS offset where tess factors should be loaded from */ + add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->i64); + ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + } + + unsigned tess_factors_idx = + add_arg(&fninfo, ARG_VGPR, ctx->i32); /* patch index within the wave (REL_PATCH_ID) */ + add_arg(&fninfo, ARG_VGPR, ctx->i32); /* invocation ID within the patch */ + add_arg(&fninfo, ARG_VGPR, ctx->i32); /* LDS offset where tess factors should be loaded from */ /* Create the function. */ - si_create_function(ctx, "tcs_epilog", NULL, 0, params, num_params, last_sgpr, + si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo, ctx->screen->b.chip_class >= CIK ? 128 : 64); declare_lds_as_pointer(ctx); func = ctx->main_fn; si_write_tess_factors(bld_base, - LLVMGetParam(func, last_sgpr + 1), - LLVMGetParam(func, last_sgpr + 2), - LLVMGetParam(func, last_sgpr + 3)); + LLVMGetParam(func, tess_factors_idx), + LLVMGetParam(func, tess_factors_idx + 1), + LLVMGetParam(func, tess_factors_idx + 2)); LLVMBuildRetVoid(gallivm->builder); } /** * Select and compile (or reuse) TCS parts (epilog). */ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, @@ -6850,56 +6878,51 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, * - polygon stippling * * All preloaded SGPRs and VGPRs are passed through unmodified unless they are * overriden by other states. (e.g. per-sample interpolation) * Interpolated colors are stored after the preloaded VGPRs. */ static void si_build_ps_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct gallivm_state *gallivm = &ctx->gallivm; - LLVMTypeRef *params; + struct si_function_info fninfo; LLVMValueRef ret, func; - int last_sgpr, num_params, num_returns, i, num_color_channels; + int num_returns, i, num_color_channels; assert(si_need_ps_prolog(key)); - /* Number of inputs + 8 color elements. */ - params = alloca((key->ps_prolog.num_input_sgprs + - key->ps_prolog.num_input_vgprs + 8) * - sizeof(LLVMTypeRef)); + si_init_function_info(&fninfo); /* Declare inputs. */ - num_params = 0; for (i = 0; i < key->ps_prolog.num_input_sgprs; i++) - params[num_params++] = ctx->i32; - last_sgpr = num_params - 1; + add_arg(&fninfo, ARG_SGPR, ctx->i32); for (i = 0; i < key->ps_prolog.num_input_vgprs; i++) - params[num_params++] = ctx->f32; + add_arg(&fninfo, ARG_VGPR, ctx->f32); /* Declare outputs (same as inputs + add colors if needed) */ - num_returns = num_params; + num_returns = fninfo.num_params; num_color_channels = util_bitcount(key->ps_prolog.colors_read); for (i = 0; i < num_color_channels; i++) - params[num_returns++] = ctx->f32; + fninfo.types[num_returns++] = ctx->f32; /* Create the function. */ - si_create_function(ctx, "ps_prolog", params, num_returns, params, - num_params, last_sgpr, 0); + si_create_function(ctx, "ps_prolog", fninfo.types, num_returns, + &fninfo, 0); func = ctx->main_fn; /* Copy inputs to outputs. This should be no-op, as the registers match, * but it will prevent the compiler from overwriting them unintentionally. */ ret = ctx->return_value; - for (i = 0; i < num_params; i++) { + for (i = 0; i < fninfo.num_params; i++) { LLVMValueRef p = LLVMGetParam(func, i); ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); } /* Polygon stippling. */ if (key->ps_prolog.states.poly_stipple) { /* POS_FIXED_PT is always last. */ unsigned pos = key->ps_prolog.num_input_sgprs + key->ps_prolog.num_input_vgprs - 1; LLVMValueRef list = si_prolog_get_rw_buffers(ctx); @@ -7018,20 +7041,21 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx, for (i = 0; i < 2; i++) ret = LLVMBuildInsertValue(gallivm->builder, ret, linear_center[i], base + 6 + i, ""); /* Overwrite LINEAR_CENTROID. */ for (i = 0; i < 2; i++) ret = LLVMBuildInsertValue(gallivm->builder, ret, linear_center[i], base + 10 + i, ""); } /* Interpolate colors. */ + unsigned color_out_idx = 0; for (i = 0; i < 2; i++) { unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf; unsigned face_vgpr = key->ps_prolog.num_input_sgprs + key->ps_prolog.face_vgpr_index; LLVMValueRef interp[2], color[4]; LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL; if (!writemask) continue; @@ -7059,21 +7083,21 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx, interp_fs_input(ctx, key->ps_prolog.color_attr_index[i], TGSI_SEMANTIC_COLOR, i, key->ps_prolog.num_interp_inputs, key->ps_prolog.colors_read, interp_ij, prim_mask, face, color); while (writemask) { unsigned chan = u_bit_scan(&writemask); ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan], - num_params++, ""); + fninfo.num_params + color_out_idx++, ""); } } /* Tell LLVM to insert WQM instruction sequence when needed. */ if (key->ps_prolog.wqm) { LLVMAddTargetDependentFunctionAttr(func, "amdgpu-ps-wqm-outputs", ""); } si_llvm_build_ret(ctx, ret); @@ -7081,57 +7105,55 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx, /** * Build the pixel shader epilog function. This handles everything that must be * emulated for pixel shader exports. (alpha-test, format conversions, etc) */ static void si_build_ps_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct gallivm_state *gallivm = &ctx->gallivm; struct lp_build_tgsi_context *bld_base = &ctx->bld_base; - LLVMTypeRef params[16+8*4+3]; + struct si_function_info fninfo; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; - int last_sgpr, num_params = 0, i; + int i; struct si_ps_exports exp = {}; + si_init_function_info(&fninfo); + /* Declare input SGPRs. */ - params[ctx->param_rw_buffers = num_params++] = ctx->i64; - params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64; - params[ctx->param_samplers_and_images = num_params++] = ctx->i64; - assert(num_params == SI_PARAM_ALPHA_REF); - params[SI_PARAM_ALPHA_REF] = ctx->f32; - last_sgpr = SI_PARAM_ALPHA_REF; + ctx->param_rw_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64); + ctx->param_const_and_shader_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64); + ctx->param_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg_checked(&fninfo, ARG_SGPR, ctx->f32, SI_PARAM_ALPHA_REF); /* Declare input VGPRs. */ - num_params = (last_sgpr + 1) + + unsigned required_num_params = + fninfo.num_sgpr_params + util_bitcount(key->ps_epilog.colors_written) * 4 + key->ps_epilog.writes_z + key->ps_epilog.writes_stencil + key->ps_epilog.writes_samplemask; - num_params = MAX2(num_params, - last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); - - assert(num_params <= ARRAY_SIZE(params)); + required_num_params = MAX2(required_num_params, + fninfo.num_sgpr_params + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); - for (i = last_sgpr + 1; i < num_params; i++) - params[i] = ctx->f32; + while (fninfo.num_params < required_num_params) + add_arg(&fninfo, ARG_VGPR, ctx->f32); /* Create the function. */ - si_create_function(ctx, "ps_epilog", NULL, 0, params, num_params, - last_sgpr, 0); + si_create_function(ctx, "ps_epilog", NULL, 0, &fninfo, 0); /* Disable elimination of unused inputs. */ si_llvm_add_attribute(ctx->main_fn, "InitialPSInputAddr", 0xffffff); /* Process colors. */ - unsigned vgpr = last_sgpr + 1; + unsigned vgpr = fninfo.num_sgpr_params; unsigned colors_written = key->ps_epilog.colors_written; int last_color_export = -1; /* Find the last color export. */ if (!key->ps_epilog.writes_z && !key->ps_epilog.writes_stencil && !key->ps_epilog.writes_samplemask) { unsigned spi_format = key->ps_epilog.states.spi_shader_col_format; /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ @@ -7149,21 +7171,21 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, } while (colors_written) { LLVMValueRef color[4]; int mrt = u_bit_scan(&colors_written); for (i = 0; i < 4; i++) color[i] = LLVMGetParam(ctx->main_fn, vgpr++); si_export_mrt_color(bld_base, color, mrt, - num_params - 1, + fninfo.num_params - 1, mrt == last_color_export, &exp); } /* Process depth, stencil, samplemask. */ if (key->ps_epilog.writes_z) depth = LLVMGetParam(ctx->main_fn, vgpr++); if (key->ps_epilog.writes_stencil) stencil = LLVMGetParam(ctx->main_fn, vgpr++); if (key->ps_epilog.writes_samplemask) samplemask = LLVMGetParam(ctx->main_fn, vgpr++); -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev