Module: Mesa Branch: main Commit: c44d5d61ce7251ea935723ef888f916f69cab9a1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c44d5d61ce7251ea935723ef888f916f69cab9a1
Author: Dave Airlie <[email protected]> Date: Thu May 5 14:27:01 2022 +1000 aco: remove radv vs prolog key from aco internals. This creates an aco specific key, and converts radv to it. Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16342> --- src/amd/compiler/aco_instruction_selection.cpp | 34 +++++++++++++------------- src/amd/compiler/aco_interface.cpp | 2 +- src/amd/compiler/aco_interface.h | 3 ++- src/amd/compiler/aco_ir.h | 3 +-- src/amd/compiler/aco_shader_info.h | 23 +++++++++++++++++ src/amd/vulkan/radv_aco_shader_info.h | 22 +++++++++++++++++ src/amd/vulkan/radv_shader.c | 4 ++- 7 files changed, 69 insertions(+), 22 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index d948ba1b0b7..bbbe0922c60 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11683,7 +11683,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct radv_shader_args* args, u } void -select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shader_config* config, +select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shader_config* config, const struct radv_nir_compiler_options* options, const struct aco_shader_info* info, const struct radv_shader_args* args, unsigned* num_preserved_sgprs) @@ -11710,7 +11710,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad bld.sopp(aco_opcode::s_setprio, -1u, 0x3u); uint32_t attrib_mask = BITFIELD_MASK(key->num_attributes); - bool has_nontrivial_divisors = key->state->nontrivial_divisors & attrib_mask; + bool has_nontrivial_divisors = key->state.nontrivial_divisors & attrib_mask; wait_imm lgkm_imm; lgkm_imm.lgkm = 0; @@ -11769,12 +11769,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad bool needs_instance_index = false; bool needs_start_instance = false; - u_foreach_bit(i, key->state->instance_rate_inputs & attrib_mask) + u_foreach_bit(i, key->state.instance_rate_inputs & attrib_mask) { - needs_instance_index |= key->state->divisors[i] == 1; - needs_start_instance |= key->state->divisors[i] == 0; + needs_instance_index |= key->state.divisors[i] == 1; + needs_start_instance |= key->state.divisors[i] == 0; } - bool needs_vertex_index = ~key->state->instance_rate_inputs & attrib_mask; + bool needs_vertex_index = ~key->state.instance_rate_inputs & attrib_mask; if (needs_vertex_index) bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->ac.base_vertex), get_arg_fixed(args, args->ac.vertex_id), false, Operand(s2), true); @@ -11792,13 +11792,13 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad /* calculate index */ Operand fetch_index = Operand(vertex_index, v1); - if (key->state->instance_rate_inputs & (1u << loc)) { - uint32_t divisor = key->state->divisors[loc]; + if (key->state.instance_rate_inputs & (1u << loc)) { + uint32_t divisor = key->state.divisors[loc]; if (divisor) { fetch_index = instance_id; - if (key->state->nontrivial_divisors & (1u << loc)) { + if (key->state.nontrivial_divisors & (1u << loc)) { unsigned index = - util_bitcount(key->state->nontrivial_divisors & BITFIELD_MASK(loc)); + util_bitcount(key->state.nontrivial_divisors & BITFIELD_MASK(loc)); fetch_index = calc_nontrivial_instance_id( bld, args, index, instance_id, start_instance, prolog_input, nontrivial_tmp_vgpr0, nontrivial_tmp_vgpr1); @@ -11813,11 +11813,11 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad /* perform load */ PhysReg cur_desc = desc.advance(i * 16); if ((key->misaligned_mask & (1u << loc))) { - unsigned dfmt = key->state->formats[loc] & 0xf; - unsigned nfmt = key->state->formats[loc] >> 4; + unsigned dfmt = key->state.formats[loc] & 0xf; + unsigned nfmt = key->state.formats[loc] >> 4; const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt); for (unsigned j = 0; j < vtx_info->num_channels; j++) { - bool post_shuffle = key->state->post_shuffle & (1u << loc); + bool post_shuffle = key->state.post_shuffle & (1u << loc); unsigned offset = vtx_info->chan_byte_size * (post_shuffle && j < 3 ? 2 - j : j); /* Use MUBUF to workaround hangs for byte-aligned dword loads. The Vulkan spec @@ -11849,7 +11849,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad } } - if (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi) { + if (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi) { wait_imm vm_imm; vm_imm.vm = 0; bld.sopp(aco_opcode::s_waitcnt, -1, vm_imm.pack(program->chip_class)); @@ -11857,12 +11857,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. * so we may need to fix it up. */ - u_foreach_bit(loc, (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi)) + u_foreach_bit(loc, (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi)) { PhysReg alpha(attributes_start.reg() + loc * 4u + 3); - unsigned alpha_adjust = (key->state->alpha_adjust_lo >> loc) & 0x1; - alpha_adjust |= ((key->state->alpha_adjust_hi >> loc) & 0x1) << 1; + unsigned alpha_adjust = (key->state.alpha_adjust_lo >> loc) & 0x1; + alpha_adjust |= ((key->state.alpha_adjust_hi >> loc) & 0x1) << 1; if (alpha_adjust == ALPHA_ADJUST_SSCALED) bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1)); diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp index 8077d590855..a2f85f64d73 100644 --- a/src/amd/compiler/aco_interface.cpp +++ b/src/amd/compiler/aco_interface.cpp @@ -281,7 +281,7 @@ aco_compile_shader(const struct radv_nir_compiler_options* options, void aco_compile_vs_prolog(const struct radv_nir_compiler_options* options, const struct aco_shader_info* info, - const struct radv_vs_prolog_key* key, + const struct aco_vs_prolog_key* key, const struct radv_shader_args* args, struct radv_prolog_binary** binary) { diff --git a/src/amd/compiler/aco_interface.h b/src/amd/compiler/aco_interface.h index 02a19c03dc9..5f293beae56 100644 --- a/src/amd/compiler/aco_interface.h +++ b/src/amd/compiler/aco_interface.h @@ -33,6 +33,7 @@ extern "C" { struct ac_shader_config; struct aco_shader_info; +struct aco_vs_prolog_key; struct aco_compiler_statistic_info { char name[32]; @@ -50,7 +51,7 @@ void aco_compile_shader(const struct radv_nir_compiler_options* options, void aco_compile_vs_prolog(const struct radv_nir_compiler_options* options, const struct aco_shader_info* info, - const struct radv_vs_prolog_key* key, + const struct aco_vs_prolog_key* key, const struct radv_shader_args* args, struct radv_prolog_binary** binary); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index e676068db61..a6dbbe0d902 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -38,7 +38,6 @@ #include <vector> struct radv_shader_args; -struct radv_vs_prolog_key; namespace aco { @@ -2168,7 +2167,7 @@ void select_trap_handler_shader(Program* program, struct nir_shader* shader, const struct radv_nir_compiler_options* options, const struct aco_shader_info* info, const struct radv_shader_args* args); -void select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, +void select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shader_config* config, const struct radv_nir_compiler_options* options, const struct aco_shader_info* info, diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index 83a1c304499..61969ac4dad 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -35,6 +35,29 @@ extern "C" { #define ACO_MAX_SO_OUTPUTS 64 #define ACO_MAX_SO_BUFFERS 4 +#define ACO_MAX_VERTEX_ATTRIBS 32 + +struct aco_vs_input_state { + uint32_t instance_rate_inputs; + uint32_t nontrivial_divisors; + uint32_t post_shuffle; + /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes + * using bitwise arithmetic. + */ + uint32_t alpha_adjust_lo; + uint32_t alpha_adjust_hi; + + uint32_t divisors[ACO_MAX_VERTEX_ATTRIBS]; + uint8_t formats[ACO_MAX_VERTEX_ATTRIBS]; +}; + +struct aco_vs_prolog_key { + struct aco_vs_input_state state; + unsigned num_attributes; + uint32_t misaligned_mask; + bool is_ngg; + gl_shader_stage next_stage; +}; struct aco_vp_output_info { uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index c69c69a86de..87f84f79a3e 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -103,8 +103,30 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, radv_aco_convert_shader_so_info(aco_info, radv); aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size; } + +#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x +#define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state->x, sizeof(radv->state->x)) +static inline void +radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_key *aco_info, + const struct radv_vs_prolog_key *radv) +{ + ASSIGN_VS_STATE_FIELD(instance_rate_inputs); + ASSIGN_VS_STATE_FIELD(nontrivial_divisors); + ASSIGN_VS_STATE_FIELD(post_shuffle); + ASSIGN_VS_STATE_FIELD(alpha_adjust_lo); + ASSIGN_VS_STATE_FIELD(alpha_adjust_hi); + ASSIGN_VS_STATE_FIELD_CP(divisors); + ASSIGN_VS_STATE_FIELD_CP(formats); + ASSIGN_FIELD(num_attributes); + ASSIGN_FIELD(misaligned_mask); + ASSIGN_FIELD(is_ngg); + ASSIGN_FIELD(next_stage); +} +#undef ASSIGN_VS_STATE_FIELD +#undef ASSIGN_VS_STATE_FIELD_CP #undef ASSIGN_FIELD #undef ASSIGN_FIELD_CP #undef ASSIGN_OUTINFO + #endif diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index e17d6ff28e9..460fa0e0b7f 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2186,8 +2186,10 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke struct radv_prolog_binary *binary = NULL; struct aco_shader_info ac_info; + struct aco_vs_prolog_key ac_key; radv_aco_convert_shader_info(&ac_info, &info); - aco_compile_vs_prolog(&options, &ac_info, key, &args, &binary); + radv_aco_convert_vs_prolog_key(&ac_key, key); + aco_compile_vs_prolog(&options, &ac_info, &ac_key, &args, &binary); struct radv_shader_prolog *prolog = upload_vs_prolog(device, binary, info.wave_size); if (prolog) { prolog->nontrivial_divisors = key->state->nontrivial_divisors;
