Module: Mesa Branch: main Commit: 55d21f2f12dd65a5e298fdb674227043ddd3da2b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=55d21f2f12dd65a5e298fdb674227043ddd3da2b
Author: Tatsuyuki Ishi <ishitatsuy...@gmail.com> Date: Tue Nov 7 11:12:34 2023 +0900 radv, aco: Inline struct aco_vs_input_state. Now that we no longer use the radv_vs_input_state pointer, we can simply inline all the state-related fields. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26023> --- src/amd/compiler/aco_instruction_selection.cpp | 31 +++++++++++++------------- src/amd/compiler/aco_shader_info.h | 8 +++---- src/amd/vulkan/radv_aco_shader_info.h | 16 ++++++------- src/amd/vulkan/radv_cmd_buffer.c | 14 ++++++------ src/amd/vulkan/radv_device.c | 6 ++--- src/amd/vulkan/radv_shader.c | 2 +- src/amd/vulkan/radv_shader.h | 22 +++++++++--------- 7 files changed, 46 insertions(+), 53 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 336fd9260f5..79625b6cf1f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -12739,7 +12739,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh bld.sopp(aco_opcode::s_setprio, -1u, 0x3u); uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes); - bool has_nontrivial_divisors = pinfo->state.nontrivial_divisors; + bool has_nontrivial_divisors = pinfo->nontrivial_divisors; wait_imm lgkm_imm; lgkm_imm.lgkm = 0; @@ -12800,10 +12800,10 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh } bool needs_instance_index = - pinfo->state.instance_rate_inputs & - ~(pinfo->state.zero_divisors | pinfo->state.nontrivial_divisors); /* divisor is 1 */ - bool needs_start_instance = pinfo->state.instance_rate_inputs & pinfo->state.zero_divisors; - bool needs_vertex_index = ~pinfo->state.instance_rate_inputs & attrib_mask; + pinfo->instance_rate_inputs & + ~(pinfo->zero_divisors | pinfo->nontrivial_divisors); /* divisor is 1 */ + bool needs_start_instance = pinfo->instance_rate_inputs & pinfo->zero_divisors; + bool needs_vertex_index = ~pinfo->instance_rate_inputs & attrib_mask; if (needs_vertex_index) bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->base_vertex), get_arg_fixed(args, args->vertex_id), false, Operand(s2), true); @@ -12821,12 +12821,11 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh /* calculate index */ Operand fetch_index = Operand(vertex_index, v1); - if (pinfo->state.instance_rate_inputs & (1u << loc)) { - if (!(pinfo->state.zero_divisors & (1u << loc))) { + if (pinfo->instance_rate_inputs & (1u << loc)) { + if (!(pinfo->zero_divisors & (1u << loc))) { fetch_index = instance_id; - if (pinfo->state.nontrivial_divisors & (1u << loc)) { - unsigned index = - util_bitcount(pinfo->state.nontrivial_divisors & BITFIELD_MASK(loc)); + if (pinfo->nontrivial_divisors & (1u << loc)) { + unsigned index = util_bitcount(pinfo->nontrivial_divisors & BITFIELD_MASK(loc)); fetch_index = calc_nontrivial_instance_id( bld, args, pinfo, index, instance_id, start_instance, prolog_input, nontrivial_tmp_vgpr0, nontrivial_tmp_vgpr1); @@ -12841,14 +12840,14 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh /* perform load */ PhysReg cur_desc = desc.advance(i * 16); if ((pinfo->misaligned_mask & (1u << loc))) { - const struct ac_vtx_format_info* vtx_info = &vtx_info_table[pinfo->state.formats[loc]]; + const struct ac_vtx_format_info* vtx_info = &vtx_info_table[pinfo->formats[loc]]; assert(vtx_info->has_hw_format & 0x1); unsigned dfmt = vtx_info->hw_format[0] & 0xf; unsigned nfmt = vtx_info->hw_format[0] >> 4; for (unsigned j = 0; j < vtx_info->num_channels; j++) { - bool post_shuffle = pinfo->state.post_shuffle & (1u << loc); + bool post_shuffle = pinfo->post_shuffle & (1u << loc); unsigned offset = vtx_info->chan_byte_size * (post_shuffle && j < 3 ? 2 - j : j); /* Use MUBUF to workaround hangs for byte-aligned dword loads. The Vulkan spec @@ -12894,7 +12893,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh } } - if (pinfo->state.alpha_adjust_lo | pinfo->state.alpha_adjust_hi) { + if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi) { wait_imm vm_imm; vm_imm.vm = 0; bld.sopp(aco_opcode::s_waitcnt, -1, vm_imm.pack(program->gfx_level)); @@ -12902,11 +12901,11 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. * so we may need to fix it up. */ - u_foreach_bit (loc, (pinfo->state.alpha_adjust_lo | pinfo->state.alpha_adjust_hi)) { + u_foreach_bit (loc, (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi)) { PhysReg alpha(attributes_start.reg() + loc * 4u + 3); - unsigned alpha_adjust = (pinfo->state.alpha_adjust_lo >> loc) & 0x1; - alpha_adjust |= ((pinfo->state.alpha_adjust_hi >> loc) & 0x1) << 1; + unsigned alpha_adjust = (pinfo->alpha_adjust_lo >> loc) & 0x1; + alpha_adjust |= ((pinfo->alpha_adjust_hi >> loc) & 0x1) << 1; if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1)); diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index 5fdb96fa833..493d6bf3336 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -41,7 +41,9 @@ extern "C" { #define ACO_MAX_VERTEX_ATTRIBS 32 #define ACO_MAX_VBS 32 -struct aco_vs_input_state { +struct aco_vs_prolog_info { + struct ac_arg inputs; + uint32_t instance_rate_inputs; uint32_t nontrivial_divisors; uint32_t zero_divisors; @@ -53,11 +55,7 @@ struct aco_vs_input_state { uint32_t alpha_adjust_hi; uint8_t formats[ACO_MAX_VERTEX_ATTRIBS]; -}; -struct aco_vs_prolog_info { - struct ac_arg inputs; - struct aco_vs_input_state state; unsigned num_attributes; uint32_t misaligned_mask; bool is_ngg; diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index e75ba5826aa..8df3f31b3bd 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -72,19 +72,17 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv aco_info->next_stage_pc = radv_args->next_stage_pc; } -#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state.x -#define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state.x, sizeof(radv->state.x)) static inline void radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_info *aco_info, const struct radv_vs_prolog_key *radv, const struct radv_shader_args *radv_args) { - ASSIGN_VS_STATE_FIELD(instance_rate_inputs); - ASSIGN_VS_STATE_FIELD(nontrivial_divisors); - ASSIGN_VS_STATE_FIELD(zero_divisors); - ASSIGN_VS_STATE_FIELD(post_shuffle); - ASSIGN_VS_STATE_FIELD(alpha_adjust_lo); - ASSIGN_VS_STATE_FIELD(alpha_adjust_hi); - ASSIGN_VS_STATE_FIELD_CP(formats); + ASSIGN_FIELD(instance_rate_inputs); + ASSIGN_FIELD(nontrivial_divisors); + ASSIGN_FIELD(zero_divisors); + ASSIGN_FIELD(post_shuffle); + ASSIGN_FIELD(alpha_adjust_lo); + ASSIGN_FIELD(alpha_adjust_hi); + ASSIGN_FIELD_CP(formats); ASSIGN_FIELD(num_attributes); ASSIGN_FIELD(misaligned_mask); ASSIGN_FIELD(is_ngg); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 19e0e55e45b..a1ced57bd15 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3774,15 +3774,15 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v struct radv_vs_prolog_key key; memset(&key, 0, sizeof(key)); - key.state.instance_rate_inputs = instance_rate_inputs; - key.state.nontrivial_divisors = *nontrivial_divisors; - key.state.zero_divisors = zero_divisors; + key.instance_rate_inputs = instance_rate_inputs; + key.nontrivial_divisors = *nontrivial_divisors; + key.zero_divisors = zero_divisors; /* If the attribute is aligned, post shuffle is implemented using DST_SEL instead. */ - key.state.post_shuffle = state->post_shuffle & attribute_mask & misaligned_mask; - key.state.alpha_adjust_hi = state->alpha_adjust_hi & attribute_mask; - key.state.alpha_adjust_lo = state->alpha_adjust_lo & attribute_mask; + key.post_shuffle = state->post_shuffle & attribute_mask & misaligned_mask; + key.alpha_adjust_hi = state->alpha_adjust_hi & attribute_mask; + key.alpha_adjust_lo = state->alpha_adjust_lo & attribute_mask; u_foreach_bit (index, misaligned_mask) - key.state.formats[index] = state->formats[index]; + key.formats[index] = state->formats[index]; key.num_attributes = num_attributes; key.misaligned_mask = misaligned_mask; /* The instance ID input VGPR is placed differently when as_ls=true. */ diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 56a73c72b7c..8aef8e62125 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -176,7 +176,7 @@ radv_device_init_vs_prologs(struct radv_device *device) key.wave32 = device->physical_device->ge_wave_size == 32; for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) { - key.state.instance_rate_inputs = 0; + key.instance_rate_inputs = 0; key.num_attributes = i; device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key); @@ -188,14 +188,14 @@ radv_device_init_vs_prologs(struct radv_device *device) for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) { for (unsigned count = 1; count <= num_attributes; count++) { for (unsigned start = 0; start <= (num_attributes - count); start++) { - key.state.instance_rate_inputs = u_bit_consecutive(start, count); + key.instance_rate_inputs = u_bit_consecutive(start, count); key.num_attributes = num_attributes; struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key); if (!prolog) return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - assert(idx == radv_instance_rate_prolog_index(num_attributes, key.state.instance_rate_inputs)); + assert(idx == radv_instance_rate_prolog_index(num_attributes, key.instance_rate_inputs)); device->instance_rate_vs_prologs[idx++] = prolog; } } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index fec40cf7065..a73a7b2f73e 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2581,7 +2581,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke if (!prolog) goto fail; - prolog->nontrivial_divisors = key->state.nontrivial_divisors; + prolog->nontrivial_divisors = key->nontrivial_divisors; if (options.dump_shader) { fprintf(stderr, "Vertex prolog"); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index b62a7147d6b..5a451c71d5c 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -488,18 +488,16 @@ struct radv_vs_prolog_key { /* All the fields are pre-masked with BITFIELD_MASK(num_attributes). * Some of the fields are pre-masked by other conditions. See lookup_vs_prolog. */ - struct { - uint32_t instance_rate_inputs; - uint32_t nontrivial_divisors; - uint32_t zero_divisors; - uint32_t post_shuffle; - /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes - * using bitwise arithmetic. - */ - uint32_t alpha_adjust_lo; - uint32_t alpha_adjust_hi; - uint8_t formats[MAX_VERTEX_ATTRIBS]; - } state; + uint32_t instance_rate_inputs; + uint32_t nontrivial_divisors; + uint32_t zero_divisors; + uint32_t post_shuffle; + /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes + * using bitwise arithmetic. + */ + uint32_t alpha_adjust_lo; + uint32_t alpha_adjust_hi; + uint8_t formats[MAX_VERTEX_ATTRIBS]; unsigned num_attributes; uint32_t misaligned_mask; bool as_ls;