Module: Mesa
Branch: main
Commit: 55d21f2f12dd65a5e298fdb674227043ddd3da2b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=55d21f2f12dd65a5e298fdb674227043ddd3da2b

Author: Tatsuyuki Ishi <ishitatsuy...@gmail.com>
Date:   Tue Nov  7 11:12:34 2023 +0900

radv, aco: Inline struct aco_vs_input_state.

Now that we no longer use the radv_vs_input_state pointer, we can simply
inline all the state-related fields.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26023>

---

 src/amd/compiler/aco_instruction_selection.cpp | 31 +++++++++++++-------------
 src/amd/compiler/aco_shader_info.h             |  8 +++----
 src/amd/vulkan/radv_aco_shader_info.h          | 16 ++++++-------
 src/amd/vulkan/radv_cmd_buffer.c               | 14 ++++++------
 src/amd/vulkan/radv_device.c                   |  6 ++---
 src/amd/vulkan/radv_shader.c                   |  2 +-
 src/amd/vulkan/radv_shader.h                   | 22 +++++++++---------
 7 files changed, 46 insertions(+), 53 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 336fd9260f5..79625b6cf1f 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -12739,7 +12739,7 @@ select_vs_prolog(Program* program, const struct 
aco_vs_prolog_info* pinfo, ac_sh
    bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
 
    uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
-   bool has_nontrivial_divisors = pinfo->state.nontrivial_divisors;
+   bool has_nontrivial_divisors = pinfo->nontrivial_divisors;
 
    wait_imm lgkm_imm;
    lgkm_imm.lgkm = 0;
@@ -12800,10 +12800,10 @@ select_vs_prolog(Program* program, const struct 
aco_vs_prolog_info* pinfo, ac_sh
          }
 
          bool needs_instance_index =
-            pinfo->state.instance_rate_inputs &
-            ~(pinfo->state.zero_divisors | pinfo->state.nontrivial_divisors); 
/* divisor is 1 */
-         bool needs_start_instance = pinfo->state.instance_rate_inputs & 
pinfo->state.zero_divisors;
-         bool needs_vertex_index = ~pinfo->state.instance_rate_inputs & 
attrib_mask;
+            pinfo->instance_rate_inputs &
+            ~(pinfo->zero_divisors | pinfo->nontrivial_divisors); /* divisor 
is 1 */
+         bool needs_start_instance = pinfo->instance_rate_inputs & 
pinfo->zero_divisors;
+         bool needs_vertex_index = ~pinfo->instance_rate_inputs & attrib_mask;
          if (needs_vertex_index)
             bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, 
args->base_vertex),
                        get_arg_fixed(args, args->vertex_id), false, 
Operand(s2), true);
@@ -12821,12 +12821,11 @@ select_vs_prolog(Program* program, const struct 
aco_vs_prolog_info* pinfo, ac_sh
 
          /* calculate index */
          Operand fetch_index = Operand(vertex_index, v1);
-         if (pinfo->state.instance_rate_inputs & (1u << loc)) {
-            if (!(pinfo->state.zero_divisors & (1u << loc))) {
+         if (pinfo->instance_rate_inputs & (1u << loc)) {
+            if (!(pinfo->zero_divisors & (1u << loc))) {
                fetch_index = instance_id;
-               if (pinfo->state.nontrivial_divisors & (1u << loc)) {
-                  unsigned index =
-                     util_bitcount(pinfo->state.nontrivial_divisors & 
BITFIELD_MASK(loc));
+               if (pinfo->nontrivial_divisors & (1u << loc)) {
+                  unsigned index = util_bitcount(pinfo->nontrivial_divisors & 
BITFIELD_MASK(loc));
                   fetch_index = calc_nontrivial_instance_id(
                      bld, args, pinfo, index, instance_id, start_instance, 
prolog_input,
                      nontrivial_tmp_vgpr0, nontrivial_tmp_vgpr1);
@@ -12841,14 +12840,14 @@ select_vs_prolog(Program* program, const struct 
aco_vs_prolog_info* pinfo, ac_sh
          /* perform load */
          PhysReg cur_desc = desc.advance(i * 16);
          if ((pinfo->misaligned_mask & (1u << loc))) {
-            const struct ac_vtx_format_info* vtx_info = 
&vtx_info_table[pinfo->state.formats[loc]];
+            const struct ac_vtx_format_info* vtx_info = 
&vtx_info_table[pinfo->formats[loc]];
 
             assert(vtx_info->has_hw_format & 0x1);
             unsigned dfmt = vtx_info->hw_format[0] & 0xf;
             unsigned nfmt = vtx_info->hw_format[0] >> 4;
 
             for (unsigned j = 0; j < vtx_info->num_channels; j++) {
-               bool post_shuffle = pinfo->state.post_shuffle & (1u << loc);
+               bool post_shuffle = pinfo->post_shuffle & (1u << loc);
                unsigned offset = vtx_info->chan_byte_size * (post_shuffle && j 
< 3 ? 2 - j : j);
 
                /* Use MUBUF to workaround hangs for byte-aligned dword loads. 
The Vulkan spec
@@ -12894,7 +12893,7 @@ select_vs_prolog(Program* program, const struct 
aco_vs_prolog_info* pinfo, ac_sh
       }
    }
 
-   if (pinfo->state.alpha_adjust_lo | pinfo->state.alpha_adjust_hi) {
+   if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi) {
       wait_imm vm_imm;
       vm_imm.vm = 0;
       bld.sopp(aco_opcode::s_waitcnt, -1, vm_imm.pack(program->gfx_level));
@@ -12902,11 +12901,11 @@ select_vs_prolog(Program* program, const struct 
aco_vs_prolog_info* pinfo, ac_sh
 
    /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
     * so we may need to fix it up. */
-   u_foreach_bit (loc, (pinfo->state.alpha_adjust_lo | 
pinfo->state.alpha_adjust_hi)) {
+   u_foreach_bit (loc, (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi)) {
       PhysReg alpha(attributes_start.reg() + loc * 4u + 3);
 
-      unsigned alpha_adjust = (pinfo->state.alpha_adjust_lo >> loc) & 0x1;
-      alpha_adjust |= ((pinfo->state.alpha_adjust_hi >> loc) & 0x1) << 1;
+      unsigned alpha_adjust = (pinfo->alpha_adjust_lo >> loc) & 0x1;
+      alpha_adjust |= ((pinfo->alpha_adjust_hi >> loc) & 0x1) << 1;
 
       if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
          bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), 
Operand(alpha, v1));
diff --git a/src/amd/compiler/aco_shader_info.h 
b/src/amd/compiler/aco_shader_info.h
index 5fdb96fa833..493d6bf3336 100644
--- a/src/amd/compiler/aco_shader_info.h
+++ b/src/amd/compiler/aco_shader_info.h
@@ -41,7 +41,9 @@ extern "C" {
 #define ACO_MAX_VERTEX_ATTRIBS 32
 #define ACO_MAX_VBS            32
 
-struct aco_vs_input_state {
+struct aco_vs_prolog_info {
+   struct ac_arg inputs;
+
    uint32_t instance_rate_inputs;
    uint32_t nontrivial_divisors;
    uint32_t zero_divisors;
@@ -53,11 +55,7 @@ struct aco_vs_input_state {
    uint32_t alpha_adjust_hi;
 
    uint8_t formats[ACO_MAX_VERTEX_ATTRIBS];
-};
 
-struct aco_vs_prolog_info {
-   struct ac_arg inputs;
-   struct aco_vs_input_state state;
    unsigned num_attributes;
    uint32_t misaligned_mask;
    bool is_ngg;
diff --git a/src/amd/vulkan/radv_aco_shader_info.h 
b/src/amd/vulkan/radv_aco_shader_info.h
index e75ba5826aa..8df3f31b3bd 100644
--- a/src/amd/vulkan/radv_aco_shader_info.h
+++ b/src/amd/vulkan/radv_aco_shader_info.h
@@ -72,19 +72,17 @@ radv_aco_convert_shader_info(struct aco_shader_info 
*aco_info, const struct radv
    aco_info->next_stage_pc = radv_args->next_stage_pc;
 }
 
-#define ASSIGN_VS_STATE_FIELD(x)    aco_info->state.x = radv->state.x
-#define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state.x, 
sizeof(radv->state.x))
 static inline void
 radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_info *aco_info, const 
struct radv_vs_prolog_key *radv,
                                const struct radv_shader_args *radv_args)
 {
-   ASSIGN_VS_STATE_FIELD(instance_rate_inputs);
-   ASSIGN_VS_STATE_FIELD(nontrivial_divisors);
-   ASSIGN_VS_STATE_FIELD(zero_divisors);
-   ASSIGN_VS_STATE_FIELD(post_shuffle);
-   ASSIGN_VS_STATE_FIELD(alpha_adjust_lo);
-   ASSIGN_VS_STATE_FIELD(alpha_adjust_hi);
-   ASSIGN_VS_STATE_FIELD_CP(formats);
+   ASSIGN_FIELD(instance_rate_inputs);
+   ASSIGN_FIELD(nontrivial_divisors);
+   ASSIGN_FIELD(zero_divisors);
+   ASSIGN_FIELD(post_shuffle);
+   ASSIGN_FIELD(alpha_adjust_lo);
+   ASSIGN_FIELD(alpha_adjust_hi);
+   ASSIGN_FIELD_CP(formats);
    ASSIGN_FIELD(num_attributes);
    ASSIGN_FIELD(misaligned_mask);
    ASSIGN_FIELD(is_ngg);
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 19e0e55e45b..a1ced57bd15 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3774,15 +3774,15 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, 
const struct radv_shader *v
 
    struct radv_vs_prolog_key key;
    memset(&key, 0, sizeof(key));
-   key.state.instance_rate_inputs = instance_rate_inputs;
-   key.state.nontrivial_divisors = *nontrivial_divisors;
-   key.state.zero_divisors = zero_divisors;
+   key.instance_rate_inputs = instance_rate_inputs;
+   key.nontrivial_divisors = *nontrivial_divisors;
+   key.zero_divisors = zero_divisors;
    /* If the attribute is aligned, post shuffle is implemented using DST_SEL 
instead. */
-   key.state.post_shuffle = state->post_shuffle & attribute_mask & 
misaligned_mask;
-   key.state.alpha_adjust_hi = state->alpha_adjust_hi & attribute_mask;
-   key.state.alpha_adjust_lo = state->alpha_adjust_lo & attribute_mask;
+   key.post_shuffle = state->post_shuffle & attribute_mask & misaligned_mask;
+   key.alpha_adjust_hi = state->alpha_adjust_hi & attribute_mask;
+   key.alpha_adjust_lo = state->alpha_adjust_lo & attribute_mask;
    u_foreach_bit (index, misaligned_mask)
-      key.state.formats[index] = state->formats[index];
+      key.formats[index] = state->formats[index];
    key.num_attributes = num_attributes;
    key.misaligned_mask = misaligned_mask;
    /* The instance ID input VGPR is placed differently when as_ls=true. */
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 56a73c72b7c..8aef8e62125 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -176,7 +176,7 @@ radv_device_init_vs_prologs(struct radv_device *device)
    key.wave32 = device->physical_device->ge_wave_size == 32;
 
    for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
-      key.state.instance_rate_inputs = 0;
+      key.instance_rate_inputs = 0;
       key.num_attributes = i;
 
       device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
@@ -188,14 +188,14 @@ radv_device_init_vs_prologs(struct radv_device *device)
    for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) {
       for (unsigned count = 1; count <= num_attributes; count++) {
          for (unsigned start = 0; start <= (num_attributes - count); start++) {
-            key.state.instance_rate_inputs = u_bit_consecutive(start, count);
+            key.instance_rate_inputs = u_bit_consecutive(start, count);
             key.num_attributes = num_attributes;
 
             struct radv_shader_part *prolog = radv_create_vs_prolog(device, 
&key);
             if (!prolog)
                return vk_error(device->physical_device->instance, 
VK_ERROR_OUT_OF_DEVICE_MEMORY);
 
-            assert(idx == radv_instance_rate_prolog_index(num_attributes, 
key.state.instance_rate_inputs));
+            assert(idx == radv_instance_rate_prolog_index(num_attributes, 
key.instance_rate_inputs));
             device->instance_rate_vs_prologs[idx++] = prolog;
          }
       }
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index fec40cf7065..a73a7b2f73e 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -2581,7 +2581,7 @@ radv_create_vs_prolog(struct radv_device *device, const 
struct radv_vs_prolog_ke
    if (!prolog)
       goto fail;
 
-   prolog->nontrivial_divisors = key->state.nontrivial_divisors;
+   prolog->nontrivial_divisors = key->nontrivial_divisors;
 
    if (options.dump_shader) {
       fprintf(stderr, "Vertex prolog");
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index b62a7147d6b..5a451c71d5c 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -488,18 +488,16 @@ struct radv_vs_prolog_key {
    /* All the fields are pre-masked with BITFIELD_MASK(num_attributes).
     * Some of the fields are pre-masked by other conditions. See 
lookup_vs_prolog.
     */
-   struct {
-      uint32_t instance_rate_inputs;
-      uint32_t nontrivial_divisors;
-      uint32_t zero_divisors;
-      uint32_t post_shuffle;
-      /* Having two separate fields instead of a single uint64_t makes it 
easier to remove attributes
-       * using bitwise arithmetic.
-       */
-      uint32_t alpha_adjust_lo;
-      uint32_t alpha_adjust_hi;
-      uint8_t formats[MAX_VERTEX_ATTRIBS];
-   } state;
+   uint32_t instance_rate_inputs;
+   uint32_t nontrivial_divisors;
+   uint32_t zero_divisors;
+   uint32_t post_shuffle;
+   /* Having two separate fields instead of a single uint64_t makes it easier 
to remove attributes
+    * using bitwise arithmetic.
+    */
+   uint32_t alpha_adjust_lo;
+   uint32_t alpha_adjust_hi;
+   uint8_t formats[MAX_VERTEX_ATTRIBS];
    unsigned num_attributes;
    uint32_t misaligned_mask;
    bool as_ls;

Reply via email to