Module: Mesa
Branch: master
Commit: b37c91c12eb8fcdf763dbd98b17c33f98c63cae3
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b37c91c12eb8fcdf763dbd98b17c33f98c63cae3

Author: Samuel Pitoiset <[email protected]>
Date:   Fri Nov 29 15:12:30 2019 +0100

radv: handle unaligned vertex fetches on GFX6/GFX10

The Vulkan spec doesn't have any words for vertex attributes alignment.

Fixes a test failure on GFX6 and a GPU hang on GFX10 with:
dEQP-VK.spirv_assembly.instruction.spirv1p4.entrypoint.tess_con_pc_entry_point

vkpipeline-db results on GFX10:
Totals from affected shaders:
SGPRS: 463772 -> 472972 (1.98 %)
VGPRS: 343208 -> 343752 (0.16 %)
Spilled SGPRs: 323 -> 336 (4.02 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 13806200 -> 14164472 (2.60 %) bytes
Max Waves: 84021 -> 83755 (-0.32 %)

Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2161
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>

---

 src/amd/vulkan/radv_nir_to_llvm.c | 133 ++++++++++++++++++++++++--------------
 1 file changed, 86 insertions(+), 47 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 9f44f1bd70d..9637271b1b6 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -1280,34 +1280,28 @@ adjust_vertex_fetch_alpha(struct radv_shader_context 
*ctx,
        return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
 }
 
-static unsigned
-get_num_channels_from_data_format(unsigned data_format)
-{
-       switch (data_format) {
-       case V_008F0C_BUF_DATA_FORMAT_8:
-       case V_008F0C_BUF_DATA_FORMAT_16:
-       case V_008F0C_BUF_DATA_FORMAT_32:
-               return 1;
-       case V_008F0C_BUF_DATA_FORMAT_8_8:
-       case V_008F0C_BUF_DATA_FORMAT_16_16:
-       case V_008F0C_BUF_DATA_FORMAT_32_32:
-               return 2;
-       case V_008F0C_BUF_DATA_FORMAT_10_11_11:
-       case V_008F0C_BUF_DATA_FORMAT_11_11_10:
-       case V_008F0C_BUF_DATA_FORMAT_32_32_32:
-               return 3;
-       case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
-       case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
-       case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
-       case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
-       case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
-               return 4;
-       default:
-               break;
-       }
-
-       return 4;
-}
+static const struct vertex_format_info {
+       uint8_t vertex_byte_size;
+       uint8_t num_channels;
+       uint8_t chan_byte_size;
+       uint8_t chan_format;
+} vertex_format_table[] = {
+       {  0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID    },      /* 
BUF_DATA_FORMAT_INVALID      */
+       {  1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8          },      /* 
BUF_DATA_FORMAT_8            */
+       {  2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16         },      /* 
BUF_DATA_FORMAT_16           */
+       {  2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8          },      /* 
BUF_DATA_FORMAT_8_8          */
+       {  4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32         },      /* 
BUF_DATA_FORMAT_32           */
+       {  4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16         },      /* 
BUF_DATA_FORMAT_16_16        */
+       {  4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11   },      /* 
BUF_DATA_FORMAT_10_11_11     */
+       {  4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10   },      /* 
BUF_DATA_FORMAT_11_11_10     */
+       {  4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2 },      /* 
BUF_DATA_FORMAT_10_10_10_2   */
+       {  4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10 },      /* 
BUF_DATA_FORMAT_2_10_10_10   */
+       {  4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8          },      /* 
BUF_DATA_FORMAT_8_8_8_8      */
+       {  8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32         },      /* 
BUF_DATA_FORMAT_32_32        */
+       {  8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16         },      /* 
BUF_DATA_FORMAT_16_16_16_16  */
+       { 12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32         },      /* 
BUF_DATA_FORMAT_32_32_32     */
+       { 16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32         },      /* 
BUF_DATA_FORMAT_32_32_32_32  */
+};
 
 static LLVMValueRef
 radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
@@ -1393,11 +1387,13 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
                                                               
ctx->args->ac.base_vertex), "");
                }
 
+               assert(data_format < ARRAY_SIZE(vertex_format_table));
+               const struct vertex_format_info *vtx_info = 
&vertex_format_table[data_format];
+
                /* Adjust the number of channels to load based on the vertex
                 * attribute format.
                 */
-               unsigned num_format_channels = 
get_num_channels_from_data_format(data_format);
-               unsigned num_channels = MIN2(num_input_channels, 
num_format_channels);
+               unsigned num_channels = MIN2(num_input_channels, 
vtx_info->num_channels);
                unsigned attrib_binding = 
ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
                unsigned attrib_offset = 
ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
                unsigned attrib_stride = 
ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
@@ -1409,27 +1405,70 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
                        num_channels = MAX2(num_channels, 3);
                }
 
-               if (attrib_stride != 0 && attrib_offset > attrib_stride) {
-                       LLVMValueRef buffer_offset =
-                               LLVMConstInt(ctx->ac.i32,
-                                            attrib_offset / attrib_stride, 
false);
+               t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
+               t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
 
-                       buffer_index = LLVMBuildAdd(ctx->ac.builder,
-                                                   buffer_index,
-                                                   buffer_offset, "");
+               /* Perform per-channel vertex fetch operations if unaligned
+                * access are detected. Only GFX6 and GFX10 are affected.
+                */
+               bool unaligned_vertex_fetches = false;
+               if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class == GFX10) 
&&
+                   vtx_info->chan_format != data_format &&
+                   ((attrib_offset % vtx_info->vertex_byte_size) ||
+                    (attrib_stride % vtx_info->vertex_byte_size)))
+                       unaligned_vertex_fetches = true;
+
+               if (unaligned_vertex_fetches) {
+                       unsigned chan_format = vtx_info->chan_format;
+                       LLVMValueRef values[4];
 
-                       attrib_offset = attrib_offset % attrib_stride;
-               }
+                       assert(ctx->ac.chip_class == GFX6 ||
+                              ctx->ac.chip_class == GFX10);
 
-               t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
-               t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
+                       for (unsigned chan  = 0; chan < num_channels; chan++) {
+                               unsigned chan_offset = attrib_offset + chan * 
vtx_info->chan_byte_size;
+                               LLVMValueRef chan_index = buffer_index;
+
+                               if (attrib_stride != 0 && chan_offset > 
attrib_stride) {
+                                       LLVMValueRef buffer_offset =
+                                               LLVMConstInt(ctx->ac.i32,
+                                                            chan_offset / 
attrib_stride, false);
 
-               input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
-                                                    buffer_index,
-                                                    LLVMConstInt(ctx->ac.i32, 
attrib_offset, false),
-                                                    ctx->ac.i32_0, 
ctx->ac.i32_0,
-                                                    num_channels,
-                                                    data_format, num_format, 
0, true);
+                                       chan_index = 
LLVMBuildAdd(ctx->ac.builder,
+                                                                 buffer_index,
+                                                                 
buffer_offset, "");
+
+                                       chan_offset = chan_offset % 
attrib_stride;
+                               }
+
+                               values[chan] = 
ac_build_struct_tbuffer_load(&ctx->ac, t_list,
+                                                                          
chan_index,
+                                                                          
LLVMConstInt(ctx->ac.i32, chan_offset, false),
+                                                                          
ctx->ac.i32_0, ctx->ac.i32_0, 1,
+                                                                          
chan_format, num_format, 0, true);
+                       }
+
+                       input = ac_build_gather_values(&ctx->ac, values, 
num_channels);
+               } else {
+                       if (attrib_stride != 0 && attrib_offset > 
attrib_stride) {
+                               LLVMValueRef buffer_offset =
+                                       LLVMConstInt(ctx->ac.i32,
+                                                    attrib_offset / 
attrib_stride, false);
+
+                               buffer_index = LLVMBuildAdd(ctx->ac.builder,
+                                                           buffer_index,
+                                                           buffer_offset, "");
+
+                               attrib_offset = attrib_offset % attrib_stride;
+                       }
+
+                       input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
+                                                            buffer_index,
+                                                            
LLVMConstInt(ctx->ac.i32, attrib_offset, false),
+                                                            ctx->ac.i32_0, 
ctx->ac.i32_0,
+                                                            num_channels,
+                                                            data_format, 
num_format, 0, true);
+               }
 
                if (ctx->args->options->key.vs.post_shuffle & (1 << 
attrib_index)) {
                        LLVMValueRef c[4];

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to