Module: Mesa Branch: main Commit: b3ea6c610363c26cfc461b92c7a002b94a2761fe URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3ea6c610363c26cfc461b92c7a002b94a2761fe
Author: Timur Kristóf <[email protected]> Date: Sun Jan 23 18:35:12 2022 +0100 radv: Add task shader arguments. Mostly the same as for compute shaders, but with a few extras: task_ring_offsets: Same as what ring_offsets is to graphics shaders. Contains an address that points to a buffer that contains the ring buffer descriptors. task_ring_entry: Index that can be used to address the draw and payload rings. draw_id: Same meaning as in graphics shaders. task_ib_addr/task_ib_stride: Indirect buffer address and stride from the draw calls. These are used to emulate the firstTask feature of NV_mesh_shader. Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Rhys Perry <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14929> --- src/amd/vulkan/radv_constants.h | 4 +++- src/amd/vulkan/radv_device.c | 6 +++++- src/amd/vulkan/radv_shader.h | 6 +++++- src/amd/vulkan/radv_shader_args.c | 42 +++++++++++++++++++++++++++++++++++++-- src/amd/vulkan/radv_shader_args.h | 7 +++++++ 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/src/amd/vulkan/radv_constants.h b/src/amd/vulkan/radv_constants.h index 5787f042316..4b6d3b9667b 100644 --- a/src/amd/vulkan/radv_constants.h +++ b/src/amd/vulkan/radv_constants.h @@ -74,7 +74,9 @@ #define RING_GSVS_GS 4 #define RING_HS_TESS_FACTOR 5 #define RING_HS_TESS_OFFCHIP 6 -#define RING_PS_SAMPLE_POSITIONS 7 +#define RING_TS_DRAW 7 +#define RING_TS_PAYLOAD 8 +#define RING_PS_SAMPLE_POSITIONS 9 /* max number of descriptor sets */ #define MAX_SETS 32 diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 9822deb815d..63de77db804 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -3707,6 +3707,10 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_ desc += 8; + /* Reserved for task shader rings. */ + + desc += 8; + if (add_sample_positions) { /* add sample positions after all rings */ memcpy(desc, queue->device->sample_locations_1x, 8); @@ -4004,7 +4008,7 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave add_sample_positions) { uint32_t size = 0; if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || add_sample_positions) { - size = 112; /* 2 dword + 2 padding + 4 dword * 6 */ + size = 144; /* 2 dword + 2 padding + 4 dword * 8 */ if (add_sample_positions) size += 128; /* 64+32+16+8 = 120 bytes */ } else if (scratch_bo) { diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index f843776f102..373e3410e78 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -146,7 +146,8 @@ enum radv_ud_index { AC_UD_NGG_CULLING_SETTINGS = 7, AC_UD_NGG_VIEWPORT = 8, AC_UD_FORCE_VRS_RATES = 9, - AC_UD_SHADER_START = 10, + AC_UD_TASK_RING_ENTRY = 10, + AC_UD_SHADER_START = 11, AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, AC_UD_VS_BASE_VERTEX_START_INSTANCE, AC_UD_VS_PROLOG_INPUTS, @@ -155,6 +156,9 @@ enum radv_ud_index { AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, AC_UD_CS_SBT_DESCRIPTORS, AC_UD_CS_RAY_LAUNCH_SIZE, + AC_UD_CS_TASK_RING_OFFSETS, + AC_UD_CS_TASK_DRAW_ID, + AC_UD_CS_TASK_IB, AC_UD_CS_MAX_UD, AC_UD_GS_MAX_UD, AC_UD_TCS_MAX_UD, diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index c4c95bf4d74..084c779934b 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -50,7 +50,8 @@ set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_ static void set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) { - bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS; + bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS && + idx != AC_UD_CS_TASK_RING_OFFSETS; set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); } @@ -157,18 +158,26 @@ allocate_user_sgprs(enum chip_class chip_class, const struct radv_shader_info *i /* 2 user sgprs will always be allocated for scratch/rings */ user_sgpr_count += 2; + if (stage == MESA_SHADER_TASK) + user_sgpr_count += 2; /* task descriptors */ + /* prolog inputs */ if (info->vs.has_prolog) user_sgpr_count += 2; switch (stage) { case MESA_SHADER_COMPUTE: + case MESA_SHADER_TASK: if (info->cs.uses_sbt) user_sgpr_count += 1; if (info->cs.uses_grid_size) user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2; if (info->cs.uses_ray_launch_size) user_sgpr_count += 3; + if (info->vs.needs_draw_id) + user_sgpr_count += 1; + if (info->cs.uses_task_rings) + user_sgpr_count += 4; /* ring_entry, 2x ib_addr, ib_stride */ break; case MESA_SHADER_FRAGMENT: break; @@ -212,7 +221,8 @@ allocate_user_sgprs(enum chip_class chip_class, const struct radv_shader_info *i if (info->so.num_outputs) user_sgpr_count++; - uint32_t available_sgprs = chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; + uint32_t available_sgprs = + chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask); @@ -527,6 +537,9 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ if (args->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); } + if (stage == MESA_SHADER_TASK) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets); + } /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other * sgprs. @@ -534,6 +547,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ switch (stage) { case MESA_SHADER_COMPUTE: + case MESA_SHADER_TASK: declare_global_input_sgprs(info, &user_sgpr_info, args); if (info->cs.uses_sbt) { @@ -551,6 +565,16 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size); } + if (info->vs.needs_draw_id) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); + } + + if (info->cs.uses_task_rings) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry); + ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride); + } + for (int i = 0; i < 3; i++) { if (info->cs.uses_block_id[i]) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]); @@ -750,6 +774,9 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ uint8_t user_sgpr_idx = 0; set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx); + if (stage == MESA_SHADER_TASK) { + set_loc_shader_ptr(args, AC_UD_CS_TASK_RING_OFFSETS, &user_sgpr_idx); + } /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ @@ -765,6 +792,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ switch (stage) { case MESA_SHADER_COMPUTE: + case MESA_SHADER_TASK: if (args->ac.sbt_descriptors.used) { set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx); } @@ -775,6 +803,16 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ if (args->ac.ray_launch_size.used) { set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3); } + if (args->ac.draw_id.used) { + set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1); + } + if (args->ac.task_ring_entry.used) { + set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1); + } + if (args->task_ib_addr.used) { + assert(args->task_ib_stride.used); + set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3); + } break; case MESA_SHADER_VERTEX: if (args->ac.view_index.used) diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index ed202a09fae..b510c31d0ef 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -36,7 +36,10 @@ struct radv_shader_args { struct ac_shader_args ac; struct ac_arg descriptor_sets[MAX_SETS]; + /* User data 0/1. GFX: descriptor list, Compute: scratch BO */ struct ac_arg ring_offsets; + /* User data 2/3. same as the descriptor list above but for task shaders. */ + struct ac_arg task_ring_offsets; /* Streamout */ struct ac_arg streamout_buffers; @@ -47,6 +50,10 @@ struct radv_shader_args { struct ac_arg ngg_viewport_scale[2]; struct ac_arg ngg_viewport_translate[2]; + /* Task shaders */ + struct ac_arg task_ib_addr; + struct ac_arg task_ib_stride; + struct ac_arg prolog_inputs; struct ac_arg vs_inputs[MAX_VERTEX_ATTRIBS];
