Module: Mesa Branch: main Commit: 3d589b8b464828dc19c4e7d73f5cc078c24915e1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d589b8b464828dc19c4e7d73f5cc078c24915e1
Author: Timur Kristóf <timur.kris...@gmail.com> Date: Fri Apr 9 16:56:57 2021 +0200 ac: Add new NIR pass to lower NGG VS/TES. Signed-off-by: Timur Kristóf <timur.kris...@gmail.com> Reviewed-by: Daniel Schürmann <dan...@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10740> --- src/amd/common/ac_nir.h | 17 +++ src/amd/common/ac_nir_lower_ngg.c | 272 ++++++++++++++++++++++++++++++++++++++ src/amd/common/meson.build | 1 + 3 files changed, 290 insertions(+) diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index b3d4c6b56f7..59e0d307f8a 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -86,6 +86,23 @@ bool ac_nir_lower_indirect_derefs(nir_shader *shader, enum chip_class chip_class); +typedef struct +{ + bool can_cull; + bool passthrough; +} ac_nir_ngg_config; + +ac_nir_ngg_config +ac_nir_lower_ngg_nogs(nir_shader *shader, + unsigned max_num_es_vertices, + unsigned num_vertices_per_primitive, + unsigned max_workgroup_size, + unsigned wave_size, + bool consider_culling, + bool consider_passthrough, + bool export_prim_id, + bool provoking_vtx_last); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c new file mode 100644 index 00000000000..14c60408e4d --- /dev/null +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -0,0 +1,272 @@ +/* + * Copyright © 2021 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "ac_nir.h" +#include "nir_builder.h" +#include "u_math.h" + +typedef struct +{ + nir_variable *position_value_var; + nir_variable *prim_exp_arg_var; + + bool passthrough; + bool export_prim_id; + bool early_prim_export; + unsigned max_num_waves; + unsigned num_vertices_per_primitives; + unsigned provoking_vtx_idx; + unsigned max_es_num_vertices; + unsigned total_lds_bytes; +} lower_ngg_nogs_state; + +static nir_ssa_def * +pervertex_lds_addr(nir_builder *b, nir_ssa_def *vertex_idx, unsigned per_vtx_bytes) +{ + return nir_imul_imm(b, vertex_idx, per_vtx_bytes); +} + +static nir_ssa_def * +emit_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives, + nir_ssa_def *vertex_indices[3], nir_ssa_def *is_null_prim) +{ + nir_ssa_def *arg = vertex_indices[0]; + + for (unsigned i = 0; i < num_vertices_per_primitives; ++i) { + assert(vertex_indices[i]); + + if (i) + arg = nir_ior(b, arg, nir_ishl(b, vertex_indices[i], nir_imm_int(b, 10u * i))); + + if (b->shader->info.stage == MESA_SHADER_VERTEX) { + nir_ssa_def *edgeflag = nir_build_load_initial_edgeflag_amd(b, 32, nir_imm_int(b, i)); + arg = nir_ior(b, arg, nir_ishl(b, edgeflag, nir_imm_int(b, 10u * i + 9u))); + } + } + + if (is_null_prim) { + if (is_null_prim->bit_size == 1) + is_null_prim = nir_b2i32(b, is_null_prim); + assert(is_null_prim->bit_size == 32); + arg = nir_ior(b, arg, nir_ishl(b, is_null_prim, nir_imm_int(b, 31u))); + } + + return arg; +} + +static nir_ssa_def * +ngg_input_primitive_vertex_index(nir_builder *b, unsigned vertex) +{ + /* TODO: This is RADV specific. We'll need to refactor RADV and/or RadeonSI to match. */ + return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u * 2u), + nir_imm_int(b, (vertex % 2u) * 16u), nir_imm_int(b, 16u)); +} + +static nir_ssa_def * +emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *st) +{ + if (st->passthrough) { + assert(!st->export_prim_id || b->shader->info.stage != MESA_SHADER_VERTEX); + return nir_build_load_packed_passthrough_primitive_amd(b); + } else { + nir_ssa_def *vtx_idx[3] = {0}; + + vtx_idx[0] = ngg_input_primitive_vertex_index(b, 0); + vtx_idx[1] = st->num_vertices_per_primitives >= 2 + ? ngg_input_primitive_vertex_index(b, 1) + : nir_imm_zero(b, 1, 32); + vtx_idx[2] = st->num_vertices_per_primitives >= 3 + ? ngg_input_primitive_vertex_index(b, 2) + : nir_imm_zero(b, 1, 32); + + return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL); + } +} + +static void +emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def *arg) +{ + nir_if *if_gs_thread = nir_push_if(b, nir_build_has_input_primitive_amd(b)); + { + if (!arg) + arg = emit_ngg_nogs_prim_exp_arg(b, st); + + if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) { + /* Copy Primitive IDs from GS threads to the LDS address corresponding to the ES thread of the provoking vertex. */ + nir_ssa_def *prim_id = nir_build_load_primitive_id(b); + nir_ssa_def *provoking_vtx_idx = ngg_input_primitive_vertex_index(b, st->provoking_vtx_idx); + nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u); + + nir_build_store_shared(b, prim_id, addr, .write_mask = 1u, .align_mul = 4u); + } + + nir_build_export_primitive_amd(b, arg); + } + nir_pop_if(b, if_gs_thread); +} + +static void +emit_store_ngg_nogs_es_primitive_id(nir_builder *b) +{ + nir_ssa_def *prim_id = NULL; + + if (b->shader->info.stage == MESA_SHADER_VERTEX) { + /* Workgroup barrier - wait for GS threads to store primitive ID in LDS. */ + nir_scoped_barrier(b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_WORKGROUP, + .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared); + + /* LDS address where the primitive ID is stored */ + nir_ssa_def *thread_id_in_threadgroup = nir_build_load_local_invocation_index(b); + nir_ssa_def *addr = pervertex_lds_addr(b, thread_id_in_threadgroup, 4u); + + /* Load primitive ID from LDS */ + prim_id = nir_build_load_shared(b, 1, 32, addr, .align_mul = 4u); + } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) { + /* Just use tess eval primitive ID, which is the same as the patch ID. */ + prim_id = nir_build_load_primitive_id(b); + } + + nir_io_semantics io_sem = { + .location = VARYING_SLOT_PRIMITIVE_ID, + .num_slots = 1, + }; + + nir_build_store_output(b, prim_id, nir_imm_zero(b, 1, 32), + .base = io_sem.location, + .write_mask = 1u, .src_type = nir_type_uint32, .io_semantics = io_sem); +} + +ac_nir_ngg_config +ac_nir_lower_ngg_nogs(nir_shader *shader, + unsigned max_num_es_vertices, + unsigned num_vertices_per_primitives, + unsigned max_workgroup_size, + unsigned wave_size, + bool consider_culling, + bool consider_passthrough, + bool export_prim_id, + bool provoking_vtx_last) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + assert(impl); + assert(max_num_es_vertices && max_workgroup_size && wave_size); + + bool can_cull = false; /* TODO */ + bool passthrough = consider_passthrough && !can_cull && + !(shader->info.stage == MESA_SHADER_VERTEX && export_prim_id); + + nir_variable *position_value_var = nir_local_variable_create(impl, glsl_vec4_type(), "position_value"); + nir_variable *prim_exp_arg_var = nir_local_variable_create(impl, glsl_uint_type(), "prim_exp_arg"); + + lower_ngg_nogs_state state = { + .passthrough = passthrough, + .export_prim_id = export_prim_id, + .early_prim_export = exec_list_is_singular(&impl->body), + .num_vertices_per_primitives = num_vertices_per_primitives, + .provoking_vtx_idx = provoking_vtx_last ? (num_vertices_per_primitives - 1) : 0, + .position_value_var = position_value_var, + .prim_exp_arg_var = prim_exp_arg_var, + .max_num_waves = DIV_ROUND_UP(max_workgroup_size, wave_size), + .max_es_num_vertices = max_num_es_vertices, + }; + + /* We need LDS space when VS needs to export the primitive ID. */ + if (shader->info.stage == MESA_SHADER_VERTEX && export_prim_id) + state.total_lds_bytes = max_num_es_vertices * 4u; + + nir_cf_list extracted; + nir_cf_extract(&extracted, nir_before_cf_list(&impl->body), nir_after_cf_list(&impl->body)); + + nir_builder builder; + nir_builder *b = &builder; /* This is to avoid the & */ + nir_builder_init(b, impl); + b->cursor = nir_before_cf_list(&impl->body); + + if (!can_cull) { + /* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */ + nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0))); + { + nir_ssa_def *vtx_cnt = nir_build_load_workgroup_num_input_vertices_amd(b); + nir_ssa_def *prim_cnt = nir_build_load_workgroup_num_input_primitives_amd(b); + nir_build_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt); + } + nir_pop_if(b, if_wave_0); + + /* Take care of early primitive export, otherwise just pack the primitive export argument */ + if (state.early_prim_export) + emit_ngg_nogs_prim_export(b, &state, NULL); + else + nir_store_var(b, prim_exp_arg_var, emit_ngg_nogs_prim_exp_arg(b, &state), 0x1u); + } else { + abort(); /* TODO */ + } + + nir_if *if_es_thread = nir_push_if(b, nir_build_has_input_vertex_amd(b)); + { + if (can_cull) { + nir_ssa_def *pos_val = nir_load_var(b, state.position_value_var); + nir_io_semantics io_sem = { .location = VARYING_SLOT_POS, .num_slots = 1 }; + nir_build_store_output(b, pos_val, nir_imm_int(b, 0), .base = VARYING_SLOT_POS, .component = 0, .io_semantics = io_sem, .write_mask = 0xfu); + } + + /* Run the actual shader */ + nir_cf_reinsert(&extracted, b->cursor); + b->cursor = nir_after_cf_list(&if_es_thread->then_list); + + /* Export all vertex attributes (except primitive ID) */ + nir_build_export_vertex_amd(b); + + /* Export primitive ID (in case of early primitive export or TES) */ + if (state.export_prim_id && (state.early_prim_export || shader->info.stage != MESA_SHADER_VERTEX)) + emit_store_ngg_nogs_es_primitive_id(b); + } + nir_pop_if(b, if_es_thread); + + /* Take care of late primitive export */ + if (!state.early_prim_export) { + emit_ngg_nogs_prim_export(b, &state, nir_load_var(b, prim_exp_arg_var)); + if (state.export_prim_id && shader->info.stage == MESA_SHADER_VERTEX) { + if_es_thread = nir_push_if(b, nir_build_has_input_vertex_amd(b)); + emit_store_ngg_nogs_es_primitive_id(b); + nir_pop_if(b, if_es_thread); + } + } + + nir_metadata_preserve(impl, nir_metadata_none); + nir_validate_shader(shader, "after emitting NGG VS/TES"); + + /* Cleanup */ + nir_lower_vars_to_ssa(shader); + nir_remove_dead_variables(shader, nir_var_function_temp, NULL); + nir_opt_undef(shader); + + shader->info.shared_size = state.total_lds_bytes; + + ac_nir_ngg_config ret = { + .can_cull = can_cull, + .passthrough = passthrough, + }; + + return ret; +} diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index fc8d0fe7e49..e4a2b3368a3 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -91,6 +91,7 @@ amd_common_files = files( 'ac_nir.h', 'ac_nir_lower_esgs_io_to_mem.c', 'ac_nir_lower_tess_io_to_mem.c', + 'ac_nir_lower_ngg.c', 'amd_family.c', ) _______________________________________________ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit