Module: Mesa Branch: master Commit: 38da379b3eb99c65fa3b9e5eeb1361fde700dbd8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=38da379b3eb99c65fa3b9e5eeb1361fde700dbd8
Author: Timur Kristóf <timur.kris...@gmail.com> Date: Mon Jan 11 18:36:20 2021 +0100 aco: Note if rasterization can start early. When there are no param exports in an NGG (or legacy VS) shader, the NO_PC_EXPORT=1 is set by RADV, which means PS waves can launch before the current stage finishes. Signed-off-by: Timur Kristóf <timur.kris...@gmail.com> Reviewed-by: Rhys Perry <pendingchao...@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7868> --- src/amd/compiler/README-ISA.md | 19 +++++++++++++++++++ src/amd/compiler/aco_instruction_selection_setup.cpp | 9 +++++++++ src/amd/compiler/aco_ir.h | 1 + 3 files changed, 29 insertions(+) diff --git a/src/amd/compiler/README-ISA.md b/src/amd/compiler/README-ISA.md index 940d2728c07..678759ff25c 100644 --- a/src/amd/compiler/README-ISA.md +++ b/src/amd/compiler/README-ISA.md @@ -130,6 +130,25 @@ on what sort of addressing should be used, but it says that it "is equivalent to an `S_CBRANCH` with extra math", so the subvector loop handling in ACO is done according to the `s_cbranch` doc. +## RDNA early rasterization + +The ISA documentation says about `s_endpgm`: + +> The hardware implicitly executes S_WAITCNT 0 and S_WAITCNT_VSCNT 0 +> before executing this instruction. + +What the doc doesn't say is that in case of NGG (and legacy VS) when there +are no param exports, the driver sets `NO_PC_EXPORT=1` for optimal performance, +and when this is set, the hardware will start clipping and rasterization +as soon as it encounters a position export with `DONE=1`, without waiting +for the NGG (or VS) to finish. + +It can even launch PS waves before NGG (or VS) ends. + +When this happens, any store performed by a VS is not guaranteed +to be complete when PS tries to load it, so we need to manually +make sure to insert wait instructions before the position exports. + # Hardware Bugs ## SMEM corrupts VCCZ on SI/CI diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 90a0bd9e3b0..e0c2b93d2b8 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -366,6 +366,15 @@ setup_vs_output_info(isel_context *ctx, nir_shader *nir, pos_written |= 1 << 3; outinfo->pos_exports = util_bitcount(pos_written); + + /* GFX10+ early rasterization: + * When there are no param exports in an NGG (or legacy VS) shader, + * RADV sets NO_PC_EXPORT=1, which means the HW will start clipping and rasterization + * as soon as it encounters a DONE pos export. When this happens, PS waves can launch + * before the NGG (or VS) waves finish. + */ + ctx->program->early_rast = ctx->program->chip_class >= GFX10 && + outinfo->param_exports == 0; } void diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index a97927b2036..f8aab8cef67 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1738,6 +1738,7 @@ public: bool xnack_enabled = false; bool sram_ecc_enabled = false; bool has_fast_fma32 = false; + bool early_rast = false; /* whether rasterization can start as soon as the 1st DONE pos export */ bool needs_vcc = false; bool needs_flat_scr = false; _______________________________________________ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit