Module: Mesa Branch: master Commit: 98ea523e007efa71adecfcce92a168efcf9b54dd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=98ea523e007efa71adecfcce92a168efcf9b54dd
Author: Marek Olšák <[email protected]> Date: Tue Feb 9 18:56:04 2021 -0500 radeonsi: for tess, determine the minimum num_patches before optimizing tg size Doing these MINs at the end could have undone optimizations for the LDS size and threadgroup size, so move the MINs up. Reviewed-by: Zoltán Böszörményi <[email protected]> Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9028> --- src/gallium/drivers/radeonsi/si_state_draw.cpp | 32 +++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index a7aa9612e6c..62f4f3e1422 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -271,6 +271,22 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned max_verts_per_patch = MAX2(num_tcs_input_cp, num_tcs_output_cp); *num_patches = 256 / max_verts_per_patch; + /* Not necessary for correctness, but higher numbers are slower. + * The hardware can do more, but the radeonsi shader constant is + * limited to 6 bits. + */ + *num_patches = MIN2(*num_patches, 64); /* e.g. 64 triangles in exactly 3 waves */ + + /* When distributed tessellation is unsupported, switch between SEs + * at a higher frequency to manually balance the workload between SEs. + */ + if (!sctx->screen->info.has_distributed_tess && sctx->screen->info.max_se > 1) + *num_patches = MIN2(*num_patches, 16); /* recommended */ + + /* Make sure the output data fits in the offchip buffer */ + *num_patches = + MIN2(*num_patches, (sctx->screen->tess_offchip_block_dw_size * 4) / output_patch_size); + /* Make sure that the data fits in LDS. This assumes the shaders only * use LDS for the inputs and outputs. * @@ -286,22 +302,6 @@ static void si_emit_derived_tess_state(struct si_context *sctx, *num_patches = MAX2(*num_patches, 1); assert(*num_patches * lds_per_patch <= max_lds_size); - /* Make sure the output data fits in the offchip buffer */ - *num_patches = - MIN2(*num_patches, (sctx->screen->tess_offchip_block_dw_size * 4) / output_patch_size); - - /* Not necessary for correctness, but improves performance. - * The hardware can do more, but the radeonsi shader constant is - * limited to 6 bits. - */ - *num_patches = MIN2(*num_patches, 64); /* triangles: 3 full waves */ - - /* When distributed tessellation is unsupported, switch between SEs - * at a higher frequency to compensate for it. - */ - if (!sctx->screen->info.has_distributed_tess && sctx->screen->info.max_se > 1) - *num_patches = MIN2(*num_patches, 16); /* recommended */ - /* Make sure that vector lanes are reasonably occupied. It probably * doesn't matter much because this is LS-HS, and TES is likely to * occupy significantly more CUs. _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
