From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_state_draw.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e7f8389caf3..d61374e95ca 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -163,20 +163,28 @@ static bool si_emit_derived_tess_state(struct si_context *sctx, /* Make sure the output data fits in the offchip buffer */ *num_patches = MIN2(*num_patches, (sctx->screen->tess_offchip_block_dw_size * 4) / output_patch_size); /* Not necessary for correctness, but improves performance. The * specific value is taken from the proprietary driver. */ *num_patches = MIN2(*num_patches, 40); + /* Make sure that vector lanes are reasonably occupied. It probably + * doesn't matter much because this is LS-HS, and TES is likely to + * occupy significantly more CUs. + */ + unsigned temp_verts_per_tg = *num_patches * max_verts_per_patch; + if (temp_verts_per_tg > 64 && temp_verts_per_tg % 64 < 48) + *num_patches = (temp_verts_per_tg & ~63) / max_verts_per_patch; + if (sctx->chip_class == SI) { /* SI bug workaround, related to power management. Limit LS-HS * threadgroups to only one wave. */ unsigned one_wave = 64 / max_verts_per_patch; *num_patches = MIN2(*num_patches, one_wave); } /* The VGT HS block increments the patch ID unconditionally * within a single threadgroup. This results in incorrect -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev