From: Marek Olšák <marek.ol...@amd.com>

AMDVLK uses 64 (distributed) and 16 (non-distributed).
radeonsi will use 63 and 16.
* This might improve tessellation performance on Hawaii, Bonaire, Tahiti,
  Pitcairn. (they will use 16)
* I'm not sure if this matters for 1 SE configs.
---
 src/gallium/drivers/radeonsi/si_state_draw.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index d61374e95ca..b29135a1e68 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -158,24 +158,31 @@ static bool si_emit_derived_tess_state(struct si_context 
*sctx,
         */
        hardware_lds_size = 32768;
        *num_patches = MIN2(*num_patches, hardware_lds_size / (input_patch_size 
+
                                                               
output_patch_size));
 
        /* Make sure the output data fits in the offchip buffer */
        *num_patches = MIN2(*num_patches,
                            (sctx->screen->tess_offchip_block_dw_size * 4) /
                            output_patch_size);
 
-       /* Not necessary for correctness, but improves performance. The
-        * specific value is taken from the proprietary driver.
+       /* Not necessary for correctness, but improves performance.
+        * The hardware can do more, but the radeonsi shader constant is
+        * limited to 6 bits.
         */
-       *num_patches = MIN2(*num_patches, 40);
+       *num_patches = MIN2(*num_patches, 63); /* triangles: 3 full waves 
except 3 lanes */
+
+       /* When distributed tessellation is unsupported, switch between SEs
+        * at a higher frequency to compensate for it.
+        */
+       if (!sctx->screen->has_distributed_tess && sctx->screen->info.max_se > 
1)
+               *num_patches = MIN2(*num_patches, 16); /* recommended */
 
        /* Make sure that vector lanes are reasonably occupied. It probably
         * doesn't matter much because this is LS-HS, and TES is likely to
         * occupy significantly more CUs.
         */
        unsigned temp_verts_per_tg = *num_patches * max_verts_per_patch;
        if (temp_verts_per_tg > 64 && temp_verts_per_tg % 64 < 48)
                *num_patches = (temp_verts_per_tg & ~63) / max_verts_per_patch;
 
        if (sctx->chip_class == SI) {
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to