Mesa (main): radeonsi: disallow NGG fast launch on Navi1x because VGT_FLUSH makes it slower

GitLab Mirror Tue, 28 Sep 2021 10:58:47 -0700

Module: Mesa
Branch: main
Commit: ccbd551192ea08864da5ce88d51507572743747e
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ccbd551192ea08864da5ce88d51507572743747e


Author: Marek Olšák <[email protected]>
Date:   Sun Sep 26 08:45:19 2021 -0400

radeonsi: disallow NGG fast launch on Navi1x because VGT_FLUSH makes it slower

This improves viewperf performance on Navi1x.

All Navi1x fast launch workarounds are removed and all fast launch
codepaths are disabled.

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13048>

---

 src/gallium/drivers/radeonsi/si_gfx_cs.c       |  8 +++-----
 src/gallium/drivers/radeonsi/si_state_draw.cpp | 25 ++++++++++++-------------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c 
b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 007a52372ed..f44f8e1eb65 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -358,12 +358,10 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool 
first_cs)
                  SI_CONTEXT_INV_L2 | SI_CONTEXT_START_PIPELINE_STATS;
    ctx->pipeline_stats_enabled = -1;
 
-   /* We don't know if the last draw used NGG or NGG fast launch because it 
can be a different
-    * process. When switching NGG->legacy or NGG->FAST_LAUNCH, we need to 
flush VGT for certain
-    * hw generations.
+   /* We don't know if the last draw used NGG because it can be a different 
process.
+    * When switching NGG->legacy, we need to flush VGT for certain hw 
generations.
     */
-   if ((ctx->screen->info.has_vgt_flush_ngg_legacy_bug && !ctx->ngg) ||
-       (ctx->chip_class == GFX10 && ctx->ngg_culling & 
SI_NGG_CULL_GS_FAST_LAUNCH_ALL))
+   if (ctx->screen->info.has_vgt_flush_ngg_legacy_bug && !ctx->ngg)
       ctx->flags |= SI_CONTEXT_VGT_FLUSH;
 
    if (ctx->border_color_buffer) {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp 
b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 847515b454e..671966b1870 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -1601,7 +1601,8 @@ static void si_emit_draw_packets(struct si_context *sctx, 
const struct pipe_draw
          }
       } else {
          /* Set the index buffer for fast launch. The VS prolog will load the 
indices. */
-         if (NGG && sctx->ngg_culling & 
SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0)) {
+         if (GFX_VERSION >= GFX10_3 && NGG &&
+             sctx->ngg_culling & 
SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0)) {
             index_max_size = (indexbuf->width0 - index_offset) >> 
util_logbase2(original_index_size);
 
             radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, 
si_resource(indexbuf),
@@ -2173,11 +2174,14 @@ static void si_draw_vbo(struct pipe_context *ctx,
           * A draw must have at least 1 full primitive.
           * The fast launch doesn't work with tessellation.
           *
+          * Fast launch is disabled on Navi1x because enabling it requires 
VGT_FLUSH,
+          * which decreases performance by up to 10%. Only use fast launch on 
gfx10.3 and newer.
+          *
           * Since NGG fast launch is enabled by VGT_SHADER_STAGES_EN, which 
causes a context roll,
           * which decreases performance, decrease the frequency of switching 
it on/off using
           * a high vertex count threshold.
           */
-         if (!HAS_TESS && total_direct_count >= 8000 &&
+         if (GFX_VERSION >= GFX10_3 && !HAS_TESS && total_direct_count >= 8000 
&&
              !(sctx->screen->debug_flags & DBG(NO_FAST_LAUNCH))) {
             if (prim == PIPE_PRIM_TRIANGLES && !index_size) {
                ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST;
@@ -2208,19 +2212,14 @@ static void si_draw_vbo(struct pipe_context *ctx,
          return;
       }
 
-      /* Insert a VGT_FLUSH when enabling fast launch changes to prevent hangs.
-       * See issues #2418, #2426, #2434
-       *
-       * This is the setting that is used by the draw.
+      /* si_update_shaders can clear the ngg_culling settings if the shader 
compilation hasn't
+       * finished.
        */
-      if (GFX_VERSION >= GFX10) {
+      if (GFX_VERSION >= GFX10 && NGG) {
          uint8_t ngg_culling = si_get_vs_inline(sctx, HAS_TESS, 
HAS_GS)->current->key.opt.ngg_culling;
-         if (GFX_VERSION == GFX10 &&
-             !(old_ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) &&
-             ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL)
-            sctx->flags |= SI_CONTEXT_VGT_FLUSH;
 
-         if (old_ngg_culling & 
SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0) &&
+         if (GFX_VERSION >= GFX10_3 &&
+             old_ngg_culling & 
SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0) &&
              !(ngg_culling & 
SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0))) {
             /* Need to re-set these, because we have bound an index buffer 
there. */
             sctx->shader_pointers_dirty |=
@@ -2235,7 +2234,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
    }
 
    /* ngg_culling can be changed after si_update_shaders above, so determine 
index_size here. */
-   if (GFX_VERSION >= GFX10 && NGG &&
+   if (GFX_VERSION >= GFX10_3 && NGG &&
        sctx->ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0))
       index_size = 0; /* The index buffer will be emulated. */

Mesa (main): radeonsi: disallow NGG fast launch on Navi1x because VGT_FLUSH makes it slower

Reply via email to