Module: Mesa
Branch: main
Commit: eb47e077826f99320c0ba98b694e003d3cc86a58
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=eb47e077826f99320c0ba98b694e003d3cc86a58

Author: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Date:   Thu Oct 26 09:56:37 2023 +0200

radv: remove NGG streamout support for RDNA1-2

This was useful for experimenting it on RDNA2 and during RNDA3 bringup,
but now the support is rock solid on RDNA3 and it's useless to keep the
RADV_PERFTEST=ngg_streamout option.

Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25903>

---

 docs/envvars.rst                      |  2 --
 src/amd/vulkan/radv_cmd_buffer.c      | 60 +++++++++++------------------------
 src/amd/vulkan/radv_debug.h           |  5 ++-
 src/amd/vulkan/radv_instance.c        |  1 -
 src/amd/vulkan/radv_physical_device.c |  3 +-
 src/amd/vulkan/radv_pipeline.c        |  2 --
 src/amd/vulkan/radv_private.h         |  5 ++-
 7 files changed, 23 insertions(+), 55 deletions(-)

diff --git a/docs/envvars.rst b/docs/envvars.rst
index 4be7a1fe1f2..1584cc7f3c2 100644
--- a/docs/envvars.rst
+++ b/docs/envvars.rst
@@ -1332,8 +1332,6 @@ RADV driver environment variables
       disable optimizations that get enabled when all VRAM is CPU visible.
    ``pswave32``
       enable wave32 for pixel shaders (GFX10+)
-   ``ngg_streamout``
-      enable NGG streamout
    ``nggc``
       enable NGG culling on GPUs where it's not enabled by default (GFX10.1 
only).
    ``sam``
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index d8a0ab5caf3..dfb011f10d7 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -6353,11 +6353,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer 
*cmd_buffer, const struct radv_
       cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
 
       if (cmd_buffer->device->physical_device->use_ngg_streamout) {
-         /* GFX11 only needs GDS OA for streamout. */
-         if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11) {
-            cmd_buffer->gds_needed = true;
-         }
-
+         /* GFX11 needs GDS OA for streamout. */
          cmd_buffer->gds_oa_needed = true;
       }
    }
@@ -10961,7 +10957,6 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer 
commandBuffer, uint32_t firstC
    struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
    struct radv_streamout_state *so = &cmd_buffer->state.streamout;
    struct radv_shader_info *info = &cmd_buffer->state.last_vgt_shader->info;
-   unsigned last_target = util_last_bit(so->enabled_mask) - 1;
    struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
    assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
@@ -11001,29 +10996,18 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer 
commandBuffer, uint32_t firstC
       }
 
       if (cmd_buffer->device->physical_device->use_ngg_streamout) {
-         if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) 
{
-            if (append) {
-               radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-               radeon_emit(
-                  cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 
COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
-               radeon_emit(cs, va);
-               radeon_emit(cs, va >> 32);
-               radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + 
i);
-               radeon_emit(cs, 0);
-            } else {
-               /* The PKT3 CAM bit workaround seems needed for initializing 
this GDS register to zero. */
-               
radeon_set_perfctr_reg(cmd_buffer->device->physical_device->rad_info.gfx_level, 
cmd_buffer->qf, cs,
-                                      R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + 
i * 4, 0);
-            }
-         } else {
-            radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
-            radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : 
V_411_DATA) | S_411_DST_SEL(V_411_GDS) |
-                               S_411_CP_SYNC(i == last_target));
+         if (append) {
+            radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+            radeon_emit(cs,
+                        COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 
COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
             radeon_emit(cs, va);
             radeon_emit(cs, va >> 32);
-            radeon_emit(cs, 4 * i); /* destination in GDS */
+            radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
             radeon_emit(cs, 0);
-            radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | 
S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
+         } else {
+            /* The PKT3 CAM bit workaround seems needed for initializing this 
GDS register to zero. */
+            
radeon_set_perfctr_reg(cmd_buffer->device->physical_device->rad_info.gfx_level, 
cmd_buffer->qf, cs,
+                                   R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 
4, 0);
          }
       } else {
          /* AMD GCN binds streamout buffers as shader resources.
@@ -11104,22 +11088,14 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer 
commandBuffer, uint32_t firstCou
       }
 
       if (cmd_buffer->device->physical_device->use_ngg_streamout) {
-         if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) 
{
-            if (append) {
-               radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-               radeon_emit(
-                  cs, COPY_DATA_SRC_SEL(COPY_DATA_REG) | 
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
-               radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + 
i);
-               radeon_emit(cs, 0);
-               radeon_emit(cs, va);
-               radeon_emit(cs, va >> 32);
-            }
-         } else {
-            if (append) {
-               si_cs_emit_write_event_eop(cs, 
cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
-                                          V_028A90_PS_DONE, 0, 
EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va,
-                                          EOP_DATA_GDS(i, 1), 0);
-            }
+         if (append) {
+            radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+            radeon_emit(cs,
+                        COPY_DATA_SRC_SEL(COPY_DATA_REG) | 
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
+            radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
+            radeon_emit(cs, 0);
+            radeon_emit(cs, va);
+            radeon_emit(cs, va >> 32);
          }
       } else {
          if (append) {
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index fb18bf0c8cb..12b5d1a9bbc 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -85,9 +85,8 @@ enum {
    RADV_PERFTEST_NGGC = 1u << 8,
    RADV_PERFTEST_EMULATE_RT = 1u << 9,
    RADV_PERFTEST_RT_WAVE_64 = 1u << 10,
-   RADV_PERFTEST_NGG_STREAMOUT = 1u << 11,
-   RADV_PERFTEST_VIDEO_DECODE = 1u << 12,
-   RADV_PERFTEST_DMA_SHADERS = 1u << 13,
+   RADV_PERFTEST_VIDEO_DECODE = 1u << 11,
+   RADV_PERFTEST_DMA_SHADERS = 1u << 12,
 };
 
 bool radv_init_trace(struct radv_device *device);
diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c
index 03d647ad5a5..7e9a60e2929 100644
--- a/src/amd/vulkan/radv_instance.c
+++ b/src/amd/vulkan/radv_instance.c
@@ -96,7 +96,6 @@ static const struct debug_control radv_perftest_options[] = 
{{"localbos", RADV_P
                                                              {"nggc", 
RADV_PERFTEST_NGGC},
                                                              {"emulate_rt", 
RADV_PERFTEST_EMULATE_RT},
                                                              {"rtwave64", 
RADV_PERFTEST_RT_WAVE_64},
-                                                             {"ngg_streamout", 
RADV_PERFTEST_NGG_STREAMOUT},
                                                              {"video_decode", 
RADV_PERFTEST_VIDEO_DECODE},
                                                              {"dmashaders", 
RADV_PERFTEST_DMA_SHADERS},
                                                              {NULL, 0}};
diff --git a/src/amd/vulkan/radv_physical_device.c 
b/src/amd/vulkan/radv_physical_device.c
index 541e13da935..38dcf6ce1e4 100644
--- a/src/amd/vulkan/radv_physical_device.c
+++ b/src/amd/vulkan/radv_physical_device.c
@@ -1882,8 +1882,7 @@ radv_physical_device_try_create(struct radv_instance 
*instance, drmDevicePtr drm
       (device->rad_info.gfx_level == GFX10_3 || 
(device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
       !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
 
-   device->use_ngg_streamout = device->use_ngg && (device->rad_info.gfx_level 
>= GFX11 ||
-                                                   
(device->instance->perftest_flags & RADV_PERFTEST_NGG_STREAMOUT));
+   device->use_ngg_streamout = device->rad_info.gfx_level >= GFX11;
 
    device->emulate_ngg_gs_query_pipeline_stat = device->use_ngg && 
device->rad_info.gfx_level < GFX11;
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index d18f5b04875..7ec93b7a73a 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -242,8 +242,6 @@ radv_get_hash_flags(const struct radv_device *device, bool 
stats)
       hash_flags |= RADV_HASH_SHADER_SPLIT_FMA;
    if (device->instance->debug_flags & RADV_DEBUG_NO_FMASK)
       hash_flags |= RADV_HASH_SHADER_NO_FMASK;
-   if (device->physical_device->use_ngg_streamout)
-      hash_flags |= RADV_HASH_SHADER_NGG_STREAMOUT;
    if (device->instance->debug_flags & RADV_DEBUG_NO_RT)
       hash_flags |= RADV_HASH_SHADER_NO_RT;
    if (device->instance->dual_color_blend_by_location)
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 5eb9102f8c0..c87cd06bebb 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -2174,9 +2174,8 @@ struct radv_event {
 #define RADV_HASH_SHADER_SPLIT_FMA       (1 << 17)
 #define RADV_HASH_SHADER_RT_WAVE64       (1 << 18)
 #define RADV_HASH_SHADER_NO_FMASK        (1 << 19)
-#define RADV_HASH_SHADER_NGG_STREAMOUT   (1 << 20)
-#define RADV_HASH_SHADER_NO_RT           (1 << 21)
-#define RADV_HASH_SHADER_DUAL_BLEND_MRT1 (1 << 22)
+#define RADV_HASH_SHADER_NO_RT           (1 << 20)
+#define RADV_HASH_SHADER_DUAL_BLEND_MRT1 (1 << 21)
 
 struct radv_pipeline_key;
 struct radv_ray_tracing_group;

Reply via email to