When WAVE_LIMIT is set, a submission will opt-in for SPI based resource
scheduling. Because this mechanism is cooperative, we must ensure that
all submissions have this field set, otherwise they will bypass resource
arbitration.

We always hardcode the field to its maximum value, instead of
attempting to calculate an approximate usage. In testing, there were no
benefits to using anything other than the maximum.

Signed-off-by: Andres Rodriguez <andre...@gmail.com>
---
 src/amd/vulkan/si_cmd_buffer.c          | 27 ++++++++++++++++++---------
 src/gallium/drivers/radeonsi/si_state.c | 21 ++++++++++++++-------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index de3b388..ac3dff8 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -179,7 +179,8 @@ si_emit_compute(struct radv_physical_device 
*physical_device,
        radeon_emit(cs, 0);
        radeon_emit(cs, 0);
 
-       radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
+       radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+                             S_00B854_WAVES_PER_SH(0x3));
        radeon_emit(cs, 0);
        /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
        radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | 
S_00B858_SH1_CU_EN(0xffff));
@@ -432,11 +433,15 @@ si_emit_config(struct radv_physical_device 
*physical_device,
 
        if (physical_device->rad_info.chip_class >= CIK) {
                if (physical_device->rad_info.chip_class >= GFX9) {
-                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 
S_00B41C_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                                         S_00B41C_CU_EN(0xffff) | 
S_00B41C_WAVE_LIMIT(0x3F));
                } else {
-                       radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0xffff));
-                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 
0);
-                       radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+                                         S_00B51C_CU_EN(0xffff) | 
S_00B51C_WAVE_LIMIT(0x3F));
+                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                                         S_00B41C_WAVE_LIMIT(0x3F));
+                       radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+                                         S_00B31C_CU_EN(0xffff) | 
S_00B31C_WAVE_LIMIT(0x3F));
                        /* If this is 0, Bonaire can hang even if GS isn't 
being used.
                         * Other chips are unaffected. These are suboptimal 
values,
                         * but we don't use on-chip GS.
@@ -445,7 +450,8 @@ si_emit_config(struct radv_physical_device *physical_device,
                                               S_028A44_ES_VERTS_PER_SUBGRP(64) 
|
                                               S_028A44_GS_PRIMS_PER_SUBGRP(4));
                }
-               radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 
S_00B21C_CU_EN(0xffff));
+               radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                                 S_00B21C_CU_EN(0xffff) | 
S_00B21C_WAVE_LIMIT(0x3F));
 
                if (physical_device->rad_info.num_good_compute_units /
                    (physical_device->rad_info.max_se * 
physical_device->rad_info.max_sh_per_se) <= 4) {
@@ -455,7 +461,8 @@ si_emit_config(struct radv_physical_device *physical_device,
                         *
                         * LATE_ALLOC_VS = 2 is the highest safe number.
                         */
-                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+                                         S_00B118_CU_EN(0xffff) | 
S_00B118_WAVE_LIMIT(0x3F) );
                        radeon_set_sh_reg(cs, 
R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
                } else {
                        /* Set LATE_ALLOC_VS == 31. It should be less than
@@ -463,11 +470,13 @@ si_emit_config(struct radv_physical_device 
*physical_device,
                         * - VS can't execute on CU0.
                         * - If HS writes outputs to LDS, LS can't execute on 
CU0.
                         */
-                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0xfffe));
+                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+                                         S_00B118_CU_EN(0xfffe) | 
S_00B118_WAVE_LIMIT(0x3F));
                        radeon_set_sh_reg(cs, 
R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
                }
 
-               radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 
S_00B01C_CU_EN(0xffff));
+               radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+                                 S_00B01C_CU_EN(0xffff) | 
S_00B01C_WAVE_LIMIT(0x3F));
        }
 
        if (physical_device->rad_info.chip_class >= VI) {
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index ae45e1a..8e3717e 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4994,11 +4994,15 @@ static void si_init_config(struct si_context *sctx)
 
        if (sctx->b.chip_class >= CIK) {
                if (sctx->b.chip_class >= GFX9) {
-                       si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 
S_00B41C_CU_EN(0xffff));
+                       si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                                      S_00B41C_CU_EN(0xffff) | 
S_00B41C_WAVE_LIMIT(0x3F));
                } else {
-                       si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0xffff));
-                       si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 
0);
-                       si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0xffff));
+                       si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+                                      S_00B51C_CU_EN(0xffff) | 
S_00B51C_WAVE_LIMIT(0x3F));
+                       si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                                      S_00B41C_WAVE_LIMIT(0x3F));
+                       si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+                                      S_00B31C_CU_EN(0xffff) | 
S_00B31C_WAVE_LIMIT(0x3F));
 
                        /* If this is 0, Bonaire can hang even if GS isn't 
being used.
                         * Other chips are unaffected. These are suboptimal 
values,
@@ -5008,7 +5012,8 @@ static void si_init_config(struct si_context *sctx)
                                       S_028A44_ES_VERTS_PER_SUBGRP(64) |
                                       S_028A44_GS_PRIMS_PER_SUBGRP(4));
                }
-               si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 
S_00B21C_CU_EN(0xffff));
+               si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                              S_00B21C_CU_EN(0xffff) | 
S_00B21C_WAVE_LIMIT(0x3F));
 
                /* Compute LATE_ALLOC_VS.LIMIT. */
                unsigned num_cu_per_sh = sscreen->b.info.num_good_compute_units 
/
@@ -5040,10 +5045,12 @@ static void si_init_config(struct si_context *sctx)
 
                /* VS can't execute on one CU if the limit is > 2. */
                si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
-                              S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 
0xffff));
+                              S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 
0xffff) |
+                              S_00B118_WAVE_LIMIT(0x3F));
                si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
                               S_00B11C_LIMIT(late_alloc_limit));
-               si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 
S_00B01C_CU_EN(0xffff));
+               si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+                              S_00B01C_CU_EN(0xffff) | 
S_00B01C_WAVE_LIMIT(0x3F));
        }
 
        if (sctx->b.chip_class >= VI) {
-- 
2.9.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to