From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 9 ++++++--- src/gallium/drivers/radeonsi/si_pipe.h | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 52a62dcb7fa..dc6f647d9a8 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -750,7 +750,8 @@ static void si_setup_tgsi_user_data(struct si_context *sctx, unsigned si_get_compute_resource_limits(struct si_screen *sscreen, unsigned waves_per_threadgroup, - unsigned max_waves_per_sh) + unsigned max_waves_per_sh, + unsigned threadgroups_per_cu) { unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); @@ -766,7 +767,9 @@ unsigned si_get_compute_resource_limits(struct si_screen *sscreen, if (num_cu_per_se % 4 && waves_per_threadgroup == 1) compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1); - compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh); + assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8); + compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) | + S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1); } else { /* SI */ if (max_waves_per_sh) { @@ -788,7 +791,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, si_get_compute_resource_limits(sscreen, waves_per_threadgroup, - sctx->cs_max_waves_per_sh)); + sctx->cs_max_waves_per_sh, 1)); unsigned dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e4e731e913b..330cdfa0c12 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1294,7 +1294,8 @@ unsigned si_end_counter(struct si_screen *sscreen, unsigned type, void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs); unsigned si_get_compute_resource_limits(struct si_screen *sscreen, unsigned waves_per_threadgroup, - unsigned max_waves_per_sh); + unsigned max_waves_per_sh, + unsigned threadgroups_per_cu); void si_init_compute_functions(struct si_context *sctx); /* si_perfcounters.c */ -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev