Module: Mesa Branch: main Commit: b48d183633e43911dbe0625d7e7253816ad53dcd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b48d183633e43911dbe0625d7e7253816ad53dcd
Author: Marek Olšák <[email protected]> Date: Mon May 2 21:58:35 2022 -0400 ac/llvm: move the gfx6 optimization for TCS barriers into ac_build_s_barrier Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16304> --- src/amd/llvm/ac_llvm_build.c | 6 ++++++ src/amd/llvm/ac_nir_to_llvm.c | 15 ++------------- src/amd/llvm/ac_nir_to_llvm.h | 2 -- src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 7 +------ 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 90b372b89bd..06e348e7ae5 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -390,6 +390,12 @@ LLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigne void ac_build_s_barrier(struct ac_llvm_context *ctx, gl_shader_stage stage) { + /* GFX6 only: s_barrier isn’t needed in TCS because an entire patch always fits into + * a single wave due to a bug workaround disallowing multi-wave HS workgroups. + */ + if (ctx->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) + return; + ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); } diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 940a27605f9..71529579554 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -2951,17 +2951,6 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intri return exit_waterfall(ctx, &wctx, res); } -void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) -{ - /* GFX6 only: s_barrier isn’t needed in TCS because an entire patch always fits into - * a single wave due to a bug workaround disallowing multi-wave HS workgroups. - */ - if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) - return; - - ac_build_s_barrier(ac, stage); -} - static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef cond; @@ -3935,11 +3924,11 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins ac_build_waitcnt(&ctx->ac, wait_flags); if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP) - ac_emit_barrier(&ctx->ac, ctx->stage); + ac_build_s_barrier(&ctx->ac, ctx->stage); break; } case nir_intrinsic_control_barrier: - ac_emit_barrier(&ctx->ac, ctx->stage); + ac_build_s_barrier(&ctx->ac, ctx->stage); break; case nir_intrinsic_shared_atomic_add: case nir_intrinsic_shared_atomic_imin: diff --git a/src/amd/llvm/ac_nir_to_llvm.h b/src/amd/llvm/ac_nir_to_llvm.h index f4ed35a0153..46b22bf1fee 100644 --- a/src/amd/llvm/ac_nir_to_llvm.h +++ b/src/amd/llvm/ac_nir_to_llvm.h @@ -54,6 +54,4 @@ void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_ struct nir_shader *nir, struct nir_variable *variable, gl_shader_stage stage); -void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage); - #endif /* AC_NIR_TO_LLVM_H */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index ac831098a62..a401b6bbc4a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -685,12 +685,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re /* Add a barrier before loading tess factors from LDS. */ if (!shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) { ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM); - - /* GFX6 only: s_barrier isn’t needed in TCS because an entire patch always fits into - * a single wave due to a bug workaround disallowing multi-wave HS workgroups. - */ - if (ctx->screen->info.chip_class != GFX6) - ac_build_s_barrier(&ctx->ac, ctx->stage); + ac_build_s_barrier(&ctx->ac, ctx->stage); } /* Do this only for invocation 0, because the tess levels are per-patch,
