Module: Mesa Branch: main Commit: 79009811a2c0493532fc4ddbacf01cad221e7d20 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=79009811a2c0493532fc4ddbacf01cad221e7d20
Author: Qiang Yu <[email protected]> Date: Wed Aug 30 10:53:55 2023 +0800 radeonsi: move use_aco to si_screen It's not per shader any more. Reviewed-by: Marek Olšák <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25632> --- src/gallium/drivers/radeonsi/si_nir_lower_abi.c | 4 +- src/gallium/drivers/radeonsi/si_pipe.c | 3 ++ src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_shader.c | 55 ++++++++----------------- src/gallium/drivers/radeonsi/si_shader.h | 3 -- 5 files changed, 23 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 9d15e17faa9..e8457bb405e 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -680,7 +680,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s break; case nir_intrinsic_load_tess_rel_patch_id_amd: /* LLVM need to replace patch id arg, so have to be done in LLVM backend. */ - if (!shader->use_aco) + if (!sel->screen->use_aco) return false; if (stage == MESA_SHADER_TESS_CTRL) { @@ -740,7 +740,7 @@ static bool lower_tex(nir_builder *b, nir_instr *instr, struct lower_abi_state * */ /* LLVM keep non-uniform sampler as index, so can't do this in NIR. */ - if (tex->is_shadow && gfx_level >= GFX8 && gfx_level <= GFX9 && s->shader->use_aco) { + if (tex->is_shadow && gfx_level >= GFX8 && gfx_level <= GFX9 && sel->screen->use_aco) { int samp_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle); int comp_index = nir_tex_instr_src_index(tex, nir_tex_src_comparator); assert(samp_index >= 0 && comp_index >= 0); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5390f3e2e8b..741a47a6d64 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1192,6 +1192,9 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->info.has_dedicated_vram; } + /* ACO does not support compute cards yet. */ + sscreen->use_aco = (sscreen->debug_flags & DBG(USE_ACO)) && sscreen->info.has_graphics; + if (sscreen->debug_flags & DBG(NO_GFX)) sscreen->info.has_graphics = false; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 48bc6eed439..3bfaf6d426b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -581,6 +581,7 @@ struct si_screen { bool use_ngg_culling; bool allow_dcc_msaa_clear_to_reg_for_bpp[5]; /* indexed by log2(Bpp) */ bool always_allow_dcc_stores; + bool use_aco; struct { #define OPT_BOOL(name, dflt, description) bool name : 1; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5d6e746455e..3c5e8b7954c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -389,7 +389,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) } /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (shader->use_aco && sel->screen->info.gfx_level < GFX11) + if (sel->screen->use_aco && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* VGPRs */ @@ -407,7 +407,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (shader->use_aco && sel->screen->info.gfx_level < GFX11) + if (sel->screen->use_aco && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* VGPRs */ @@ -473,7 +473,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) ac_add_return(&args->ac, AC_ARG_VGPR); /* VS outputs passed via VGPRs to TCS. */ - if (shader->key.ge.opt.same_patch_vertices && !shader->use_aco) { + if (shader->key.ge.opt.same_patch_vertices && !sel->screen->use_aco) { unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written); for (i = 0; i < num_outputs * 4; i++) ac_add_return(&args->ac, AC_ARG_VGPR); @@ -481,7 +481,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) } } else { /* TCS inputs are passed via VGPRs from VS. */ - if (shader->key.ge.opt.same_patch_vertices && !shader->use_aco) { + if (shader->key.ge.opt.same_patch_vertices && !sel->screen->use_aco) { unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written); for (i = 0; i < num_inputs * 4; i++) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); @@ -604,7 +604,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) } /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (shader->use_aco && sel->screen->info.gfx_level < GFX11) + if (sel->screen->use_aco && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* VGPRs */ @@ -618,7 +618,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id); /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (shader->use_aco && sel->screen->info.gfx_level < GFX11) + if (sel->screen->use_aco && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* VGPRs */ @@ -671,7 +671,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.pos_fixed_pt, SI_PARAM_POS_FIXED_PT); - if (shader->use_aco) { + if (sel->screen->use_aco) { ac_compact_ps_vgpr_args(&args->ac, shader->config.spi_ps_input_addr); /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ @@ -747,7 +747,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size); /* GFX11 set FLAT_SCRATCH directly instead of using this arg. */ - if (shader->use_aco && sel->screen->info.gfx_level < GFX11) + if (sel->screen->use_aco && sel->screen->info.gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); /* Hardware VGPRs. */ @@ -2264,7 +2264,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, NIR_PASS(progress, nir, ac_nir_lower_image_opcodes); /* LLVM does not work well with this, so is handled in llvm backend waterfall. */ - if (shader->use_aco && sel->info.has_non_uniform_tex_access) { + if (sel->screen->use_aco && sel->info.has_non_uniform_tex_access) { nir_lower_non_uniform_access_options options = { .types = nir_lower_non_uniform_texture_access, }; @@ -2348,7 +2348,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, ac_nir_lower_ps_options options = { .gfx_level = sel->screen->info.gfx_level, .family = sel->screen->info.family, - .use_aco = shader->use_aco, + .use_aco = sel->screen->use_aco, .uses_discard = si_shader_uses_discard(shader), .alpha_to_coverage_via_mrtz = key->ps.part.epilog.alpha_to_coverage_via_mrtz, .dual_src_blend_swizzle = key->ps.part.epilog.dual_src_blend_swizzle, @@ -2407,7 +2407,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, /* aco only accept scalar const, must be done after si_nir_late_opts() * which may generate vec const. */ - if (shader->use_aco) + if (sel->screen->use_aco) NIR_PASS_V(nir, nir_lower_load_const_to_scalar); /* This helps LLVM form VMEM clauses and thus get more GPU cache hits. @@ -2432,20 +2432,6 @@ void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir) shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh; } -static void si_determine_use_aco(struct si_shader *shader) -{ - const struct si_shader_selector *sel = shader->selector; - - if (!(sel->screen->debug_flags & DBG(USE_ACO))) - return; - - /* ACO does not support compute cards yet. */ - if (!sel->screen->info.has_graphics) - return; - - shader->use_aco = true; -} - /* Generate code for the hardware VS shader stage to go with a geometry shader */ static struct si_shader * si_nir_generate_gs_copy_shader(struct si_screen *sscreen, @@ -2506,8 +2492,6 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, sscreen->options.vrs2x2, output_info); - si_determine_use_aco(shader); - struct si_shader_args args; si_init_shader_args(shader, &args); @@ -2517,7 +2501,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, si_nir_opts(gs_selector->screen, nir, false); /* aco only accept scalar const */ - if (shader->use_aco) + if (sscreen->use_aco) NIR_PASS_V(nir, nir_lower_load_const_to_scalar); if (si_can_dump_shader(sscreen, MESA_SHADER_GEOMETRY, SI_DUMP_NIR)) { @@ -2525,7 +2509,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, nir_print_shader(nir, stderr); } - bool ok = shader->use_aco ? + bool ok = sscreen->use_aco ? si_aco_compile_shader(shader, &args, nir, debug) : si_llvm_compile_shader(sscreen, compiler, shader, &args, debug, nir); @@ -2715,10 +2699,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi bool ret = true; struct si_shader_selector *sel = shader->selector; - si_determine_use_aco(shader); - /* ACO need spi_ps_input in advance to init args and used in compiler. */ - if (sel->stage == MESA_SHADER_FRAGMENT && shader->use_aco) + if (sel->stage == MESA_SHADER_FRAGMENT && sscreen->use_aco) si_set_spi_ps_input_config(shader); /* We need this info only when legacy GS. */ @@ -2782,7 +2764,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64)) float_mode &= ~V_00B028_FP_16_64_DENORMS; - ret = shader->use_aco ? + ret = sscreen->use_aco ? si_aco_compile_shader(shader, &args, nir, debug) : si_llvm_compile_shader(sscreen, compiler, shader, &args, debug, nir); if (!ret) @@ -2873,7 +2855,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi if (sel->screen->info.gfx_level < GFX11 && (sel->screen->info.family < CHIP_GFX940 || sel->screen->info.has_graphics) && !si_is_merged_shader(shader)) { - if (shader->use_aco) { + if (sscreen->use_aco) { /* When aco scratch_offset arg is added explicitly at the beginning. * After compile if no scratch used, reduce the input sgpr count. */ @@ -2945,9 +2927,7 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list, result = CALLOC_STRUCT(si_shader_part); result->key = *key; - bool use_aco = (sscreen->debug_flags & DBG(USE_ACO)) && sscreen->info.has_graphics; - - bool ok = use_aco ? + bool ok = sscreen->use_aco ? si_aco_build_shader_part(sscreen, stage, prolog, debug, name, result) : si_llvm_build_shader_part(sscreen, stage, prolog, compiler, debug, name, result); @@ -3514,7 +3494,6 @@ nir_shader *si_get_prev_stage_nir_shader(struct si_shader *shader, */ prev_shader->key.ge.opt.kill_outputs = 0; prev_shader->is_monolithic = true; - prev_shader->use_aco = shader->use_aco; si_init_shader_args(prev_shader, args); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index e6b82e2fff1..a4bfbc26444 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -886,9 +886,6 @@ struct si_shader { bool is_gs_copy_shader; uint8_t wave_size; - /* Use ACO for compilation. */ - bool use_aco; - /* The following data is all that's needed for binary shaders. */ struct si_shader_binary binary; struct ac_shader_config config;
