Module: Mesa Branch: main Commit: a00b50d820b5a293ba6f05fee631f02bf86138f4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a00b50d820b5a293ba6f05fee631f02bf86138f4
Author: Georg Lehmann <[email protected]> Date: Sat Feb 18 13:37:48 2023 +0100 nir: change 16bit image dest folding option to per type Reviewed-by: Marek Olšák <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21404> --- src/amd/vulkan/radv_pipeline.c | 3 ++- src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_lower_mediump.c | 15 ++++++++++----- src/freedreno/ir3/ir3_nir.c | 4 +++- src/gallium/drivers/radeonsi/si_shader_nir.c | 3 ++- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 3e2dc6fde0f..7d8789c343d 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3297,7 +3297,8 @@ radv_postprocess_nir(struct radv_pipeline *pipeline, struct nir_fold_16bit_tex_image_options fold_16bit_options = { .rounding_mode = nir_rounding_mode_rtne, .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int, - .fold_image_load_store_data = true, + .fold_image_dest_types = nir_type_float | nir_type_uint | nir_type_int, + .fold_image_store_data = true, .fold_image_srcs = !radv_use_llvm_for_stage(device, stage->stage), .fold_srcs_options_count = separate_g16 ? 2 : 1, .fold_srcs_options = fold_srcs_options, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 621b8363cad..db0a536caa4 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5566,7 +5566,8 @@ struct nir_fold_tex_srcs_options { struct nir_fold_16bit_tex_image_options { nir_rounding_mode rounding_mode; nir_alu_type fold_tex_dest_types; - bool fold_image_load_store_data; + nir_alu_type fold_image_dest_types; + bool fold_image_store_data; bool fold_image_srcs; unsigned fold_srcs_options_count; struct nir_fold_tex_srcs_options *fold_srcs_options; diff --git a/src/compiler/nir/nir_lower_mediump.c b/src/compiler/nir/nir_lower_mediump.c index c5597f8086d..c1baabf27ed 100644 --- a/src/compiler/nir/nir_lower_mediump.c +++ b/src/compiler/nir/nir_lower_mediump.c @@ -886,11 +886,14 @@ fold_16bit_destination(nir_ssa_def *ssa, nir_alu_type dest_type, } static bool -fold_16bit_load_data(nir_builder *b, nir_intrinsic_instr *instr, - unsigned exec_mode, nir_rounding_mode rdm) +fold_16bit_image_dest(nir_intrinsic_instr *instr, unsigned exec_mode, + nir_alu_type allowed_types, nir_rounding_mode rdm) { nir_alu_type dest_type = nir_intrinsic_dest_type(instr); + if (!(nir_alu_type_get_base_type(dest_type) & allowed_types)) + return false; + if (!fold_16bit_destination(&instr->dest.ssa, dest_type, exec_mode, rdm)) return false; @@ -1016,7 +1019,7 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params) case nir_intrinsic_bindless_image_store: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_store: - if (options->fold_image_load_store_data) + if (options->fold_image_store_data) progress |= fold_16bit_store_data(b, intrinsic); if (options->fold_image_srcs) progress |= fold_16bit_image_srcs(b, intrinsic, 4); @@ -1024,8 +1027,10 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params) case nir_intrinsic_bindless_image_load: case nir_intrinsic_image_deref_load: case nir_intrinsic_image_load: - if (options->fold_image_load_store_data) - progress |= fold_16bit_load_data(b, intrinsic, exec_mode, options->rounding_mode); + if (options->fold_image_dest_types) + progress |= fold_16bit_image_dest(intrinsic, exec_mode, + options->fold_image_dest_types, + options->rounding_mode); if (options->fold_image_srcs) progress |= fold_16bit_image_srcs(b, intrinsic, 3); break; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 388017eb9be..59389bf5247 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -783,7 +783,9 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) .rounding_mode = nir_rounding_mode_rtz, .fold_tex_dest_types = nir_type_float, /* blob dumps have no half regs on pixel 2's ldib or stib, so only enable for a6xx+. */ - .fold_image_load_store_data = so->compiler->gen >= 6, + .fold_image_dest_types = so->compiler->gen >= 6 ? + nir_type_float | nir_type_uint | nir_type_int : 0, + .fold_image_store_data = so->compiler->gen >= 6, .fold_srcs_options_count = 1, .fold_srcs_options = &fold_srcs_options, }; diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index f2d588a2fe0..12e7cfdc553 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -198,7 +198,8 @@ static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, nir_shade struct nir_fold_16bit_tex_image_options fold_16bit_options = { .rounding_mode = nir_rounding_mode_rtne, .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int, - .fold_image_load_store_data = true, + .fold_image_dest_types = nir_type_float | nir_type_uint | nir_type_int, + .fold_image_store_data = true, .fold_srcs_options_count = has_g16 ? 2 : 1, .fold_srcs_options = fold_srcs_options, };
