Module: Mesa Branch: master Commit: a2619b97f53a30d74920ee6b819e943291641f9d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2619b97f53a30d74920ee6b819e943291641f9d
Author: Rhys Perry <pendingchao...@gmail.com> Date: Wed Apr 7 19:17:46 2021 +0100 nir/lower_idiv: add options to use fp32 for 8-bit division lowering Signed-off-by: Rhys Perry <pendingchao...@gmail.com> Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10081> --- src/amd/vulkan/radv_pipeline.c | 6 ++++- src/broadcom/compiler/vir.c | 6 ++++- src/compiler/nir/nir.h | 31 +++++++++++++--------- src/compiler/nir/nir_lower_idiv.c | 19 ++++++------- src/freedreno/ir3/ir3_nir.c | 6 ++++- src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c | 6 ++++- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 6 ++++- src/gallium/drivers/r600/sfn/sfn_nir.cpp | 8 +++--- src/gallium/drivers/vc4/vc4_program.c | 6 ++++- src/panfrost/bifrost/bifrost_compile.c | 6 ++++- src/panfrost/midgard/midgard_compile.c | 6 ++++- 11 files changed, 74 insertions(+), 32 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 168ee087579..614b5598f4e 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3319,7 +3319,11 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, /* TODO: Implement nir_op_uadd_sat with LLVM. */ if (!radv_use_llvm_for_stage(device, i)) nir_opt_idiv_const(nir[i], 8); - nir_lower_idiv(nir[i], nir_lower_idiv_precise); + + nir_lower_idiv(nir[i], &(nir_lower_idiv_options){ + .imprecise_32bit_lowering = false, + .allow_fp16 = true, + }); nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies); nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 5468364a6b0..335a5a8e314 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1397,7 +1397,11 @@ v3d_attempt_compile(struct v3d_compile *c) NIR_PASS_V(c->s, v3d_nir_lower_io, c); NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c); NIR_PASS_V(c->s, v3d_nir_lower_image_load_store); - NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = true, + .allow_fp16 = true, + }; + NIR_PASS_V(c->s, nir_lower_idiv, &idiv_options); if (c->key->robust_buffer_access) { /* v3d_nir_lower_robust_buffer_access assumes constant buffer diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 60f4fe26b00..260d0ca2700 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4855,19 +4855,26 @@ enum nir_lower_non_uniform_access_type { bool nir_lower_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_access_type); -enum nir_lower_idiv_path { - /* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of - * the two but it is not exact in some cases (for example, 1091317713u / - * 1034u gives 5209173 instead of 1055432) */ - nir_lower_idiv_fast, - /* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and - * AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than - * the nv50 path and many of them are integer multiplications, so it is - * probably slower. It should always return the correct result, though. */ - nir_lower_idiv_precise, -}; +typedef struct { + /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV() + * is used. It is the faster of the two but it is not exact in some cases + * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432). + * + * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and + * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more + * instructions than the nv50 path and many of them are integer + * multiplications, so it is probably slower. It should always return the + * correct result, though. + */ + bool imprecise_32bit_lowering; + + /* Whether 16-bit floating point arithmetic should be allowed in 8-bit + * division lowering + */ + bool allow_fp16; +} nir_lower_idiv_options; -bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path); +bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options); typedef struct nir_input_attachment_options { bool use_fragcoord_sysval; diff --git a/src/compiler/nir/nir_lower_idiv.c b/src/compiler/nir/nir_lower_idiv.c index c2f58df6b8c..c477fd53e85 100644 --- a/src/compiler/nir/nir_lower_idiv.c +++ b/src/compiler/nir/nir_lower_idiv.c @@ -200,11 +200,12 @@ convert_instr_precise(nir_builder *bld, nir_op op, static nir_ssa_def * convert_instr_small(nir_builder *b, nir_op op, - nir_ssa_def *numer, nir_ssa_def *denom) + nir_ssa_def *numer, nir_ssa_def *denom, + const nir_lower_idiv_options *options) { unsigned sz = numer->bit_size; nir_alu_type int_type = nir_op_infos[op].output_type | sz; - nir_alu_type float_type = nir_type_float | (sz * 2); + nir_alu_type float_type = nir_type_float | (options->allow_fp16 ? sz * 2 : 32); nir_ssa_def *p = nir_type_convert(b, numer, int_type, float_type); nir_ssa_def *q = nir_type_convert(b, denom, int_type, float_type); @@ -240,18 +241,18 @@ convert_instr_small(nir_builder *b, nir_op op, static nir_ssa_def * lower_idiv(nir_builder *b, nir_instr *instr, void *_data) { - enum nir_lower_idiv_path *path = _data; + const nir_lower_idiv_options *options = _data; nir_alu_instr *alu = nir_instr_as_alu(instr); nir_ssa_def *numer = nir_ssa_for_alu_src(b, alu, 0); nir_ssa_def *denom = nir_ssa_for_alu_src(b, alu, 1); if (numer->bit_size < 32) - return convert_instr_small(b, alu->op, numer, denom); - else if (*path == nir_lower_idiv_precise) - return convert_instr_precise(b, alu->op, numer, denom); - else + return convert_instr_small(b, alu->op, numer, denom, options); + else if (options->imprecise_32bit_lowering) return convert_instr(b, alu->op, numer, denom); + else + return convert_instr_precise(b, alu->op, numer, denom); } static bool @@ -278,10 +279,10 @@ inst_is_idiv(const nir_instr *instr, UNUSED const void *_state) } bool -nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path) +nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options) { return nir_shader_lower_instructions(shader, inst_is_idiv, lower_idiv, - &path); + (void *)options); } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index ead0d1a85a9..101fa004335 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -320,7 +320,11 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s) /* do idiv lowering after first opt loop to get a chance to propagate * constants for divide by immed power-of-two: */ - const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = true, + .allow_fp16 = true, + }; + const bool idiv_progress = OPT(s, nir_lower_idiv, &idiv_options); if (idiv_progress) ir3_optimize_loop(s); diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c index 81f4c3696d5..88c22819184 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -1113,7 +1113,11 @@ etna_compile_shader_nir(struct etna_shader_variant *v) NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX); NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u }); NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); - NIR_PASS_V(s, nir_lower_idiv, nir_lower_idiv_fast); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = true, + .allow_fp16 = true, + }; + NIR_PASS_V(s, nir_lower_idiv, &idiv_options); etna_optimize_loop(s); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 7ca3a5ded38..1d47f34968e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3139,7 +3139,11 @@ Converter::run() /*TODO: improve this lowering/optimisation loop so that we can use * nir_opt_idiv_const effectively before this. */ - NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = false, + .allow_fp16 = true, + }; + NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options); do { progress = false; diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 8dcd6c0b22e..b421f838c9f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -863,9 +863,11 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa); NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa); - NIR_PASS_V(sel->nir, nir_lower_idiv, - sel->nir->info.stage == MESA_SHADER_COMPUTE ? - nir_lower_idiv_precise : nir_lower_idiv_fast); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE, + .allow_fp16 = true, + }; + NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options); NIR_PASS_V(sel->nir, r600_lower_alu); NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index e9fc802b3a6..007af9427ad 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2316,7 +2316,11 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, NIR_PASS_V(c->s, vc4_nir_lower_io, c); NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c); - NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = true, + .allow_fp16 = true, + }; + NIR_PASS_V(c->s, nir_lower_idiv, &idiv_options); vc4_optimize_nir(c->s); diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 53ae4082157..e045081910a 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -2834,7 +2834,11 @@ bi_optimize_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_lower_int64); - NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = true, + .allow_fp16 = true, + }; + NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options); NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options); NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 283afe63518..9e4e6061c36 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -296,7 +296,11 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend) (nir->options->lower_flrp64 ? 64 : 0); NIR_PASS(progress, nir, nir_lower_regs_to_ssa); - NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = true, + .allow_fp16 = true, + }; + NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options); nir_lower_tex_options lower_tex_options = { .lower_txs_lod = true, _______________________________________________ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit