Mesa (master): nir/lower_idiv: add options to use fp32 for 8-bit division lowering

GitLab Mirror Mon, 12 Apr 2021 09:45:12 -0700

Module: Mesa
Branch: master
Commit: a2619b97f53a30d74920ee6b819e943291641f9d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2619b97f53a30d74920ee6b819e943291641f9d


Author: Rhys Perry <pendingchao...@gmail.com>
Date:   Wed Apr  7 19:17:46 2021 +0100

nir/lower_idiv: add options to use fp32 for 8-bit division lowering

Signed-off-by: Rhys Perry <pendingchao...@gmail.com>
Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10081>

---

 src/amd/vulkan/radv_pipeline.c                     |  6 ++++-
 src/broadcom/compiler/vir.c                        |  6 ++++-
 src/compiler/nir/nir.h                             | 31 +++++++++++++---------
 src/compiler/nir/nir_lower_idiv.c                  | 19 ++++++-------
 src/freedreno/ir3/ir3_nir.c                        |  6 ++++-
 src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c |  6 ++++-
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   |  6 ++++-
 src/gallium/drivers/r600/sfn/sfn_nir.cpp           |  8 +++---
 src/gallium/drivers/vc4/vc4_program.c              |  6 ++++-
 src/panfrost/bifrost/bifrost_compile.c             |  6 ++++-
 src/panfrost/midgard/midgard_compile.c             |  6 ++++-
 11 files changed, 74 insertions(+), 32 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 168ee087579..614b5598f4e 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3319,7 +3319,11 @@ radv_create_shaders(struct radv_pipeline *pipeline, 
struct radv_device *device,
          /* TODO: Implement nir_op_uadd_sat with LLVM. */
          if (!radv_use_llvm_for_stage(device, i))
             nir_opt_idiv_const(nir[i], 8);
-         nir_lower_idiv(nir[i], nir_lower_idiv_precise);
+
+         nir_lower_idiv(nir[i], &(nir_lower_idiv_options){
+                                   .imprecise_32bit_lowering = false,
+                                   .allow_fp16 = true,
+                                });
 
          nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | 
nir_move_copies);
          nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | 
nir_move_copies);
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 5468364a6b0..335a5a8e314 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -1397,7 +1397,11 @@ v3d_attempt_compile(struct v3d_compile *c)
         NIR_PASS_V(c->s, v3d_nir_lower_io, c);
         NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
         NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
-        NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast);
+        nir_lower_idiv_options idiv_options = {
+                .imprecise_32bit_lowering = true,
+                .allow_fp16 = true,
+        };
+        NIR_PASS_V(c->s, nir_lower_idiv, &idiv_options);
 
         if (c->key->robust_buffer_access) {
            /* v3d_nir_lower_robust_buffer_access assumes constant buffer
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 60f4fe26b00..260d0ca2700 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4855,19 +4855,26 @@ enum nir_lower_non_uniform_access_type {
 bool nir_lower_non_uniform_access(nir_shader *shader,
                                   enum nir_lower_non_uniform_access_type);
 
-enum nir_lower_idiv_path {
-   /* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of
-    * the two but it is not exact in some cases (for example, 1091317713u /
-    * 1034u gives 5209173 instead of 1055432) */
-   nir_lower_idiv_fast,
-   /* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and
-    * AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than
-    * the nv50 path and many of them are integer multiplications, so it is
-    * probably slower. It should always return the correct result, though. */
-   nir_lower_idiv_precise,
-};
+typedef struct {
+   /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV()
+    * is used. It is the faster of the two but it is not exact in some cases
+    * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432).
+    *
+    * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and
+    * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more
+    * instructions than the nv50 path and many of them are integer
+    * multiplications, so it is probably slower. It should always return the
+    * correct result, though.
+    */
+   bool imprecise_32bit_lowering;
+
+   /* Whether 16-bit floating point arithmetic should be allowed in 8-bit
+    * division lowering
+    */
+   bool allow_fp16;
+} nir_lower_idiv_options;
 
-bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path);
+bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options);
 
 typedef struct nir_input_attachment_options {
    bool use_fragcoord_sysval;
diff --git a/src/compiler/nir/nir_lower_idiv.c 
b/src/compiler/nir/nir_lower_idiv.c
index c2f58df6b8c..c477fd53e85 100644
--- a/src/compiler/nir/nir_lower_idiv.c
+++ b/src/compiler/nir/nir_lower_idiv.c
@@ -200,11 +200,12 @@ convert_instr_precise(nir_builder *bld, nir_op op,
 
 static nir_ssa_def *
 convert_instr_small(nir_builder *b, nir_op op,
-      nir_ssa_def *numer, nir_ssa_def *denom)
+      nir_ssa_def *numer, nir_ssa_def *denom,
+      const nir_lower_idiv_options *options)
 {
    unsigned sz = numer->bit_size;
    nir_alu_type int_type = nir_op_infos[op].output_type | sz;
-   nir_alu_type float_type = nir_type_float | (sz * 2);
+   nir_alu_type float_type = nir_type_float | (options->allow_fp16 ? sz * 2 : 
32);
 
    nir_ssa_def *p = nir_type_convert(b, numer, int_type, float_type);
    nir_ssa_def *q = nir_type_convert(b, denom, int_type, float_type);
@@ -240,18 +241,18 @@ convert_instr_small(nir_builder *b, nir_op op,
 static nir_ssa_def *
 lower_idiv(nir_builder *b, nir_instr *instr, void *_data)
 {
-   enum nir_lower_idiv_path *path = _data;
+   const nir_lower_idiv_options *options = _data;
    nir_alu_instr *alu = nir_instr_as_alu(instr);
 
    nir_ssa_def *numer = nir_ssa_for_alu_src(b, alu, 0);
    nir_ssa_def *denom = nir_ssa_for_alu_src(b, alu, 1);
 
    if (numer->bit_size < 32)
-      return convert_instr_small(b, alu->op, numer, denom);
-   else if (*path == nir_lower_idiv_precise)
-      return convert_instr_precise(b, alu->op, numer, denom);
-   else
+      return convert_instr_small(b, alu->op, numer, denom, options);
+   else if (options->imprecise_32bit_lowering)
       return convert_instr(b, alu->op, numer, denom);
+   else
+      return convert_instr_precise(b, alu->op, numer, denom);
 }
 
 static bool
@@ -278,10 +279,10 @@ inst_is_idiv(const nir_instr *instr, UNUSED const void 
*_state)
 }
 
 bool
-nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path)
+nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options)
 {
    return nir_shader_lower_instructions(shader,
          inst_is_idiv,
          lower_idiv,
-         &path);
+         (void *)options);
 }
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index ead0d1a85a9..101fa004335 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -320,7 +320,11 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader 
*s)
        /* do idiv lowering after first opt loop to get a chance to propagate
         * constants for divide by immed power-of-two:
         */
-       const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast);
+       nir_lower_idiv_options idiv_options = {
+               .imprecise_32bit_lowering = true,
+               .allow_fp16 = true,
+       };
+       const bool idiv_progress = OPT(s, nir_lower_idiv, &idiv_options);
 
        if (idiv_progress)
                ir3_optimize_loop(s);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c 
b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
index 81f4c3696d5..88c22819184 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@@ -1113,7 +1113,11 @@ etna_compile_shader_nir(struct etna_shader_variant *v)
    NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX);
    NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = 
~0u });
    NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);
-   NIR_PASS_V(s, nir_lower_idiv, nir_lower_idiv_fast);
+   nir_lower_idiv_options idiv_options = {
+      .imprecise_32bit_lowering = true,
+      .allow_fp16 = true,
+   };
+   NIR_PASS_V(s, nir_lower_idiv, &idiv_options);
 
    etna_optimize_loop(s);
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 7ca3a5ded38..1d47f34968e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3139,7 +3139,11 @@ Converter::run()
    /*TODO: improve this lowering/optimisation loop so that we can use
     *      nir_opt_idiv_const effectively before this.
     */
-   NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise);
+   nir_lower_idiv_options idiv_options = {
+      .imprecise_32bit_lowering = false,
+      .allow_fp16 = true,
+   };
+   NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
 
    do {
       progress = false;
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp 
b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
index 8dcd6c0b22e..b421f838c9f 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@@ -863,9 +863,11 @@ int r600_shader_from_nir(struct r600_context *rctx,
 
    NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
    NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
-   NIR_PASS_V(sel->nir, nir_lower_idiv,
-              sel->nir->info.stage == MESA_SHADER_COMPUTE ?
-                 nir_lower_idiv_precise : nir_lower_idiv_fast);
+   nir_lower_idiv_options idiv_options = {
+      .imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE,
+      .allow_fp16 = true,
+   };
+   NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options);
    NIR_PASS_V(sel->nir, r600_lower_alu);
    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
 
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index e9fc802b3a6..007af9427ad 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2316,7 +2316,11 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage 
stage,
 
         NIR_PASS_V(c->s, vc4_nir_lower_io, c);
         NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
-        NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast);
+        nir_lower_idiv_options idiv_options = {
+                .imprecise_32bit_lowering = true,
+                .allow_fp16 = true,
+        };
+        NIR_PASS_V(c->s, nir_lower_idiv, &idiv_options);
 
         vc4_optimize_nir(c->s);
 
diff --git a/src/panfrost/bifrost/bifrost_compile.c 
b/src/panfrost/bifrost/bifrost_compile.c
index 53ae4082157..e045081910a 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -2834,7 +2834,11 @@ bi_optimize_nir(nir_shader *nir)
 
         NIR_PASS(progress, nir, nir_lower_int64);
 
-        NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
+        nir_lower_idiv_options idiv_options = {
+                .imprecise_32bit_lowering = true,
+                .allow_fp16 = true,
+        };
+        NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
 
         NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
         NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
diff --git a/src/panfrost/midgard/midgard_compile.c 
b/src/panfrost/midgard/midgard_compile.c
index 283afe63518..9e4e6061c36 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -296,7 +296,11 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool 
is_blend)
                 (nir->options->lower_flrp64 ? 64 : 0);
 
         NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
-        NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
+        nir_lower_idiv_options idiv_options = {
+                .imprecise_32bit_lowering = true,
+                .allow_fp16 = true,
+        };
+        NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
 
         nir_lower_tex_options lower_tex_options = {
                 .lower_txs_lod = true,

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): nir/lower_idiv: add options to use fp32 for 8-bit division lowering

Reply via email to