The nir_opt_algebraic rule

(('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),

can produce new fdiv operations, which need to be lowered on i965,
as we don't actually implement fdiv.  (Normally, we handle this in
GLSL IR's lower_instructions pass, but in the above case we introduce
an fdiv after that point.  So, make NIR do it for us.)

Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
Cc: mesa-sta...@lists.freedesktop.org
---
 src/glsl/nir/nir.h                       | 1 +
 src/glsl/nir/nir_opt_algebraic.py        | 1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp | 1 +
 3 files changed, 3 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4286738..fed8a97 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1440,6 +1440,7 @@ typedef struct nir_function {
 } nir_function;
 
 typedef struct nir_shader_compiler_options {
+   bool lower_fdiv;
    bool lower_ffma;
    bool lower_flrp;
    bool lower_fpow;
diff --git a/src/glsl/nir/nir_opt_algebraic.py 
b/src/glsl/nir/nir_opt_algebraic.py
index 1fdad3d..c553de5 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -183,6 +183,7 @@ optimizations = [
    (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
    # Division and reciprocal
    (('fdiv', 1.0, a), ('frcp', a)),
+   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
    (('frcp', ('frcp', a)), a),
    (('frcp', ('fsqrt', a)), ('frsq', a)),
    (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d4b6410..4bd24a7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -97,6 +97,7 @@ brw_compiler_create(void *mem_ctx, const struct 
brw_device_info *devinfo)
    nir_shader_compiler_options *nir_options =
       rzalloc(compiler, nir_shader_compiler_options);
    nir_options->native_integers = true;
+   nir_options->lower_fdiv = true;
    /* In order to help allow for better CSE at the NIR level we tell NIR
     * to split all ffma instructions during opt_algebraic and we then
     * re-combine them as a later step.
-- 
2.6.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to