Module: Mesa
Branch: main
Commit: b4ca45911d234c2fd763ed60cbb74be804dccd31
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b4ca45911d234c2fd763ed60cbb74be804dccd31

Author: Pavel Ondračka <[email protected]>
Date:   Sat Jun 17 13:50:05 2023 +0200

nir_opt_algebraic: don't use i32csel without native integer support

Otherwise nir_lower_int_to_float (or specifically nir_gather_ssa_types)
will fail to recognize we already have float constants and converts them
again.

Example from spec/glsl-1.10/execution/vs-loop-array-index-unroll.shader_test
with r300 driver (after enabling has_fused_comp_and_csel).

impl main {
        block block_0:
        /* preds: */
        vec1 32 ssa_0 = load_const (0x00000000 = 0.000000)
        vec4 32 ssa_1 = intrinsic load_input (ssa_0) (base=0, component=0, 
dest_type=float32, io location=VERT_ATTRIB_POS slots=1)      /* gl_Vertex */
        vec3 32 ssa_2 = load_const (0x00000000, 0x3e800000, 0x3f800000) = 
(0.000000, 0.250000, 1.000000)
        vec3 32 ssa_3 = load_const (0x00000000, 0x3f000000, 0x3f800000) = 
(0.000000, 0.500000, 1.000000)
        vec3 32 ssa_4 = load_const (0x00000000, 0x3f400000, 0x3f800000) = 
(0.000000, 0.750000, 1.000000)
        vec2 32 ssa_5 = load_const (0x00000000, 0x3f800000) = (0.000000, 
1.000000)
        vec1 32 ssa_6 = load_const (0x3f800000 = 1.000000)
        vec1 32 ssa_7 = intrinsic load_ubo_vec4 (ssa_0, ssa_0) (access=0, 
base=0, component=0)
        vec4 32 ssa_8 = load_const (0x00000000, 0x00000001, 0x00000002, 
0x00000003) = (0.000000, 0.000000, 0.000000, 0.000000)
        vec4  1 ssa_9 = ilt ssa_8, ssa_7.xxxx
        vec3 32 ssa_10 = bcsel ssa_9.www, ssa_5.xyy, ssa_4
        vec3 32 ssa_11 = bcsel ssa_9.zzz, ssa_10, ssa_3
        vec3 32 ssa_12 = bcsel ssa_9.yyy, ssa_11, ssa_2
        vec3 32 ssa_15 = i32csel_gt ssa_7.xxx, ssa_12, ssa_6.xxx
        vec4 32 ssa_14 = fsat ssa_15.xyxz
        intrinsic store_output (ssa_14, ssa_0) (base=1, wrmask=xyzw, 
component=0, src_type=float32, io location=VARYING_SLOT_COL0 slots=1, xfb(), 
xfb2())       /* gl_FrontColor */
        intrinsic store_output (ssa_1, ssa_0) (base=0, wrmask=xyzw, 
component=0, src_type=float32, io location=VARYING_SLOT_POS slots=1, xfb(), 
xfb2()) /* gl_Position */
        /* succs: block_1 */
        block block_1:
}

and after nir_lower_int_to_float

impl main {
        block block_0:
        /* preds: */
        vec1 32 ssa_0 = load_const (0x00000000 = 0.000000)
        vec4 32 ssa_1 = intrinsic load_input (ssa_0) (base=0, component=0, 
dest_type=float32, io location=VERT_ATTRIB_POS slots=1)      /* gl_Vertex */
        vec3 32 ssa_2 = load_const (0x00000000, 0x4e7a0000, 0x4e7e0000) = 
(0.000000, 1048576000.000000, 1065353216.000000)
        vec3 32 ssa_3 = load_const (0x00000000, 0x4e7c0000, 0x4e7e0000) = 
(0.000000, 1056964608.000000, 1065353216.000000)
        vec3 32 ssa_4 = load_const (0x00000000, 0x4e7d0000, 0x4e7e0000) = 
(0.000000, 1061158912.000000, 1065353216.000000)
        vec2 32 ssa_5 = load_const (0x00000000, 0x4e7e0000) = (0.000000, 
1065353216.000000)
        vec1 32 ssa_6 = load_const (0x4e7e0000 = 1065353216.000000)
        vec1 32 ssa_7 = intrinsic load_ubo_vec4 (ssa_0, ssa_0) (access=0, 
base=0, component=0)
        vec4 32 ssa_8 = load_const (0x00000000, 0x3f800000, 0x40000000, 
0x40400000) = (0.000000, 1.000000, 2.000000, 3.000000)
        vec4  1 ssa_9 = flt ssa_8, ssa_7.xxxx
        vec3 32 ssa_10 = bcsel ssa_9.www, ssa_5.xyy, ssa_4
        vec3 32 ssa_11 = bcsel ssa_9.zzz, ssa_10, ssa_3
        vec3 32 ssa_12 = bcsel ssa_9.yyy, ssa_11, ssa_2
        vec3 32 ssa_13 = fcsel_gt ssa_7.xxx, ssa_12, ssa_6.xxx
        vec4 32 ssa_14 = fsat ssa_13.xyxz
        intrinsic store_output (ssa_14, ssa_0) (base=1, wrmask=xyzw, 
component=0, src_type=float32, io location=VARYING_SLOT_COL0 slots=1, xfb(), 
xfb2())       /* gl_FrontColor */
        intrinsic store_output (ssa_1, ssa_0) (base=0, wrmask=xyzw, 
component=0, src_type=float32, io location=VARYING_SLOT_POS slots=1, xfb(), 
xfb2()) /* gl_Position */
        /* succs: block_1 */
        block block_1:
}

Reviewed-by: Gert Wollny <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23704>

---

 src/compiler/nir/nir_opt_algebraic.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 55c052ace7f..11e8aa69d24 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -3189,10 +3189,10 @@ late_optimizations += [
   (('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), 
"options->has_fused_comp_and_csel"),
   (('fcsel', ('sge', 0, a), b, c), ('fcsel_ge', ('fneg', a), b, c), 
"options->has_fused_comp_and_csel"),
 
-  (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), 
"options->has_fused_comp_and_csel"),
-  (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), 
"options->has_fused_comp_and_csel"),
-  (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), 
"options->has_fused_comp_and_csel"),
-  (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), 
"options->has_fused_comp_and_csel"),
+  (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), 
"options->has_fused_comp_and_csel && !options->no_integers"),
+  (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), 
"options->has_fused_comp_and_csel && !options->no_integers"),
+  (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), 
"options->has_fused_comp_and_csel && !options->no_integers"),
+  (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), 
"options->has_fused_comp_and_csel && !options->no_integers"),
 
   (('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), 
"options->has_fused_comp_and_csel"),
   (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_gt', ('fneg', a), b, 
c), "options->has_fused_comp_and_csel"),

Reply via email to