Module: Mesa
Branch: master
Commit: aab0bfc648bf1be50b81a25224970015f1dc78b8
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=aab0bfc648bf1be50b81a25224970015f1dc78b8

Author: Roland Scheidegger <srol...@vmware.com>
Date:   Thu Nov  9 19:37:54 2017 +0100

r600: use min_dx10/max_dx10 instead of min/max

I believe this is the safe thing to do, especially ever since the driver
actually generates NaNs for muls too.
The ISA docs are not very helpful here, however the dx10 versions will pick
a non-nan result over a NaN one (this is also the ieee754 behavior), whereas
the non-dx10 ones will pick the NaN (verified by newly changed piglit
isinf-and-isnan test).
Other "modern" drivers will most likely do the same.
This was shown to make some difference for bug 103544, albeit it is not
required to fix it.

Reviewed-by: Dave Airlie <airl...@redhat.com>

---

 src/gallium/drivers/r600/r600_shader.c  | 13 +++++++------
 src/gallium/drivers/r600/sb/sb_expr.cpp |  2 ++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 0fa2a1f0d1..805b3b6b3d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -9175,8 +9175,9 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[]
        [TGSI_OPCODE_DP3]       = { ALU_OP2_DOT4_IEEE, tgsi_dp},
        [TGSI_OPCODE_DP4]       = { ALU_OP2_DOT4_IEEE, tgsi_dp},
        [TGSI_OPCODE_DST]       = { ALU_OP0_NOP, tgsi_opdst},
-       [TGSI_OPCODE_MIN]       = { ALU_OP2_MIN, tgsi_op2},
-       [TGSI_OPCODE_MAX]       = { ALU_OP2_MAX, tgsi_op2},
+       /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */
+       [TGSI_OPCODE_MIN]       = { ALU_OP2_MIN_DX10, tgsi_op2},
+       [TGSI_OPCODE_MAX]       = { ALU_OP2_MAX_DX10, tgsi_op2},
        [TGSI_OPCODE_SLT]       = { ALU_OP2_SETGT, tgsi_op2_swap},
        [TGSI_OPCODE_SGE]       = { ALU_OP2_SETGE, tgsi_op2},
        [TGSI_OPCODE_MAD]       = { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9373,8 +9374,8 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
        [TGSI_OPCODE_DP3]       = { ALU_OP2_DOT4_IEEE, tgsi_dp},
        [TGSI_OPCODE_DP4]       = { ALU_OP2_DOT4_IEEE, tgsi_dp},
        [TGSI_OPCODE_DST]       = { ALU_OP0_NOP, tgsi_opdst},
-       [TGSI_OPCODE_MIN]       = { ALU_OP2_MIN, tgsi_op2},
-       [TGSI_OPCODE_MAX]       = { ALU_OP2_MAX, tgsi_op2},
+       [TGSI_OPCODE_MIN]       = { ALU_OP2_MIN_DX10, tgsi_op2},
+       [TGSI_OPCODE_MAX]       = { ALU_OP2_MAX_DX10, tgsi_op2},
        [TGSI_OPCODE_SLT]       = { ALU_OP2_SETGT, tgsi_op2_swap},
        [TGSI_OPCODE_SGE]       = { ALU_OP2_SETGE, tgsi_op2},
        [TGSI_OPCODE_MAD]       = { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9596,8 +9597,8 @@ static const struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] =
        [TGSI_OPCODE_DP3]       = { ALU_OP2_DOT4_IEEE, tgsi_dp},
        [TGSI_OPCODE_DP4]       = { ALU_OP2_DOT4_IEEE, tgsi_dp},
        [TGSI_OPCODE_DST]       = { ALU_OP0_NOP, tgsi_opdst},
-       [TGSI_OPCODE_MIN]       = { ALU_OP2_MIN, tgsi_op2},
-       [TGSI_OPCODE_MAX]       = { ALU_OP2_MAX, tgsi_op2},
+       [TGSI_OPCODE_MIN]       = { ALU_OP2_MIN_DX10, tgsi_op2},
+       [TGSI_OPCODE_MAX]       = { ALU_OP2_MAX_DX10, tgsi_op2},
        [TGSI_OPCODE_SLT]       = { ALU_OP2_SETGT, tgsi_op2_swap},
        [TGSI_OPCODE_SGE]       = { ALU_OP2_SETGE, tgsi_op2},
        [TGSI_OPCODE_MAD]       = { ALU_OP3_MULADD_IEEE, tgsi_op3},
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp 
b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 3dd3a4815b..7a5d62c8e8 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) {
                                n.bc.src[0].abs == n.bc.src[1].abs) {
                        switch (n.bc.op) {
                        case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
+                       case ALU_OP2_MIN_DX10:
                        case ALU_OP2_MAX:
+                       case ALU_OP2_MAX_DX10:
                                convert_to_mov(n, v0, n.bc.src[0].neg, 
n.bc.src[0].abs);
                                return fold_alu_op1(n);
                        case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to