Module: Mesa Branch: main Commit: 806cd2341c8b6756a4c8a9d9c6e61ece6a5e604b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=806cd2341c8b6756a4c8a9d9c6e61ece6a5e604b
Author: Ian Romanick <[email protected]> Date: Tue Feb 23 19:12:49 2021 -0800 nir/algebraic: Basic patterns for dot_4x8 v2: Add and modify patterns to let constant folding do better. v3: Remove '(is_not_zero)' from the patterns that try to combine addends. I honestly don't know why I had it there in the first place, and nothing in my deep git logs could help clue me in. Noticed by Alyssa. Remover patterns that detect open-coded udot_4x8. Suggested by Alyssa and Jason. Add missing sudot_4x8 patterns. Reviewed-by: Jason Ekstrand <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12142> --- src/compiler/nir/nir_opt_algebraic.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 001a86918d4..2f0a044e2a8 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -192,6 +192,35 @@ optimizations = [ # flrp(a, a + b, c) => a + flrp(0, b, c) => a + (b * c) (('~flrp', a, ('fadd(is_used_once)', a, b), c), ('fadd', ('fmul', b, c), a)), + + (('sdot_4x8_iadd', a, 0, b), b), + (('udot_4x8_uadd', a, 0, b), b), + (('sdot_4x8_iadd_sat', a, 0, b), b), + (('udot_4x8_uadd_sat', a, 0, b), b), + + # sudot_4x8_iadd is not commutative at all, so the patterns must be + # duplicated with zeros on each of the first positions. + (('sudot_4x8_iadd', a, 0, b), b), + (('sudot_4x8_iadd', 0, a, b), b), + (('sudot_4x8_iadd_sat', a, 0, b), b), + (('sudot_4x8_iadd_sat', 0, a, b), b), + + (('iadd', ('sdot_4x8_iadd(is_used_once)', a, b, '#c'), '#d'), ('sdot_4x8_iadd', a, b, ('iadd', c, d))), + (('iadd', ('udot_4x8_uadd(is_used_once)', a, b, '#c'), '#d'), ('udot_4x8_uadd', a, b, ('iadd', c, d))), + (('iadd', ('sudot_4x8_iadd(is_used_once)', a, b, '#c'), '#d'), ('sudot_4x8_iadd', a, b, ('iadd', c, d))), + + # Try to let constant folding eliminate the dot-product part. These are + # safe because the dot product cannot overflow 32 bits. + (('iadd', ('sdot_4x8_iadd', 'a(is_not_const)', b, 0), c), ('sdot_4x8_iadd', a, b, c)), + (('iadd', ('udot_4x8_uadd', 'a(is_not_const)', b, 0), c), ('udot_4x8_uadd', a, b, c)), + (('iadd', ('sudot_4x8_iadd', 'a(is_not_const)', b, 0), c), ('sudot_4x8_iadd', a, b, c)), + (('iadd', ('sudot_4x8_iadd', a, 'b(is_not_const)', 0), c), ('sudot_4x8_iadd', a, b, c)), + (('sdot_4x8_iadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('sdot_4x8_iadd', a, b, 0), c)), + (('udot_4x8_uadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('udot_4x8_uadd', a, b, 0), c)), + (('sudot_4x8_iadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('sudot_4x8_iadd', a, b, 0), c)), + (('sdot_4x8_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sdot_4x8_iadd', a, b, 0), c), '!options->lower_add_sat'), + (('udot_4x8_uadd_sat', '#a', '#b', 'c(is_not_const)'), ('uadd_sat', ('udot_4x8_uadd', a, b, 0), c), '!options->lower_add_sat'), + (('sudot_4x8_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sudot_4x8_iadd', a, b, 0), c), '!options->lower_add_sat'), ] # Float sizes
