Module: Mesa Branch: main Commit: 65e431e61a3bace7e50c11d699880ae860f76133 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=65e431e61a3bace7e50c11d699880ae860f76133
Author: Alyssa Rosenzweig <aly...@rosenzweig.io> Date: Sun Dec 3 21:30:45 2023 -0400 nir/lower_idiv: Optimize idiv sign calculation Save a comparison, and move out the comparison to be more backend friendly. Saves 2 instrs on AGX (as the remaining comparison now fuses with bcsel). Results on AGX, all affected shaders in asphalt9. total instructions in shared programs: 1813003 -> 1812611 (-0.02%) instructions in affected programs: 119646 -> 119254 (-0.33%) helped: 333 HURT: 0 Instructions are helped. total bytes in shared programs: 11870344 -> 11867208 (-0.03%) bytes in affected programs: 820888 -> 817752 (-0.38%) helped: 333 HURT: 0 Bytes are helped. and on Mali-G57: total instructions in shared programs: 2677538 -> 2677205 (-0.01%) instructions in affected programs: 206923 -> 206590 (-0.16%) helped: 333 HURT: 0 Instructions are helped. total cvt in shared programs: 14667.50 -> 14662.30 (-0.04%) cvt in affected programs: 1953.64 -> 1948.44 (-0.27%) helped: 333 HURT: 0 Cvt are helped. total quadwords in shared programs: 1450664 -> 1450544 (<.01%) quadwords in affected programs: 5064 -> 4944 (-2.37%) helped: 15 HURT: 0 Quadwords are helped. total threads in shared programs: 53282 -> 53309 (0.05%) threads in affected programs: 27 -> 54 (100.00%) helped: 27 HURT: 0 Threads are helped. Signed-off-by: Alyssa Rosenzweig <aly...@rosenzweig.io> Reviewed-by: Faith Ekstrand <faith.ekstr...@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26489> --- src/compiler/nir/nir_lower_idiv.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_lower_idiv.c b/src/compiler/nir/nir_lower_idiv.c index 16acadc3715..c46bc3b6744 100644 --- a/src/compiler/nir/nir_lower_idiv.c +++ b/src/compiler/nir/nir_lower_idiv.c @@ -68,17 +68,20 @@ emit_udiv(nir_builder *bld, nir_def *numer, nir_def *denom, bool modulo) static nir_def * emit_idiv(nir_builder *bld, nir_def *numer, nir_def *denom, nir_op op) { - nir_def *lh_sign = nir_ilt_imm(bld, numer, 0); - nir_def *rh_sign = nir_ilt_imm(bld, denom, 0); - nir_def *lhs = nir_iabs(bld, numer); nir_def *rhs = nir_iabs(bld, denom); if (op == nir_op_idiv) { - nir_def *d_sign = nir_ixor(bld, lh_sign, rh_sign); + /* We want (numer < 0) ^ (denom < 0). This is the XOR of the sign bits, + * and since XOR is bitwise, that's the sign bit of the XOR. + */ + nir_def *d_sign = nir_ilt_imm(bld, nir_ixor(bld, numer, denom), 0); nir_def *res = emit_udiv(bld, lhs, rhs, false); return nir_bcsel(bld, d_sign, nir_ineg(bld, res), res); } else { + nir_def *lh_sign = nir_ilt_imm(bld, numer, 0); + nir_def *rh_sign = nir_ilt_imm(bld, denom, 0); + nir_def *res = emit_udiv(bld, lhs, rhs, true); res = nir_bcsel(bld, lh_sign, nir_ineg(bld, res), res); if (op == nir_op_imod) {