--- src/compiler/nir/nir_opt_algebraic.py | 70 +++++++++++++++++++++++++++ 1 file changed, 70 insertions(+)
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 3800db1da20..3384c9c2e67 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -890,10 +890,80 @@ def ldexp(f, exp, bits): pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits) return ('fmul', ('fmul', f, pow2_1), pow2_2) +def ldexp_rtne(f, exp, bits): + # First, we clamp exp to a reasonable range. The maximum possible range + # for a normal exponent is [-126, 127] and, throwing in denormals, you get + # a maximum range of [-149, 127]. This means that we can potentially have + # a swing of +-276. If you start with FLT_MAX, you actually have to do + # ldexp(FLT_MAX, -278) to get it to flush all the way to zero. The GLSL + # spec, on the other hand, only requires that we handle an exponent value + # in the range [-126, 128]. This implementation is *mostly* correct; it + # handles a range on exp of [-252, 254] which allows you to create any + # value (including denorms if the hardware supports it) and to adjust the + # exponent of any normal value to anything you want. + if bits == 16: + exp = ('imin', ('imax', exp, -28), 30) + elif bits == 32: + exp = ('imin', ('imax', exp, -252), 254) + elif bits == 64: + exp = ('imin', ('imax', exp, -2044), 2046) + else: + assert False + + # Now we compute two powers of 2, one for exp/2 and one for exp-exp/2. + # (We use ishr which isn't the same for -1, but the -1 case still works + # since we use exp-exp/2 as the second exponent.) While the spec + # technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't + # work with denormals and doesn't allow for the full swing in exponents + # that you can get with normalized values. Instead, we create two powers + # of two and multiply by them each in turn. That way the effective range + # of our exponent is doubled. + pow2_1 = fexp2i(('ishr', exp, 1), bits) + pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits) + return ('fmul_rtne', ('fmul_rtne', f, pow2_1), pow2_2) + +def ldexp_rtz(f, exp, bits): + # First, we clamp exp to a reasonable range. The maximum possible range + # for a normal exponent is [-126, 127] and, throwing in denormals, you get + # a maximum range of [-149, 127]. This means that we can potentially have + # a swing of +-276. If you start with FLT_MAX, you actually have to do + # ldexp(FLT_MAX, -278) to get it to flush all the way to zero. The GLSL + # spec, on the other hand, only requires that we handle an exponent value + # in the range [-126, 128]. This implementation is *mostly* correct; it + # handles a range on exp of [-252, 254] which allows you to create any + # value (including denorms if the hardware supports it) and to adjust the + # exponent of any normal value to anything you want. + if bits == 16: + exp = ('imin', ('imax', exp, -28), 30) + elif bits == 32: + exp = ('imin', ('imax', exp, -252), 254) + elif bits == 64: + exp = ('imin', ('imax', exp, -2044), 2046) + else: + assert False + + # Now we compute two powers of 2, one for exp/2 and one for exp-exp/2. + # (We use ishr which isn't the same for -1, but the -1 case still works + # since we use exp-exp/2 as the second exponent.) While the spec + # technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't + # work with denormals and doesn't allow for the full swing in exponents + # that you can get with normalized values. Instead, we create two powers + # of two and multiply by them each in turn. That way the effective range + # of our exponent is doubled. + pow2_1 = fexp2i(('ishr', exp, 1), bits) + pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits) + return ('fmul_rtz', ('fmul_rtz', f, pow2_1), pow2_2) + optimizations += [ (('ldexp@16', 'x', 'exp'), ldexp('x', 'exp', 16), 'options->lower_ldexp'), (('ldexp@32', 'x', 'exp'), ldexp('x', 'exp', 32), 'options->lower_ldexp'), (('ldexp@64', 'x', 'exp'), ldexp('x', 'exp', 64), 'options->lower_ldexp'), + (('ldexp_rtne@16', 'x', 'exp'), ldexp_rtne('x', 'exp', 16), 'options->lower_ldexp'), + (('ldexp_rtne@32', 'x', 'exp'), ldexp_rtne('x', 'exp', 32), 'options->lower_ldexp'), + (('ldexp_rtne@64', 'x', 'exp'), ldexp_rtne('x', 'exp', 64), 'options->lower_ldexp'), + (('ldexp_rtz@16', 'x', 'exp'), ldexp_rtz('x', 'exp', 16), 'options->lower_ldexp'), + (('ldexp_rtz@32', 'x', 'exp'), ldexp_rtz('x', 'exp', 32), 'options->lower_ldexp'), + (('ldexp_rtz@64', 'x', 'exp'), ldexp_rtz('x', 'exp', 64), 'options->lower_ldexp'), ] # Unreal Engine 4 demo applications open-codes bitfieldReverse() -- 2.19.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev