Both are Reviewed-by: Connor Abbott <cwabbo...@gmail.com>
On Wed, Jan 13, 2016 at 2:25 PM, Matt Turner <matts...@gmail.com> wrote: > The OpenGL specifications for bitfieldInsert() says: > > The result will be undefined if <offset> or <bits> is negative, or if > the sum of <offset> and <bits> is greater than the number of bits > used to store the operand. > > Therefore passing bits=32, offset=0 is legal and defined in GLSL. > > But the earlier SM5 bfi opcode is specified to accept a bitfield width > ranging from 0-31. As such, Intel and AMD instructions read only the low > 5 bits of the width operand, making them not able to implement the > GLSL-specified behavior directly. > > This commit fixes the lowering of bitfield_insert to handle the trivial > case of <bits> = 32 as > > bitfieldInsert: > bits > 31 ? insert : bfi(bfm(bits, offset), insert, base) > > Fixes: > ES31-CTS.shader_bitfield_operation.bitfieldInsert.uint_2 > ES31-CTS.shader_bitfield_operation.bitfieldInsert.uvec4_3 > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92595 > --- > These two patches replace 8/9 and 9/9 of the previous series. > The first 7 patches from it have been reviewed and committed. > > src/glsl/nir/nir_opcodes.py | 1 + > src/glsl/nir/nir_opt_algebraic.py | 6 +++++- > 2 files changed, 6 insertions(+), 1 deletion(-) > > diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py > index 1c65def..3e43438 100644 > --- a/src/glsl/nir/nir_opcodes.py > +++ b/src/glsl/nir/nir_opcodes.py > @@ -558,6 +558,7 @@ triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") > opcode("bcsel", 0, tuint, [0, 0, 0], > [tbool, tuint, tuint], "", "src0 ? src1 : src2") > > +# SM5 bfi assembly > triop("bfi", tuint, """ > unsigned mask = src0, insert = src1, base = src2; > if (mask == 0) { > diff --git a/src/glsl/nir/nir_opt_algebraic.py > b/src/glsl/nir/nir_opt_algebraic.py > index 1eb044a..0d31e39 100644 > --- a/src/glsl/nir/nir_opt_algebraic.py > +++ b/src/glsl/nir/nir_opt_algebraic.py > @@ -225,9 +225,13 @@ optimizations = [ > > # Misc. lowering > (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), > 'options->lower_fmod'), > - (('bitfield_insert', a, b, c, d), ('bfi', ('bfm', d, c), b, a), > 'options->lower_bitfield_insert'), > (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), > 'options->lower_uadd_carry'), > (('usub_borrow', a, b), ('b2i', ('ult', a, b)), > 'options->lower_usub_borrow'), > + > + (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), > + ('bcsel', ('ilt', 31, 'bits'), 'insert', > + ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')), > + 'options->lower_bitfield_insert'), > ] > > # Add optimizations to handle the case where the result of a ternary is > -- > 2.4.9 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev