On Mon, Jan 11, 2016 at 5:48 PM, Matt Turner <[email protected]> wrote: > From: Kenneth Graunke <[email protected]> > > We would like to be able to combine > > result.x = bitfieldInsert(src0.x, src1.x, src2.x, src3.x); > result.y = bitfieldInsert(src0.y, src1.y, src2.y, src3.y); > result.z = bitfieldInsert(src0.z, src1.z, src2.z, src3.z); > result.w = bitfieldInsert(src0.w, src1.w, src2.w, src3.w); > > into a single ivec4 bitfieldInsert operation. This should be possible > with most drivers. > > This patch changes the offset and bits parameters from scalar ints > to ivecN or uvecN. The type of all four operands will be the same, > for simplicity. > > Signed-off-by: Kenneth Graunke <[email protected]> > Reviewed-by: Matt Turner <[email protected]> > --- > [mattst88] v2: > Use swizzle(expr, SWIZZLE_XXXX, type->vector_elements) instead of > swizzle_for_size(expr, type->vector_elements). The latter does not > provide the wanted operation of expanding a scalar. > > Expand arguments to bitfield_insert() in ldexp() lowering passes. > > src/glsl/builtin_functions.cpp | 8 +++++++- > src/glsl/ir.h | 1 - > src/glsl/ir_constant_expression.cpp | 6 +++--- > src/glsl/ir_validate.cpp | 5 +++-- > src/glsl/lower_instructions.cpp | 8 ++++---- > src/glsl/nir/nir_opcodes.py | 4 ++-- > 6 files changed, 19 insertions(+), 13 deletions(-) > > diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp > index 602852a..38383bc 100644 > --- a/src/glsl/builtin_functions.cpp > +++ b/src/glsl/builtin_functions.cpp > @@ -4902,13 +4902,19 @@ builtin_builder::_bitfieldExtract(const glsl_type > *type) > ir_function_signature * > builtin_builder::_bitfieldInsert(const glsl_type *type) > { > + bool is_uint = type->base_type == GLSL_TYPE_UINT; > ir_variable *base = in_var(type, "base"); > ir_variable *insert = in_var(type, "insert"); > ir_variable *offset = in_var(glsl_type::int_type, "offset"); > ir_variable *bits = in_var(glsl_type::int_type, "bits"); > MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); > > - body.emit(ret(bitfield_insert(base, insert, offset, bits))); > + operand cast_offset = is_uint ? i2u(offset) : operand(offset); > + operand cast_bits = is_uint ? i2u(bits) : operand(bits); > + > + body.emit(ret(bitfield_insert(base, insert, > + swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), > + swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); > > return sig; > } > diff --git a/src/glsl/ir.h b/src/glsl/ir.h > index a2eb508..9af2fc1 100644 > --- a/src/glsl/ir.h > +++ b/src/glsl/ir.h > @@ -1710,7 +1710,6 @@ public: > operation == ir_triop_vector_insert || > operation == ir_quadop_vector || > /* TODO: these can't currently be vectorized */ > - operation == ir_quadop_bitfield_insert || > operation == ir_triop_bitfield_extract; > } > > diff --git a/src/glsl/ir_constant_expression.cpp > b/src/glsl/ir_constant_expression.cpp > index 38b6dd5..f5b5bd8 100644 > --- a/src/glsl/ir_constant_expression.cpp > +++ b/src/glsl/ir_constant_expression.cpp > @@ -1710,10 +1710,10 @@ ir_expression::constant_expression_value(struct > hash_table *variable_context) > } > > case ir_quadop_bitfield_insert: { > - int offset = op[2]->value.i[0]; > - int bits = op[3]->value.i[0]; > - > for (unsigned c = 0; c < components; c++) { > + int offset = op[2]->value.i[c]; > + int bits = op[3]->value.i[c]; > + > if (bits == 0) > data.u[c] = op[0]->value.u[c]; > else if (offset < 0 || bits < 0) > diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp > index a4d6182..fea9b76 100644 > --- a/src/glsl/ir_validate.cpp > +++ b/src/glsl/ir_validate.cpp > @@ -647,10 +647,11 @@ ir_validate::visit_leave(ir_expression *ir) > break; > > case ir_quadop_bitfield_insert: > + assert(ir->type->is_integer()); > assert(ir->operands[0]->type == ir->type); > assert(ir->operands[1]->type == ir->type); > - assert(ir->operands[2]->type == glsl_type::int_type); > - assert(ir->operands[3]->type == glsl_type::int_type); > + assert(ir->operands[2]->type == ir->type); > + assert(ir->operands[3]->type == ir->type);
This will break lower_packing_builtins::pack_uvec2_to_uint and the such. (In other news, I highly encourage you to set LOWER_PACK_USE_BFI/BFE). -ilia _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
