On 04/06/2014 11:49 PM, Kenneth Graunke wrote: > Integer shifts are basically always well supported and efficient; that > isn't always true of integer division, and sometimes even integer > multiplication isn't without issues. > > On some Intel hardware, INTDIV can't be used in SIMD16 mode. It also > doesn't support immediate operands (on any generation), while ASR can. > > On Haswell, this cuts the number of instructions in dolphin/efb2ram by > 7.94%. It also removes a single MOV in dolphin/realxfb (due to ASR > supporting immediates), and gains SIMD16 support (due to no INTDIV). > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/glsl/opt_algebraic.cpp | 70 > ++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 68 insertions(+), 2 deletions(-) > > diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp > index 9d55392..eccc2eb 100644 > --- a/src/glsl/opt_algebraic.cpp > +++ b/src/glsl/opt_algebraic.cpp > @@ -34,6 +34,7 @@ > #include "ir_optimization.h" > #include "ir_builder.h" > #include "glsl_types.h" > +#include "main/macros.h" > > using namespace ir_builder; > > @@ -68,6 +69,8 @@ public: > int op2); > ir_rvalue *swizzle_if_required(ir_expression *expr, > ir_rvalue *operand); > + ir_rvalue *convert_int_math_to_shifts(ir_expression *ir, > + ir_constant *op_const_array[4]); > > void *mem_ctx; > > @@ -185,6 +188,59 @@ ir_algebraic_visitor::reassociate_constant(ir_expression > *ir1, int const_index, > return false; > } > > +/** > + * Transform integer multiplication/division by a constant power-of-two > + * factor into shift instructions. > + */ > +ir_rvalue * > +ir_algebraic_visitor::convert_int_math_to_shifts(ir_expression *ir, > + ir_constant > *op_const_array[4]) > +{ > + /* This optimization only makes sense for GPUs with native integers. */ > + if (!native_integers) > + return NULL; > + > + assert(ir->operation == ir_binop_mul || ir->operation == ir_binop_div); > + > + /* Shifts only work for integer types. */ > + if (!ir->type->is_integer()) > + return NULL;
After the previous conversation about converting division to shifts, I think at least division only generally works for unsigned. -1 / 2 => 0, but -1 >> 1 => -1. I don't know what the rules are for multiplication overflow... I think the result of int32_t(0x70000000)*2 is undefined, so that should be fine. > + > + ir_constant *const_op; > + ir_rvalue *other_op; > + if (op_const_array[0]) { > + const_op = op_const_array[0]; > + other_op = ir->operands[1]; > + } else if (op_const_array[1]) { > + const_op = op_const_array[1]; > + other_op = ir->operands[0]; > + } else { > + /* If neither is a constant, we can't check for powers of two. */ > + return NULL; > + } > + > + ir_constant_data shift_data; > + for (int i = 0; i < const_op->type->vector_elements; i++) { > + if (const_op->type->base_type == GLSL_TYPE_INT && > + const_op->value.i[i] <= 0) { > + /* Negative values aren't powers of two. */ > + return NULL; > + } But... we could convert x*-4 into -x*4... assuming negation is free on integer operands. Seems like that would be good follow-on work... > + > + if (!is_power_of_two(const_op->value.u[i])) > + return NULL; > + > + shift_data.u[i] = ffs(const_op->value.u[i]) - 1; > + } > + > + ir_constant *shifts = new(mem_ctx) ir_constant(ir->type, &shift_data); > + > + if (ir->operation == ir_binop_mul) > + return lshift(other_op, shifts); > + else > + return rshift(other_op, shifts); > +} > + > /* When eliminating an expression and just returning one of its operands, > * we may need to swizzle that operand out to a vector if the expression was > * vector type. > @@ -389,7 +445,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) > return ir->operands[0]; > break; > > - case ir_binop_mul: > + case ir_binop_mul: { > if (is_vec_one(op_const[0])) > return ir->operands[1]; > if (is_vec_one(op_const[1])) > @@ -403,6 +459,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) > if (is_vec_negative_one(op_const[1])) > return neg(ir->operands[0]); > > + ir_rvalue *shift_expr = convert_int_math_to_shifts(ir, op_const); > + if (shift_expr) > + return shift_expr; > > /* Reassociate multiplication of constants so that we can do > * constant folding. > @@ -413,8 +472,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) > reassociate_constant(ir, 1, op_const[1], op_expr[0]); > > break; > + } > > - case ir_binop_div: > + case ir_binop_div: { > if (is_vec_one(op_const[0]) && ir->type->base_type == GLSL_TYPE_FLOAT) > { > return new(mem_ctx) ir_expression(ir_unop_rcp, > ir->operands[1]->type, > @@ -423,7 +483,13 @@ ir_algebraic_visitor::handle_expression(ir_expression > *ir) > } > if (is_vec_one(op_const[1])) > return ir->operands[0]; > + > + ir_rvalue *shift_expr = convert_int_math_to_shifts(ir, op_const); > + if (shift_expr) > + return shift_expr; > + > break; > + } > > case ir_binop_dot: > if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev