On 3 October 2013 10:59, Matt Turner <[email protected]> wrote: > v2: Check fixed_hw_reg.{file,nr} instead of dst.reg. > v3: Store the bool emitted_addc_or_subb in the class, not static. > --- > src/mesa/drivers/dri/i965/brw_fs.h | 3 + > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 104 > +++++++++++++++++++++++++++ > 2 files changed, 107 insertions(+) >
Reviewed-by: Paul Berry <[email protected]> > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 6a53e59..c703c2b 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -345,6 +345,7 @@ public: > fs_reg src0, fs_reg src1); > bool try_emit_saturate(ir_expression *ir); > bool try_emit_mad(ir_expression *ir, int mul_arg); > + void try_combine_add_with_addc_subb(); > void try_replace_with_sel(); > void emit_bool_to_cond_code(ir_rvalue *condition); > void emit_if_gen6(ir_if *ir); > @@ -458,6 +459,8 @@ public: > > int force_uncompressed_stack; > int force_sechalf_stack; > + > + bool emitted_addc_or_subb; > }; > > /** > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index b8c30e6..8accbd6 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -313,6 +313,102 @@ fs_visitor::try_emit_mad(ir_expression *ir, int > mul_arg) > return true; > } > > +/** > + * The addition and carry in the uaddCarry() built-in function are > implemented > + * separately as ir_binop_add and ir_binop_carry respectively. i965 > generates > + * ADDC and a MOV from the accumulator for the carry. > + * > + * The generated code for uaddCarry(uint x, uint y, out uint carry) would > look > + * like this: > + * > + * addc null, x, y > + * mov carry, acc0 > + * add sum, x, y > + * > + * This peephole pass optimizes this into > + * > + * addc sum, x, y > + * mov carry, acc0 > + * > + * usubBorrow() works in the same fashion. > + */ > +void > +fs_visitor::try_combine_add_with_addc_subb() > +{ > + /* ADDC/SUBB was introduced in gen7. */ > + if (brw->gen < 7) > + return; > + > + fs_inst *add_inst = (fs_inst *) instructions.get_tail(); > + assert(add_inst->opcode == BRW_OPCODE_ADD); > + > + /* ADDC/SUBB only operates on UD. */ > + if (add_inst->dst.type != BRW_REGISTER_TYPE_UD || > + add_inst->src[0].type != BRW_REGISTER_TYPE_UD || > + add_inst->src[1].type != BRW_REGISTER_TYPE_UD) > + return; > + > + bool found = false; > + fs_inst *match = (fs_inst *) add_inst->prev; > + /* The ADDC should appear within 8 instructions of ADD for a vec4. SUBB > + * should appear farther away because of the extra MOV negates. > + */ > + for (int i = 0; i < 16; i++, match = (fs_inst *) match->prev) { > + if (match->is_head_sentinel()) > + return; > + > + /* Look for an ADDC/SUBB instruction whose destination is the null > + * register (ir_binop_carry emits ADDC with null destination; same > for > + * ir_binop_borrow with SUBB) and whose sources are identical to > those > + * of the ADD. > + */ > + if (match->opcode != BRW_OPCODE_ADDC && match->opcode != > BRW_OPCODE_SUBB) > + continue; > + > + /* Only look for newly emitted ADDC/SUBB with null destination. */ > + if (match->dst.file != HW_REG || > + match->dst.fixed_hw_reg.file != BRW_ARCHITECTURE_REGISTER_FILE > || > + match->dst.fixed_hw_reg.nr != BRW_ARF_NULL) > + continue; > + > + fs_reg *src0 = &add_inst->src[0]; > + fs_reg *src1 = &add_inst->src[1]; > + > + /* For SUBB, the ADD's second source will contain a negate modifier > + * which at this point will be in the form of a > + * > + * MOV dst, -src > + * > + * instruction, so src[1].file will be GRF, even if it's a uniform > push > + * constant. > + */ > + if (match->src[1].reg != add_inst->src[1].reg) { > + /* The negating MOV should be immediately before the ADD. */ > + fs_inst *mov_inst = (fs_inst *) add_inst->prev; > + if (mov_inst->opcode != BRW_OPCODE_MOV) > + continue; > + > + src1 = &mov_inst->src[0]; > + } > + > + /* If everything matches, we're done. */ > + if (match->src[0].file == src0->file && > + match->src[1].file == src1->file && > + match->src[0].reg == src0->reg && > + match->src[1].reg == src1->reg && > + match->src[0].reg_offset == src0->reg_offset && > + match->src[1].reg_offset == src1->reg_offset) { > + found = true; > + break; > + } > + } > + > + if (found) { > + match->dst = add_inst->dst; > + add_inst->remove(); > + } > +} > + > void > fs_visitor::visit(ir_expression *ir) > { > @@ -415,6 +511,8 @@ fs_visitor::visit(ir_expression *ir) > > case ir_binop_add: > emit(ADD(this->result, op[0], op[1])); > + if (emitted_addc_or_subb) > + try_combine_add_with_addc_subb(); > break; > case ir_binop_sub: > assert(!"not reached: should be handled by ir_sub_to_add_neg"); > @@ -451,6 +549,8 @@ fs_visitor::visit(ir_expression *ir) > if (brw->gen >= 7 && dispatch_width == 16) > fail("16-wide explicit accumulator operands unsupported\n"); > > + emitted_addc_or_subb = true; > + > struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); > > emit(ADDC(reg_null_ud, op[0], op[1])); > @@ -461,6 +561,8 @@ fs_visitor::visit(ir_expression *ir) > if (brw->gen >= 7 && dispatch_width == 16) > fail("16-wide explicit accumulator operands unsupported\n"); > > + emitted_addc_or_subb = true; > + > struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); > > emit(SUBB(reg_null_ud, op[0], op[1])); > @@ -2610,6 +2712,8 @@ fs_visitor::fs_visitor(struct brw_context *brw, > this->force_uncompressed_stack = 0; > this->force_sechalf_stack = 0; > > + this->emitted_addc_or_subb = false; > + > memset(&this->param_size, 0, sizeof(this->param_size)); > } > > -- > 1.8.3.2 > > _______________________________________________ > mesa-dev mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
