Improves nexuiz performance 0.28% +/- .15% (n=5) on my gen6. No statistically significant performance difference on warsow (n=5). --- src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 6 ++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 42 ++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 0 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 5fdc055..060aa36 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -529,6 +529,7 @@ public: fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); + bool try_emit_mad(ir_expression *ir, int mul_arg); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index b68d8cb..344a533 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -725,6 +725,12 @@ fs_visitor::generate_code() brw_set_acc_write_control(p, 0); break; + case BRW_OPCODE_MAD: + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MAD(p, dst, src[0], src[1], src[2]); + brw_set_access_mode(p, BRW_ALIGN_1); + break; + case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index ea8cd37..efa54b5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -182,6 +182,44 @@ fs_visitor::try_emit_saturate(ir_expression *ir) return true; } +bool +fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) +{ + /* 3-src instructions were introduced in gen6. */ + if (intel->gen < 6) + return false; + + /* FINISHME: Can we do this for 16-wide at all? Breaks + * fs-mix-float-float-float at least. + */ + if (c->dispatch_width == 16) + return false; + + /* MAD can only handle floating-point data. */ + if (ir->type != glsl_type::float_type) + return false; + + ir_rvalue *nonmul = ir->operands[1 - mul_arg]; + ir_expression *mul = ir->operands[mul_arg]->as_expression(); + + if (!mul || mul->operation != ir_binop_mul) + return false; + + nonmul->accept(this); + fs_reg src0 = this->result; + + mul->operands[0]->accept(this); + fs_reg src1 = this->result; + + mul->operands[1]->accept(this); + fs_reg src2 = this->result; + + this->result = fs_reg(this, ir->type); + emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); + + return true; +} + void fs_visitor::visit(ir_expression *ir) { @@ -193,6 +231,10 @@ fs_visitor::visit(ir_expression *ir) if (try_emit_saturate(ir)) return; + if (ir->operation == ir_binop_add) { + if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) + return; + } for (operand = 0; operand < ir->get_num_operands(); operand++) { ir->operands[operand]->accept(this); -- 1.7.9 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev