From: Ian Romanick <[email protected]> Previously we would emit the comparison, emit an AND to mask off extra bits from the comparison result, then convert the result to float. Now, do the comparison, then use a cleverly constructed SEL to pick either 0.0f or 1.0f.
No piglit regressions on Ivybridge. NOTE: I have not yet tested actual application performance. I'll do that tomorrow. total instructions in shared programs: 1642311 -> 1639449 (-0.17%) instructions in affected programs: 136533 -> 133671 (-2.10%) GAINED: 0 LOST: 0 Programs that are affected appear to save between 1 and 5 instuctions (just by skimming the output from shader-db report.py. Signed-off-by: Ian Romanick <[email protected]> --- src/mesa/drivers/dri/i965/brw_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 47 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c2bbd68..27e1e39 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -587,6 +587,7 @@ public: bool try_emit_sat(ir_expression *ir); bool try_emit_mad(ir_expression *ir); + bool try_emit_b2f_of_compare(ir_expression *ir); void resolve_ud_negate(src_reg *reg); src_reg get_timestamp(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 249072c..902f02d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1126,6 +1126,48 @@ vec4_visitor::try_emit_mad(ir_expression *ir) return true; } +bool +vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir) +{ + ir_expression *const cmp = ir->operands[0]->as_expression(); + + if (cmp == NULL) + return false; + + switch (cmp->operation) { + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + break; + + default: + return false; + } + + cmp->operands[0]->accept(this); + const src_reg cmp_src0 = this->result; + + cmp->operands[1]->accept(this); + const src_reg cmp_src1 = this->result; + + this->result = src_reg(this, ir->type); + + emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1, + brw_conditional_for_comparison(cmp->operation))); + + /* If the comparison is false, this->result will just happen to be zero. + */ + vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result), + fix_3src_operand(src_reg(1.0f)), + this->result); + inst->predicate = BRW_PREDICATE_NORMAL; + + return true; +} + void vec4_visitor::emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1) @@ -1202,6 +1244,11 @@ vec4_visitor::visit(ir_expression *ir) return; } + if (ir->operation == ir_unop_b2f) { + if (try_emit_b2f_of_compare(ir)) + return; + } + for (operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = BAD_FILE; ir->operands[operand]->accept(this); -- 1.8.1.4 _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
