From: Ian Romanick <ian.d.roman...@intel.com> A ir_triop_select like "ir_triop_sel condition 0 value" could be implemented using a CMP and a SEL instruction, as is currently done. It could also be impelmented using a CMP and an AND instruction using the compare result as one of the operands. This is basically how we implement the various b2i operations.
On i965 SEL can have at most one immediate parameter. If the non-zero parameter is also an immediate, it has to be pre-loaded into a GRF. With this optimization, that extra MOV can be saved. This is not applied on platforms that don't generate the full 32-bits from CMP (i.e., GEN5 and earlier) because this often (~800 cases in shader-db) results in an extra instruction being generated by resolve_bool_comparison. Shader-db results: Sandy Bridge (0x0116): total instructions in shared programs: 6832991 -> 6832973 (-0.00%) instructions in affected programs: 5303 -> 5285 (-0.34%) helped: 6 Ivy Bridge (0x0166): total instructions in shared programs: 6291668 -> 6291662 (-0.00%) instructions in affected programs: 4823 -> 4817 (-0.12%) helped: 3 HURT: 3 Haswell (0x0426): total instructions in shared programs: 5779054 -> 5779036 (-0.00%) instructions in affected programs: 4408 -> 4390 (-0.41%) helped: 6 Broadwell (0x162E): total instructions in shared programs: 6822848 -> 6822830 (-0.00%) instructions in affected programs: 4382 -> 4364 (-0.41%) helped: 6 No change on GM45, Iron Lake, or any platform with NIR. Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 21 ++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 1fbef5f..bde9492 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -707,7 +707,7 @@ fs_visitor::visit(ir_expression *ir) return; break; - case ir_triop_csel: + case ir_triop_csel: { /* After splitting an expression like 'v = csel(cond, vec4(a, b, c, 1), * vec4(d, e, f, 1))', there will be a 'v.w = csel(cond, 1, 1)'. Detect * this, and avoid emitting the spurious SEL. @@ -720,6 +720,24 @@ fs_visitor::visit(ir_expression *ir) if (try_opt_frontfacing_ternary(ir)) return; + /* If operands[2] (the "false" result) is zero, we can emit 'AND dst, + * operand0, condition' instead of a SEL. This is similar to the way + * ir_unop_b2f and ir_unop_b2i are implemented. + */ + ir_constant *const false_result = ir->operands[2]->as_constant(); + if (brw->gen >= 6 && false_result && false_result->is_zero()) { + ir->operands[0]->accept(this); + op[0] = this->result; + ir->operands[1]->accept(this); + op[1] = this->result; + + this->result = fs_reg(vgrf(ir->type)); + inst = emit(AND(retype(this->result, BRW_REGISTER_TYPE_D), + retype(op[1], BRW_REGISTER_TYPE_D), + retype(op[0], BRW_REGISTER_TYPE_D))); + return; + } + ir->operands[1]->accept(this); op[1] = this->result; ir->operands[2]->accept(this); @@ -731,6 +749,7 @@ fs_visitor::visit(ir_expression *ir) inst = emit(SEL(this->result, op[1], op[2])); inst->predicate = BRW_PREDICATE_NORMAL; return; + } case ir_unop_b2f: if (brw->gen <= 5 && try_emit_b2f_of_comparison(ir)) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index d5c6e9b..8ecc625 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1325,6 +1325,25 @@ vec4_visitor::visit(ir_expression *ir) src_reg result_src(result_dst); if (ir->operation == ir_triop_csel) { + /* If operands[2] (the "false" result) is zero, we can emit 'AND dst, + * operand0, condition' instead of a SEL. This is similar to the way + * ir_unop_b2f and ir_unop_b2i are implemented. + */ + ir_constant *const false_result = ir->operands[2]->as_constant(); + if (brw->gen >= 6 && false_result && false_result->is_zero()) { + ir->operands[0]->accept(this); + op[0] = this->result; + ir->operands[1]->accept(this); + op[1] = this->result; + + inst = emit(AND(retype(result_dst, BRW_REGISTER_TYPE_D), + retype(op[1], BRW_REGISTER_TYPE_D), + retype(op[0], BRW_REGISTER_TYPE_D))); + + this->result = result_src; + return; + } + ir->operands[1]->accept(this); op[1] = this->result; ir->operands[2]->accept(this); -- 2.1.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev