Re: [Mesa-dev] [PATCH 6/9] i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
On Tue, Feb 26, 2013 at 9:51 AM, Kenneth Graunke kenn...@whitecape.org wrote: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9ab18cc..6965d72 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -156,7 +156,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2 | -LRP_TO_ARITH); +(stage == MESA_SHADER_FRAGMENT ? 0 : LRP_TO_ARITH)); Doesn't this need to include a gen check as well? Perhaps: Not really, since emit_lrp() has a gen check (and has to have it, since emit_lrp() will be used for fragment program's OPCODE_LRP). For GLSL, do we gain anything by lowering LRP on gen 6 in the compiler vs the backend, or vice versa? const int lrp_to_arith = 0; if (intel-gen 6 || stage != MESA_SHADER_FRAGMENT) lrp_to_arith = LRP_TO_ARITH; then use lrp_to_arith here. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/9] i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
On 02/28/2013 11:20 AM, Matt Turner wrote: On Tue, Feb 26, 2013 at 9:51 AM, Kenneth Graunke kenn...@whitecape.org wrote: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9ab18cc..6965d72 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -156,7 +156,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2 | -LRP_TO_ARITH); +(stage == MESA_SHADER_FRAGMENT ? 0 : LRP_TO_ARITH)); Doesn't this need to include a gen check as well? Perhaps: Not really, since emit_lrp() has a gen check (and has to have it, since emit_lrp() will be used for fragment program's OPCODE_LRP). For GLSL, do we gain anything by lowering LRP on gen 6 in the compiler vs the backend, or vice versa? Oh yeah, I didn't realize you made emit_lrp() handle both the LRP instruction and the raw math. So you're okay as is. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/9] i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
On 02/19/2013 05:03 PM, Matt Turner wrote: From: Kenneth Graunke kenn...@whitecape.org v2 [mattst88]: - Add BRW_OPCODE_LRP to list of CSE-able expressions. - Fix op_var[] array size. - Rename arguments to emit_lrp to (x, y, a) to clear confusion. - Add LRP function to brw_fs.cpp/.h. - Corrected comment about LRP instruction arguments in emit_lrp Nice changes. Signed-off-by: Kenneth Graunke kenn...@whitecape.org Signed-off-by: Matt Turner matts...@gmail.com A few comments below...we'll need a v3 of this... Reviewed-by: Matt Turner matts...@gmail.com --- src/mesa/drivers/dri/i965/brw_fs.cpp |8 src/mesa/drivers/dri/i965/brw_fs.h |2 + .../dri/i965/brw_fs_channel_expressions.cpp| 16 - src/mesa/drivers/dri/i965/brw_fs_cse.cpp |1 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 15 +++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 35 ++-- src/mesa/drivers/dri/i965/brw_shader.cpp |2 +- 7 files changed, 71 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c1ccd92..bdb6616 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -146,6 +146,13 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst, return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\ } +#define ALU3(op)\ + fs_inst *\ + fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)\ + {\ + return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\ + } + ALU1(NOT) ALU1(MOV) ALU1(FRC) @@ -161,6 +168,7 @@ ALU2(XOR) ALU2(SHL) ALU2(SHR) ALU2(ASR) +ALU3(LRP) /** Gen4 predicated IF. */ fs_inst * diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d5ebd51..9c1b359 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -285,6 +285,7 @@ public: fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition); fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition); + fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x); fs_inst *DEP_RESOLVE_MOV(int grf); int type_size(const struct glsl_type *type); @@ -360,6 +361,7 @@ public: fs_reg fix_math_operand(fs_reg src); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); + void emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a); void emit_minmax(uint32_t conditionalmod, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index ea06225..30d8d9b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -135,7 +135,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) ir_expression *expr = ir-rhs-as_expression(); bool found_vector = false; unsigned int i, vector_elements = 1; - ir_variable *op_var[2]; + ir_variable *op_var[3]; if (!expr) return visit_continue; @@ -342,6 +342,20 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) assert(!not yet supported); break; + case ir_triop_lrp: + for (i = 0; i vector_elements; i++) { +ir_rvalue *op0 = get_element(op_var[0], i); +ir_rvalue *op1 = get_element(op_var[1], i); +ir_rvalue *op2 = get_element(op_var[2], i); + +assign(ir, i, new(mem_ctx) ir_expression(expr-operation, + element_type, + op0, + op1, + op2)); + } + break; + case ir_unop_pack_snorm_2x16: case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 70c143a..0b74d2e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -66,6 +66,7 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_LINE: case BRW_OPCODE_PLN: case BRW_OPCODE_MAD: + case BRW_OPCODE_LRP: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: return true; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 3d1f3b3..38d6332 100644 ---
Re: [Mesa-dev] [PATCH 6/9] i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
On 02/26/2013 09:51 AM, Kenneth Graunke wrote: On 02/19/2013 05:03 PM, Matt Turner wrote: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9ab18cc..6965d72 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -156,7 +156,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2 | - LRP_TO_ARITH); + (stage == MESA_SHADER_FRAGMENT ? 0 : LRP_TO_ARITH)); Doesn't this need to include a gen check as well? Perhaps: const int lrp_to_arith = 0; if (intel-gen 6 || stage != MESA_SHADER_FRAGMENT) lrp_to_arith = LRP_TO_ARITH; then use lrp_to_arith here. I like Ken's suggestion. This is also a place where I would use ?:, but I won't be too pushy about it. ;) const int lrp_to_arith = (intel-gen 6 || stage != MESA_SHADER_FRAGMENT) ? LRP_TO_ARITH : 0; If you take Ken's snippet, you'll have to remove const. /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, * if-statements need to be flattened. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/9] i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
From: Kenneth Graunke kenn...@whitecape.org v2 [mattst88]: - Add BRW_OPCODE_LRP to list of CSE-able expressions. - Fix op_var[] array size. - Rename arguments to emit_lrp to (x, y, a) to clear confusion. - Add LRP function to brw_fs.cpp/.h. - Corrected comment about LRP instruction arguments in emit_lrp. Reviewed-by: Matt Turner matts...@gmail.com --- src/mesa/drivers/dri/i965/brw_fs.cpp |8 src/mesa/drivers/dri/i965/brw_fs.h |2 + .../dri/i965/brw_fs_channel_expressions.cpp| 16 - src/mesa/drivers/dri/i965/brw_fs_cse.cpp |1 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 15 +++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 35 ++-- src/mesa/drivers/dri/i965/brw_shader.cpp |2 +- 7 files changed, 71 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c1ccd92..bdb6616 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -146,6 +146,13 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst, return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\ } +#define ALU3(op)\ + fs_inst *\ + fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)\ + {\ + return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\ + } + ALU1(NOT) ALU1(MOV) ALU1(FRC) @@ -161,6 +168,7 @@ ALU2(XOR) ALU2(SHL) ALU2(SHR) ALU2(ASR) +ALU3(LRP) /** Gen4 predicated IF. */ fs_inst * diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d5ebd51..9c1b359 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -285,6 +285,7 @@ public: fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition); fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition); + fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x); fs_inst *DEP_RESOLVE_MOV(int grf); int type_size(const struct glsl_type *type); @@ -360,6 +361,7 @@ public: fs_reg fix_math_operand(fs_reg src); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); + void emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a); void emit_minmax(uint32_t conditionalmod, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index ea06225..30d8d9b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -135,7 +135,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) ir_expression *expr = ir-rhs-as_expression(); bool found_vector = false; unsigned int i, vector_elements = 1; - ir_variable *op_var[2]; + ir_variable *op_var[3]; if (!expr) return visit_continue; @@ -342,6 +342,20 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) assert(!not yet supported); break; + case ir_triop_lrp: + for (i = 0; i vector_elements; i++) { +ir_rvalue *op0 = get_element(op_var[0], i); +ir_rvalue *op1 = get_element(op_var[1], i); +ir_rvalue *op2 = get_element(op_var[2], i); + +assign(ir, i, new(mem_ctx) ir_expression(expr-operation, + element_type, + op0, + op1, + op2)); + } + break; + case ir_unop_pack_snorm_2x16: case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 70c143a..0b74d2e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -66,6 +66,7 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_LINE: case BRW_OPCODE_PLN: case BRW_OPCODE_MAD: + case BRW_OPCODE_LRP: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: return true; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 3d1f3b3..38d6332 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -1082,18 +1082,27 @@ fs_generator::generate_code(exec_list *instructions) break; case BRW_OPCODE_MAD: + case BRW_OPCODE_LRP: { + struct brw_instruction