[Mesa-dev] [PATCH 2/2] i965/vs: Use GRFs for pull constant offsets on gen7.
This allows the computation of the offset to get written directly into the message source. shader-db results: total instructions in shared programs: 3308390 - 3283025 (-0.77%) instructions in affected programs: 442998 - 417633 (-5.73%) No difference in GLB2.7 low res (n=9). --- src/mesa/drivers/dri/i965/brw_defines.h|1 + src/mesa/drivers/dri/i965/brw_shader.cpp |2 ++ src/mesa/drivers/dri/i965/brw_vec4.cpp |8 - src/mesa/drivers/dri/i965/brw_vec4.h |4 +++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp| 45 +++- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 +++--- 6 files changed, 56 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3d07c36..a13f9dc 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -739,6 +739,7 @@ enum opcode { VS_OPCODE_SCRATCH_READ, VS_OPCODE_SCRATCH_WRITE, VS_OPCODE_PULL_CONSTANT_LOAD, + VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 1a52039..b3bd1b9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op) return scratch_write; case VS_OPCODE_PULL_CONSTANT_LOAD: return pull_constant_load; + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return pull_constant_load_gen7; default: /* Yes, this leaks. It's in debug code, it should never occur, and if diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e470ac8..67dd17a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -230,7 +230,13 @@ vec4_instruction::is_math() bool vec4_instruction::is_send_from_grf() { - return opcode == SHADER_OPCODE_SHADER_TIME_ADD; + switch (opcode) { + case SHADER_OPCODE_SHADER_TIME_ADD: + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return true; + default: + return false; + } } bool diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 8f130e1..e286925 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -544,6 +544,10 @@ private: struct brw_reg dst, struct brw_reg index, struct brw_reg offset); + void generate_pull_constant_load_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset); struct brw_context *brw; struct intel_context *intel; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index e378f7f..963901c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -558,27 +558,11 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg index, struct brw_reg offset) { + assert(intel-gen = 7); assert(index.file == BRW_IMMEDIATE_VALUE index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; - if (intel-gen == 7) { - gen6_resolve_implied_move(p, offset, inst-base_mrf); - brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, offset); - brw_set_sampler_message(p, insn, - surf_index, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ - BRW_SAMPLER_SIMD_MODE_SIMD4X2, - 0); - return; - } - struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, header, inst-base_mrf); @@ -614,6 +598,29 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, } void +vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset) +{ + assert(surf_index.file == BRW_IMMEDIATE_VALUE + surf_index.type == BRW_REGISTER_TYPE_UD); + + brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); +
Re: [Mesa-dev] [PATCH 2/2] i965/vs: Use GRFs for pull constant offsets on gen7.
Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965/vs: Use GRFs for pull constant offsets on gen7.
This allows the computation of the offset to get written directly into the message source. Improves performance of low-resolution GLB2.7 by 4.6% +/- 1.4% (n=11). --- src/mesa/drivers/dri/i965/brw_defines.h|1 + src/mesa/drivers/dri/i965/brw_shader.cpp |2 ++ src/mesa/drivers/dri/i965/brw_vec4.cpp |8 - src/mesa/drivers/dri/i965/brw_vec4.h |4 +++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp| 45 +++- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 +++--- 6 files changed, 56 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3d07c36..a13f9dc 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -739,6 +739,7 @@ enum opcode { VS_OPCODE_SCRATCH_READ, VS_OPCODE_SCRATCH_WRITE, VS_OPCODE_PULL_CONSTANT_LOAD, + VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 1a52039..b3bd1b9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op) return scratch_write; case VS_OPCODE_PULL_CONSTANT_LOAD: return pull_constant_load; + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return pull_constant_load_gen7; default: /* Yes, this leaks. It's in debug code, it should never occur, and if diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index c58fb44..1013aae 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -223,7 +223,13 @@ vec4_instruction::is_math() bool vec4_instruction::is_send_from_grf() { - return opcode == SHADER_OPCODE_SHADER_TIME_ADD; + switch (opcode) { + case SHADER_OPCODE_SHADER_TIME_ADD: + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return true; + default: + return false; + } } bool diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 8f130e1..e286925 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -544,6 +544,10 @@ private: struct brw_reg dst, struct brw_reg index, struct brw_reg offset); + void generate_pull_constant_load_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset); struct brw_context *brw; struct intel_context *intel; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index e378f7f..963901c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -558,27 +558,11 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg index, struct brw_reg offset) { + assert(intel-gen = 7); assert(index.file == BRW_IMMEDIATE_VALUE index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; - if (intel-gen == 7) { - gen6_resolve_implied_move(p, offset, inst-base_mrf); - brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, offset); - brw_set_sampler_message(p, insn, - surf_index, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ - BRW_SAMPLER_SIMD_MODE_SIMD4X2, - 0); - return; - } - struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, header, inst-base_mrf); @@ -614,6 +598,29 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, } void +vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset) +{ + assert(surf_index.file == BRW_IMMEDIATE_VALUE + surf_index.type == BRW_REGISTER_TYPE_UD); + + brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dst); + brw_set_src0(p, insn, offset); + brw_set_sampler_message(p, insn, +