[Mesa-dev] [PATCH 2/2] i965/vs: Use GRFs for pull constant offsets on gen7.

2013-04-05 Thread Eric Anholt
This allows the computation of the offset to get written directly into the
message source.

shader-db results:
total instructions in shared programs: 3308390 - 3283025 (-0.77%)
instructions in affected programs: 442998 - 417633 (-5.73%)

No difference in GLB2.7 low res (n=9).
---
 src/mesa/drivers/dri/i965/brw_defines.h|1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   |2 ++
 src/mesa/drivers/dri/i965/brw_vec4.cpp |8 -
 src/mesa/drivers/dri/i965/brw_vec4.h   |4 +++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|   45 +++-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   18 +++---
 6 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 3d07c36..a13f9dc 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -739,6 +739,7 @@ enum opcode {
VS_OPCODE_SCRATCH_READ,
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
+   VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
 };
 
 #define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1a52039..b3bd1b9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op)
   return scratch_write;
case VS_OPCODE_PULL_CONSTANT_LOAD:
   return pull_constant_load;
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+  return pull_constant_load_gen7;
 
default:
   /* Yes, this leaks.  It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index e470ac8..67dd17a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -230,7 +230,13 @@ vec4_instruction::is_math()
 bool
 vec4_instruction::is_send_from_grf()
 {
-   return opcode == SHADER_OPCODE_SHADER_TIME_ADD;
+   switch (opcode) {
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+  return true;
+   default:
+  return false;
+   }
 }
 
 bool
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 8f130e1..e286925 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -544,6 +544,10 @@ private:
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
+   void generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset);
 
struct brw_context *brw;
struct intel_context *intel;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index e378f7f..963901c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -558,27 +558,11 @@ 
vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
 struct brw_reg index,
 struct brw_reg offset)
 {
+   assert(intel-gen = 7);
assert(index.file == BRW_IMMEDIATE_VALUE 
  index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
 
-   if (intel-gen == 7) {
-  gen6_resolve_implied_move(p, offset, inst-base_mrf);
-  brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
-  brw_set_dest(p, insn, dst);
-  brw_set_src0(p, insn, offset);
-  brw_set_sampler_message(p, insn,
-  surf_index,
-  0, /* LD message ignores sampler unit */
-  GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
-  1, /* rlen */
-  1, /* mlen */
-  false, /* no header */
-  BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-  0);
-  return;
-   }
-
struct brw_reg header = brw_vec8_grf(0, 0);
 
gen6_resolve_implied_move(p, header, inst-base_mrf);
@@ -614,6 +598,29 @@ 
vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
 }
 
 void
+vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset)
+{
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE 
+ surf_index.type == BRW_REGISTER_TYPE_UD);
+
+   brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+   

Re: [Mesa-dev] [PATCH 2/2] i965/vs: Use GRFs for pull constant offsets on gen7.

2013-04-05 Thread Matt Turner
Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/vs: Use GRFs for pull constant offsets on gen7.

2013-04-04 Thread Eric Anholt
This allows the computation of the offset to get written directly into the
message source.  Improves performance of low-resolution GLB2.7 by 4.6% +/-
1.4% (n=11).
---
 src/mesa/drivers/dri/i965/brw_defines.h|1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   |2 ++
 src/mesa/drivers/dri/i965/brw_vec4.cpp |8 -
 src/mesa/drivers/dri/i965/brw_vec4.h   |4 +++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|   45 +++-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   18 +++---
 6 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 3d07c36..a13f9dc 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -739,6 +739,7 @@ enum opcode {
VS_OPCODE_SCRATCH_READ,
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
+   VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
 };
 
 #define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1a52039..b3bd1b9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op)
   return scratch_write;
case VS_OPCODE_PULL_CONSTANT_LOAD:
   return pull_constant_load;
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+  return pull_constant_load_gen7;
 
default:
   /* Yes, this leaks.  It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c58fb44..1013aae 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -223,7 +223,13 @@ vec4_instruction::is_math()
 bool
 vec4_instruction::is_send_from_grf()
 {
-   return opcode == SHADER_OPCODE_SHADER_TIME_ADD;
+   switch (opcode) {
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+  return true;
+   default:
+  return false;
+   }
 }
 
 bool
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 8f130e1..e286925 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -544,6 +544,10 @@ private:
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
+   void generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset);
 
struct brw_context *brw;
struct intel_context *intel;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index e378f7f..963901c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -558,27 +558,11 @@ 
vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
 struct brw_reg index,
 struct brw_reg offset)
 {
+   assert(intel-gen = 7);
assert(index.file == BRW_IMMEDIATE_VALUE 
  index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
 
-   if (intel-gen == 7) {
-  gen6_resolve_implied_move(p, offset, inst-base_mrf);
-  brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
-  brw_set_dest(p, insn, dst);
-  brw_set_src0(p, insn, offset);
-  brw_set_sampler_message(p, insn,
-  surf_index,
-  0, /* LD message ignores sampler unit */
-  GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
-  1, /* rlen */
-  1, /* mlen */
-  false, /* no header */
-  BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-  0);
-  return;
-   }
-
struct brw_reg header = brw_vec8_grf(0, 0);
 
gen6_resolve_implied_move(p, header, inst-base_mrf);
@@ -614,6 +598,29 @@ 
vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
 }
 
 void
+vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset)
+{
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE 
+ surf_index.type == BRW_REGISTER_TYPE_UD);
+
+   brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, insn, dst);
+   brw_set_src0(p, insn, offset);
+   brw_set_sampler_message(p, insn,
+