Re: [Mesa-dev] [PATCH 07/37] i965/gen6/gs: Implement GS_OPCODE_FF_SYNC.

2014-09-02 Thread Iago Toral Quiroga
On lun, 2014-09-01 at 11:17 -0700, Kenneth Graunke wrote:
 On Thursday, August 14, 2014 01:11:39 PM Iago Toral Quiroga wrote:
  This implements the FF_SYNC message required in gen6  geometry shaders to
  get the initial URB handle.
  ---
   src/mesa/drivers/dri/i965/brw_defines.h  | 14 +
   src/mesa/drivers/dri/i965/brw_shader.cpp |  2 ++
   src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++
   src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 40 
  
   4 files changed, 59 insertions(+)
  
  diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
  b/src/mesa/drivers/dri/i965/brw_defines.h
  index 3564041..125d728 100644
  --- a/src/mesa/drivers/dri/i965/brw_defines.h
  +++ b/src/mesa/drivers/dri/i965/brw_defines.h
  @@ -1002,6 +1002,20 @@ enum opcode {
   * - dst is the GRF for gl_InvocationID.
   */
  GS_OPCODE_GET_INSTANCE_ID,
  +
  +   /**
  +* Send a FF_SYNC message to allocate initial URB handles (gen6).
  +*
  +* - dst will hold the newly allocated VUE handle. It is expected to be
  +*   be initialized so that it can be used to as the FF_SYNC message 
  header
  +*   (that is, it won't do an implied move from R0).
  +*
  +* - src0 is a temporary that will be used as writeback register for the
  +*   FF_SYNC operation.
 
 I was originally a bit concerned that writing over a source register could 
 get your into trouble - say, optimization passes not noticing that you're 
 modifying src0, or the scheduler not noticing dependencies.  But, since you 
 allocate a temporary, and only use it in this one instruction, there's not 
 really any chance for optimizations to combine it with anything.
 
 So, I suppose this will work.
 
 Another way you might consider doing this...
 
 1. Add GS_OPCODE_FF_SYNC to vec4_visitor::implied_mrf_writes, and return 1.
This tells the scheduler etc. that you implicitly trash 1 MRF.
 2. Make the visitor set inst-base_mrf, and MOV base_mrf g0.
 3. Make the generator use brw_message_reg(inst-base_mrf) for the header MOVs.
 
 Then, src0 can be the number of primitives written, and you don't need src1.
 That seems simpler to me.
 
 brw_ff_sync will resolve your GRF (dst) to an MRF anyway; the above approach 
 would write directly into the MRF, saving you a GRF.
 
 Thoughts?

That looks like a better idea. I'll give it a go.
Thanks!

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/37] i965/gen6/gs: Implement GS_OPCODE_FF_SYNC.

2014-09-01 Thread Kenneth Graunke
On Thursday, August 14, 2014 01:11:39 PM Iago Toral Quiroga wrote:
 This implements the FF_SYNC message required in gen6  geometry shaders to
 get the initial URB handle.
 ---
  src/mesa/drivers/dri/i965/brw_defines.h  | 14 +
  src/mesa/drivers/dri/i965/brw_shader.cpp |  2 ++
  src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++
  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 40 
 
  4 files changed, 59 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
 b/src/mesa/drivers/dri/i965/brw_defines.h
 index 3564041..125d728 100644
 --- a/src/mesa/drivers/dri/i965/brw_defines.h
 +++ b/src/mesa/drivers/dri/i965/brw_defines.h
 @@ -1002,6 +1002,20 @@ enum opcode {
  * - dst is the GRF for gl_InvocationID.
  */
 GS_OPCODE_GET_INSTANCE_ID,
 +
 +   /**
 +* Send a FF_SYNC message to allocate initial URB handles (gen6).
 +*
 +* - dst will hold the newly allocated VUE handle. It is expected to be
 +*   be initialized so that it can be used to as the FF_SYNC message 
 header
 +*   (that is, it won't do an implied move from R0).
 +*
 +* - src0 is a temporary that will be used as writeback register for the
 +*   FF_SYNC operation.

I was originally a bit concerned that writing over a source register could get 
your into trouble - say, optimization passes not noticing that you're modifying 
src0, or the scheduler not noticing dependencies.  But, since you allocate a 
temporary, and only use it in this one instruction, there's not really any 
chance for optimizations to combine it with anything.

So, I suppose this will work.

Another way you might consider doing this...

1. Add GS_OPCODE_FF_SYNC to vec4_visitor::implied_mrf_writes, and return 1.
   This tells the scheduler etc. that you implicitly trash 1 MRF.
2. Make the visitor set inst-base_mrf, and MOV base_mrf g0.
3. Make the generator use brw_message_reg(inst-base_mrf) for the header MOVs.

Then, src0 can be the number of primitives written, and you don't need src1.
That seems simpler to me.

brw_ff_sync will resolve your GRF (dst) to an MRF anyway; the above approach 
would write directly into the MRF, saving you a GRF.

Thoughts?

 +*
 +* - src1 is the number of primitives written.
 +*/
 +   GS_OPCODE_FF_SYNC,
  };
  
  enum brw_urb_write_flags {
 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
 b/src/mesa/drivers/dri/i965/brw_shader.cpp
 index 0033135..5749061 100644
 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
 @@ -528,6 +528,8 @@ brw_instruction_name(enum opcode op)
return set_channel_masks;
 case GS_OPCODE_GET_INSTANCE_ID:
return get_instance_id;
 +   case GS_OPCODE_FF_SYNC:
 +  return ff_sync;
  
 default:
/* Yes, this leaks.  It's in debug code, it should never occur, and if
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
 b/src/mesa/drivers/dri/i965/brw_vec4.h
 index 67132c0..72fabdd 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.h
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
 @@ -659,6 +659,9 @@ private:
 void generate_gs_prepare_channel_masks(struct brw_reg dst);
 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg 
 src);
 void generate_gs_get_instance_id(struct brw_reg dst);
 +   void generate_gs_ff_sync(struct brw_reg dst,
 +struct brw_reg src0,
 +struct brw_reg src1);
 void generate_oword_dual_block_offsets(struct brw_reg m1,
 struct brw_reg index);
 void generate_scratch_write(vec4_instruction *inst,
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 index c63b47a..05f4892 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 @@ -621,6 +621,42 @@ vec4_generator::generate_gs_get_instance_id(struct 
 brw_reg dst)
  }
  
  void
 +vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
 +struct brw_reg src0,
 +struct brw_reg src1)
 +{
 +   /* We use dst to setup the ff_sync header, so we expect it to be
 +* initialized to R0 by the caller. Here we overwrite dword 0 (cleared
 +* for now since we are not doing transform feedback) and dword 1
 +* (to hold the number of primitives written).
 +*/
 +   brw_push_insn_state(p);
 +   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 +   brw_set_default_access_mode(p, BRW_ALIGN_1);
 +   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
 +   brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
 +   brw_set_default_access_mode(p, BRW_ALIGN_16);
 +   brw_pop_insn_state(p);
 +
 +   /* Write allocated URB handle to temporary passed in src0 */
 +   brw_ff_sync(p,
 +   src0,
 +   0,
 +   dst,
 +   

[Mesa-dev] [PATCH 07/37] i965/gen6/gs: Implement GS_OPCODE_FF_SYNC.

2014-08-14 Thread Iago Toral Quiroga
This implements the FF_SYNC message required in gen6  geometry shaders to
get the initial URB handle.
---
 src/mesa/drivers/dri/i965/brw_defines.h  | 14 +
 src/mesa/drivers/dri/i965/brw_shader.cpp |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 40 
 4 files changed, 59 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 3564041..125d728 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1002,6 +1002,20 @@ enum opcode {
 * - dst is the GRF for gl_InvocationID.
 */
GS_OPCODE_GET_INSTANCE_ID,
+
+   /**
+* Send a FF_SYNC message to allocate initial URB handles (gen6).
+*
+* - dst will hold the newly allocated VUE handle. It is expected to be
+*   be initialized so that it can be used to as the FF_SYNC message header
+*   (that is, it won't do an implied move from R0).
+*
+* - src0 is a temporary that will be used as writeback register for the
+*   FF_SYNC operation.
+*
+* - src1 is the number of primitives written.
+*/
+   GS_OPCODE_FF_SYNC,
 };
 
 enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 0033135..5749061 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -528,6 +528,8 @@ brw_instruction_name(enum opcode op)
   return set_channel_masks;
case GS_OPCODE_GET_INSTANCE_ID:
   return get_instance_id;
+   case GS_OPCODE_FF_SYNC:
+  return ff_sync;
 
default:
   /* Yes, this leaks.  It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 67132c0..72fabdd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -659,6 +659,9 @@ private:
void generate_gs_prepare_channel_masks(struct brw_reg dst);
void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
void generate_gs_get_instance_id(struct brw_reg dst);
+   void generate_gs_ff_sync(struct brw_reg dst,
+struct brw_reg src0,
+struct brw_reg src1);
void generate_oword_dual_block_offsets(struct brw_reg m1,
  struct brw_reg index);
void generate_scratch_write(vec4_instruction *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index c63b47a..05f4892 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -621,6 +621,42 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg 
dst)
 }
 
 void
+vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
+struct brw_reg src0,
+struct brw_reg src1)
+{
+   /* We use dst to setup the ff_sync header, so we expect it to be
+* initialized to R0 by the caller. Here we overwrite dword 0 (cleared
+* for now since we are not doing transform feedback) and dword 1
+* (to hold the number of primitives written).
+*/
+   brw_push_insn_state(p);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
+   brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
+   brw_set_default_access_mode(p, BRW_ALIGN_16);
+   brw_pop_insn_state(p);
+
+   /* Write allocated URB handle to temporary passed in src0 */
+   brw_ff_sync(p,
+   src0,
+   0,
+   dst,
+   1, /* allocate */
+   1, /* response length */
+   0 /* eot */);
+
+   /* Now put allocated urb handle in dst.0 */
+   brw_push_insn_state(p);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
+   brw_set_default_access_mode(p, BRW_ALIGN_16);
+   brw_pop_insn_state(p);
+}
+
+void
 vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
   struct brw_reg index)
 {
@@ -1198,6 +1234,10 @@ 
vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
   generate_gs_get_instance_id(dst);
   break;
 
+   case GS_OPCODE_FF_SYNC:
+  generate_gs_ff_sync(dst, src[0], src[1]);
+  break;
+
case SHADER_OPCODE_SHADER_TIME_ADD:
   brw_shader_time_add(p, src[0],
   prog_data-base.binding_table.shader_time_start);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org