Re: [Mesa-dev] [PATCH 07/37] i965/gen6/gs: Implement GS_OPCODE_FF_SYNC.
On lun, 2014-09-01 at 11:17 -0700, Kenneth Graunke wrote: On Thursday, August 14, 2014 01:11:39 PM Iago Toral Quiroga wrote: This implements the FF_SYNC message required in gen6 geometry shaders to get the initial URB handle. --- src/mesa/drivers/dri/i965/brw_defines.h | 14 + src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++ src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 40 4 files changed, 59 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3564041..125d728 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1002,6 +1002,20 @@ enum opcode { * - dst is the GRF for gl_InvocationID. */ GS_OPCODE_GET_INSTANCE_ID, + + /** +* Send a FF_SYNC message to allocate initial URB handles (gen6). +* +* - dst will hold the newly allocated VUE handle. It is expected to be +* be initialized so that it can be used to as the FF_SYNC message header +* (that is, it won't do an implied move from R0). +* +* - src0 is a temporary that will be used as writeback register for the +* FF_SYNC operation. I was originally a bit concerned that writing over a source register could get your into trouble - say, optimization passes not noticing that you're modifying src0, or the scheduler not noticing dependencies. But, since you allocate a temporary, and only use it in this one instruction, there's not really any chance for optimizations to combine it with anything. So, I suppose this will work. Another way you might consider doing this... 1. Add GS_OPCODE_FF_SYNC to vec4_visitor::implied_mrf_writes, and return 1. This tells the scheduler etc. that you implicitly trash 1 MRF. 2. Make the visitor set inst-base_mrf, and MOV base_mrf g0. 3. Make the generator use brw_message_reg(inst-base_mrf) for the header MOVs. Then, src0 can be the number of primitives written, and you don't need src1. That seems simpler to me. brw_ff_sync will resolve your GRF (dst) to an MRF anyway; the above approach would write directly into the MRF, saving you a GRF. Thoughts? That looks like a better idea. I'll give it a go. Thanks! Iago ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/37] i965/gen6/gs: Implement GS_OPCODE_FF_SYNC.
On Thursday, August 14, 2014 01:11:39 PM Iago Toral Quiroga wrote: This implements the FF_SYNC message required in gen6 geometry shaders to get the initial URB handle. --- src/mesa/drivers/dri/i965/brw_defines.h | 14 + src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++ src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 40 4 files changed, 59 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3564041..125d728 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1002,6 +1002,20 @@ enum opcode { * - dst is the GRF for gl_InvocationID. */ GS_OPCODE_GET_INSTANCE_ID, + + /** +* Send a FF_SYNC message to allocate initial URB handles (gen6). +* +* - dst will hold the newly allocated VUE handle. It is expected to be +* be initialized so that it can be used to as the FF_SYNC message header +* (that is, it won't do an implied move from R0). +* +* - src0 is a temporary that will be used as writeback register for the +* FF_SYNC operation. I was originally a bit concerned that writing over a source register could get your into trouble - say, optimization passes not noticing that you're modifying src0, or the scheduler not noticing dependencies. But, since you allocate a temporary, and only use it in this one instruction, there's not really any chance for optimizations to combine it with anything. So, I suppose this will work. Another way you might consider doing this... 1. Add GS_OPCODE_FF_SYNC to vec4_visitor::implied_mrf_writes, and return 1. This tells the scheduler etc. that you implicitly trash 1 MRF. 2. Make the visitor set inst-base_mrf, and MOV base_mrf g0. 3. Make the generator use brw_message_reg(inst-base_mrf) for the header MOVs. Then, src0 can be the number of primitives written, and you don't need src1. That seems simpler to me. brw_ff_sync will resolve your GRF (dst) to an MRF anyway; the above approach would write directly into the MRF, saving you a GRF. Thoughts? +* +* - src1 is the number of primitives written. +*/ + GS_OPCODE_FF_SYNC, }; enum brw_urb_write_flags { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0033135..5749061 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -528,6 +528,8 @@ brw_instruction_name(enum opcode op) return set_channel_masks; case GS_OPCODE_GET_INSTANCE_ID: return get_instance_id; + case GS_OPCODE_FF_SYNC: + return ff_sync; default: /* Yes, this leaks. It's in debug code, it should never occur, and if diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 67132c0..72fabdd 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -659,6 +659,9 @@ private: void generate_gs_prepare_channel_masks(struct brw_reg dst); void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src); void generate_gs_get_instance_id(struct brw_reg dst); + void generate_gs_ff_sync(struct brw_reg dst, +struct brw_reg src0, +struct brw_reg src1); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); void generate_scratch_write(vec4_instruction *inst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index c63b47a..05f4892 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -621,6 +621,42 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg dst) } void +vec4_generator::generate_gs_ff_sync(struct brw_reg dst, +struct brw_reg src0, +struct brw_reg src1) +{ + /* We use dst to setup the ff_sync header, so we expect it to be +* initialized to R0 by the caller. Here we overwrite dword 0 (cleared +* for now since we are not doing transform feedback) and dword 1 +* (to hold the number of primitives written). +*/ + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + /* Write allocated URB handle to temporary passed in src0 */ + brw_ff_sync(p, + src0, + 0, + dst, +
[Mesa-dev] [PATCH 07/37] i965/gen6/gs: Implement GS_OPCODE_FF_SYNC.
This implements the FF_SYNC message required in gen6 geometry shaders to get the initial URB handle. --- src/mesa/drivers/dri/i965/brw_defines.h | 14 + src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++ src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 40 4 files changed, 59 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3564041..125d728 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1002,6 +1002,20 @@ enum opcode { * - dst is the GRF for gl_InvocationID. */ GS_OPCODE_GET_INSTANCE_ID, + + /** +* Send a FF_SYNC message to allocate initial URB handles (gen6). +* +* - dst will hold the newly allocated VUE handle. It is expected to be +* be initialized so that it can be used to as the FF_SYNC message header +* (that is, it won't do an implied move from R0). +* +* - src0 is a temporary that will be used as writeback register for the +* FF_SYNC operation. +* +* - src1 is the number of primitives written. +*/ + GS_OPCODE_FF_SYNC, }; enum brw_urb_write_flags { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0033135..5749061 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -528,6 +528,8 @@ brw_instruction_name(enum opcode op) return set_channel_masks; case GS_OPCODE_GET_INSTANCE_ID: return get_instance_id; + case GS_OPCODE_FF_SYNC: + return ff_sync; default: /* Yes, this leaks. It's in debug code, it should never occur, and if diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 67132c0..72fabdd 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -659,6 +659,9 @@ private: void generate_gs_prepare_channel_masks(struct brw_reg dst); void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src); void generate_gs_get_instance_id(struct brw_reg dst); + void generate_gs_ff_sync(struct brw_reg dst, +struct brw_reg src0, +struct brw_reg src1); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); void generate_scratch_write(vec4_instruction *inst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index c63b47a..05f4892 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -621,6 +621,42 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg dst) } void +vec4_generator::generate_gs_ff_sync(struct brw_reg dst, +struct brw_reg src0, +struct brw_reg src1) +{ + /* We use dst to setup the ff_sync header, so we expect it to be +* initialized to R0 by the caller. Here we overwrite dword 0 (cleared +* for now since we are not doing transform feedback) and dword 1 +* (to hold the number of primitives written). +*/ + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + /* Write allocated URB handle to temporary passed in src0 */ + brw_ff_sync(p, + src0, + 0, + dst, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); + + /* Now put allocated urb handle in dst.0 */ + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); +} + +void vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index) { @@ -1198,6 +1234,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, generate_gs_get_instance_id(dst); break; + case GS_OPCODE_FF_SYNC: + generate_gs_ff_sync(dst, src[0], src[1]); + break; + case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], prog_data-base.binding_table.shader_time_start); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org