Kenneth Graunke <kenn...@whitecape.org> writes: > On 08/26/2013 03:12 PM, Paul Berry wrote: >> --- >> src/mesa/drivers/dri/i965/brw_state.h | 41 ++++++++++ >> src/mesa/drivers/dri/i965/gen7_vs_state.c | 123 >> ++++++++++++++++++++---------- >> 2 files changed, 122 insertions(+), 42 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_state.h >> b/src/mesa/drivers/dri/i965/brw_state.h >> index b54338a..efef994 100644 >> --- a/src/mesa/drivers/dri/i965/brw_state.h >> +++ b/src/mesa/drivers/dri/i965/brw_state.h >> @@ -128,6 +128,38 @@ extern const struct brw_tracked_state gen7_wm_state; >> extern const struct brw_tracked_state haswell_cut_index; >> >> >> +/** >> + * Parameters that differ between Gen7 VS and GS state upload commands. >> + */ >> +struct gen7_vec4_upload_params >> +{ >> + /** >> + * Command used to set the binding table pointers for this stage. >> + */ >> + unsigned binding_table_pointers_cmd; >> + >> + /** >> + * Command used to set the sampler state pointers for this stage. >> + */ >> + unsigned sampler_state_pointers_cmd; >> + >> + /** >> + * Command used to send constants for this stage. >> + */ >> + unsigned constant_cmd; >> + >> + /** >> + * Command used to send state for this stage. >> + */ >> + unsigned state_cmd; >> + >> + /** >> + * Size of the state command for this stage. >> + */ >> + unsigned state_cmd_size; >> +}; >> + >> + >> /* brw_misc_state.c */ >> void brw_upload_invariant_state(struct brw_context *brw); >> uint32_t >> @@ -240,6 +272,15 @@ brw_vec4_upload_binding_table(struct brw_context *brw, >> struct brw_vec4_context_base *vec4_ctx, >> const struct brw_vec4_prog_data *prog_data); >> >> +/* gen7_vs_state.c */ >> +void >> +gen7_upload_vec4_state(struct brw_context *brw, >> + const struct gen7_vec4_upload_params *upload_params, >> + const struct brw_vec4_context_base *vec4_ctx, >> + bool active, bool alt_floating_point_mode, >> + const struct brw_vec4_prog_data *prog_data, >> + const unsigned *stage_specific_cmd_data); >> + >> #ifdef __cplusplus >> } >> #endif >> diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c >> b/src/mesa/drivers/dri/i965/gen7_vs_state.c >> index 30fe802..fd81112 100644 >> --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c >> +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c >> @@ -29,33 +29,31 @@ >> #include "program/prog_statevars.h" >> #include "intel_batchbuffer.h" >> >> -static void >> -upload_vs_state(struct brw_context *brw) >> -{ >> - struct gl_context *ctx = &brw->ctx; >> - const struct brw_vec4_context_base *vec4_ctx = &brw->vs.base; >> - uint32_t floating_point_mode = 0; >> - const int max_threads_shift = brw->is_haswell ? >> - HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT; >> >> - gen7_emit_vs_workaround_flush(brw); >> - >> - /* BRW_NEW_VS_BINDING_TABLE */ >> +void >> +gen7_upload_vec4_state(struct brw_context *brw, >> + const struct gen7_vec4_upload_params *upload_params, >> + const struct brw_vec4_context_base *vec4_ctx, >> + bool active, bool alt_floating_point_mode, >> + const struct brw_vec4_prog_data *prog_data, >> + const unsigned *stage_specific_cmd_data) >> +{ >> + /* BRW_NEW_*_BINDING_TABLE */ >> BEGIN_BATCH(2); >> - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2)); >> + OUT_BATCH(upload_params->binding_table_pointers_cmd << 16 | (2 - 2)); >> OUT_BATCH(vec4_ctx->bind_bo_offset); >> ADVANCE_BATCH(); >> >> /* CACHE_NEW_SAMPLER */ >> BEGIN_BATCH(2); >> - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); >> + OUT_BATCH(upload_params->sampler_state_pointers_cmd << 16 | (2 - 2)); >> OUT_BATCH(vec4_ctx->sampler_offset); >> ADVANCE_BATCH(); >> >> - if (vec4_ctx->push_const_size == 0) { >> + if (!active || vec4_ctx->push_const_size == 0) { >> /* Disable the push constant buffers. */ >> BEGIN_BATCH(7); >> - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2)); >> + OUT_BATCH(upload_params->constant_cmd << 16 | (7 - 2)); >> OUT_BATCH(0); >> OUT_BATCH(0); >> OUT_BATCH(0); >> @@ -65,10 +63,10 @@ upload_vs_state(struct brw_context *brw) >> ADVANCE_BATCH(); >> } else { >> BEGIN_BATCH(7); >> - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2)); >> + OUT_BATCH(upload_params->constant_cmd << 16 | (7 - 2)); >> OUT_BATCH(vec4_ctx->push_const_size); >> OUT_BATCH(0); >> - /* Pointer to the VS constant buffer. Covered by the set of >> + /* Pointer to the stage's constant buffer. Covered by the set of >> * state flags from gen6_prepare_wm_contants >> */ >> OUT_BATCH(vec4_ctx->push_const_offset | GEN7_MOCS_L3); >> @@ -78,36 +76,77 @@ upload_vs_state(struct brw_context *brw) >> ADVANCE_BATCH(); >> } >> >> + BEGIN_BATCH(upload_params->state_cmd_size); >> + OUT_BATCH(upload_params->state_cmd << 16 | >> + (upload_params->state_cmd_size - 2)); >> + if (active) { >> + OUT_BATCH(vec4_ctx->prog_offset); >> + OUT_BATCH((alt_floating_point_mode ? GEN6_FLOATING_POINT_MODE_ALT >> + : GEN6_FLOATING_POINT_MODE_IEEE_754) | >> + ((ALIGN(vec4_ctx->sampler_count, 4)/4) << >> + GEN6_SAMPLER_COUNT_SHIFT)); >> + >> + if (prog_data->total_scratch) { >> + OUT_RELOC(vec4_ctx->scratch_bo, >> + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, >> + ffs(prog_data->total_scratch) - 11); >> + } else { >> + OUT_BATCH(0); >> + } >> + } else { >> + OUT_BATCH(0); /* prog_bo */ >> + OUT_BATCH((0 << GEN6_SAMPLER_COUNT_SHIFT) | >> + (0 << GEN6_BINDING_TABLE_ENTRY_COUNT_SHIFT)); >> + OUT_BATCH(0); /* scratch space base offset */ >> + } >> + for (int i = 0; i < upload_params->state_cmd_size - 4; ++i) >> + OUT_BATCH(stage_specific_cmd_data[i]); >> + ADVANCE_BATCH(); >> +} >> + >> + >> +static const struct gen7_vec4_upload_params vs_upload_params = { >> + .binding_table_pointers_cmd = _3DSTATE_BINDING_TABLE_POINTERS_VS, >> + .sampler_state_pointers_cmd = _3DSTATE_SAMPLER_STATE_POINTERS_VS, >> + .constant_cmd = _3DSTATE_CONSTANT_VS, >> + .state_cmd = _3DSTATE_VS, >> + .state_cmd_size = 6, >> +}; >> + >> + >> +static void >> +upload_vs_state(struct brw_context *brw) >> +{ >> + struct gl_context *ctx = &brw->ctx; >> + const struct brw_vec4_context_base *vec4_ctx = &brw->vs.base; >> + const int max_threads_shift = brw->is_haswell ? >> + HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT; >> + /* CACHE_NEW_VS_PROG */ >> + const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base; >> + >> + gen7_emit_vs_workaround_flush(brw); >> + >> /* Use ALT floating point mode for ARB vertex programs, because they >> * require 0^0 == 1. >> */ >> - if (ctx->Shader.CurrentVertexProgram == NULL) >> - floating_point_mode = GEN6_FLOATING_POINT_MODE_ALT; >> - >> - BEGIN_BATCH(6); >> - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); >> - OUT_BATCH(vec4_ctx->prog_offset); >> - OUT_BATCH(floating_point_mode | >> - ((ALIGN(vec4_ctx->sampler_count, 4)/4) << >> - GEN6_SAMPLER_COUNT_SHIFT)); >> - >> - if (brw->vs.prog_data->base.total_scratch) { >> - OUT_RELOC(vec4_ctx->scratch_bo, >> - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, >> - ffs(brw->vs.prog_data->base.total_scratch) - 11); >> - } else { >> - OUT_BATCH(0); >> - } >> + bool alt_floating_point_mode = (ctx->Shader.CurrentVertexProgram == >> NULL); >> >> - OUT_BATCH((brw->vs.prog_data->base.dispatch_grf_start_reg << >> - GEN6_VS_DISPATCH_START_GRF_SHIFT) | >> - (brw->vs.prog_data->base.urb_read_length << >> GEN6_VS_URB_READ_LENGTH_SHIFT) | >> - (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); >> + unsigned stage_specific_cmd_data[2]; >> + stage_specific_cmd_data[0] = >> + (prog_data->dispatch_grf_start_reg << >> + GEN6_VS_DISPATCH_START_GRF_SHIFT) | >> + (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | >> + (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT); >> + stage_specific_cmd_data[1] = >> + ((brw->max_vs_threads - 1) << max_threads_shift) | >> + GEN6_VS_STATISTICS_ENABLE | >> + GEN6_VS_ENABLE; >> >> - OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) | >> - GEN6_VS_STATISTICS_ENABLE | >> - GEN6_VS_ENABLE); >> - ADVANCE_BATCH(); >> + /* BRW_NEW_VS_BINDING_TABLE */ >> + /* CACHE_NEW_SAMPLER */ >> + gen7_upload_vec4_state(brw, &vs_upload_params, vec4_ctx, true /* active >> */, >> + alt_floating_point_mode, prog_data, >> + stage_specific_cmd_data); >> } >> >> const struct brw_tracked_state gen7_vs_state = { >> > > I can't quite put my finger on why, but I really dislike this code > reuse. It just seems really awkward to me.
The struct argument is what makes me cringe. If upload_vs_state() was: static void upload_vs_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; uint32_t floating_point_mode = 0; const int max_threads_shift = brw->is_haswell ? HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT; /* the VS has a special requirement for synchronization due to a bug * in its constant handling. */ gen7_emit_vs_workaround_flush(brw); emit_binding_table_pointers(_3DSTATE_BINDING_TABLE_POINTERS_VS, brw->vs.bind_bo_offset); emit_sampler(_3DSTATE_SAMPLER_STATE_POINTERS_VS, brw->vs.sampler_offset); emit_constants(3DSTATE_CONSTANT_VS, &brw->vs.stage_state); /* inline emit of 3DSTATE_VS just like we have today, since there's a * whole bunch of stage-specific stuff as is evident from the * alt_floating_point_mode, state_cmd, state_cmd_size, * stage_specific_cmd_data arguments. */ } that would share the definitely-common code without the contortions to make 3DSTATE_VS be "shared". Plus you get to fairly trivially reuse those 3 shared packets from blorp, which this patch didn't do today. (only "fairly" trivially because you'd want to have individual enable/disable packets for the constants to really trivially reuse from blorp).
pgpR_1TipF8EZ.pgp
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev