On Wed, Jun 1, 2016 at 3:04 PM, Jordan Justen <jordan.l.jus...@intel.com> wrote:
> Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> > --- > src/intel/vulkan/anv_cmd_buffer.c | 7 +++---- > src/intel/vulkan/anv_private.h | 1 - > src/intel/vulkan/gen7_cmd_buffer.c | 2 +- > src/intel/vulkan/gen8_cmd_buffer.c | 2 +- > src/intel/vulkan/genX_cmd_buffer.c | 4 ++-- > src/intel/vulkan/genX_pipeline.c | 4 +--- > src/mesa/drivers/dri/i965/brw_compiler.h | 1 + > src/mesa/drivers/dri/i965/brw_fs.cpp | 15 ++++++++++++--- > src/mesa/drivers/dri/i965/gen7_cs_state.c | 32 > ++++++++++--------------------- > 9 files changed, 31 insertions(+), 37 deletions(-) > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > b/src/intel/vulkan/anv_cmd_buffer.c > index 4d0fd7c..63d096c 100644 > --- a/src/intel/vulkan/anv_cmd_buffer.c > +++ b/src/intel/vulkan/anv_cmd_buffer.c > @@ -1076,9 +1076,8 @@ anv_cmd_buffer_cs_push_constants(struct > anv_cmd_buffer *cmd_buffer) > if (reg_aligned_constant_size == 0) > return (struct anv_state) { .offset = 0 }; > > - const unsigned threads = pipeline->cs_thread_width_max; > const unsigned total_push_constants_size = > - reg_aligned_constant_size * threads; > + reg_aligned_constant_size * cs_prog_data->threads; > const unsigned push_constant_alignment = > cmd_buffer->device->info.gen < 8 ? 32 : 64; > const unsigned aligned_total_push_constants_size = > @@ -1091,7 +1090,7 @@ anv_cmd_buffer_cs_push_constants(struct > anv_cmd_buffer *cmd_buffer) > /* Walk through the param array and fill the buffer with data */ > uint32_t *u32_map = state.map; > > - brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, > + brw_cs_fill_local_id_payload(cs_prog_data, u32_map, > cs_prog_data->threads, > reg_aligned_constant_size); > > /* Setup uniform data for the first thread */ > @@ -1102,7 +1101,7 @@ anv_cmd_buffer_cs_push_constants(struct > anv_cmd_buffer *cmd_buffer) > > /* Copy uniform data from the first thread to every other thread */ > const size_t uniform_data_size = prog_data->nr_params * > sizeof(uint32_t); > - for (unsigned t = 1; t < threads; t++) { > + for (unsigned t = 1; t < cs_prog_data->threads; t++) { > memcpy(&u32_map[t * param_aligned_count + local_id_dwords], > &u32_map[local_id_dwords], > uniform_data_size); > diff --git a/src/intel/vulkan/anv_private.h > b/src/intel/vulkan/anv_private.h > index 7325f3f..26ffbd6 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -1474,7 +1474,6 @@ struct anv_pipeline { > bool primitive_restart; > uint32_t topology; > > - uint32_t cs_thread_width_max; > Hooray! Less crap in the pipeline! > uint32_t cs_right_mask; > > struct { > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c > b/src/intel/vulkan/gen7_cmd_buffer.c > index 331275e..40ab008 100644 > --- a/src/intel/vulkan/gen7_cmd_buffer.c > +++ b/src/intel/vulkan/gen7_cmd_buffer.c > @@ -271,7 +271,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer > *cmd_buffer) > .BarrierEnable = cs_prog_data->uses_barrier, > .SharedLocalMemorySize = slm_size, > .NumberofThreadsinGPGPUThreadGroup = > - pipeline->cs_thread_width_max); > + cs_prog_data->threads); > > const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * > sizeof(uint32_t); > anv_batch_emit(&cmd_buffer->batch, > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c > b/src/intel/vulkan/gen8_cmd_buffer.c > index 547fedd..e139e8a 100644 > --- a/src/intel/vulkan/gen8_cmd_buffer.c > +++ b/src/intel/vulkan/gen8_cmd_buffer.c > @@ -356,7 +356,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer > *cmd_buffer) > .BarrierEnable = cs_prog_data->uses_barrier, > .SharedLocalMemorySize = slm_size, > .NumberofThreadsinGPGPUThreadGroup = > - pipeline->cs_thread_width_max); > + cs_prog_data->threads); > > uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * > sizeof(uint32_t); > anv_batch_emit(&cmd_buffer->batch, > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > b/src/intel/vulkan/genX_cmd_buffer.c > index e7d322c..d9acf58 100644 > --- a/src/intel/vulkan/genX_cmd_buffer.c > +++ b/src/intel/vulkan/genX_cmd_buffer.c > @@ -773,7 +773,7 @@ void genX(CmdDispatch)( > ggw.SIMDSize = prog_data->simd_size / 16; > ggw.ThreadDepthCounterMaximum = 0; > ggw.ThreadHeightCounterMaximum = 0; > - ggw.ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - > 1; > + ggw.ThreadWidthCounterMaximum = prog_data->threads - 1; > ggw.ThreadGroupIDXDimension = x; > ggw.ThreadGroupIDYDimension = y; > ggw.ThreadGroupIDZDimension = z; > @@ -874,7 +874,7 @@ void genX(CmdDispatchIndirect)( > ggw.SIMDSize = prog_data->simd_size / 16; > ggw.ThreadDepthCounterMaximum = 0; > ggw.ThreadHeightCounterMaximum = 0; > - ggw.ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - > 1; > + ggw.ThreadWidthCounterMaximum = prog_data->threads - 1; > ggw.RightExecutionMask = pipeline->cs_right_mask; > ggw.BottomExecutionMask = 0xffffffff; > } > diff --git a/src/intel/vulkan/genX_pipeline.c > b/src/intel/vulkan/genX_pipeline.c > index 918a9a4..1776577 100644 > --- a/src/intel/vulkan/genX_pipeline.c > +++ b/src/intel/vulkan/genX_pipeline.c > @@ -97,8 +97,6 @@ genX(compute_pipeline_create)( > > uint32_t group_size = cs_prog_data->local_size[0] * > cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; > - pipeline->cs_thread_width_max = > - DIV_ROUND_UP(group_size, cs_prog_data->simd_size); > uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); > > if (remainder > 0) > @@ -107,7 +105,7 @@ genX(compute_pipeline_create)( > pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size); > > const uint32_t vfe_curbe_allocation = > - push_constant_regs * pipeline->cs_thread_width_max; > + push_constant_regs * cs_prog_data->threads; > > anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) { > vfe.ScratchSpaceBasePointer = > pipeline->scratch_start[MESA_SHADER_COMPUTE]; > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index bed969c..f1f9e56 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -430,6 +430,7 @@ struct brw_cs_prog_data { > GLuint dispatch_grf_start_reg_16; > unsigned local_size[3]; > unsigned simd_size; > + unsigned threads; > bool uses_barrier; > bool uses_num_work_groups; > unsigned local_invocation_id_regs; > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index d83d9e0..7e5d583 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -6561,6 +6561,15 @@ fs_visitor::emit_cs_work_group_id_setup() > return reg; > } > > +static void > +cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size) > +{ > + cs_prog_data->simd_size = size; > + unsigned group_size = cs_prog_data->local_size[0] * > + cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; > + cs_prog_data->threads = (group_size + size - 1) / size; > +} > + > const unsigned * > brw_compile_cs(const struct brw_compiler *compiler, void *log_data, > void *mem_ctx, > @@ -6617,7 +6626,7 @@ brw_compile_cs(const struct brw_compiler *compiler, > void *log_data, > fail_msg = v8.fail_msg; > } else { > cfg = v8.cfg; > - prog_data->simd_size = 8; > + cs_set_simd_size(prog_data, 8); > prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs; > } > } > @@ -6642,7 +6651,7 @@ brw_compile_cs(const struct brw_compiler *compiler, > void *log_data, > } > } else { > cfg = v16.cfg; > - prog_data->simd_size = 16; > + cs_set_simd_size(prog_data, 16); > prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs; > } > } > @@ -6669,7 +6678,7 @@ brw_compile_cs(const struct brw_compiler *compiler, > void *log_data, > } > } else { > cfg = v32.cfg; > - prog_data->simd_size = 32; > + cs_set_simd_size(prog_data, 32); > } > } > > diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c > b/src/mesa/drivers/dri/i965/gen7_cs_state.c > index 7f484dd..619edfb 100644 > --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c > @@ -33,17 +33,6 @@ > #include "program/prog_statevars.h" > #include "compiler/glsl/ir_uniform.h" > > -static unsigned > -get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data) > -{ > - const unsigned simd_size = cs_prog_data->simd_size; > - unsigned group_size = cs_prog_data->local_size[0] * > - cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; > - > - return (group_size + simd_size - 1) / simd_size; > -} > - > - > static void > brw_upload_cs_state(struct brw_context *brw) > { > @@ -79,7 +68,6 @@ brw_upload_cs_state(struct brw_context *brw) > (prog_data->nr_params + local_id_dwords) * > sizeof(gl_constant_value); > unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, > 32); > unsigned push_constant_regs = reg_aligned_constant_size / 32; > - unsigned threads = get_cs_thread_count(cs_prog_data); > > uint32_t dwords = brw->gen < 8 ? 8 : 9; > BEGIN_BATCH(dwords); > @@ -129,7 +117,8 @@ brw_upload_cs_state(struct brw_context *brw) > * > * Note: The constant data is built in brw_upload_cs_push_constants > below. > */ > - const uint32_t vfe_curbe_allocation = push_constant_regs * threads; > + const uint32_t vfe_curbe_allocation = > + push_constant_regs * cs_prog_data->threads; > OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) | > SET_FIELD(vfe_curbe_allocation, > MEDIA_VFE_STATE_CURBE_ALLOC)); > OUT_BATCH(0); > @@ -141,7 +130,7 @@ brw_upload_cs_state(struct brw_context *brw) > BEGIN_BATCH(4); > OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2)); > OUT_BATCH(0); > - OUT_BATCH(ALIGN(reg_aligned_constant_size * threads, 64)); > + OUT_BATCH(ALIGN(reg_aligned_constant_size * cs_prog_data->threads, > 64)); > OUT_BATCH(stage_state->push_const_offset); > ADVANCE_BATCH(); > } > @@ -163,9 +152,9 @@ brw_upload_cs_state(struct brw_context *brw) > desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH); > const uint32_t media_threads = > brw->gen >= 8 ? > - SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : > - SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT); > - assert(threads <= brw->max_cs_threads); > + SET_FIELD(cs_prog_data->threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : > + SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT); > + assert(cs_prog_data->threads <= brw->max_cs_threads); > > assert(prog_data->total_shared <= 64 * 1024); > uint32_t slm_size = 0; > @@ -247,21 +236,20 @@ brw_upload_cs_push_constants(struct brw_context *brw, > const unsigned param_aligned_count = > reg_aligned_constant_size / sizeof(*param); > > - unsigned threads = get_cs_thread_count(cs_prog_data); > - > param = (gl_constant_value*) > brw_state_batch(brw, type, > - ALIGN(reg_aligned_constant_size * threads, 64), > + ALIGN(reg_aligned_constant_size * > + cs_prog_data->threads, 64), > 64, &stage_state->push_const_offset); > assert(param); > > STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); > > - brw_cs_fill_local_id_payload(cs_prog_data, param, threads, > + brw_cs_fill_local_id_payload(cs_prog_data, param, > cs_prog_data->threads, > reg_aligned_constant_size); > > /* _NEW_PROGRAM_CONSTANTS */ > - for (t = 0; t < threads; t++) { > + for (t = 0; t < cs_prog_data->threads; t++) { > gl_constant_value *next_param = > ¶m[t * param_aligned_count + local_id_dwords]; > for (i = 0; i < prog_data->nr_params; i++) { > -- > 2.8.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev