On Sun, May 29, 2016 at 3:38 PM, Jordan Justen <jordan.l.jus...@intel.com> wrote:
> We need information about push constants in a few places for the GL > driver, and another couple places for the vulkan driver. > > When we add support for uploading both a common (cross-thread) set of > push constants, combined with the previous per-thread push constant > data, things are going to get even more complicated. To simplify > things, we add push constant info into the cs prog_data struct. > > The cross-thread constant support is added as of Haswell. To support > it we need to make sure all push constants with uniform values are > added to earlier registers. The register that varies per thread and > holds the thread invocation's unique local ID needs to be added last. > > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> > --- > src/mesa/drivers/dri/i965/brw_compiler.h | 12 +++++++ > src/mesa/drivers/dri/i965/brw_fs.cpp | 58 > ++++++++++++++++++++++++++++++++ > 2 files changed, 70 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index f1f9e56..dda6297 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -424,6 +424,12 @@ struct brw_wm_prog_data { > int urb_setup[VARYING_SLOT_MAX]; > }; > > +struct brw_push_const_block { > + unsigned dwords; /* Dword count, not reg aligned */ > + unsigned regs; > + unsigned size; /* Bytes, register aligned */ > +}; > + > struct brw_cs_prog_data { > struct brw_stage_prog_data base; > > @@ -437,6 +443,12 @@ struct brw_cs_prog_data { > int thread_local_id_index; > > struct { > + struct brw_push_const_block cross_thread; > + struct brw_push_const_block per_thread; > + struct brw_push_const_block total; > + } push; > + > + struct { > /** @{ > * surface indices the CS-specific surfaces > */ > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 836ade0..bd37fbd 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -6479,6 +6479,61 @@ fs_visitor::emit_cs_work_group_id_setup() > } > > static void > +fill_push_const_block_info(struct brw_push_const_block *block, unsigned > dwords) > +{ > + block->dwords = dwords; > + block->regs = DIV_ROUND_UP(dwords, 8); > + block->size = block->regs * 32; > +} > + > +static void > +cs_fill_push_const_info(const struct brw_device_info *devinfo, > + struct brw_cs_prog_data *cs_prog_data) > +{ > + const struct brw_stage_prog_data *prog_data = > + (struct brw_stage_prog_data*) cs_prog_data; > + bool fill_thread_id = > + cs_prog_data->thread_local_id_index >= 0 && > + cs_prog_data->thread_local_id_index < (int)prog_data->nr_params; > + bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell; > + > + /* The thread ID should be stored in the last param dword */ > + assert(prog_data->nr_params > 0 || !fill_thread_id); > + assert(!fill_thread_id || > + cs_prog_data->thread_local_id_index == > + (int)prog_data->nr_params - 1); > + > + unsigned cross_thread_dwords, per_thread_dwords; > + if (cross_thread_supported && fill_thread_id) { > + /* Fill all but the last register with cross-thread payload */ > + cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8); > + per_thread_dwords = prog_data->nr_params - cross_thread_dwords; > + assert(per_thread_dwords > 0 && per_thread_dwords <= 8); > If I understand you correctly here, you're putting the bottom registers (aligned down to 8) into the cross-thread space and putting whatever is left including the thread_local_id into the per-thread space. Seems reasonable. I probably would have been more lazy and burned a whole register on the local_id. :-) > + } else if (cross_thread_supported && !fill_thread_id) { > + /* Fill all data using cross-thread payload */ > + cross_thread_dwords = prog_data->nr_params; > + per_thread_dwords = 0u; > + } else { > + cross_thread_dwords = 0u; > + per_thread_dwords = prog_data->nr_params; > Mind putting the !cross_thread_supported case first? > + } > + > + fill_push_const_block_info(&cs_prog_data->push.cross_thread, > cross_thread_dwords); > + fill_push_const_block_info(&cs_prog_data->push.per_thread, > per_thread_dwords); > + > + unsigned total_dwords = > + (cs_prog_data->push.per_thread.size * cs_prog_data->threads + > + cs_prog_data->push.cross_thread.size) / 4; > + fill_push_const_block_info(&cs_prog_data->push.total, total_dwords); > + > + assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 || > + cs_prog_data->push.per_thread.size == 0); > + assert(cs_prog_data->push.cross_thread.dwords + > + cs_prog_data->push.per_thread.dwords == > + prog_data->nr_params); > +} > + > +static void > cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size) > { > cs_prog_data->simd_size = size; > @@ -6536,6 +6591,7 @@ brw_compile_cs(const struct brw_compiler *compiler, > void *log_data, > } else { > cfg = v8.cfg; > cs_set_simd_size(prog_data, 8); > + cs_fill_push_const_info(compiler->devinfo, prog_data); > prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs; > } > } > @@ -6561,6 +6617,7 @@ brw_compile_cs(const struct brw_compiler *compiler, > void *log_data, > } else { > cfg = v16.cfg; > cs_set_simd_size(prog_data, 16); > + cs_fill_push_const_info(compiler->devinfo, prog_data); > prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs; > } > } > @@ -6588,6 +6645,7 @@ brw_compile_cs(const struct brw_compiler *compiler, > void *log_data, > } else { > cfg = v32.cfg; > cs_set_simd_size(prog_data, 32); > + cs_fill_push_const_info(compiler->devinfo, prog_data); > } > } > > -- > 2.8.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev