On Mon, Sep 29, 2014 at 9:50 AM, Matt Turner <matts...@gmail.com> wrote:

> On Fri, Sep 26, 2014 at 12:24 PM, Jason Ekstrand <ja...@jlekstrand.net>
> wrote:
> > This commit reworks copy propagation a bit to support propagating the
> > copying of partial registers.  This comes up every time we have pull
> > constants because we do a pull constant read immediately followed by a
> move
> > to splat the one component of the out to 8 or 16-wide.  This allows us to
> > eliminate the copy and simply use the one component of the register.
> >
> > Shader DB results:
> >
> > total instructions in shared programs: 5044937 -> 5044428 (-0.01%)
> > instructions in affected programs:     66112 -> 65603 (-0.77%)
> > GAINED:                                0
> > LOST:                                  0
> >
> > Signed-off-by: Jason Ekstrand <jason.ekstr...@intel.com>
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.h                 |  1 +
> >  .../drivers/dri/i965/brw_fs_copy_propagation.cpp   | 83
> ++++++++++++++++------
> >  2 files changed, 64 insertions(+), 20 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> > index 50b5fc1..9b63114 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -337,6 +337,7 @@ public:
> >     bool opt_cse_local(bblock_t *block);
> >     bool opt_copy_propagate();
> >     bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
> > +   bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
> >     bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
> >                                   exec_list *acp);
> >     void opt_drop_redundant_mov_to_flags();
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> > index e5816df..a97dc04 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> > @@ -277,24 +277,30 @@ is_logic_op(enum opcode opcode)
> >  bool
> >  fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
> >  {
> > +   if (inst->src[arg].file != GRF)
> > +      return false;
> > +
> >     if (entry->src.file == IMM)
> >        return false;
> > +   assert(entry->src.file == GRF || entry->src.file == UNIFORM);
> >
> >     if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
> >         inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD)
> >        return false;
> >
> > -   /* Bail if inst is reading more than entry is writing. */
> > -   if ((inst->regs_read(this, arg) * inst->src[arg].stride *
> > -        type_sz(inst->src[arg].type)) > type_sz(entry->dst.type))
> > +   assert(entry->dst.file == GRF);
> > +   if (inst->src[arg].reg != entry->dst.reg)
> >        return false;
> >
> > -   if (inst->src[arg].file != entry->dst.file ||
> > -       inst->src[arg].reg != entry->dst.reg ||
> > -       inst->src[arg].reg_offset != entry->dst.reg_offset ||
> > -       inst->src[arg].subreg_offset != entry->dst.subreg_offset) {
> > +   /* Bail if inst is reading a range that isn't contained in the range
> > +    * that entry is writing.
> > +    */
> > +   int reg_size = dispatch_width * sizeof(float);
> > +   if (inst->src[arg].reg_offset < entry->dst.reg_offset ||
> > +       (inst->src[arg].reg_offset * reg_size +
> inst->src[arg].subreg_offset +
> > +        inst->regs_read(this, arg) * inst->src[arg].stride * reg_size) >
> > +       (entry->dst.reg_offset + 1) * reg_size)
> >        return false;
> > -   }
> >
> >     /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */
> >     if (inst->conditional_mod &&
> > @@ -361,11 +367,39 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int
> arg, acp_entry *entry)
> >
> >     inst->src[arg].file = entry->src.file;
> >     inst->src[arg].reg = entry->src.reg;
> > -   inst->src[arg].reg_offset = entry->src.reg_offset;
> > -   inst->src[arg].subreg_offset = entry->src.subreg_offset;
> >     inst->src[arg].stride *= entry->src.stride;
> >     inst->saturate = inst->saturate || entry->saturate;
> >
> > +   switch (entry->src.file) {
> > +   case BAD_FILE:
> > +   case HW_REG:
> > +   case UNIFORM:
> > +      inst->src[arg].reg_offset = entry->src.reg_offset;
> > +      inst->src[arg].subreg_offset = entry->src.subreg_offset;
> > +      break;
> > +   case GRF:
> > +      {
> > +         /* In this case, we have to deal with mapping parts of vgrfs to
> > +          * other parts of vgrfs so we have to do some reg_offset magic.
> > +          */
> > +
> > +         /* Compute the offset of inst->src[arg] relative to inst->dst
> */
> > +         assert(entry->dst.subreg_offset == 0);
> > +         int rel_offset = inst->src[arg].reg_offset -
> entry->dst.reg_offset;
> > +         int rel_suboffset = inst->src[arg].subreg_offset;
> > +
> > +         /* Compute the final register offset (in bytes) */
> > +         int offset = entry->src.reg_offset * reg_size +
> entry->src.subreg_offset;
> > +         offset += rel_offset * reg_size + rel_suboffset;
> > +         inst->src[arg].reg_offset = offset / reg_size;
> > +         inst->src[arg].subreg_offset = offset % reg_size;
> > +      }
> > +      break;
> > +   default:
> > +      unreachable("Invalid register file");
> > +      break;
> > +   }
> > +
> >     if (!inst->src[arg].abs) {
> >        inst->src[arg].abs = entry->src.abs;
> >        inst->src[arg].negate ^= entry->src.negate;
> > @@ -375,9 +409,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int
> arg, acp_entry *entry)
> >  }
> >
> >
> > -static bool
> > -try_constant_propagate(struct brw_context *brw, fs_inst *inst,
> > -                       acp_entry *entry)
> > +bool
> > +fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
> >  {
> >     bool progress = false;
> >
> > @@ -385,12 +418,22 @@ try_constant_propagate(struct brw_context *brw,
> fs_inst *inst,
> >        return false;
> >
> >     for (int i = inst->sources - 1; i >= 0; i--) {
> > -      if (inst->src[i].file != entry->dst.file ||
> > -          inst->src[i].reg != entry->dst.reg ||
> > -          inst->src[i].reg_offset != entry->dst.reg_offset ||
> > -          inst->src[i].subreg_offset != entry->dst.subreg_offset ||
> > -          inst->src[i].type != entry->dst.type ||
> > -          inst->src[i].stride > 1)
> > +      if (inst->src[i].file != GRF)
> > +         continue;
> > +
> > +      assert(entry->dst.file == GRF);
> > +      if (inst->src[i].reg != entry->dst.reg ||
> > +          inst->src[i].type != entry->dst.type)
> > +         continue;
> > +
> > +      /* Bail if inst is reading a range that isn't contained in the
> range
> > +       * that entry is writing.
> > +       */
> > +      int reg_size = dispatch_width * sizeof(float);
> > +      if (inst->src[i].reg_offset < entry->dst.reg_offset ||
> > +          (inst->src[i].reg_offset * reg_size +
> inst->src[i].subreg_offset +
> > +           inst->regs_read(this, i) * inst->src[i].stride * reg_size) >
> > +          (entry->dst.reg_offset + 1) * reg_size)
> >           continue;
>
> Does this bit, the constant propagation code, actually benefit from
> this? I guess it'd let an immediate loaded with a mov(16) be constant
> propagated into two SIMD8 instructions? That seems plausible, I was
> just a little surprised at first.
>

Yes, that's exactly what it's for
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to