On Tue, Feb 12, 2019 at 5:57 AM Iago Toral Quiroga <ito...@igalia.com> wrote:
> At the very least we need it to handle HF too, since we are doing > constant propagation for MAD and LRP, which relies on this pass > to promote the immediates to GRF in the end, but ideally > we want it to support even more types so we can take advantage > of it to improve register pressure in some scenarios. > --- > .../compiler/brw_fs_combine_constants.cpp | 202 ++++++++++++++++-- > 1 file changed, 180 insertions(+), 22 deletions(-) > > diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp > b/src/intel/compiler/brw_fs_combine_constants.cpp > index 7343f77bb45..5d79f1a0826 100644 > --- a/src/intel/compiler/brw_fs_combine_constants.cpp > +++ b/src/intel/compiler/brw_fs_combine_constants.cpp > @@ -36,6 +36,7 @@ > > #include "brw_fs.h" > #include "brw_cfg.h" > +#include "util/half_float.h" > > using namespace brw; > > @@ -114,8 +115,17 @@ struct imm { > */ > exec_list *uses; > > - /** The immediate value. We currently only handle floats. */ > - float val; > + /** The immediate value */ > + union { > + char bytes[8]; > + float f; > + int32_t d; > + int16_t w; > + }; > + uint8_t size; > + > + /** When promoting half-float we need to account for certain > restrictions */ > + bool is_half_float; > > /** > * The GRF register and subregister number where we've decided to > store the > @@ -145,10 +155,11 @@ struct table { > }; > > static struct imm * > -find_imm(struct table *table, float val) > +find_imm(struct table *table, void *data, uint8_t size) > { > for (int i = 0; i < table->len; i++) { > - if (table->imm[i].val == val) { > + if (table->imm[i].size == size && > + !memcmp(table->imm[i].bytes, data, size)) { > return &table->imm[i]; > } > } > @@ -190,6 +201,96 @@ compare(const void *_a, const void *_b) > return a->first_use_ip - b->first_use_ip; > } > > +static bool > +get_constant_value(const struct gen_device_info *devinfo, > + const fs_inst *inst, uint32_t src_idx, > + void *out, brw_reg_type *out_type) > +{ > + const bool can_do_source_mods = inst->can_do_source_mods(devinfo); > + const fs_reg *src = &inst->src[src_idx]; > + > + *out_type = src->type; > + > + switch (*out_type) { > + case BRW_REGISTER_TYPE_F: { > + float val = !can_do_source_mods ? src->f : fabsf(src->f); > + memcpy(out, &val, 4); > + break; > + } > + case BRW_REGISTER_TYPE_HF: { > + uint16_t val = src->d & 0xffffu; > + if (can_do_source_mods) > + val = _mesa_float_to_half(fabsf(_mesa_half_to_float(val))); > + memcpy(out, &val, 2); > + break; > + } > + case BRW_REGISTER_TYPE_D: { > + int32_t val = !can_do_source_mods ? src->d : abs(src->d); > + memcpy(out, &val, 4); > + break; > + } > + case BRW_REGISTER_TYPE_UD: > + memcpy(out, &src->ud, 4); > + break; > + case BRW_REGISTER_TYPE_W: { > + int16_t val = src->d & 0xffffu; > + if (can_do_source_mods) > + val = abs(val); > + memcpy(out, &val, 2); > + break; > + } > + case BRW_REGISTER_TYPE_UW: > + memcpy(out, &src->ud, 2); > + break; > You could also throw in DF and Q types. This is probably sufficient for now though. > + default: > + return false; > + }; > + > + return true; > +} > + > +static struct brw_reg > +build_imm_reg_for_copy(struct imm *imm) > +{ > + switch (imm->size) { > + case 4: > + return brw_imm_d(imm->d); > + case 2: > + return brw_imm_w(imm->w); > + default: > + unreachable("not implemented"); > + } > +} > + > +static inline uint32_t > +get_alignment_for_imm(const struct imm *imm) > +{ > + if (imm->is_half_float) > + return 4; /* At least MAD seems to require this */ > + else > + return imm->size; > +} > + > +static bool > +needs_negate(const struct fs_reg *reg, const struct imm *imm) > +{ > + switch (reg->type) { > + case BRW_REGISTER_TYPE_F: > + return signbit(reg->f) != signbit(imm->f); > + case BRW_REGISTER_TYPE_D: > + return (reg->d < 0) != (imm->d < 0); > + case BRW_REGISTER_TYPE_HF: > + return (reg->d & 0x8000u) != (imm->w & 0x8000u); > + case BRW_REGISTER_TYPE_W: > + return ((reg->d & 0xffffu) < 0) != (imm->w < 0); > + case BRW_REGISTER_TYPE_UD: > + case BRW_REGISTER_TYPE_UW: > + return false; > + default: > + unreachable("not implemented"); > + }; > +} > + > bool > fs_visitor::opt_combine_constants() > { > @@ -214,13 +315,17 @@ fs_visitor::opt_combine_constants() > continue; > > for (int i = 0; i < inst->sources; i++) { > - if (inst->src[i].file != IMM || > - inst->src[i].type != BRW_REGISTER_TYPE_F) > + if (inst->src[i].file != IMM) > continue; > > - float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f : > - fabs(inst->src[i].f); > - struct imm *imm = find_imm(&table, val); > + char data[8]; > + brw_reg_type type; > + if (!get_constant_value(devinfo, inst, i, data, &type)) > + continue; > + > + uint8_t size = type_sz(type); > + > + struct imm *imm = find_imm(&table, data, size); > > if (imm) { > bblock_t *intersection = cfg_t::intersect(block, imm->block); > @@ -237,7 +342,9 @@ fs_visitor::opt_combine_constants() > imm->inst = inst; > imm->uses = new(const_ctx) exec_list(); > imm->uses->push_tail(link(const_ctx, &inst->src[i])); > - imm->val = val; > + memcpy(imm->bytes, data, size); > + imm->size = size; > + imm->is_half_float = type == BRW_REGISTER_TYPE_HF; > In the "if (imm)" case above, I think we want to do if (type == BRW_REGISTER_TYPE_HF) imm->is_half_float = true; in case we get a constant that's used first as a uint16_t (which won't set is_half_float) and then later as a float16_t. > imm->uses_by_coissue = could_coissue(devinfo, inst); > imm->must_promote = must_promote_imm(devinfo, inst); > imm->first_use_ip = ip; > @@ -276,17 +383,40 @@ fs_visitor::opt_combine_constants() > */ > exec_node *n = (imm->inst ? imm->inst : > imm->block->last_non_control_flow_inst()->next); > - const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, > 0); > > - ibld.MOV(reg, brw_imm_f(imm->val)); > - imm->nr = reg.nr; > - imm->subreg_offset = reg.offset; > + /* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions: > + * > + * "In Align16 mode, the channel selects and channel enables > apply to a > + * pair of half-floats, because these parameters are defined for > DWord > + * elements ONLY. This is applicable when both source and > destination > + * are half-floats." > + * > + * This means that Align16 instructions that use promoted HF > immediates > + * and use a <0,1,0>:HF region would read 2 HF slots instead of > + * replicating the single one we want. To avoid this, we always > populate > + * both HF slots within a DWord with the constant. > + */ > + const uint32_t width = devinfo->gen == 8 && imm->is_half_float ? 2 > : 1; > + const fs_builder ibld = bld.at(imm->block, > n).exec_all().group(width, 0); > + > + /* Put the immediate in an offset aligned to its size. Some > instructions > + * seem to have additional alignment requirements, so account for > that > + * too. > + */ > + reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm)); > > - reg.offset += sizeof(float); > - if (reg.offset == 8 * sizeof(float)) { > + /* Ensure we have enough space in the register to copy the > immediate */ > + struct brw_reg imm_reg = build_imm_reg_for_copy(imm); > + if (reg.offset + type_sz(imm_reg.type) > REG_SIZE) { > I think you need to multiply by width here as well. > reg.nr = alloc.allocate(1); > reg.offset = 0; > } > + > + ibld.MOV(retype(reg, imm_reg.type), imm_reg); > + imm->nr = reg.nr; > + imm->subreg_offset = reg.offset; > + > + reg.offset += imm->size; > Multiply by width. You can ignore the comment about DF and Q types if you'd like. With all the others addressed, this is Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> > } > promoted_constants = table.len; > > @@ -294,13 +424,41 @@ fs_visitor::opt_combine_constants() > for (int i = 0; i < table.len; i++) { > foreach_list_typed(reg_link, link, link, table.imm[i].uses) { > fs_reg *reg = link->reg; > - assert((isnan(reg->f) && isnan(table.imm[i].val)) || > - fabsf(reg->f) == fabs(table.imm[i].val)); > +#ifdef DEBUG > + switch (reg->type) { > + case BRW_REGISTER_TYPE_F: > + assert((isnan(reg->f) && isnan(table.imm[i].f)) || > + (fabsf(reg->f) == fabsf(table.imm[i].f))); > + break; > + case BRW_REGISTER_TYPE_HF: > + assert((isnan(_mesa_half_to_float(reg->d & 0xffffu)) && > + isnan(_mesa_half_to_float(table.imm[i].w))) || > + (fabsf(_mesa_half_to_float(reg->d & 0xffffu)) == > + fabsf(_mesa_half_to_float(table.imm[i].w)))); > + break; > + case BRW_REGISTER_TYPE_D: > + assert(reg->type == BRW_REGISTER_TYPE_D && > + abs(reg->d) == abs(table.imm[i].d)); > + break; > + case BRW_REGISTER_TYPE_UD: > + assert(reg->d == table.imm[i].d); > + break; > + case BRW_REGISTER_TYPE_W: > + assert(abs((int16_t) (reg->d & 0xffff)) == table.imm[i].w); > + break; > + case BRW_REGISTER_TYPE_UW: > + assert(reg->type == BRW_REGISTER_TYPE_UW && > + (reg->ud & 0xffffu) == (uint16_t) table.imm[i].w); > + break; > + default: > + break; > + } > +#endif > > reg->file = VGRF; > reg->offset = table.imm[i].subreg_offset; > reg->stride = 0; > - reg->negate = signbit(reg->f) != signbit(table.imm[i].val); > + reg->negate = needs_negate(reg, &table.imm[i]); > reg->nr = table.imm[i].nr; > } > } > @@ -309,9 +467,9 @@ fs_visitor::opt_combine_constants() > for (int i = 0; i < table.len; i++) { > struct imm *imm = &table.imm[i]; > > - printf("%.3fF - block %3d, reg %3d sub %2d, Uses: (%2d, %2d), " > - "IP: %4d to %4d, length %4d\n", > - imm->val, > + printf("0x%016" PRIx64 " - block %3d, reg %3d sub %2d, " > + "Uses: (%2d, %2d), IP: %4d to %4d, length %4d\n", > + (uint64_t)(imm->d & BITFIELD64_MASK(imm->size * 8)), > imm->block->num, > imm->nr, > imm->subreg_offset, > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev