On Tue, May 24, 2016 at 12:18 AM, Francisco Jerez <curroje...@riseup.net> wrote:
> This generalizes the current fs_inst::force_sechalf flag to allow > specifying channel enable groups other than 0 or 8. At some point it > will likely make sense to fix the vec4 generator to support arbitrary > execution groups and then move the definition of fs_inst::group into > backend_instruction (e.g. so we can do FP64 in the VEC4 back-end). > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 20 > ++++++++------------ > src/mesa/drivers/dri/i965/brw_fs_builder.h | 14 +++++++++++--- > src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 4 ++-- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 ++++--- > src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 2 +- > src/mesa/drivers/dri/i965/brw_ir_fs.h | 20 > +++++++++----------- > 6 files changed, 35 insertions(+), 32 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index a59cd3c..92caeaa 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -3638,7 +3638,7 @@ fs_visitor::lower_integer_multiplication() > mul->src[1].stride *= 2; > > } else if (devinfo->gen == 7 && !devinfo->is_haswell && > - inst->force_sechalf) { > + inst->group > 0) { > /* Among other things the quarter control bits influence which > * accumulator register is used by the hardware for > instructions > * that access the accumulator implicitly (e.g. MACH). A > @@ -3655,7 +3655,7 @@ fs_visitor::lower_integer_multiplication() > * to get the result masked correctly according to the current > * channel enables. > */ > - mach->force_sechalf = false; > + mach->group = 0; > mach->force_writemask_all = true; > mach->dst = ibld.vgrf(inst->dst.type); > ibld.MOV(inst->dst, mach->dst); > @@ -3791,8 +3791,8 @@ lower_fb_write_logical_send(const fs_builder &bld, > fs_inst *inst, > sample_mask.stride *= 2; > > bld.exec_all().annotate("FB write oMask") > - .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW), > - inst->force_sechalf), > + .MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW), > + inst->group), > sample_mask); > length++; > } > @@ -5017,10 +5017,10 @@ fs_visitor::lower_simd_width() > * execution size of the builder to the highest of both for now > so > * we're sure that both cases can be handled. > */ > + const unsigned max_width = MAX2(inst->exec_size, lower_width); > const fs_builder ibld = bld.at(block, inst) > .exec_all(inst->force_writemask_all) > - .group(MAX2(inst->exec_size, > lower_width), > - inst->force_sechalf); > + .group(max_width, inst->group / > max_width); > > /* Split the copies in chunks of the execution width of either > the > * original or the lowered instruction, whichever is lower. > @@ -5352,12 +5352,8 @@ fs_visitor::dump_instruction(backend_instruction > *be_inst, FILE *file) > if (inst->force_writemask_all) > fprintf(file, "NoMask "); > > - if (dispatch_width == 16 && inst->exec_size == 8) { > - if (inst->force_sechalf) > - fprintf(file, "2ndhalf "); > - else > - fprintf(file, "1sthalf "); > - } > + if (inst->exec_size != dispatch_width) > + fprintf(file, "group%d ", inst->group); > > fprintf(file, "\n"); > } > diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h > b/src/mesa/drivers/dri/i965/brw_fs_builder.h > index b50dda4..c1d13a2 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h > +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h > @@ -72,7 +72,7 @@ namespace brw { > fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) : > shader(shader), block(block), cursor(inst), > _dispatch_width(inst->exec_size), > - _group(inst->force_sechalf ? 8 : 0), > + _group(inst->group), > force_writemask_all(inst->force_writemask_all) > { > annotation.str = inst->annotation; > @@ -168,6 +168,15 @@ namespace brw { > } > > /** > + * Get the channel group in use. > + */ > + unsigned > + group() const > + { > + return _group; > + } > + > + /** > * Allocate a virtual register of natural vector size (one for this > IR) > * and SIMD width. \p n gives the amount of space to allocate in > * dispatch_width units (which is just enough space for one logical > @@ -353,9 +362,8 @@ namespace brw { > assert(inst->exec_size <= 32); > assert(inst->exec_size == dispatch_width() || > force_writemask_all); > - assert(_group == 0 || _group == 8); > > - inst->force_sechalf = (_group == 8); > + inst->group = _group; > inst->force_writemask_all = force_writemask_all; > inst->annotation = annotation.str; > inst->ir = annotation.ir; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > index 9c39106..159bf5d 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > @@ -162,7 +162,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool > *negate) > return a->opcode == b->opcode && > a->force_writemask_all == b->force_writemask_all && > a->exec_size == b->exec_size && > - a->force_sechalf == b->force_sechalf && > + a->group == b->group && > a->saturate == b->saturate && > a->predicate == b->predicate && > a->predicate_inverse == b->predicate_inverse && > @@ -215,7 +215,7 @@ create_copy_instr(const fs_builder &bld, fs_inst > *inst, fs_reg src, bool negate) > copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); > } else { > copy = bld.MOV(inst->dst, src); > - copy->force_sechalf = inst->force_sechalf; > + copy->group = inst->group; > copy->force_writemask_all = inst->force_writemask_all; > copy->src[0].negate = negate; > } > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 804c639..914ec9b 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -212,7 +212,7 @@ fs_generator::fire_fb_write(fs_inst *inst, > if (inst->opcode == FS_OPCODE_REP_FB_WRITE) > msg_control = > BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; > else if (prog_data->dual_src_blend) { > - if (!inst->force_sechalf) > + if (!inst->group) > msg_control = > BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; > else > msg_control = > BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23; > This doesn't look right. I suppose it's correct for SIMD16-only and that you have a follow-on patch for SIMD32 but it seems wrong. If it's an easy one-liner, maybe just squash it in? > @@ -1071,7 +1071,7 @@ fs_generator::generate_scratch_write(fs_inst *inst, > struct brw_reg src) > brw_set_default_compression(p, lower_size > 8); > > for (unsigned i = 0; i < inst->exec_size / lower_size; i++) { > - brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size > * i); > + brw_set_default_group(p, inst->group + lower_size * i); > > brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0), > retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD)); > @@ -1615,7 +1615,7 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > const bool compressed = > inst->dst.component_size(inst->exec_size) > REG_SIZE; > brw_set_default_compression(p, compressed); > - brw_set_default_group(p, inst->force_sechalf ? 8 : 0); > + brw_set_default_group(p, inst->group); > > for (unsigned int i = 0; i < inst->sources; i++) { > src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen, > @@ -1643,6 +1643,7 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); > > assert(inst->force_writemask_all || inst->exec_size >= 8); > + assert(inst->force_writemask_all || inst->group % inst->exec_size > == 0); > assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen)); > assert(inst->mlen <= BRW_MAX_MSG_LENGTH); > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp > b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp > index 8613725..8cd897f 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp > @@ -163,7 +163,7 @@ fs_visitor::opt_peephole_sel() > /* Check that the MOVs are the right form. */ > if (!then_mov[i]->dst.equals(else_mov[i]->dst) || > then_mov[i]->exec_size != else_mov[i]->exec_size || > - then_mov[i]->force_sechalf != else_mov[i]->force_sechalf || > + then_mov[i]->group != else_mov[i]->group || > then_mov[i]->force_writemask_all != > else_mov[i]->force_writemask_all || > then_mov[i]->is_partial_write() || > else_mov[i]->is_partial_write() || > diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h > b/src/mesa/drivers/dri/i965/brw_ir_fs.h > index cdb4464..6a93e81 100644 > --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h > @@ -289,22 +289,20 @@ public: > */ > uint8_t exec_size; > > + /** > + * Channel group from the hardware execution and predication mask that > + * should be applied to the instruction. The subset of channel enable > + * signals (calculated from the EU control flow and predication state) > + * given by [group, group + exec_size[ will be used to mask GRF writes > and > If you're intending to indicate a half-open interval, I believe the notation more people will be familiar with is "[group, group + exec_size)" > + * any other side effects of the instruction. > + */ > + uint8_t group; > + > bool eot:1; > - bool force_sechalf:1; > bool pi_noperspective:1; /**< Pixel interpolator noperspective flag > */ > }; > > /** > - * Set second-half quarter control on \p inst. > - */ > -static inline fs_inst * > -set_sechalf(fs_inst *inst) > -{ > - inst->force_sechalf = true; > - return inst; > -} > - > -/** > * Make the execution of \p inst dependent on the evaluation of a possibly > * inverted predicate. > */ > -- > 2.7.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev