On Wed, Dec 9, 2015 at 8:33 PM, Matt Turner <[email protected]> wrote: > On Wed, Dec 9, 2015 at 8:23 PM, Jason Ekstrand <[email protected]> wrote: >> Instead of using reladdr, this commit changes the FS backend to emit a >> MOV_INDIRECT whenever we need an indirect uniform load. We also have to >> rework some of the other bits of the backend to handle this new form of >> uniform load. The obvious change is that demote_pull_constants now acts >> more like a lowering pass when it hits a MOV_INDIRECT. >> --- >> src/mesa/drivers/dri/i965/brw_fs.cpp | 72 >> +++++++++++++++++++------------- >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 53 ++++++++++++++++++----- >> 2 files changed, 86 insertions(+), 39 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >> b/src/mesa/drivers/dri/i965/brw_fs.cpp >> index bf446d2..7cc03c5 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >> @@ -1945,8 +1945,8 @@ fs_visitor::assign_constant_locations() >> if (inst->src[i].file != UNIFORM) >> continue; >> >> - if (inst->src[i].reladdr) { >> - int uniform = inst->src[i].nr; >> + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { >> + int uniform = inst->src[0].nr; >> >> /* If this array isn't already present in the pull constant >> buffer, >> * add it. >> @@ -2028,49 +2028,63 @@ fs_visitor::assign_constant_locations() >> void >> fs_visitor::demote_pull_constants() >> { >> - foreach_block_and_inst (block, fs_inst, inst, cfg) { >> + const unsigned index = >> stage_prog_data->binding_table.pull_constants_start; >> + >> + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { >> + /* Set up the annotation tracking for new generated instructions. */ >> + const fs_builder ibld(this, block, inst); >> + >> for (int i = 0; i < inst->sources; i++) { >> if (inst->src[i].file != UNIFORM) >> continue; >> >> - int pull_index; >> + /* We'll handle this case later */ >> + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) >> + continue; >> + >> unsigned location = inst->src[i].nr + inst->src[i].reg_offset; >> - if (location >= uniforms) /* Out of bounds access */ >> - pull_index = -1; >> - else >> - pull_index = pull_constant_loc[location]; >> + if (location >= uniforms) >> + continue; /* Out of bounds access */ >> + >> + int pull_index = pull_constant_loc[location]; >> >> if (pull_index == -1) >> continue; >> >> - /* Set up the annotation tracking for new generated instructions. >> */ >> - const fs_builder ibld(this, block, inst); >> - const unsigned index = >> stage_prog_data->binding_table.pull_constants_start; >> - fs_reg dst = vgrf(glsl_type::float_type); >> - >> assert(inst->src[i].stride == 0); >> >> - /* Generate a pull load into dst. */ >> - if (inst->src[i].reladdr) { >> - VARYING_PULL_CONSTANT_LOAD(ibld, dst, >> - brw_imm_ud(index), >> - *inst->src[i].reladdr, >> - pull_index * 4); >> - inst->src[i].reladdr = NULL; >> - inst->src[i].stride = 1; >> - } else { >> - const fs_builder ubld = ibld.exec_all().group(8, 0); >> - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & >> ~15); >> - ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, >> - dst, brw_imm_ud(index), offset); >> - inst->src[i].set_smear(pull_index & 3); >> - } >> - brw_mark_surface_used(prog_data, index); >> + fs_reg dst = vgrf(glsl_type::float_type); >> + const fs_builder ubld = ibld.exec_all().group(8, 0); >> + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & >> ~15); >> + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, >> + dst, brw_imm_ud(index), offset); >> >> /* Rewrite the instruction to use the temporary VGRF. */ >> inst->src[i].file = VGRF; >> inst->src[i].nr = dst.nr; >> inst->src[i].reg_offset = 0; >> + inst->src[i].set_smear(pull_index & 3); >> + >> + brw_mark_surface_used(prog_data, index); >> + } >> + >> + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && >> + inst->src[0].file == UNIFORM) { >> + >> + unsigned location = inst->src[0].nr + inst->src[0].reg_offset; >> + if (location >= uniforms) >> + continue; /* Out of bounds access */ >> + >> + int pull_index = pull_constant_loc[location]; >> + assert(pull_index >= 0); /* This had better be pull */ >> + >> + VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst, >> + brw_imm_ud(index), >> + inst->src[1], >> + pull_index * 4); >> + inst->remove(block); >> + >> + brw_mark_surface_used(prog_data, index); >> } >> } >> invalidate_live_intervals(); >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index 15d9b1c..bf239c3 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -1136,6 +1136,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var >> *deref) >> { >> fs_reg image(UNIFORM, deref->var->data.driver_location / 4, >> BRW_REGISTER_TYPE_UD); >> + fs_reg indirect; >> + unsigned indirect_max = 0; >> >> for (const nir_deref *tail = &deref->deref; tail->child; >> tail = tail->child) { >> @@ -1147,7 +1149,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var >> *deref) >> image = offset(image, bld, base * element_size); >> >> if (deref_array->deref_array_type == nir_deref_array_type_indirect) { >> - fs_reg tmp = vgrf(glsl_type::int_type); >> + fs_reg tmp = vgrf(glsl_type::uint_type); >> >> if (devinfo->gen == 7 && !devinfo->is_haswell) { >> /* IVB hangs when trying to access an invalid surface index with >> @@ -1166,14 +1168,29 @@ fs_visitor::get_nir_image_deref(const nir_deref_var >> *deref) >> } >> >> bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4)); >> - if (image.reladdr) >> - bld.ADD(*image.reladdr, *image.reladdr, tmp); >> - else >> - image.reladdr = new(mem_ctx) fs_reg(tmp); >> + if (indirect.file == BAD_FILE) { >> + indirect = tmp; >> + indirect_max = type_size_scalar(tail->type) - >> BRW_IMAGE_PARAM_SIZE; >> + } else { >> + bld.ADD(indirect, indirect, tmp); >> + } >> } >> } >> >> - return image; >> + if (indirect.file == BAD_FILE) { >> + return image; >> + } else { >> + /* Emit a pile of MOVs to load the uniform into a temporary. The >> + * dead-code elimination pass will get rid of what we don't use. >> + */ >> + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE); >> + for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) { >> + bld.emit(SHADER_OPCODE_MOV_INDIRECT, >> + offset(tmp, bld, j), offset(image, bld, j), >> + indirect, brw_imm_ud((indirect_max + 1) * 4)); >> + } >> + return tmp; >> + } >> } >> >> void >> @@ -2302,12 +2319,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder >> &bld, nir_intrinsic_instr *instr >> /* Offsets are in bytes but they should always be multiples of 4 */ >> assert(const_offset->u[0] % 4 == 0); >> src.reg_offset = const_offset->u[0] / 4; >> + >> + for (unsigned j = 0; j < instr->num_components; j++) { >> + bld.MOV(offset(dest, bld, j), offset(src, bld, j)); >> + } >> } else { >> - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); >> - } >> + fs_reg indirect = retype(get_nir_src(instr->src[0]), >> + BRW_REGISTER_TYPE_UD); >> >> - for (unsigned j = 0; j < instr->num_components; j++) { >> - bld.MOV(offset(dest, bld, j), offset(src, bld, j)); >> + /* Se need to pass a size to the MOV_INDIRECT but we don't want it >> to > > s/Se/We/
Fixed. Thanks! _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
