This effectively removes all offset calculations in ir3_compiler_nir::get_image_offset().
No regressions observed on affected tests from Khronos CTS and piglit suites, compared to master. Collecting useful stats on helps/hurts caused by this pass is WIP. Very few shaders in shader-db data-base exercise image store or image atomic ops, and of those that do, most require higher versions of GLSL than what freedreno supports, so they get skipped. There is on-going work writing/porting shaders to collect useful stats. So far, all tested show no meaningful difference compared to master. --- src/freedreno/ir3/ir3_compiler_nir.c | 61 +++++++++++++--------------- src/freedreno/ir3/ir3_nir.c | 1 + 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index fd641735620..fe329db658c 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -548,6 +548,9 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) ir3_MADSH_M16(b, src[0], 0, src[1], 0, ir3_MULL_U(b, src[0], 0, src[1], 0), 0), 0); break; + case nir_op_imad: + dst[0] = ir3_MAD_S24(b, src[0], 0, src[1], 0, src[2], 0); + break; case nir_op_ineg: dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG); break; @@ -1172,44 +1175,19 @@ get_image_type(const nir_variable *var) static struct ir3_instruction * get_image_offset(struct ir3_context *ctx, const nir_variable *var, - struct ir3_instruction * const *coords, bool byteoff) + struct ir3_instruction * const *coords) { struct ir3_block *b = ctx->block; - struct ir3_instruction *offset; - unsigned ncoords = get_image_coords(var, NULL); - - /* to calculate the byte offset (yes, uggg) we need (up to) three - * const values to know the bytes per pixel, and y and z stride: - */ - unsigned cb = regid(ctx->so->constbase.image_dims, 0) + - ctx->so->const_layout.image_dims.off[var->data.driver_location]; debug_assert(ctx->so->const_layout.image_dims.mask & (1 << var->data.driver_location)); - /* offset = coords.x * bytes_per_pixel: */ - offset = ir3_MUL_S(b, coords[0], 0, create_uniform(b, cb + 0), 0); - if (ncoords > 1) { - /* offset += coords.y * y_pitch: */ - offset = ir3_MAD_S24(b, create_uniform(b, cb + 1), 0, - coords[1], 0, offset, 0); - } - if (ncoords > 2) { - /* offset += coords.z * z_pitch: */ - offset = ir3_MAD_S24(b, create_uniform(b, cb + 2), 0, - coords[2], 0, offset, 0); - } - - if (!byteoff) { - /* Some cases, like atomics, seem to use dword offset instead - * of byte offsets.. blob just puts an extra shr.b in there - * in those cases: - */ - offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); - } - + /* ir3_nir_lower_sampler_io pass should have placed the final + * byte-offset (or dword offset for atomics) at the 4th component + * of the coordinate vector. + */ return ir3_create_collect(ctx, (struct ir3_instruction*[]){ - offset, + coords[3], create_immed(b, 0), }, 2); } @@ -1341,7 +1319,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) * src2 is 64b byte offset */ - offset = get_image_offset(ctx, var, coords, true); + offset = get_image_offset(ctx, var, coords); /* NOTE: stib seems to take byte offset, but stgb.typed can be used * too and takes a dword offset.. not quite sure yet why blob uses @@ -1443,7 +1421,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) */ src0 = ir3_get_src(ctx, &intr->src[3])[0]; src1 = ir3_create_collect(ctx, coords, ncoords); - src2 = get_image_offset(ctx, var, coords, false); + src2 = get_image_offset(ctx, var, coords); switch (intr->intrinsic) { case nir_intrinsic_image_deref_atomic_add: @@ -1612,6 +1590,23 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) } switch (intr->intrinsic) { + case nir_intrinsic_load_image_stride: { + idx = intr->const_index[0]; + + /* this is the index into image_dims offsets, which can take + * values 0, 1 or 2 (bpp, y-stride, z-stride respectively). + */ + uint8_t off = intr->const_index[1]; + debug_assert(off <= 2); + + unsigned cb = regid(ctx->so->constbase.image_dims, 0) + + ctx->so->const_layout.image_dims.off[idx]; + debug_assert(ctx->so->const_layout.image_dims.mask & (1 << idx)); + + dst[0] = create_uniform(b, cb + off); + break; + } + case nir_intrinsic_load_uniform: idx = nir_intrinsic_base(intr); const_offset = nir_src_as_const_value(intr->src[0]); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index d9fcf798b3d..68a0edb343c 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -160,6 +160,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, OPT_V(s, nir_opt_global_to_local); OPT_V(s, nir_lower_regs_to_ssa); + OPT_V(s, ir3_nir_lower_sampler_io); if (key) { if (s->info.stage == MESA_SHADER_VERTEX) { -- 2.20.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev