This effectively removes all offset calculations in
ir3_compiler_nir::get_image_offset().

No regressions observed on affected tests from Khronos CTS and piglit
suites, compared to master.

Collecting useful stats on helps/hurts caused by this pass is WIP. Very
few shaders in shader-db data-base exercise image store or image
atomic ops, and of those that do, most require higher versions of
GLSL than what freedreno supports, so they get skipped.

There is on-going work writing/porting shaders to collect useful
stats. So far, all tested show no meaningful difference compared
to master.
---
 src/freedreno/ir3/ir3_compiler_nir.c | 61 +++++++++++++---------------
 src/freedreno/ir3/ir3_nir.c          |  1 +
 2 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/src/freedreno/ir3/ir3_compiler_nir.c 
b/src/freedreno/ir3/ir3_compiler_nir.c
index fd641735620..fe329db658c 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -548,6 +548,9 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                                        ir3_MADSH_M16(b, src[0], 0, src[1], 0,
                                                ir3_MULL_U(b, src[0], 0, 
src[1], 0), 0), 0);
                break;
+       case nir_op_imad:
+               dst[0] = ir3_MAD_S24(b, src[0], 0, src[1], 0, src[2], 0);
+               break;
        case nir_op_ineg:
                dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG);
                break;
@@ -1172,44 +1175,19 @@ get_image_type(const nir_variable *var)
 
 static struct ir3_instruction *
 get_image_offset(struct ir3_context *ctx, const nir_variable *var,
-               struct ir3_instruction * const *coords, bool byteoff)
+               struct ir3_instruction * const *coords)
 {
        struct ir3_block *b = ctx->block;
-       struct ir3_instruction *offset;
-       unsigned ncoords = get_image_coords(var, NULL);
-
-       /* to calculate the byte offset (yes, uggg) we need (up to) three
-        * const values to know the bytes per pixel, and y and z stride:
-        */
-       unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
-               ctx->so->const_layout.image_dims.off[var->data.driver_location];
 
        debug_assert(ctx->so->const_layout.image_dims.mask &
                        (1 << var->data.driver_location));
 
-       /* offset = coords.x * bytes_per_pixel: */
-       offset = ir3_MUL_S(b, coords[0], 0, create_uniform(b, cb + 0), 0);
-       if (ncoords > 1) {
-               /* offset += coords.y * y_pitch: */
-               offset = ir3_MAD_S24(b, create_uniform(b, cb + 1), 0,
-                               coords[1], 0, offset, 0);
-       }
-       if (ncoords > 2) {
-               /* offset += coords.z * z_pitch: */
-               offset = ir3_MAD_S24(b, create_uniform(b, cb + 2), 0,
-                               coords[2], 0, offset, 0);
-       }
-
-       if (!byteoff) {
-               /* Some cases, like atomics, seem to use dword offset instead
-                * of byte offsets.. blob just puts an extra shr.b in there
-                * in those cases:
-                */
-               offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
-       }
-
+       /* ir3_nir_lower_sampler_io pass should have placed the final
+        * byte-offset (or dword offset for atomics) at the 4th component
+        * of the coordinate vector.
+        */
        return ir3_create_collect(ctx, (struct ir3_instruction*[]){
-               offset,
+               coords[3],
                create_immed(b, 0),
        }, 2);
 }
@@ -1341,7 +1319,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, 
nir_intrinsic_instr *intr)
         * src2 is 64b byte offset
         */
 
-       offset = get_image_offset(ctx, var, coords, true);
+       offset = get_image_offset(ctx, var, coords);
 
        /* NOTE: stib seems to take byte offset, but stgb.typed can be used
         * too and takes a dword offset.. not quite sure yet why blob uses
@@ -1443,7 +1421,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, 
nir_intrinsic_instr *intr)
         */
        src0 = ir3_get_src(ctx, &intr->src[3])[0];
        src1 = ir3_create_collect(ctx, coords, ncoords);
-       src2 = get_image_offset(ctx, var, coords, false);
+       src2 = get_image_offset(ctx, var, coords);
 
        switch (intr->intrinsic) {
        case nir_intrinsic_image_deref_atomic_add:
@@ -1612,6 +1590,23 @@ emit_intrinsic(struct ir3_context *ctx, 
nir_intrinsic_instr *intr)
        }
 
        switch (intr->intrinsic) {
+       case nir_intrinsic_load_image_stride: {
+               idx = intr->const_index[0];
+
+               /* this is the index into image_dims offsets, which can take
+                * values 0, 1 or 2 (bpp, y-stride, z-stride respectively).
+                */
+               uint8_t off = intr->const_index[1];
+               debug_assert(off <= 2);
+
+               unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
+                       ctx->so->const_layout.image_dims.off[idx];
+               debug_assert(ctx->so->const_layout.image_dims.mask & (1 << 
idx));
+
+               dst[0] = create_uniform(b, cb + off);
+               break;
+       }
+
        case nir_intrinsic_load_uniform:
                idx = nir_intrinsic_base(intr);
                const_offset = nir_src_as_const_value(intr->src[0]);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index d9fcf798b3d..68a0edb343c 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -160,6 +160,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 
        OPT_V(s, nir_opt_global_to_local);
        OPT_V(s, nir_lower_regs_to_ssa);
+       OPT_V(s, ir3_nir_lower_sampler_io);
 
        if (key) {
                if (s->info.stage == MESA_SHADER_VERTEX) {
-- 
2.20.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to