Module: Mesa
Branch: main
Commit: 0955fe8fe20d2ffb416394be80a12897d7a6d61c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0955fe8fe20d2ffb416394be80a12897d7a6d61c

Author: Alyssa Rosenzweig <[email protected]>
Date:   Fri Oct 21 12:28:35 2022 -0400

panfrost: Use compute-based XFB on Midgard

Now we're back to a single XFB implementation for all gens. Fixes:

   KHR-GLES31.core.draw_indirect.advanced-twoPasses-transformFeedback-arrays
   KHR-GLES31.core.draw_indirect.advanced-twoPasses-transformFeedback-elements

Cc: mesa-stable
Signed-off-by: Alyssa Rosenzweig <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19238>

---

 src/gallium/drivers/panfrost/pan_assemble.c        |   2 +-
 src/gallium/drivers/panfrost/pan_cmdstream.c       | 144 +++------------------
 src/panfrost/bifrost/bifrost_compile.c             |   2 +-
 src/panfrost/bifrost/meson.build                   |   1 -
 src/panfrost/midgard/midgard_compile.c             |  19 ++-
 src/panfrost/util/meson.build                      |   1 +
 src/panfrost/util/pan_ir.h                         |   1 +
 .../bi_lower_xfb.c => util/pan_lower_xfb.c}        |   9 +-
 8 files changed, 46 insertions(+), 133 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_assemble.c 
b/src/gallium/drivers/panfrost/pan_assemble.c
index 20707ff773d..e7bbe4ea9c8 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -49,7 +49,7 @@ panfrost_shader_compile(struct pipe_screen *pscreen,
 
         nir_shader *s = nir_shader_clone(NULL, ir);
 
-        if (dev->arch >= 6 && s->xfb_info && !s->info.internal) {
+        if (s->xfb_info && !s->info.internal) {
                 /* Create compute shader doing transform feedback */
                 nir_shader *xfb = nir_shader_clone(NULL, s);
                 xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", 
xfb->info.name);
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c 
b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 5a7347f408d..ad170821187 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -2016,9 +2016,14 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
         unsigned nr_bufs = ((so->nr_bufs + nr_images) * bufs_per_attrib) +
                            (PAN_ARCH >= 6 ? 1 : 0);
 
+        unsigned count = vs->info.attribute_count;
+
+        if (vs->xfb)
+                count = MAX2(count, vs->xfb->info.attribute_count);
+
 #if PAN_ARCH <= 5
         /* Midgard needs vertexid/instanceid handled specially */
-        bool special_vbufs = vs->info.attribute_count >= PAN_VERTEX_ID;
+        bool special_vbufs = count >= PAN_VERTEX_ID;
 
         if (special_vbufs)
                 nr_bufs += 2;
@@ -2033,8 +2038,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
                 pan_pool_alloc_desc_array(&batch->pool.base, nr_bufs,
                                           ATTRIBUTE_BUFFER);
         struct panfrost_ptr T =
-                pan_pool_alloc_desc_array(&batch->pool.base,
-                                          vs->info.attribute_count,
+                pan_pool_alloc_desc_array(&batch->pool.base, count,
                                           ATTRIBUTE);
 
         struct mali_attribute_buffer_packed *bufs =
@@ -2257,50 +2261,6 @@ panfrost_emit_varyings(struct panfrost_batch *batch,
         return ptr;
 }
 
-#if PAN_ARCH <= 5
-static void
-panfrost_emit_streamout(struct panfrost_batch *batch,
-                        struct mali_attribute_buffer_packed *slot,
-                        unsigned stride, unsigned count,
-                        struct pipe_stream_output_target *target)
-{
-        unsigned max_size = target->buffer_size;
-        unsigned expected_size = stride * count;
-
-        /* Grab the BO and bind it to the batch */
-        struct panfrost_resource *rsrc = pan_resource(target->buffer);
-        struct panfrost_bo *bo = rsrc->image.data.bo;
-
-        panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
-        panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
-
-        unsigned offset = panfrost_xfb_offset(stride, target);
-
-        pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
-                cfg.pointer = bo->ptr.gpu + (offset & ~63);
-                cfg.stride = stride;
-                cfg.size = MIN2(max_size, expected_size) + (offset & 63);
-
-                util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
-                                offset, cfg.size);
-        }
-}
-
-/* Helpers for manipulating stream out information so we can pack varyings
- * accordingly. Compute the src_offset for a given captured varying */
-
-static struct pipe_stream_output *
-pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
-{
-        for (unsigned i = 0; i < info->num_outputs; ++i) {
-                if (info->output[i].register_index == loc)
-                        return &info->output[i];
-        }
-
-        unreachable("Varying not captured");
-}
-#endif
-
 /* Given a varying, figure out which index it corresponds to */
 
 static inline unsigned
@@ -2309,16 +2269,6 @@ pan_varying_index(unsigned present, enum 
pan_special_varying v)
         return util_bitcount(present & BITFIELD_MASK(v));
 }
 
-/* Get the base offset for XFB buffers, which by convention come after
- * everything else. Wrapper function for semantic reasons; by construction this
- * is just popcount. */
-
-static inline unsigned
-pan_xfb_base(unsigned present)
-{
-        return util_bitcount(present);
-}
-
 /* Determines which varying buffers are required */
 
 static inline unsigned
@@ -2474,10 +2424,6 @@ panfrost_emit_varying(const struct panfrost_device *dev,
                       enum pipe_format pipe_format,
                       unsigned present,
                       uint16_t point_sprite_mask,
-                      struct pipe_stream_output_info *xfb,
-                      uint64_t xfb_loc_mask,
-                      unsigned max_xfb,
-                      unsigned *xfb_offsets,
                       signed offset,
                       enum pan_special_varying pos_varying)
 {
@@ -2489,21 +2435,8 @@ panfrost_emit_varying(const struct panfrost_device *dev,
         gl_varying_slot loc = varying.location;
         mali_pixel_format format = dev->formats[pipe_format].hw;
 
-#if PAN_ARCH <= 5
-        struct pipe_stream_output *o = (xfb_loc_mask & BITFIELD64_BIT(loc)) ?
-                pan_get_so(xfb, loc) : NULL;
-#else
-        struct pipe_stream_output *o = NULL;
-#endif
-
         if (util_varying_is_point_coord(loc, point_sprite_mask)) {
                 pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD);
-        } else if (o && o->output_buffer < max_xfb) {
-                unsigned fixup_offset = xfb_offsets[o->output_buffer] & 63;
-
-                pan_emit_vary(dev, out,
-                                pan_xfb_base(present) + o->output_buffer,
-                                format, (o->dst_offset * 4) + fixup_offset);
         } else if (loc == VARYING_SLOT_POS) {
                 pan_emit_vary_special(dev, out, present, pos_varying);
         } else if (loc == VARYING_SLOT_PSIZ) {
@@ -2526,12 +2459,10 @@ panfrost_emit_varying_descs(
                 struct panfrost_pool *pool,
                 struct panfrost_shader_state *producer,
                 struct panfrost_shader_state *consumer,
-                struct panfrost_streamout *xfb,
                 uint16_t point_coord_mask,
                 struct pan_linkage *out)
 {
         struct panfrost_device *dev = pool->base.dev;
-        struct pipe_stream_output_info *xfb_info = &producer->stream_output;
         unsigned producer_count = producer->info.varyings.output_count;
         unsigned consumer_count = consumer->info.varyings.input_count;
 
@@ -2565,16 +2496,6 @@ panfrost_emit_varying_descs(
         out->stride = pan_assign_varyings(dev, &producer->info,
                         &consumer->info, offsets);
 
-        unsigned xfb_offsets[PIPE_MAX_SO_BUFFERS] = {0};
-
-        for (unsigned i = 0; i < xfb->num_targets; ++i) {
-                if (!xfb->targets[i])
-                        continue;
-
-                xfb_offsets[i] = panfrost_xfb_offset(xfb_info->stride[i] * 4,
-                                                     xfb->targets[i]);
-        }
-
         for (unsigned i = 0; i < producer_count; ++i) {
                 signed j = pan_find_vary(consumer->info.varyings.input,
                                 consumer->info.varyings.input_count,
@@ -2586,9 +2507,7 @@ panfrost_emit_varying_descs(
 
                 panfrost_emit_varying(dev, descs + i,
                                 producer->info.varyings.output[i], format,
-                                out->present, 0, &producer->stream_output,
-                                producer->so_mask, xfb->num_targets,
-                                xfb_offsets, offsets[i], PAN_VARY_POSITION);
+                                out->present, 0, offsets[i], 
PAN_VARY_POSITION);
         }
 
         for (unsigned i = 0; i < consumer_count; ++i) {
@@ -2602,9 +2521,7 @@ panfrost_emit_varying_descs(
                                 consumer->info.varyings.input[i],
                                 consumer->info.varyings.input[i].format,
                                 out->present, point_coord_mask,
-                                &producer->stream_output, producer->so_mask,
-                                xfb->num_targets, xfb_offsets, offset,
-                                PAN_VARY_FRAGCOORD);
+                                offset, PAN_VARY_FRAGCOORD);
         }
 }
 
@@ -2657,7 +2574,6 @@ panfrost_emit_varying_descriptor(struct panfrost_batch 
*batch,
         /* In good conditions, we only need to link varyings once */
         bool prelink =
                 (point_coord_mask == 0) &&
-                (PAN_ARCH >= 6 || ctx->streamout.num_targets == 0) &&
                 !vs->info.separable &&
                 !fs->info.separable;
 
@@ -2670,43 +2586,24 @@ panfrost_emit_varying_descriptor(struct panfrost_batch 
*batch,
                 struct panfrost_pool *pool =
                         prelink ? &ctx->descs : &batch->pool;
 
-                panfrost_emit_varying_descs(pool, vs, fs, &ctx->streamout, 
point_coord_mask, linkage);
+                panfrost_emit_varying_descs(pool, vs, fs, point_coord_mask, 
linkage);
         }
 
         unsigned present = linkage->present, stride = linkage->stride;
-        unsigned xfb_base = pan_xfb_base(present);
+        unsigned count = util_bitcount(present);
         struct panfrost_ptr T =
                 pan_pool_alloc_desc_array(&batch->pool.base,
-                                          xfb_base +
-                                          ctx->streamout.num_targets + 1,
+                                          count + 1,
                                           ATTRIBUTE_BUFFER);
         struct mali_attribute_buffer_packed *varyings =
                 (struct mali_attribute_buffer_packed *) T.cpu;
 
         if (buffer_count)
-                *buffer_count = xfb_base + ctx->streamout.num_targets;
+                *buffer_count = count;
 
 #if PAN_ARCH >= 6
         /* Suppress prefetch on Bifrost */
-        memset(varyings + xfb_base + ctx->streamout.num_targets, 0, 
sizeof(*varyings));
-#else
-        /* Emit the stream out buffers. We need enough room for all the
-         * vertices we emit across all instances */
-
-        struct pipe_stream_output_info *so = &vs->stream_output;
-
-        unsigned out_count = ctx->instance_count *
-                u_stream_outputs_for_vertices(ctx->active_prim, 
ctx->vertex_count);
-
-        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
-                if (!ctx->streamout.targets[i])
-                        continue;
-
-                panfrost_emit_streamout(batch, &varyings[xfb_base + i],
-                                        so->stride[i] * 4,
-                                        out_count,
-                                        ctx->streamout.targets[i]);
-        }
+        memset(varyings + count, 0, sizeof(*varyings));
 #endif
 
         if (stride) {
@@ -3692,7 +3589,7 @@ panfrost_launch_xfb(struct panfrost_batch *batch,
 
         panfrost_pack_work_groups_compute(&invocation,
                         1, count, info->instance_count,
-                        1, 1, 1, false, false);
+                        1, 1, 1, PAN_ARCH <= 5, false);
 
         batch->uniform_buffers[PIPE_SHADER_VERTEX] =
                 panfrost_emit_const_buf(batch, PIPE_SHADER_VERTEX, NULL,
@@ -3701,9 +3598,12 @@ panfrost_launch_xfb(struct panfrost_batch *batch,
         panfrost_draw_emit_vertex(batch, info, &invocation, 0, 0,
                                   attribs, attrib_bufs, t.cpu);
 #endif
-        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
-                        MALI_JOB_TYPE_COMPUTE, true, false,
-                        0, 0, &t, false);
+        enum mali_job_type job_type = MALI_JOB_TYPE_COMPUTE;
+#if PAN_ARCH <= 5
+        job_type = MALI_JOB_TYPE_VERTEX;
+#endif
+        panfrost_add_job(&batch->pool.base, &batch->scoreboard, job_type,
+                         true, false, 0, 0, &t, false);
 
         ctx->shader[PIPE_SHADER_VERTEX] = saved_vs;
         batch->rsd[PIPE_SHADER_VERTEX] = saved_rsd;
@@ -3837,14 +3737,12 @@ panfrost_direct_draw(struct panfrost_batch *batch,
         panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT);
         panfrost_clean_state_3d(ctx);
 
-#if PAN_ARCH >= 6
         if (vs->xfb) {
 #if PAN_ARCH >= 9
                 mali_ptr attribs = 0, attrib_bufs = 0;
 #endif
                 panfrost_launch_xfb(batch, info, attribs, attrib_bufs, 
draw->count);
         }
-#endif
 
         /* Increment transform feedback offsets */
         panfrost_update_streamout_offsets(ctx);
diff --git a/src/panfrost/bifrost/bifrost_compile.c 
b/src/panfrost/bifrost/bifrost_compile.c
index 2a696137c9a..edd9b4996e8 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -4962,7 +4962,7 @@ bi_finalize_nir(nir_shader *nir, unsigned gpu_id, bool 
is_blend)
                 NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
                            nir_var_shader_in | nir_var_shader_out);
                 NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
-                NIR_PASS_V(nir, bifrost_nir_lower_xfb);
+                NIR_PASS_V(nir, pan_lower_xfb);
         }
 
         bi_optimize_nir(nir, gpu_id, is_blend);
diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build
index e6f79ac963a..63d1560cc8d 100644
--- a/src/panfrost/bifrost/meson.build
+++ b/src/panfrost/bifrost/meson.build
@@ -29,7 +29,6 @@ libpanfrost_bifrost_files = files(
   'bi_liveness.c',
   'bi_lower_divergent_indirects.c',
   'bi_lower_swizzle.c',
-  'bi_lower_xfb.c',
   'bi_print.c',
   'bi_opt_constant_fold.c',
   'bi_opt_copy_prop.c',
diff --git a/src/panfrost/midgard/midgard_compile.c 
b/src/panfrost/midgard/midgard_compile.c
index 5732dc254e5..94668b4235b 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -362,6 +362,13 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool 
is_blend, bool is_blit)
         NIR_PASS(progress, nir, pan_lower_helper_invocation);
         NIR_PASS(progress, nir, pan_lower_sample_pos);
 
+        if (nir->xfb_info != NULL && 
nir->info.has_transform_feedback_varyings) {
+                NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
+                           nir_var_shader_in | nir_var_shader_out);
+                NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
+                NIR_PASS_V(nir, pan_lower_xfb);
+        }
+
         NIR_PASS(progress, nir, midgard_nir_lower_algebraic_early);
         NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
 
@@ -2101,9 +2108,14 @@ emit_intrinsic(compiler_context *ctx, 
nir_intrinsic_instr *instr)
                 emit_global(ctx, &instr->instr, false, reg, &instr->src[1], 
seg);
                 break;
 
-        case nir_intrinsic_load_first_vertex:
         case nir_intrinsic_load_ssbo_address:
+        case nir_intrinsic_load_xfb_address:
+                emit_sysval_read(ctx, &instr->instr, 2, 0);
+                break;
+
+        case nir_intrinsic_load_first_vertex:
         case nir_intrinsic_load_work_dim:
+        case nir_intrinsic_load_num_vertices:
                 emit_sysval_read(ctx, &instr->instr, 1, 0);
                 break;
 
@@ -2112,6 +2124,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr 
*instr)
                 break;
 
         case nir_intrinsic_load_base_instance:
+        case nir_intrinsic_get_ssbo_size:
                 emit_sysval_read(ctx, &instr->instr, 1, 8);
                 break;
 
@@ -2119,10 +2132,6 @@ emit_intrinsic(compiler_context *ctx, 
nir_intrinsic_instr *instr)
                 emit_sysval_read(ctx, &instr->instr, 2, 0);
                 break;
 
-        case nir_intrinsic_get_ssbo_size:
-                emit_sysval_read(ctx, &instr->instr, 1, 8);
-                break;
-
         case nir_intrinsic_load_viewport_scale:
         case nir_intrinsic_load_viewport_offset:
         case nir_intrinsic_load_num_workgroups:
diff --git a/src/panfrost/util/meson.build b/src/panfrost/util/meson.build
index 75f9024eafb..34858723feb 100644
--- a/src/panfrost/util/meson.build
+++ b/src/panfrost/util/meson.build
@@ -30,6 +30,7 @@ libpanfrost_util_files = files(
   'pan_lower_helper_invocation.c',
   'pan_lower_sample_position.c',
   'pan_lower_writeout.c',
+  'pan_lower_xfb.c',
   'pan_lower_64bit_intrin.c',
   'pan_sysval.c',
 )
diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h
index 4e1732cf80a..98b4decd117 100644
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@@ -507,6 +507,7 @@ bool pan_nir_lower_64bit_intrin(nir_shader *shader);
 
 bool pan_lower_helper_invocation(nir_shader *shader);
 bool pan_lower_sample_pos(nir_shader *shader);
+bool pan_lower_xfb(nir_shader *nir);
 
 /*
  * Helper returning the subgroup size. Generally, this is equal to the number 
of
diff --git a/src/panfrost/bifrost/bi_lower_xfb.c 
b/src/panfrost/util/pan_lower_xfb.c
similarity index 93%
rename from src/panfrost/bifrost/bi_lower_xfb.c
rename to src/panfrost/util/pan_lower_xfb.c
index 35daeb3a55c..e9620b2e760 100644
--- a/src/panfrost/bifrost/bi_lower_xfb.c
+++ b/src/panfrost/util/pan_lower_xfb.c
@@ -21,7 +21,9 @@
  * SOFTWARE.
  */
 
-#include "bifrost_nir.h"
+
+#include "pan_ir.h"
+#include "compiler/nir/nir_builder.h"
 
 static void
 lower_xfb_output(nir_builder *b, nir_intrinsic_instr *intr,
@@ -42,6 +44,9 @@ lower_xfb_output(nir_builder *b, nir_intrinsic_instr *intr,
                             nir_load_num_vertices(b)),
                 nir_load_vertex_id_zero_base(b));
 
+        BITSET_SET(b->shader->info.system_values_read, 
SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
+        BITSET_SET(b->shader->info.system_values_read, 
SYSTEM_VALUE_INSTANCE_ID);
+
         nir_ssa_def *buf = nir_load_xfb_address(b, 64, .base = buffer);
         nir_ssa_def *addr =
                 nir_iadd(b, buf, nir_u2u64(b,
@@ -87,7 +92,7 @@ lower_xfb(nir_builder *b, nir_instr *instr, UNUSED void *data)
 }
 
 bool
-bifrost_nir_lower_xfb(nir_shader *nir)
+pan_lower_xfb(nir_shader *nir)
 {
         return nir_shader_instructions_pass(nir, lower_xfb,
                                             nir_metadata_block_index |

Reply via email to