For now, not enabled by default, but can be enabled (on a3xx/a4xx) with
FD_MESA_DEBUG=reorder.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_batch.c    | 168 ++++++++++++++++++---
 src/gallium/drivers/freedreno/freedreno_batch.h    |   1 +
 src/gallium/drivers/freedreno/freedreno_context.c  |  38 ++---
 src/gallium/drivers/freedreno/freedreno_context.h  |   2 -
 src/gallium/drivers/freedreno/freedreno_query_hw.c |   2 +-
 src/gallium/drivers/freedreno/freedreno_resource.c |   6 +-
 src/gallium/drivers/freedreno/freedreno_resource.h |   1 +
 src/gallium/drivers/freedreno/freedreno_screen.c   |   9 ++
 src/gallium/drivers/freedreno/freedreno_screen.h   |   2 +
 src/gallium/drivers/freedreno/freedreno_state.c    |  15 +-
 src/gallium/drivers/freedreno/freedreno_util.h     |   1 +
 11 files changed, 188 insertions(+), 57 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c 
b/src/gallium/drivers/freedreno/freedreno_batch.c
index 5c6ae76..9d5bcf8 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -25,26 +25,20 @@
  */
 
 #include "util/list.h"
+#include "util/set.h"
+#include "util/hash_table.h"
 #include "util/u_string.h"
 
 #include "freedreno_batch.h"
 #include "freedreno_context.h"
 #include "freedreno_resource.h"
 
-struct fd_batch *
-fd_batch_create(struct fd_context *ctx)
+static void
+batch_init(struct fd_batch *batch)
 {
-       struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
-       static unsigned seqno = 0;
+       struct fd_context *ctx = batch->ctx;
        unsigned size = 0;
 
-       if (!batch)
-               return NULL;
-
-       pipe_reference_init(&batch->reference, 1);
-       batch->seqno = ++seqno;
-       batch->ctx = ctx;
-
        /* if kernel is too old to support unlimited # of cmd buffers, we
         * have no option but to allocate large worst-case sizes so that
         * we don't need to grow the ringbuffer.  Performance is likely to
@@ -62,7 +56,11 @@ fd_batch_create(struct fd_context *ctx)
        fd_ringbuffer_set_parent(batch->draw, batch->gmem);
        fd_ringbuffer_set_parent(batch->binning, batch->gmem);
 
-       list_inithead(&batch->used_resources);
+       batch->cleared = batch->partial_cleared = 0;
+       batch->restore = batch->resolve = 0;
+       batch->needs_flush = false;
+       batch->gmem_reason = 0;
+       batch->num_draws = 0;
 
        /* reset maximal bounds: */
        batch->max_scissor.minx = batch->max_scissor.miny = ~0;
@@ -73,16 +71,37 @@ fd_batch_create(struct fd_context *ctx)
        if (is_a3xx(ctx->screen))
                util_dynarray_init(&batch->rbrc_patches);
 
-       return batch;
+       assert(LIST_IS_EMPTY(&batch->used_resources));
 }
 
-void
-__fd_batch_destroy(struct fd_batch *batch)
+struct fd_batch *
+fd_batch_create(struct fd_context *ctx)
 {
-       fd_bc_invalidate_batch(batch);
+       struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
+       static unsigned seqno = 0;
 
-       util_copy_framebuffer_state(&batch->framebuffer, NULL);
+       if (!batch)
+               return NULL;
+
+       DBG("%p", batch);
+
+       pipe_reference_init(&batch->reference, 1);
+       batch->seqno = ++seqno;
+       batch->ctx = ctx;
+
+       list_inithead(&batch->used_resources);
+
+       batch_init(batch);
+
+       batch->dependencies = _mesa_set_create(NULL, _mesa_hash_pointer,
+                       _mesa_key_pointer_equal);
 
+       return batch;
+}
+
+static void
+batch_fini(struct fd_batch *batch)
+{
        fd_ringbuffer_del(batch->draw);
        fd_ringbuffer_del(batch->binning);
        fd_ringbuffer_del(batch->gmem);
@@ -91,6 +110,51 @@ __fd_batch_destroy(struct fd_batch *batch)
 
        if (is_a3xx(batch->ctx->screen))
                util_dynarray_fini(&batch->rbrc_patches);
+}
+
+static void
+batch_reset(struct fd_batch *batch)
+{
+       struct set_entry *entry;
+
+       DBG("%p", batch);
+
+       batch_fini(batch);
+       batch_init(batch);
+
+       set_foreach(batch->dependencies, entry) {
+               struct fd_batch *dep = (struct fd_batch *)entry->key;
+               _mesa_set_remove(batch->dependencies, entry);
+               fd_batch_reference(&dep, NULL);
+       }
+}
+
+void
+fd_batch_reset(struct fd_batch *batch)
+{
+       if (batch->needs_flush)
+               batch_reset(batch);
+}
+
+static void
+unref_batch(struct set_entry *entry)
+{
+       struct fd_batch *batch = (struct fd_batch *)entry->key;
+       fd_batch_reference(&batch, NULL);
+}
+
+void
+__fd_batch_destroy(struct fd_batch *batch)
+{
+       fd_bc_invalidate_batch(batch);
+
+       DBG("%p", batch);
+
+       util_copy_framebuffer_state(&batch->framebuffer, NULL);
+
+       batch_fini(batch);
+
+       _mesa_set_destroy(batch->dependencies, unref_batch);
 
        free(batch);
 }
@@ -101,16 +165,26 @@ __fd_batch_describe(char* buf, const struct fd_batch 
*batch)
        util_sprintf(buf, "fd_batch<%u>", batch->seqno);
 }
 
-void
-fd_batch_flush(struct fd_batch *batch)
+static void
+batch_flush(struct fd_batch *batch)
 {
        struct fd_resource *rsc, *rsc_tmp;
+       struct set_entry *entry;
 
        DBG("%p: needs_flush=%d", batch, batch->needs_flush);
 
        if (!batch->needs_flush)
                return;
 
+       batch->needs_flush = false;
+
+       set_foreach(batch->dependencies, entry) {
+               struct fd_batch *dep = (struct fd_batch *)entry->key;
+               fd_batch_flush(dep);
+               _mesa_set_remove(batch->dependencies, entry);
+               fd_batch_reference(&dep, NULL);
+       }
+
        fd_gmem_render_tiles(batch);
 
        /* go through all the used resources and clear their reading flag */
@@ -119,18 +193,67 @@ fd_batch_flush(struct fd_batch *batch)
                debug_assert(rsc->status != 0);
                rsc->status = 0;
                fd_batch_reference(&rsc->pending_batch, NULL);
+               fd_batch_reference(&rsc->write_batch, NULL);
                list_delinit(&rsc->list);
        }
 
        assert(LIST_IS_EMPTY(&batch->used_resources));
-       batch->needs_flush = false;
-       fd_bc_invalidate_batch(batch);
+
+       if (batch == batch->ctx->batch) {
+               batch_reset(batch);
+       } else {
+               fd_bc_invalidate_batch(batch);
+       }
+}
+
+void
+fd_batch_flush(struct fd_batch *batch)
+{
+       /* NOTE: we need to hold an extra ref across the body of flush,
+        * since the last ref to this batch could be dropped when cleaning
+        * up used_resources
+        */
+       struct fd_batch *tmp = NULL;
+       fd_batch_reference(&tmp, batch);
+       batch_flush(tmp);
+       fd_batch_reference(&tmp, NULL);
+}
+
+static void
+batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
+{
+       if (!_mesa_set_search(batch->dependencies, dep)) {
+               struct fd_batch *other = NULL;
+               fd_batch_reference(&other, dep);
+               _mesa_set_add(batch->dependencies, other);
+       }
+}
+
+static void
+batch_update_dep(struct fd_batch *batch, struct fd_resource *rsc,
+               enum fd_resource_status status)
+{
+       switch (status) {
+       case FD_PENDING_WRITE:
+               DBG("%p: flush forced! (%p, %d)\n", rsc->pending_batch, rsc, 
rsc->status);
+               fd_batch_flush(rsc->pending_batch);
+               assert(rsc->pending_batch == NULL);
+               break;
+       case FD_PENDING_READ:
+               if (rsc->write_batch)
+                       batch_add_dep(batch, rsc->write_batch);
+               batch_add_dep(batch, rsc->pending_batch);
+               break;
+       }
 }
 
 void
 fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
                enum fd_resource_status status)
 {
+       if (unlikely(rsc->pending_batch && (rsc->pending_batch != batch)))
+               batch_update_dep(batch, rsc, status);
+
        rsc->status |= status;
 
        if (rsc->stencil)
@@ -139,7 +262,6 @@ fd_batch_resource_used(struct fd_batch *batch, struct 
fd_resource *rsc,
        /* TODO resources can actually be shared across contexts,
         * so I'm not sure a single list-head will do the trick?
         */
-       debug_assert((rsc->pending_batch == batch) || !rsc->pending_batch);
        list_delinit(&rsc->list);
        list_addtail(&rsc->list, &batch->used_resources);
        fd_batch_reference(&rsc->pending_batch, batch);
@@ -154,5 +276,5 @@ fd_batch_check_size(struct fd_batch *batch)
        struct fd_ringbuffer *ring = batch->draw;
        if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) ||
                        (fd_mesa_debug & FD_DBG_FLUSH))
-               fd_context_render(&batch->ctx->base);
+               fd_batch_flush(batch);
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h 
b/src/gallium/drivers/freedreno/freedreno_batch.h
index d500f95..44da3c4 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -129,6 +129,7 @@ struct fd_batch {
 
 struct fd_batch * fd_batch_create(struct fd_context *ctx);
 
+void fd_batch_reset(struct fd_batch *batch);
 void fd_batch_flush(struct fd_batch *batch);
 void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
                enum fd_resource_status status);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c 
b/src/gallium/drivers/freedreno/freedreno_context.c
index 4359fb2..3a16a51 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -38,39 +38,27 @@
 #include "freedreno_query_hw.h"
 #include "freedreno_util.h"
 
-/* emit accumulated render cmds, needed for example if render target has
- * changed, or for flush()
- */
-void
-fd_context_render(struct pipe_context *pctx)
-{
-       struct fd_context *ctx = fd_context(pctx);
-       struct fd_batch *new_batch;
-
-       fd_batch_flush(ctx->batch);
-
-       new_batch = fd_batch_create(ctx);
-       util_copy_framebuffer_state(&new_batch->framebuffer, 
&ctx->batch->framebuffer);
-       fd_batch_reference(&ctx->batch, NULL);
-       ctx->batch = new_batch;
-}
-
 static void
 fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
                unsigned flags)
 {
-       struct fd_batch *batch = NULL;
-
-       fd_batch_reference(&batch, fd_context(pctx)->batch);
-
-       fd_context_render(pctx);
+       struct fd_context *ctx = fd_context(pctx);
+       uint32_t timestamp;
+
+       if (!ctx->screen->reorder) {
+               struct fd_batch *batch = NULL;
+               fd_batch_reference(&batch, ctx->batch);
+               fd_batch_flush(batch);
+               timestamp = fd_ringbuffer_timestamp(batch->gmem);
+               fd_batch_reference(&batch, NULL);
+       } else {
+               timestamp = fd_bc_flush(&ctx->batch_cache);
+       }
 
        if (fence) {
                fd_screen_fence_ref(pctx->screen, fence, NULL);
-               *fence = fd_fence_create(pctx, 
fd_ringbuffer_timestamp(batch->gmem));
+               *fence = fd_fence_create(pctx, timestamp);
        }
-
-       fd_batch_reference(&batch, NULL);
 }
 
 /**
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h 
b/src/gallium/drivers/freedreno/freedreno_context.h
index 6be7437..012f452 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -378,8 +378,6 @@ struct pipe_context * fd_context_init(struct fd_context 
*ctx,
                struct pipe_screen *pscreen, const uint8_t *primtypes,
                void *priv);
 
-void fd_context_render(struct pipe_context *pctx);
-
 void fd_context_destroy(struct pipe_context *pctx);
 
 #endif /* FREEDRENO_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c 
b/src/gallium/drivers/freedreno/freedreno_query_hw.c
index ec8bf20..a55aee2 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -210,7 +210,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct 
fd_query *q,
                if (!ctx->batch->needs_flush)
                        return true;
                DBG("reading query result forces flush!");
-               fd_context_render(&ctx->base);
+               fd_batch_flush(ctx->batch);
        }
 
        util_query_clear_result(result, q->type);
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index 4fd8559..d7603b2 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -111,6 +111,7 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
        rsc->timestamp = 0;
        rsc->status = 0;
        fd_batch_reference(&rsc->pending_batch, NULL);
+       fd_batch_reference(&rsc->write_batch, NULL);
        list_delinit(&rsc->list);
        util_range_set_empty(&rsc->valid_buffer_range);
 }
@@ -327,7 +328,7 @@ fd_resource_transfer_map(struct pipe_context *pctx,
                if (((ptrans->usage & PIPE_TRANSFER_WRITE) &&
                                        pending(rsc, FD_PENDING_READ | 
FD_PENDING_WRITE)) ||
                                pending(rsc, FD_PENDING_WRITE))
-                       fd_context_render(pctx);
+                       fd_batch_flush(rsc->pending_batch);
 
                /* The GPU keeps track of how the various bo's are being used, 
and
                 * will wait if necessary for the proper operation to have
@@ -456,6 +457,7 @@ fd_resource_destroy(struct pipe_screen *pscreen,
        if (rsc->bo)
                fd_bo_del(rsc->bo);
        fd_batch_reference(&rsc->pending_batch, NULL);
+       fd_batch_reference(&rsc->write_batch, NULL);
        list_delinit(&rsc->list);
        util_range_destroy(&rsc->valid_buffer_range);
        FREE(rsc);
@@ -849,7 +851,7 @@ fd_flush_resource(struct pipe_context *pctx, struct 
pipe_resource *prsc)
        struct fd_resource *rsc = fd_resource(prsc);
 
        if (pending(rsc, FD_PENDING_WRITE | FD_PENDING_READ))
-               fd_context_render(pctx);
+               fd_batch_flush(rsc->pending_batch);
 }
 
 void
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h 
b/src/gallium/drivers/freedreno/freedreno_resource.h
index 3b990a9..2615527 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -96,6 +96,7 @@ struct fd_resource {
         */
        struct list_head list;
        struct fd_batch *pending_batch;
+       struct fd_batch *write_batch;
 
        /* set of batches whose batch-cache key references this resource: */
        struct set *batches;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 5255c10..a18df54 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -75,6 +75,7 @@ static const struct debug_named_value debug_options[] = {
                {"flush",     FD_DBG_FLUSH,  "Force flush after every draw"},
                {"deqp",      FD_DBG_DEQP,   "Enable dEQP hacks"},
                {"nir",       FD_DBG_NIR,    "Prefer NIR as native IR"},
+               {"reorder",   FD_DBG_REORDER,"Enable reordering for 
draws/blits"},
                DEBUG_NAMED_VALUE_END
 };
 
@@ -649,6 +650,14 @@ fd_screen_create(struct fd_device *dev)
                goto fail;
        }
 
+       /* NOTE: don't enable reordering on a2xx, since completely untested.
+        * Also, don't enable if we have too old of a kernel to support
+        * growable cmdstream buffers, since memory requirement for cmdstream
+        * buffers would be too much otherwise.
+        */
+       if ((screen->gpu_id >= 300) && (fd_device_version(dev) >= 
FD_VERSION_UNLIMITED_CMDS))
+               screen->reorder = !!(fd_mesa_debug & FD_DBG_REORDER);
+
        pscreen->destroy = fd_screen_destroy;
        pscreen->get_param = fd_screen_get_param;
        pscreen->get_paramf = fd_screen_get_paramf;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h 
b/src/gallium/drivers/freedreno/freedreno_screen.h
index a81c778..67fa689 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -65,6 +65,8 @@ struct fd_screen {
        struct fd_pipe *pipe;
 
        int64_t cpu_gpu_time_delta;
+
+       bool reorder;
 };
 
 static inline struct fd_screen *
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c 
b/src/gallium/drivers/freedreno/freedreno_state.c
index 98b56c7..63ffa0c 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -117,10 +117,17 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
        struct fd_context *ctx = fd_context(pctx);
        struct pipe_framebuffer_state *cso;
 
-       DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
-                       framebuffer->cbufs[0], framebuffer->zsbuf);
-
-       fd_context_render(pctx);
+       if (ctx->screen->reorder) {
+               struct fd_batch *batch =
+                       fd_batch_from_fb(&ctx->batch_cache, ctx, framebuffer);
+               fd_batch_reference(&ctx->batch, NULL);
+               ctx->batch = batch;
+               ctx->dirty = ~0;
+       } else {
+               DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
+                               framebuffer->cbufs[0], framebuffer->zsbuf);
+               fd_batch_flush(ctx->batch);
+       }
 
        cso = &ctx->batch->framebuffer;
 
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h 
b/src/gallium/drivers/freedreno/freedreno_util.h
index 8f125d9..5cb958e 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -75,6 +75,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
 #define FD_DBG_FLUSH    0x1000
 #define FD_DBG_DEQP     0x2000
 #define FD_DBG_NIR      0x4000
+#define FD_DBG_REORDER  0x8000
 
 extern int fd_mesa_debug;
 extern bool fd_binning_enabled;
-- 
2.7.4

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

Reply via email to