To make batch re-ordering useful, we need to be able to create shadow
resources to avoid a flush/stall in transfer_map().  For example,
uploading new texture contents or updating a UBO mid-batch.  In these
cases, we want to clone the buffer, and update the new buffer, leaving
the old buffer (whose reference is held by cmdstream) as a shadow.

This is done by blitting the remaining other levels (and whatever part
of current level that is not discarded) from the old/shadow buffer to
the new one.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_context.h  |   5 +
 src/gallium/drivers/freedreno/freedreno_resource.c | 208 ++++++++++++++++++++-
 src/gallium/drivers/freedreno/freedreno_util.h     |   6 +
 3 files changed, 210 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_context.h 
b/src/gallium/drivers/freedreno/freedreno_context.h
index 012f452..ca3c01b 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -296,6 +296,11 @@ struct fd_context {
        bool cond_cond; /* inverted rendering condition */
        uint cond_mode;
 
+       /* Are we in process of shadowing a resource? Used to detect recursion
+        * in transfer_map, and skip unneeded synchronization.
+        */
+       bool in_shadow;
+
        struct pipe_debug_callback debug;
 
        /* GMEM/tile handling fxns: */
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index d7603b2..8ff0d4a 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -116,6 +116,171 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
        util_range_set_empty(&rsc->valid_buffer_range);
 }
 
+static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond);
+static void fd_blitter_pipe_end(struct fd_context *ctx);
+
+static void
+do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool 
fallback)
+{
+       /* TODO size threshold too?? */
+       if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
+               /* do blit on gpu: */
+               fd_blitter_pipe_begin(ctx, false);
+               util_blitter_blit(ctx->blitter, blit);
+               fd_blitter_pipe_end(ctx);
+       } else {
+               /* do blit on cpu: */
+               util_resource_copy_region(&ctx->base,
+                               blit->dst.resource, blit->dst.level, 
blit->dst.box.x,
+                               blit->dst.box.y, blit->dst.box.z,
+                               blit->src.resource, blit->src.level, 
&blit->src.box);
+       }
+}
+
+static bool
+fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
+               unsigned level, unsigned usage, const struct pipe_box *box)
+{
+       struct pipe_context *pctx = &ctx->base;
+       struct pipe_resource *prsc = &rsc->base.b;
+       bool fallback = false;
+
+       /* TODO: somehow munge dimensions and format to copy unsupported
+        * render target format to something that is supported?
+        */
+       if (!pctx->screen->is_format_supported(pctx->screen,
+                       prsc->format, prsc->target, prsc->nr_samples,
+                       PIPE_BIND_RENDER_TARGET))
+               fallback = true;
+
+       /* these cases should be handled elsewhere.. just for future
+        * reference in case this gets split into a more generic(ish)
+        * helper.
+        */
+       debug_assert(!(usage & PIPE_TRANSFER_READ));
+       debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
+
+       /* if we do a gpu blit to clone the whole resource, we'll just
+        * end up stalling on that.. so only allow if we can discard
+        * current range (and blit, possibly cpu or gpu, the rest)
+        */
+       if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
+               return false;
+
+       bool whole_level = util_texrange_covers_whole_level(prsc, level,
+               box->x, box->y, box->z, box->width, box->height, box->depth);
+
+       /* TODO need to be more clever about current level */
+       if ((prsc->target > PIPE_TEXTURE_2D) && !whole_level)
+               return false;
+
+       struct pipe_resource *pshadow =
+               pctx->screen->resource_create(pctx->screen, prsc);
+
+       if (!pshadow)
+               return false;
+
+       assert(!ctx->in_shadow);
+       ctx->in_shadow = true;
+
+       /* get rid of any references that batch-cache might have to us (which
+        * should empty/destroy rsc->batches hashset)
+        */
+       fd_bc_invalidate_resource(rsc);
+
+       /* Swap the backing bo's, so shadow becomes the old buffer,
+        * blit from shadow to new buffer.  From here on out, we
+        * cannot fail.
+        *
+        * Note that we need to do it in this order, otherwise if
+        * we go down cpu blit path, the recursive transfer_map()
+        * sees the wrong status..
+        */
+       struct fd_resource *shadow = fd_resource(pshadow);
+       DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
+                       shadow, shadow->base.b.reference.count);
+
+       struct pipe_blit_info blit = {0};
+       blit.dst.resource = pshadow;
+       blit.dst.format   = pshadow->format;
+       blit.src.resource = prsc;
+       blit.src.format   = prsc->format;
+       blit.mask = util_format_get_mask(prsc->format);
+       blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+#define set_box(field, val) do {     \
+               blit.dst.field = (val);      \
+               blit.src.field = (val);      \
+       } while (0)
+
+       /* blit the other levels in their entirety: */
+       for (unsigned l = 0; l <= prsc->last_level; l++) {
+               if (l == level)
+                       continue;
+
+               /* just blit whole level: */
+               set_box(level, l);
+               // TODO do these get minified?
+               set_box(box.width,  u_minify(prsc->width0, l));
+               set_box(box.height, u_minify(prsc->height0, l));
+               set_box(box.depth,  u_minify(prsc->depth0, l));
+
+               do_blit(ctx, &blit, fallback);
+       }
+
+       /* deal w/ current level specially, since we might need to split
+        * it up into a couple blits:
+        */
+       if (!whole_level) {
+               set_box(level, level);
+
+               switch (prsc->target) {
+               case PIPE_BUFFER:
+               case PIPE_TEXTURE_1D:
+                       set_box(box.y, 0);
+                       set_box(box.z, 0);
+                       set_box(box.height, 1);
+                       set_box(box.depth, 1);
+
+                       if (box->x > 0) {
+                               set_box(box.x, 0);
+                               set_box(box.width, box->x);
+
+                               do_blit(ctx, &blit, fallback);
+                       }
+                       if ((box->x + box->width) < u_minify(prsc->width0, 
level)) {
+                               set_box(box.x, box->x + box->width);
+                               set_box(box.width, u_minify(prsc->width0, 
level) - (box->x + box->width));
+
+                               do_blit(ctx, &blit, fallback);
+                       }
+                       break;
+               case PIPE_TEXTURE_2D:
+                       /* TODO */
+               default:
+                       unreachable("TODO");
+               }
+       }
+
+       /* TODO valid_buffer_range?? */
+       swap(rsc->bo,        shadow->bo);
+       swap(rsc->timestamp, shadow->timestamp);
+       swap(rsc->status,    shadow->status);
+       swap(rsc->pending_batch, shadow->pending_batch);
+       swap(rsc->write_batch,   shadow->write_batch);
+
+       struct list_head tmp;
+       list_replace(&rsc->list, &tmp);
+       list_replace(&shadow->list, &rsc->list);
+       list_replace(&tmp, &shadow->list);
+
+       ctx->in_shadow = false;
+
+       pipe_resource_reference(&pshadow, NULL);
+
+       return true;
+}
+
 static unsigned
 fd_resource_layer_offset(struct fd_resource *rsc,
                                                 struct fd_resource_slice 
*slice,
@@ -303,6 +468,9 @@ fd_resource_transfer_map(struct pipe_context *pctx,
        ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * 
rsc->cpp;
        ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : 
slice->size0;
 
+       if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
+               usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+
        if (usage & PIPE_TRANSFER_READ)
                op |= DRM_FREEDRENO_PREP_READ;
 
@@ -322,21 +490,46 @@ fd_resource_transfer_map(struct pipe_context *pctx,
                 * to wait.
                 */
        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+               bool needs_flush;
+
                /* If the GPU is writing to the resource, or if it is reading 
from the
                 * resource and we're trying to write to it, flush the renders.
                 */
-               if (((ptrans->usage & PIPE_TRANSFER_WRITE) &&
-                                       pending(rsc, FD_PENDING_READ | 
FD_PENDING_WRITE)) ||
-                               pending(rsc, FD_PENDING_WRITE))
+               if (usage & PIPE_TRANSFER_WRITE)
+                       needs_flush = pending(rsc, FD_PENDING_READ | 
FD_PENDING_WRITE);
+               else
+                       needs_flush = pending(rsc, FD_PENDING_WRITE);
+
+               bool busy = needs_flush | (0 == fd_bo_cpu_prep(rsc->bo,
+                               ctx->screen->pipe, op | 
DRM_FREEDRENO_PREP_NOSYNC));
+
+               /* if we need to flush/stall, see if we can make a shadow buffer
+                * to avoid this:
+                *
+                * TODO we could go down this path !reorder && !busy_for_read
+                * ie. we only *don't* want to go down this path if the blit
+                * will trigger a flush!
+                */
+               if (ctx->screen->reorder && (needs_flush || busy) &&
+                               !(usage & PIPE_TRANSFER_READ)) {
+                       if (fd_try_shadow_resource(ctx, rsc, level, usage, 
box)) {
+                               needs_flush = busy = false;
+                               fd_invalidate_resource(ctx, prsc);
+                       }
+               }
+
+               if (needs_flush)
                        fd_batch_flush(rsc->pending_batch);
 
                /* The GPU keeps track of how the various bo's are being used, 
and
                 * will wait if necessary for the proper operation to have
                 * completed.
                 */
-               ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
-               if (ret)
-                       goto fail;
+               if (busy) {
+                       ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
+                       if (ret)
+                               goto fail;
+               }
        }
 
        buf = fd_bo_map(rsc->bo);
@@ -685,9 +878,6 @@ fail:
        return NULL;
 }
 
-static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond);
-static void fd_blitter_pipe_end(struct fd_context *ctx);
-
 /**
  * _copy_region using pipe (3d engine)
  */
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h 
b/src/gallium/drivers/freedreno/freedreno_util.h
index 5cb958e..61b3c93 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -331,4 +331,10 @@ pack_rgba(enum pipe_format format, const float *rgba)
        return uc.ui[0];
 }
 
+/*
+ * swap - swap value of @a and @b
+ */
+#define swap(a, b) \
+       do { __typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
 #endif /* FREEDRENO_UTIL_H_ */
-- 
2.7.4

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

Reply via email to