[Mesa-dev] [PATCH] Revert "st/dri2: Implement DRI2bufferDamageExtension"

2019-10-01 Thread Boris Brezillon
This reverts commit 492ffbed63a2a62759224b1c7d45aa7923d8f542.

BACK_LEFT attachment can be outdated when the user calls
KHR_partial_update(), leading to a damage region update on the
wrong pipe_resource object.
Let's not expose the ->set_damage_region() method until the core is
fixed to handle that properly.

Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_screen.c |  1 -
 src/gallium/include/pipe/p_screen.h   | 17 ---
 src/gallium/state_trackers/dri/dri2.c | 35 ---
 3 files changed, 53 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index dae8b941f1ea..a33adaa3206a 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -736,7 +736,6 @@ panfrost_create_screen(int fd, struct renderonly *ro)
 screen->base.get_compiler_options = 
panfrost_screen_get_compiler_options;
 screen->base.fence_reference = panfrost_fence_reference;
 screen->base.fence_finish = panfrost_fence_finish;
-screen->base.set_damage_region = panfrost_resource_set_damage_region;
 
 panfrost_resource_screen_init(screen);
 
diff --git a/src/gallium/include/pipe/p_screen.h 
b/src/gallium/include/pipe/p_screen.h
index 9a1fc37280e7..a275a7c7900b 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -483,23 +483,6 @@ struct pipe_screen {
bool (*is_parallel_shader_compilation_finished)(struct pipe_screen *screen,
void *shader,
unsigned shader_type);
-
-   /**
-* Set the damage region (called when KHR_partial_update() is invoked).
-* This function is passed an array of rectangles encoding the damage area.
-* rects are using the bottom-left origin convention.
-* nrects = 0 means 'reset the damage region'. What 'reset' implies is HW
-* specific. For tile-based renderers, the damage extent is typically set
-* to cover the whole resource with no damage rect (or a 0-size damage
-* rect). This way, the existing resource content is reloaded into the
-* local tile buffer for every tile thus making partial tile update
-* possible. For HW operating in immediate mode, this reset operation is
-* likely to be a NOOP.
-*/
-   void (*set_damage_region)(struct pipe_screen *screen,
- struct pipe_resource *resource,
- unsigned int nrects,
- const struct pipe_box *rects);
 };
 
 
diff --git a/src/gallium/state_trackers/dri/dri2.c 
b/src/gallium/state_trackers/dri/dri2.c
index 574ddaea5c78..d42727c6cd6c 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1872,36 +1872,6 @@ static const __DRI2interopExtension dri2InteropExtension 
= {
.export_object = dri2_interop_export_object
 };
 
-/**
- * \brief the DRI2bufferDamageExtension set_damage_region method
- */
-static void
-dri2_set_damage_region(__DRIdrawable *dPriv, unsigned int nrects, int *rects)
-{
-   struct dri_drawable *drawable = dri_drawable(dPriv);
-   struct pipe_resource *resource = 
drawable->textures[ST_ATTACHMENT_BACK_LEFT];
-   struct pipe_screen *screen = resource->screen;
-   struct pipe_box *boxes = NULL;
-
-   if (nrects) {
-  boxes = CALLOC(nrects, sizeof(*boxes));
-  assert(boxes);
-
-  for (unsigned int i = 0; i < nrects; i++) {
- int *rect = [i * 4];
-
- u_box_2d(rect[0], rect[1], rect[2], rect[3], [i]);
-  }
-   }
-
-   screen->set_damage_region(screen, resource, nrects, boxes);
-   FREE(boxes);
-}
-
-static __DRI2bufferDamageExtension dri2BufferDamageExtension = {
-   .base = { __DRI2_BUFFER_DAMAGE, 1 },
-};
-
 /**
  * \brief the DRI2ConfigQueryExtension configQueryb method
  */
@@ -2003,7 +1973,6 @@ static const __DRIextension *dri_screen_extensions[] = {
,
,
,
-   ,
,
,
,
@@ -2019,7 +1988,6 @@ static const __DRIextension 
*dri_robust_screen_extensions[] = {
,
,
,
-   ,
,
,
,
@@ -2082,9 +2050,6 @@ dri2_init_screen(__DRIscreen * sPriv)
   }
}
 
-   if (pscreen->set_damage_region)
-  dri2BufferDamageExtension.set_damage_region = dri2_set_damage_region;
-
if (pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) {
   sPriv->extensions = dri_robust_screen_extensions;
   screen->has_reset_status_query = true;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH RFC 2/2] dri: Pass a __DRIcontext to ->set_damage_region()

2019-10-01 Thread Boris Brezillon
On Mon, 2 Sep 2019 16:32:01 +0200
Michel Dänzer  wrote:

> On 2019-08-30 7:00 p.m., Boris Brezillon wrote:
> > 
> > So, next question is, do you think it's acceptable to pass a
> > DRIcontext here, and if not, do you have any idea how to solve this
> > problem?  
> 
> Hmm, not sure. Maybe it would be better to explicitly pass in the
> __DRIimage* to which the damage region applies?
> 
> 

Sorry, for the late reply. I had a look at this proposal and I don't see
how passing a __DRIimage object would help. There's this comment [1]
that makes me think passing a drawable is the right thing to do, but at
the same time I'm not sure how to rework the logic to make it work
without having access to the pipe_context (sounds like an invasive
change to me).
So, I suggest that we revert [2] and [3] until we find a proper
solution to address the problem.

Daniel, Qiang, are you okay with that?

[1]https://elixir.bootlin.com/mesa/latest/source/src/mesa/state_tracker/st_manager.c#L197
[2]492ffbed63a2 ("st/dri2: Implement DRI2bufferDamageExtension")
[3]65ae86b85422 ("panfrost: Add support for KHR_partial_update()")
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/30] pan/midgard: Add missing parans in SWIZZLE definition

2019-10-01 Thread Boris Brezillon
On Sat, 28 Sep 2019 15:02:06 -0400
Alyssa Rosenzweig  wrote:

> TODO: Move me to front of series.

Looks like you've already done that :).

> 
> Signed-off-by: Alyssa Rosenzweig 

Reviewed-by: Boris Brezillon 


> ---
>  src/panfrost/midgard/helpers.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h
> index ac58fd50327..343fad0fea8 100644
> --- a/src/panfrost/midgard/helpers.h
> +++ b/src/panfrost/midgard/helpers.h
> @@ -189,7 +189,7 @@ quadword_size(int tag)
>  
>  /* Swizzle support */
>  
> -#define SWIZZLE(A, B, C, D) ((D << 6) | (C << 4) | (B << 2) | (A << 0))
> +#define SWIZZLE(A, B, C, D) (((D) << 6) | ((C) << 4) | ((B) << 2) | ((A) << 
> 0))
>  #define SWIZZLE_FROM_ARRAY(r) SWIZZLE(r[0], r[1], r[2], r[3])
>  #define COMPONENT_X 0x0
>  #define COMPONENT_Y 0x1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] panfrost: Make sure a clear does not re-use a pre-existing batch

2019-09-22 Thread Boris Brezillon
On Sun, 22 Sep 2019 15:24:10 +0200
Boris Brezillon  wrote:

> On Sun, 22 Sep 2019 08:38:30 -0400
> Alyssa Rosenzweig  wrote:
> 
> > > > To be clear, if we have a batch and do the following operations:
> > > > 
> > > > clear red
> > > > draw 1
> > > > clear green
> > > > draw 2
> > > > flush
> > > > 
> > > > All we should see is #2 on a green background, which this patch handles
> > > > by the second clear invalidating all the clears/draws that came before
> > > > it (provided there is no flush in between). 
> > > > 
> > > > I might just be tripped up by the "freeze" name. That really means throw
> > > > away / free here, I guess?
> > > 
> > > Nope. Freeze means "stop queuing new draws to this batch". I guess we
> > > could free the batch as well if the result of the previous draws/clear
> > > are really overwritten by this new clear, but that implies checking the
> > > new clear flags to make sure they target the same depth/stencil/color
> > > combination. On the other hand, I'm wondering if it's really the
> > > driver's job to try to optimize silly things the apps might do. I mean,
> > > the sequence you describe does not look like a wise thing to do since
> > > the "clear red+draw 1" end up being overwritten by "clear green + draw
> > > 2".
> > 
> > I'm quite confused how this patch works, then.
> > 
> > A few thoughts: if the app clears all buffers in the middle, then yes
> > it's silly and yes we may as well optimize it out. (Should that be a thing 
> > GL
> > drivers have to do? I mean, if the other drivers are too...)
> > 
> > If the sequence is more like:
> > 
> > clear all buffers
> > draw 1
> > clear color buffer (preserve depth stencil)
> > draw 2
> > flush
> > 
> > That second clear should really be done by drawing a full screen quad,
> > just like if we were wallpapering, except loading its colour from a
> > uniform instead of a texture.
> > 
> > Similarly, a depth-only clear mid-frame can be emulated by drawing a
> > full-screen quad with the gl_Position.zw components juryrigged to the
> > desired depth components, and disabling colour draws by setting the
> > colour mask to 0x0. That also means you can skip having any shader at
> > all (literally set the shader pointer to 0x0) so that's faster.
> > 
> > Finally, a stencil-only clear can occur similarly playing tricks with
> > the stencil test parameters.
> > 
> > I suspect u_blitter or mesa/st is capable of doing these sorts of tricks
> > on our behalf, but I have not researched it extensively.  
> 
> AFAIU, mesa/st does not transform the clear into a quad-draw for
> depth/stencil only clears, it only does that for the color(s)-masked
> case. That's certainly something we can do in Panfrost if we want to
> avoid creating a new batch in such situations though.

One more thing: optimization of the above scenario is probably
something we'll want to have at some point, but I think the current
patch is worth applying in the meantime. All this patch does is
enforcing ordering of clears/draws to make sure the end result matches
users expectation.

> 
> > 
> > In any case, for a partial clear mid-frame, we would much rather do:
> > 
> > clear all buffers
> > draw 1
> > draw fullscreen quad (disable Z/S writes)
> > draw 2
> > flush
> > 
> > than what it sounds like this patch does
> > 
> > clear all buffers
> > draw 1
> > flush
> > 
> > clear all buffers
> > wallpaper
> > draw 2
> > flush
> > 
> > Please do correct me if I've misunderstood the logic here.  
> 
> 
> There's no flush introduced by this patch, it just adds a dep between
> batch 2 and 1:
> 
> batch1:   clear all buffers
>   draw 1
> 
> batch2: clear all buffers
>   wallpaper
>   draw 2
> 
>   flush (actually flushes batch 1 and 2)
> 
> with batch2 --depends-on--> batch1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 01/17] panfrost: Extend the panfrost_batch_add_bo() API to pass access flags

2019-09-22 Thread Boris Brezillon
On Sun, 22 Sep 2019 09:26:45 -0400
Alyssa Rosenzweig  wrote:

> > +your collabora address  
> 
> Thank you
> 
> > > > > I think this batch_add_bo should probably dropped altogether? This 
> > > > > loop
> > > > > is dealing with constant buffers; the shaders themselves were added   
> > > > >  
> > > > 
> > > > I'll double check. I couldn't find where BOs containing shader programs
> > > > were added last time I looked.
> > > 
> > > Masking a real bug :o
> > > 
> > > It should probably happen in panfrost_patch_shader_state?  
> > 
> > Ok, I'll add it there, I wasn't sure this function was called for all
> > shaders, but looking at the code a second time it seems to be the case.  
> 
> I think so as well, yeah.
> 
> > > As I stated before, I thought we should be adding the BO for
> > > wallpapering when we actually wallpaper, since that's a slow path. Not
> > > wallpapering is the default and ideally what most apps should do.  
> > 
> > Wallpapering happens too late (when we are flushing the batch) to have
> > an impact on the dep graph, but we can probably know that wallpapering
> > will be needed before that. My question remains though, are
> > vertex/tiler supposed to touch the texture BO they're reading from, or
> > should we only flag the BO for FRAGMENT use.  
> 
> Vertex/tiler should not touch the texture BO, unless you're texturing
> from the vertex shader (which we're not for wallpapering).

Okay, then adding the READ flag isn't really needed: a WRITE is more
constraining than a READ, and the FRAGMENT job already writes the FBO
BOs.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] panfrost: Make sure a clear does not re-use a pre-existing batch

2019-09-22 Thread Boris Brezillon
On Sun, 22 Sep 2019 08:38:30 -0400
Alyssa Rosenzweig  wrote:

> > > To be clear, if we have a batch and do the following operations:
> > > 
> > >   clear red
> > >   draw 1
> > >   clear green
> > >   draw 2
> > >   flush
> > > 
> > > All we should see is #2 on a green background, which this patch handles
> > > by the second clear invalidating all the clears/draws that came before
> > > it (provided there is no flush in between). 
> > > 
> > > I might just be tripped up by the "freeze" name. That really means throw
> > > away / free here, I guess?  
> > 
> > Nope. Freeze means "stop queuing new draws to this batch". I guess we
> > could free the batch as well if the result of the previous draws/clear
> > are really overwritten by this new clear, but that implies checking the
> > new clear flags to make sure they target the same depth/stencil/color
> > combination. On the other hand, I'm wondering if it's really the
> > driver's job to try to optimize silly things the apps might do. I mean,
> > the sequence you describe does not look like a wise thing to do since
> > the "clear red+draw 1" end up being overwritten by "clear green + draw
> > 2".  
> 
> I'm quite confused how this patch works, then.
> 
> A few thoughts: if the app clears all buffers in the middle, then yes
> it's silly and yes we may as well optimize it out. (Should that be a thing GL
> drivers have to do? I mean, if the other drivers are too...)
> 
> If the sequence is more like:
> 
>   clear all buffers
>   draw 1
>   clear color buffer (preserve depth stencil)
>   draw 2
>   flush
> 
> That second clear should really be done by drawing a full screen quad,
> just like if we were wallpapering, except loading its colour from a
> uniform instead of a texture.
> 
> Similarly, a depth-only clear mid-frame can be emulated by drawing a
> full-screen quad with the gl_Position.zw components juryrigged to the
> desired depth components, and disabling colour draws by setting the
> colour mask to 0x0. That also means you can skip having any shader at
> all (literally set the shader pointer to 0x0) so that's faster.
> 
> Finally, a stencil-only clear can occur similarly playing tricks with
> the stencil test parameters.
> 
> I suspect u_blitter or mesa/st is capable of doing these sorts of tricks
> on our behalf, but I have not researched it extensively.

AFAIU, mesa/st does not transform the clear into a quad-draw for
depth/stencil only clears, it only does that for the color(s)-masked
case. That's certainly something we can do in Panfrost if we want to
avoid creating a new batch in such situations though.

> 
> In any case, for a partial clear mid-frame, we would much rather do:
> 
>   clear all buffers
>   draw 1
>   draw fullscreen quad (disable Z/S writes)
>   draw 2
>   flush
> 
> than what it sounds like this patch does
> 
>   clear all buffers
>   draw 1
>   flush
> 
>   clear all buffers
>   wallpaper
>   draw 2
>   flush
> 
> Please do correct me if I've misunderstood the logic here.


There's no flush introduced by this patch, it just adds a dep between
batch 2 and 1:

batch1: clear all buffers
draw 1

batch2: clear all buffers
wallpaper
draw 2

flush (actually flushes batch 1 and 2)

with batch2 --depends-on--> batch1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 01/17] panfrost: Extend the panfrost_batch_add_bo() API to pass access flags

2019-09-22 Thread Boris Brezillon
+your collabora address

On Sun, 22 Sep 2019 08:31:40 -0400
Alyssa Rosenzweig  wrote:

> > > Although actually I am not at all sure what this batch_add_bo is doing
> > > at all?
> > > 
> > > I think this batch_add_bo should probably dropped altogether? This loop
> > > is dealing with constant buffers; the shaders themselves were added  
> > 
> > I'll double check. I couldn't find where BOs containing shader programs
> > were added last time I looked.  
> 
> Masking a real bug :o
> 
> It should probably happen in panfrost_patch_shader_state?

Ok, I'll add it there, I wasn't sure this function was called for all
shaders, but looking at the code a second time it seems to be the case.

> 
> > > >  void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
> > > >  {
> > > > +uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_WRITE |
> > > > + PAN_BO_ACCESS_VERTEX_TILER |
> > > > + PAN_BO_ACCESS_FRAGMENT;
> > > 
> > > I think we can drop VERTEX_TILER here...? The buffers are written right
> > > at the end of the FRAGMENT job, not touched before that.  
> > 
> > What about the read done when drawing the wallpaper? I guess it's also
> > only read by the fragment job, but I wasn't sure.  
> 
> As I stated before, I thought we should be adding the BO for
> wallpapering when we actually wallpaper, since that's a slow path. Not
> wallpapering is the default and ideally what most apps should do.

Wallpapering happens too late (when we are flushing the batch) to have
an impact on the dep graph, but we can probably know that wallpapering
will be needed before that. My question remains though, are
vertex/tiler supposed to touch the texture BO they're reading from, or
should we only flag the BO for FRAGMENT use.

> 
> > > If nothing else is broken, this should allow a nice perf boost with
> > > pipelining, so the vertex/tiler from frame n+1 can run in parallel with
> > > the fragment of frame n (rather than blocking on frame n finishing with
> > > the FBOs).  
> > 
> > Would require the kernel patches I posted earlier for that to
> > happen ;-). Right now all jobs touching the same BO are serialized
> > because of the implicit BO fences added by the kernel driver.  
> 
> Sure~ Maybe this sort of bug was the reason you weren't seeing
> improvement from those kernel patches?

Maybe.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] panfrost: Make sure a clear does not re-use a pre-existing batch

2019-09-22 Thread Boris Brezillon
On Fri, 20 Sep 2019 15:45:33 -0400
Alyssa Rosenzweig  wrote:

> To be clear, if we have a batch and do the following operations:
> 
>   clear red
>   draw 1
>   clear green
>   draw 2
>   flush
> 
> All we should see is #2 on a green background, which this patch handles
> by the second clear invalidating all the clears/draws that came before
> it (provided there is no flush in between). 
> 
> I might just be tripped up by the "freeze" name. That really means throw
> away / free here, I guess?

Nope. Freeze means "stop queuing new draws to this batch". I guess we
could free the batch as well if the result of the previous draws/clear
are really overwritten by this new clear, but that implies checking the
new clear flags to make sure they target the same depth/stencil/color
combination. On the other hand, I'm wondering if it's really the
driver's job to try to optimize silly things the apps might do. I mean,
the sequence you describe does not look like a wise thing to do since
the "clear red+draw 1" end up being overwritten by "clear green + draw
2".

> 
> Provided that's the idea (and we're not somehow saving the original draw
> 1), it's Reviewed-by A R 
> 
> On Fri, Sep 20, 2019 at 04:53:37PM +0200, Boris Brezillon wrote:
> > glClear()s are expected to be the first thing GL apps do before drawing
> > new things. If there's already an existing batch targetting the same
> > FBO that has draws attached to it, we should make sure the new clear
> > gets a new batch assigned to guaranteed that the FB content is actually
> > cleared with the requested color/depth/stencil values.
> > 
> > We create a panfrost_get_fresh_batch_for_fbo() helper for that and
> > call it from panfrost_clear().
> > 
> > Signed-off-by: Boris Brezillon 
> > ---
> >  src/gallium/drivers/panfrost/pan_context.c |  2 +-
> >  src/gallium/drivers/panfrost/pan_job.c | 21 +
> >  src/gallium/drivers/panfrost/pan_job.h |  3 +++
> >  3 files changed, 25 insertions(+), 1 deletion(-)
> > 
> > diff --git a/src/gallium/drivers/panfrost/pan_context.c 
> > b/src/gallium/drivers/panfrost/pan_context.c
> > index ac01461a07fe..b2f2a9da7a51 100644
> > --- a/src/gallium/drivers/panfrost/pan_context.c
> > +++ b/src/gallium/drivers/panfrost/pan_context.c
> > @@ -162,7 +162,7 @@ panfrost_clear(
> >  double depth, unsigned stencil)
> >  {
> >  struct panfrost_context *ctx = pan_context(pipe);
> > -struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
> > +struct panfrost_batch *batch = 
> > panfrost_get_fresh_batch_for_fbo(ctx);
> >  
> >  panfrost_batch_add_fbo_bos(batch);
> >  panfrost_batch_clear(batch, buffers, color, depth, stencil);
> > diff --git a/src/gallium/drivers/panfrost/pan_job.c 
> > b/src/gallium/drivers/panfrost/pan_job.c
> > index d8330bc133a6..4ec2aa0565d7 100644
> > --- a/src/gallium/drivers/panfrost/pan_job.c
> > +++ b/src/gallium/drivers/panfrost/pan_job.c
> > @@ -298,6 +298,27 @@ panfrost_get_batch_for_fbo(struct panfrost_context 
> > *ctx)
> >  return batch;
> >  }
> >  
> > +struct panfrost_batch *
> > +panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx)
> > +{
> > +struct panfrost_batch *batch;
> > +
> > +batch = panfrost_get_batch(ctx, >pipe_framebuffer);
> > +
> > +/* The batch has no draw/clear queued, let's return it directly.
> > + * Note that it's perfectly fine to re-use a batch with an
> > + * existing clear, we'll just update it with the new clear request.
> > + */
> > +if (!batch->last_job.gpu)
> > +return batch;
> > +
> > +/* Otherwise, we need to freeze the existing one and instantiate a 
> > new
> > + * one.
> > + */
> > +panfrost_freeze_batch(batch);
> > +return panfrost_get_batch(ctx, >pipe_framebuffer);
> > +}
> > +
> >  static bool
> >  panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence)
> >  {
> > diff --git a/src/gallium/drivers/panfrost/pan_job.h 
> > b/src/gallium/drivers/panfrost/pan_job.h
> > index e1b1f56a2e64..0bd78bba267a 100644
> > --- a/src/gallium/drivers/panfrost/pan_job.h
> > +++ b/src/gallium/drivers/panfrost/pan_job.h
> > @@ -172,6 +172,9 @@ panfrost_batch_fence_reference(struct 
> > panfrost_batch_fence *batch);
> >  struct panfrost_batch *
> >  panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
> >  
> > +struct panfrost_batch *
> > +panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx);
> > +
> >  void
> >  panfrost_batch_init(struct panfrost_context *ctx);
> >  
> > -- 
> > 2.21.0  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 01/17] panfrost: Extend the panfrost_batch_add_bo() API to pass access flags

2019-09-22 Thread Boris Brezillon
On Fri, 20 Sep 2019 16:53:49 -0400
Alyssa Rosenzweig  wrote:

> > @@ -1121,7 +1134,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
> > bool with_vertex_data)
> >  
> >  struct panfrost_shader_state *ss = 
> > >variants[all->active_variant];
> >  
> > -panfrost_batch_add_bo(batch, ss->bo);
> > +panfrost_batch_add_bo(batch, ss->bo,
> > +  PAN_BO_ACCESS_PRIVATE |
> > +  PAN_BO_ACCESS_READ |  
> 
> > +  PAN_BO_ACCESS_VERTEX_TILER |
> > +  PAN_BO_ACCESS_FRAGMENT);  
> 
> I believe this should be just the access for the stage `i`
> 
> Although actually I am not at all sure what this batch_add_bo is doing
> at all?
> 
> I think this batch_add_bo should probably dropped altogether? This loop
> is dealing with constant buffers; the shaders themselves were added

I'll double check. I couldn't find where BOs containing shader programs
were added last time I looked.

> 
> >  void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
> >  {
> > +uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_WRITE |
> > + PAN_BO_ACCESS_VERTEX_TILER |
> > + PAN_BO_ACCESS_FRAGMENT;  
> 
> I think we can drop VERTEX_TILER here...? The buffers are written right
> at the end of the FRAGMENT job, not touched before that.

What about the read done when drawing the wallpaper? I guess it's also
only read by the fragment job, but I wasn't sure.

> 
> If nothing else is broken, this should allow a nice perf boost with
> pipelining, so the vertex/tiler from frame n+1 can run in parallel with
> the fragment of frame n (rather than blocking on frame n finishing with
> the FBOs).

Would require the kernel patches I posted earlier for that to
happen ;-). Right now all jobs touching the same BO are serialized
because of the implicit BO fences added by the kernel driver.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] panfrost: Draw the wallpaper when only depth/stencil bufs are cleared

2019-09-20 Thread Boris Brezillon
When only the depth/stencil bufs are cleared, we should make sure the
color content is reloaded into the tile buffers if we want to preserve
their content.

Signed-off-by: Boris Brezillon 
---
There might be a more optimal solution to do that (like not passing the
color bufs to the fragment job?), but this solution seems to fix a few
deqp tests.
---
 src/gallium/drivers/panfrost/pan_context.c |  2 +-
 src/gallium/drivers/panfrost/pan_job.c | 16 ++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index b2f2a9da7a51..c99bf1b26ce7 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1333,7 +1333,7 @@ panfrost_queue_draw(struct panfrost_context *ctx)
 
 if (rasterizer_discard)
 panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE);
-else if (ctx->wallpaper_batch)
+else if (ctx->wallpaper_batch && batch->first_tiler.gpu)
 panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, 
tiler);
 else
 panfrost_scoreboard_queue_fused_job(batch, vertex, tiler);
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 4ec2aa0565d7..a2df31f96f00 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -698,10 +698,23 @@ panfrost_batch_get_tiler_dummy(struct panfrost_batch 
*batch)
 static void
 panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
 {
+/* Color 0 is cleared, no need to draw the wallpaper.
+ * TODO: MRT wallpapers.
+ */
+if (batch->clear & PIPE_CLEAR_COLOR0)
+return;
+
 /* Nothing to reload? TODO: MRT wallpapers */
 if (batch->key.cbufs[0] == NULL)
 return;
 
+/* No draw calls, and no clear on the depth/stencil bufs.
+ * Drawing the wallpaper would be useless.
+ */
+if (!batch->last_tiler.gpu &&
+!(batch->clear & PIPE_CLEAR_DEPTHSTENCIL))
+return;
+
 /* Check if the buffer has any content on it worth preserving */
 
 struct pipe_surface *surf = batch->key.cbufs[0];
@@ -923,8 +936,7 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 goto out;
 }
 
-if (!batch->clear && batch->last_tiler.gpu)
-panfrost_batch_draw_wallpaper(batch);
+panfrost_batch_draw_wallpaper(batch);
 
 panfrost_scoreboard_link_batch(batch);
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] panfrost: More tests are passing

2019-09-20 Thread Boris Brezillon
Remove the tests that are now passing.

Signed-off-by: Boris Brezillon 
---
 .../drivers/panfrost/ci/expected-failures.txt | 153 --
 1 file changed, 153 deletions(-)

diff --git a/src/gallium/drivers/panfrost/ci/expected-failures.txt 
b/src/gallium/drivers/panfrost/ci/expected-failures.txt
index 7e7dbd62307b..91c1f14ce1a2 100644
--- a/src/gallium/drivers/panfrost/ci/expected-failures.txt
+++ b/src/gallium/drivers/panfrost/ci/expected-failures.txt
@@ -1,10 +1,3 @@
-dEQP-GLES2.functional.color_clear.masked_rgba Fail
-dEQP-GLES2.functional.color_clear.masked_rgb Fail
-dEQP-GLES2.functional.color_clear.masked_scissored_rgba Fail
-dEQP-GLES2.functional.color_clear.masked_scissored_rgb Fail
-dEQP-GLES2.functional.color_clear.scissored_rgba Fail
-dEQP-GLES2.functional.color_clear.scissored_rgb Fail
-dEQP-GLES2.functional.color_clear.short_scissored_rgb Fail
 dEQP-GLES2.functional.depth_range.write.0_8_to_third Fail
 dEQP-GLES2.functional.depth_range.write.clamp_both Fail
 dEQP-GLES2.functional.depth_range.write.clamp_far Fail
@@ -672,201 +665,55 @@ 
dEQP-GLES2.functional.fragment_ops.depth_stencil.stencil_ops.zero_zero_zero Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.write_mask.both Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.write_mask.depth Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.write_mask.stencil Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.0 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.10 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.11 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.12 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.13 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.15 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.16 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.17 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.18 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.19 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.1 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.20 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.21 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.22 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.23 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.24 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.25 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.26 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.29 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.30 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.31 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.32 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.33 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.34 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.35 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.36 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.37 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.38 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.39 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.3 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.40 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.41 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.42 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.43 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.44 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.46 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.47 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.48 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.49 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.50 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.51 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.52 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.53 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.54 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.55 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.56 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.57 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.58 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.59 Fail
-dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.5 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.60 Fail
 dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.61 Fail
-dEQP-GLES2

[Mesa-dev] [PATCH 1/3] panfrost: Make sure a clear does not re-use a pre-existing batch

2019-09-20 Thread Boris Brezillon
glClear()s are expected to be the first thing GL apps do before drawing
new things. If there's already an existing batch targetting the same
FBO that has draws attached to it, we should make sure the new clear
gets a new batch assigned to guaranteed that the FB content is actually
cleared with the requested color/depth/stencil values.

We create a panfrost_get_fresh_batch_for_fbo() helper for that and
call it from panfrost_clear().

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c |  2 +-
 src/gallium/drivers/panfrost/pan_job.c | 21 +
 src/gallium/drivers/panfrost/pan_job.h |  3 +++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index ac01461a07fe..b2f2a9da7a51 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -162,7 +162,7 @@ panfrost_clear(
 double depth, unsigned stencil)
 {
 struct panfrost_context *ctx = pan_context(pipe);
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+struct panfrost_batch *batch = panfrost_get_fresh_batch_for_fbo(ctx);
 
 panfrost_batch_add_fbo_bos(batch);
 panfrost_batch_clear(batch, buffers, color, depth, stencil);
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index d8330bc133a6..4ec2aa0565d7 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -298,6 +298,27 @@ panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
 return batch;
 }
 
+struct panfrost_batch *
+panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx)
+{
+struct panfrost_batch *batch;
+
+batch = panfrost_get_batch(ctx, >pipe_framebuffer);
+
+/* The batch has no draw/clear queued, let's return it directly.
+ * Note that it's perfectly fine to re-use a batch with an
+ * existing clear, we'll just update it with the new clear request.
+ */
+if (!batch->last_job.gpu)
+return batch;
+
+/* Otherwise, we need to freeze the existing one and instantiate a new
+ * one.
+ */
+panfrost_freeze_batch(batch);
+return panfrost_get_batch(ctx, >pipe_framebuffer);
+}
+
 static bool
 panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence)
 {
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index e1b1f56a2e64..0bd78bba267a 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -172,6 +172,9 @@ panfrost_batch_fence_reference(struct panfrost_batch_fence 
*batch);
 struct panfrost_batch *
 panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
 
+struct panfrost_batch *
+panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx);
+
 void
 panfrost_batch_init(struct panfrost_context *ctx);
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] dEQP-GLES2.functional.buffer.write.use.index_array.* are passing now.

2019-09-18 Thread Boris Brezillon
Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/ci/expected-failures.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/gallium/drivers/panfrost/ci/expected-failures.txt 
b/src/gallium/drivers/panfrost/ci/expected-failures.txt
index b0fc872a3009..0cadaa20cdb2 100644
--- a/src/gallium/drivers/panfrost/ci/expected-failures.txt
+++ b/src/gallium/drivers/panfrost/ci/expected-failures.txt
@@ -1,5 +1,3 @@
-dEQP-GLES2.functional.buffer.write.use.index_array.array Fail
-dEQP-GLES2.functional.buffer.write.use.index_array.element_array Fail
 dEQP-GLES2.functional.color_clear.masked_rgba Fail
 dEQP-GLES2.functional.color_clear.masked_rgb Fail
 dEQP-GLES2.functional.color_clear.masked_scissored_rgba Fail
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] panfrost: Fix indexed draws

2019-09-18 Thread Boris Brezillon
->padded_count should be large enough to cover all vertices pointed by
the index array. Use the local vertex_count variable that contains the
updated vertex_count value for the indexed draw case.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 08b799b66bf8..1b8558c1c2c1 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1601,7 +1601,7 @@ panfrost_draw_vbo(
 
 ctx->padded_count = pan_expand_shift_odd(so);
 } else {
-ctx->padded_count = ctx->vertex_count;
+ctx->padded_count = vertex_count;
 
 /* Reset instancing state */
 ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = 0;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 13/17] panfrost: Make sure the BO is 'ready' when picked from the cache

2019-09-18 Thread Boris Brezillon
This is needed if we want to free the panfrost_batch object at submit
time in order to not have to GC the batch on the next job submission.

Signed-off-by: Boris Brezillon 
---
Changes in v3:
* Move the patch later in the series and squash "panfrost: Cache GPU
  accesses to BOs" in it
* Add extra comments to explain what we're doing
---
 src/gallium/drivers/panfrost/pan_bo.c  | 112 -
 src/gallium/drivers/panfrost/pan_bo.h  |   9 ++
 src/gallium/drivers/panfrost/pan_job.c |  11 +++
 3 files changed, 109 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 9daddf9d0cc2..37602688d630 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -23,6 +23,7 @@
  * Authors (Collabora):
  *   Alyssa Rosenzweig 
  */
+#include 
 #include 
 #include 
 #include 
@@ -101,6 +102,63 @@ panfrost_bo_free(struct panfrost_bo *bo)
 ralloc_free(bo);
 }
 
+/* Returns true if the BO is ready, false otherwise.
+ * access_type is encoding the type of access one wants to ensure is done.
+ * Say you want to make sure all writers are done writing, you should pass
+ * PAN_BO_ACCESS_WRITE.
+ * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
+ * PAN_BO_ACCESS_READ would work too as waiting for readers implies
+ * waiting for writers as well, but we want to make things explicit and waiting
+ * only for readers is impossible.
+ */
+bool
+panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+ uint32_t access_type)
+{
+struct drm_panfrost_wait_bo req = {
+.handle = bo->gem_handle,
+   .timeout_ns = timeout_ns,
+};
+int ret;
+
+assert(access_type == PAN_BO_ACCESS_WRITE ||
+   access_type == PAN_BO_ACCESS_RW);
+
+/* If the BO has been exported or imported we can't rely on the cached
+ * state, we need to call the WAIT_BO ioctl.
+ */
+if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
+/* If ->gpu_access is 0, the BO is idle, no need to wait. */
+if (!bo->gpu_access)
+return true;
+
+/* If the caller only wants to wait for writers and no
+ * writes are pending, we don't have to wait.
+ */
+if (access_type == PAN_BO_ACCESS_WRITE &&
+!(bo->gpu_access & PAN_BO_ACCESS_WRITE))
+return true;
+}
+
+/* The ioctl returns >= 0 value when the BO we are waiting for is ready
+ * -1 otherwise.
+ */
+ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, );
+if (ret != -1) {
+/* Set gpu_access to 0 so that the next call to bo_wait()
+ * doesn't have to call the WAIT_BO ioctl.
+ */
+bo->gpu_access = 0;
+return true;
+}
+
+/* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
+ * is invalid, which shouldn't happen here.
+ */
+assert(errno == ETIMEDOUT || errno == EBUSY);
+return false;
+}
+
 /* Helper to calculate the bucket index of a BO */
 
 static unsigned
@@ -137,9 +195,8 @@ pan_bucket(struct panfrost_screen *screen, unsigned size)
  * BO. */
 
 static struct panfrost_bo *
-panfrost_bo_cache_fetch(
-struct panfrost_screen *screen,
-size_t size, uint32_t flags)
+panfrost_bo_cache_fetch(struct panfrost_screen *screen,
+size_t size, uint32_t flags, bool dontwait)
 {
 pthread_mutex_lock(>bo_cache_lock);
 struct list_head *bucket = pan_bucket(screen, size);
@@ -147,27 +204,30 @@ panfrost_bo_cache_fetch(
 
 /* Iterate the bucket looking for something suitable */
 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
-if (entry->size >= size &&
-entry->flags == flags) {
-int ret;
-struct drm_panfrost_madvise madv;
+if (entry->size < size || entry->flags != flags)
+continue;
 
-/* This one works, splice it out of the cache */
-list_del(>link);
+if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
+  PAN_BO_ACCESS_RW))
+continue;
 
-madv.handle = entry->gem_handle;
-madv.madv = PANFROST_MADV_WILLNEED;
-madv.retained = 0;
+struct drm_panfrost_madvise madv = {
+.handle = entry->gem_handle,
+.madv = PANFROST_MADV_WILLNEED,
+  

[Mesa-dev] [PATCH v3 17/17] panfrost/ci: New tests are passing

2019-09-18 Thread Boris Brezillon
All dEQP-GLES2.functional.fbo.render.texsubimage.* tests are now
passing.

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
 * Collect R-b
---
 src/gallium/drivers/panfrost/ci/expected-failures.txt | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/ci/expected-failures.txt 
b/src/gallium/drivers/panfrost/ci/expected-failures.txt
index b0fc872a3009..3c707230dd23 100644
--- a/src/gallium/drivers/panfrost/ci/expected-failures.txt
+++ b/src/gallium/drivers/panfrost/ci/expected-failures.txt
@@ -53,10 +53,6 @@ 
dEQP-GLES2.functional.fbo.render.shared_colorbuffer.tex2d_rgb_depth_component16
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.rbo_rgb565_depth_component16
 Fail
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.tex2d_rgba_depth_component16
 Fail
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.tex2d_rgb_depth_component16 
Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgba Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgb Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgba Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgb Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.0 Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.10 Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.11 Fail
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 12/17] panfrost: Add flags to reflect the BO imported/exported state

2019-09-18 Thread Boris Brezillon
Will be useful to make the ioctl(WAIT_BO) call conditional on BOs that
are not exported/imported (meaning that all GPU accesses are known
by the context).

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
* Collect R-b
---
 src/gallium/drivers/panfrost/pan_bo.c | 4 ++--
 src/gallium/drivers/panfrost/pan_bo.h | 6 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 209d1e0d71e5..9daddf9d0cc2 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -355,7 +355,7 @@ panfrost_bo_import(struct panfrost_screen *screen, int fd)
 bo->gem_handle = gem_handle;
 bo->gpu = (mali_ptr) get_bo_offset.offset;
 bo->size = lseek(fd, 0, SEEK_END);
-bo->flags |= PAN_BO_DONT_REUSE;
+bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED;
 assert(bo->size > 0);
 pipe_reference_init(>reference, 1);
 
@@ -376,7 +376,7 @@ panfrost_bo_export(struct panfrost_bo *bo)
 if (ret == -1)
 return -1;
 
-bo->flags |= PAN_BO_DONT_REUSE;
+bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED;
 return args.fd;
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
index 73cc74a260d4..e4743f820aeb 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -56,6 +56,12 @@ struct panfrost_screen;
  * let the BO logic know about this contraint. */
 #define PAN_BO_DONT_REUSE (1 << 5)
 
+/* BO has been imported */
+#define PAN_BO_IMPORTED   (1 << 6)
+
+/* BO has been exported */
+#define PAN_BO_EXPORTED   (1 << 7)
+
 /* GPU access flags */
 
 /* BO is either shared (can be accessed by more than one GPU batch) or private
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 16/17] panfrost: Take draw call order into account

2019-09-18 Thread Boris Brezillon
This is not strictly required, but let's try to match the draw call
orders, just in case the app had a reason to do it in this order.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.h |  6 ++
 src/gallium/drivers/panfrost/pan_job.c | 23 +++---
 src/gallium/drivers/panfrost/pan_job.h |  3 +++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index f13967f51b46..c6b53685b285 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -114,6 +114,12 @@ struct panfrost_context {
 struct panfrost_batch *batch;
 struct hash_table *fbo_to_batch;
 
+/* A list containing all non-submitted batches since the last flush.
+ * This list is used to keep track of clear/draw order on batches that
+ * don't have explicit dependencies between them.
+ */
+struct list_head batch_queue;
+
 /* panfrost_bo -> panfrost_bo_access */
 struct hash_table *accessed_bos;
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 45f9d9d24b41..d8330bc133a6 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -116,6 +116,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
 util_dynarray_init(>headers, batch);
 util_dynarray_init(>gpu_headers, batch);
 util_dynarray_init(>dependencies, batch);
+list_inithead(>queue_node);
 batch->out_sync = panfrost_create_batch_fence(batch);
 util_copy_framebuffer_state(>key, key);
 
@@ -180,6 +181,9 @@ panfrost_free_batch(struct panfrost_batch *batch)
 panfrost_batch_fence_unreference(*dep);
 }
 
+/* Remove the batch from the batch queue. */
+list_del(>queue_node);
+
 /* The out_sync fence lifetime is different from the the batch one
  * since other batches might want to wait on a fence of already
  * submitted/signaled batch. All we need to do here is make sure the
@@ -570,6 +574,13 @@ void panfrost_batch_add_fbo_bos(struct panfrost_batch 
*batch)
 struct panfrost_resource *rsrc = 
pan_resource(batch->key.zsbuf->texture);
 panfrost_batch_add_bo(batch, rsrc->bo, flags);
 }
+
+/* We the batch was not already present in the queue, add it know.
+ * Should we move the batch at end of the queue when a new draw
+ * happens?
+ */
+if (list_empty(>queue_node))
+list_addtail(>queue_node, >ctx->batch_queue);
 }
 
 struct panfrost_bo *
@@ -916,10 +927,15 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, 
bool wait)
 util_dynarray_init(, NULL);
 }
 
-hash_table_foreach(ctx->fbo_to_batch, hentry) {
-struct panfrost_batch *batch = hentry->data;
+/* We can use the for_each_entry_safe() iterator here because the
+ * next element might be removed from the list when flushing the
+ * dependencies in panfrost_batch_submit().
+ */
+while (!list_empty(>batch_queue)) {
+struct panfrost_batch *batch;
 
-assert(batch);
+batch = list_first_entry(>batch_queue,
+ struct panfrost_batch, queue_node);
 
 if (wait) {
 panfrost_batch_fence_reference(batch->out_sync);
@@ -1189,4 +1205,5 @@ panfrost_batch_init(struct panfrost_context *ctx)
 panfrost_batch_compare);
 ctx->accessed_bos = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
 _mesa_key_pointer_equal);
+list_inithead(>batch_queue);
 }
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index 25905b516739..e1b1f56a2e64 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -71,6 +71,9 @@ struct panfrost_batch {
 struct panfrost_context *ctx;
 struct pipe_framebuffer_state key;
 
+/* Used to insert the batch in the batch queue */
+struct list_head queue_node;
+
 /* Buffers cleared (PIPE_CLEAR_* bitmask) */
 unsigned clear;
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 15/17] panfrost: Rename ctx->batches into ctx->fbo_to_batch

2019-09-18 Thread Boris Brezillon
We are about to add a batch queue to keep track of submission order.
Let's rename the existing batches hash table (which is used to get the
batch attached to an FBO) into fbo_to_batch to avoid confusion.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c  |  2 +-
 src/gallium/drivers/panfrost/pan_context.h  |  2 +-
 src/gallium/drivers/panfrost/pan_job.c  | 21 +++--
 src/gallium/drivers/panfrost/pan_resource.c | 16 
 4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 07bafad58a00..0330b5852676 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1355,7 +1355,7 @@ panfrost_flush(
  */
 if (fence) {
 util_dynarray_init(, NULL);
-hash_table_foreach(ctx->batches, hentry) {
+hash_table_foreach(ctx->fbo_to_batch, hentry) {
 struct panfrost_batch *batch = hentry->data;
 
 panfrost_batch_fence_reference(batch->out_sync);
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index d50ed57d5d8a..f13967f51b46 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -112,7 +112,7 @@ struct panfrost_context {
 
 /* Bound job batch and map of panfrost_batch_key to job batches */
 struct panfrost_batch *batch;
-struct hash_table *batches;
+struct hash_table *fbo_to_batch;
 
 /* panfrost_bo -> panfrost_bo_access */
 struct hash_table *accessed_bos;
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index a56f4044fda0..45f9d9d24b41 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -132,9 +132,9 @@ panfrost_freeze_batch(struct panfrost_batch *batch)
  * matches. This way, next draws/clears targeting this FBO will trigger
  * the creation of a new batch.
  */
-entry = _mesa_hash_table_search(ctx->batches, >key);
+entry = _mesa_hash_table_search(ctx->fbo_to_batch, >key);
 if (entry && entry->data == batch)
-_mesa_hash_table_remove(ctx->batches, entry);
+_mesa_hash_table_remove(ctx->fbo_to_batch, entry);
 
 /* If this is the bound batch, the panfrost_context parameters are
  * relevant so submitting it invalidates those parameters, but if it's
@@ -153,7 +153,7 @@ static bool panfrost_batch_is_frozen(struct panfrost_batch 
*batch)
 struct panfrost_context *ctx = batch->ctx;
 struct hash_entry *entry;
 
-entry = _mesa_hash_table_search(ctx->batches, >key);
+entry = _mesa_hash_table_search(ctx->fbo_to_batch, >key);
 if (entry && entry->data == batch)
 return false;
 
@@ -248,7 +248,8 @@ panfrost_get_batch(struct panfrost_context *ctx,
const struct pipe_framebuffer_state *key)
 {
 /* Lookup the job first */
-struct hash_entry *entry = _mesa_hash_table_search(ctx->batches, key);
+struct hash_entry *entry = _mesa_hash_table_search(ctx->fbo_to_batch,
+   key);
 
 if (entry)
 return entry->data;
@@ -258,7 +259,7 @@ panfrost_get_batch(struct panfrost_context *ctx,
 struct panfrost_batch *batch = panfrost_create_batch(ctx, key);
 
 /* Save the created job */
-_mesa_hash_table_insert(ctx->batches, >key, batch);
+_mesa_hash_table_insert(ctx->fbo_to_batch, >key, batch);
 
 return batch;
 }
@@ -915,7 +916,7 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, 
bool wait)
 util_dynarray_init(, NULL);
 }
 
-hash_table_foreach(ctx->batches, hentry) {
+hash_table_foreach(ctx->fbo_to_batch, hentry) {
 struct panfrost_batch *batch = hentry->data;
 
 assert(batch);
@@ -931,7 +932,7 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, 
bool wait)
 panfrost_batch_submit(batch);
 }
 
-assert(!ctx->batches->entries);
+assert(!ctx->fbo_to_batch->entries);
 
 /* Collect batch fences before returning */
 panfrost_gc_fences(ctx);
@@ -1183,9 +1184,9 @@ panfrost_batch_is_scanout(struct panfrost_batch *batch)
 void
 panfrost_batch_init(struct panfrost_context *ctx)
 {
-ctx->batches = _mesa_hash_table_create(ctx,
-   panfrost_batch_hash,
-   panfrost_batch_compare);
+ctx-&g

[Mesa-dev] [PATCH v3 14/17] panfrost: Do fine-grained flushing when preparing BO for CPU accesses

2019-09-18 Thread Boris Brezillon
We don't have to flush all batches when we're only interested in
reading/writing a specific BO. Thanks to the
panfrost_flush_batches_accessing_bo() and panfrost_bo_wait() helpers
we can now flush only the batches touching the BO we want to access
from the CPU.

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
* Collect R-b
---
 src/gallium/drivers/panfrost/pan_resource.c | 27 +
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index 1f7605adcd5d..d59529ff15b7 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -578,10 +578,8 @@ panfrost_transfer_map(struct pipe_context *pctx,
 is_bound |= fb->cbufs[c]->texture == resource;
 }
 
-if (is_bound && (usage & PIPE_TRANSFER_READ)) {
-assert(level == 0);
-panfrost_flush_all_batches(ctx, true);
-}
+if (is_bound && (usage & PIPE_TRANSFER_READ))
+ assert(level == 0);
 
 /* TODO: Respect usage flags */
 
@@ -594,11 +592,11 @@ panfrost_transfer_map(struct pipe_context *pctx,
 /* No flush for writes to uninitialized */
 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 if (usage & PIPE_TRANSFER_WRITE) {
-/* STUB: flush reading */
-//printf("debug: missed reading flush %d\n", 
resource->target);
+panfrost_flush_batches_accessing_bo(ctx, bo, 
PAN_BO_GPU_ACCESS_RW);
+panfrost_bo_wait(bo, INT64_MAX, PAN_BO_GPU_ACCESS_RW);
 } else if (usage & PIPE_TRANSFER_READ) {
-/* STUB: flush writing */
-//printf("debug: missed writing flush %d (%d-%d)\n", 
resource->target, box->x, box->x + box->width);
+panfrost_flush_batches_accessing_bo(ctx, bo, 
PAN_BO_GPU_ACCESS_WRITE);
+panfrost_bo_wait(bo, INT64_MAX, 
PAN_BO_GPU_ACCESS_WRITE);
 } else {
 /* Why are you even mapping?! */
 }
@@ -748,11 +746,8 @@ panfrost_generate_mipmap(
  * reorder-type optimizations in place. But for now prioritize
  * correctness. */
 
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-bool has_draws = batch->last_job.gpu;
-
-if (has_draws)
-panfrost_flush_all_batches(ctx, true);
+panfrost_flush_batches_accessing_bo(ctx, rsrc->bo, 
PAN_BO_GPU_ACCESS_RW);
+panfrost_bo_wait(rsrc->bo, INT64_MAX, PAN_BO_GPU_ACCESS_RW);
 
 /* We've flushed the original buffer if needed, now trigger a blit */
 
@@ -765,8 +760,10 @@ panfrost_generate_mipmap(
 /* If the blit was successful, flush once more. If it wasn't, well, let
  * the state tracker deal with it. */
 
-if (blit_res)
-panfrost_flush_all_batches(ctx, true);
+if (blit_res) {
+panfrost_flush_batches_accessing_bo(ctx, rsrc->bo, 
PAN_BO_GPU_ACCESS_WRITE);
+panfrost_bo_wait(rsrc->bo, INT64_MAX, PAN_BO_GPU_ACCESS_WRITE);
+}
 
 return blit_res;
 }
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 06/17] panfrost: Start tracking inter-batch dependencies

2019-09-18 Thread Boris Brezillon
The idea is to track which BO are being accessed and the type of access
to determine when a dependency exists. Thanks to that we can build a
dependency graph that will allow us to flush batches in the correct
order.

Signed-off-by: Boris Brezillon 
---
Changes in v3:
* Fix coding style issues
* Do not check for batch presence in the reader array when updating
  a BO access (we already have this information)
* Add more comments to explain what we're doing and why we're doing
  it like that
---
 src/gallium/drivers/panfrost/pan_context.h |   3 +
 src/gallium/drivers/panfrost/pan_job.c | 355 -
 src/gallium/drivers/panfrost/pan_job.h |   3 +
 3 files changed, 356 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index ce3e0c899a4f..3b09952345cf 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -114,6 +114,9 @@ struct panfrost_context {
 struct panfrost_batch *batch;
 struct hash_table *batches;
 
+/* panfrost_bo -> panfrost_bo_access */
+struct hash_table *accessed_bos;
+
 /* Within a launch_grid call.. */
 const struct pipe_grid_info *compute_grid;
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 872c846207bf..b0494af3482f 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -36,6 +36,29 @@
 #include "pan_util.h"
 #include "pandecode/decode.h"
 
+/* panfrost_bo_access is here to help us keep track of batch accesses to BOs
+ * and build a proper dependency graph such that batches can be pipelined for
+ * better GPU utilization.
+ *
+ * Each accessed BO has a corresponding entry in the ->accessed_bos hash table.
+ * A BO is either being written or read at any time, that's what the type field
+ * encodes.
+ * When the last access is a write, the batch writing the BO might have read
+ * dependencies (readers that have not been executed yet and want to read the
+ * previous BO content), and when the last access is a read, all readers might
+ * depend on another batch to push its results to memory. That's what the
+ * readers/writers keep track off.
+ * There can only be one writer at any given time, if a new batch wants to
+ * write to the same BO, a dependency will be added between the new writer and
+ * the old writer (at the batch level), and panfrost_bo_access->writer will be
+ * updated to point to the new writer.
+ */
+struct panfrost_bo_access {
+uint32_t type;
+struct util_dynarray readers;
+struct panfrost_batch_fence *writer;
+};
+
 static struct panfrost_batch_fence *
 panfrost_create_batch_fence(struct panfrost_batch *batch)
 {
@@ -92,6 +115,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
 
 util_dynarray_init(>headers, batch);
 util_dynarray_init(>gpu_headers, batch);
+util_dynarray_init(>dependencies, batch);
 batch->out_sync = panfrost_create_batch_fence(batch);
 util_copy_framebuffer_state(>key, key);
 
@@ -151,6 +175,11 @@ panfrost_free_batch(struct panfrost_batch *batch)
 hash_table_foreach(batch->bos, entry)
 panfrost_bo_unreference((struct panfrost_bo *)entry->key);
 
+util_dynarray_foreach(>dependencies,
+  struct panfrost_batch_fence *, dep) {
+panfrost_batch_fence_unreference(*dep);
+}
+
 /* The out_sync fence lifetime is different from the the batch one
  * since other batches might want to wait on a fence of already
  * submitted/signaled batch. All we need to do here is make sure the
@@ -164,6 +193,56 @@ panfrost_free_batch(struct panfrost_batch *batch)
 ralloc_free(batch);
 }
 
+#ifndef NDEBUG
+static bool
+panfrost_dep_graph_contains_batch(struct panfrost_batch *root,
+  struct panfrost_batch *batch)
+{
+if (!root)
+return false;
+
+util_dynarray_foreach(>dependencies,
+  struct panfrost_batch_fence *, dep) {
+if ((*dep)->batch == batch ||
+panfrost_dep_graph_contains_batch((*dep)->batch, batch))
+return true;
+}
+
+return false;
+}
+#endif
+
+static void
+panfrost_batch_add_dep(struct panfrost_batch *batch,
+   struct panfrost_batch_fence *newdep)
+{
+if (batch == newdep->batch)
+return;
+
+/* We might want to turn ->dependencies into a set if the number of
+ * deps turns out to be big enough to make this 'is dep already there'
+ * search inefficient.
+ */
+util_dynarray_foreach(>dependencies,
+  struct panfrost_batch_fence *, dep)

[Mesa-dev] [PATCH v3 09/17] panfrost: Add a panfrost_flush_batches_accessing_bo() helper

2019-09-18 Thread Boris Brezillon
This will allow us to only flush batches touching a specific resource,
which is particularly useful when the CPU needs to access a BO.

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
* Collect R-b
---
 src/gallium/drivers/panfrost/pan_job.c | 31 ++
 src/gallium/drivers/panfrost/pan_job.h |  4 
 2 files changed, 35 insertions(+)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 3ccf4bb6b3e9..e7eae399830f 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -952,6 +952,37 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, 
bool wait)
 util_dynarray_fini();
 }
 
+void
+panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx,
+struct panfrost_bo *bo,
+uint32_t access_type)
+{
+struct panfrost_bo_access *access;
+struct hash_entry *hentry;
+
+/* It doesn't make any to flush only the readers. */
+assert(access_type == PAN_BO_ACCESS_WRITE ||
+   access_type == PAN_BO_ACCESS_RW);
+
+hentry = _mesa_hash_table_search(ctx->accessed_bos, bo);
+access = hentry ? hentry->data : NULL;
+if (!access)
+return;
+
+if (access_type & PAN_BO_ACCESS_WRITE && access->writer &&
+access->writer->batch)
+panfrost_batch_submit(access->writer->batch);
+
+if (!(access_type & PAN_BO_ACCESS_READ))
+return;
+
+util_dynarray_foreach(>readers, struct panfrost_batch_fence *,
+  reader) {
+if (*reader && (*reader)->batch)
+panfrost_batch_submit((*reader)->batch);
+}
+}
+
 void
 panfrost_batch_set_requirements(struct panfrost_batch *batch)
 {
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index e95e156a40f8..25905b516739 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -185,6 +185,10 @@ panfrost_batch_create_bo(struct panfrost_batch *batch, 
size_t size,
 void
 panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait);
 
+void
+panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx,
+struct panfrost_bo *bo, uint32_t flags);
+
 void
 panfrost_batch_set_requirements(struct panfrost_batch *batch);
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 07/17] panfrost: Prepare panfrost_fence for batch pipelining

2019-09-18 Thread Boris Brezillon
The panfrost_fence logic currently waits on the last submitted batch,
but the batch serialization that was enforced in
panfrost_batch_submit() is about to go away, allowing for several
batches to be pipelined, and the last submitted one is not necessarily
the one that will finish last.

We need to make sure the fence logic waits on all flushed batches, not
only the last one.

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
* Fix a comment
* Adjust things to match the changes done in "panfrost: Add a batch fence"
---
 src/gallium/drivers/panfrost/pan_context.c | 18 +-
 src/gallium/drivers/panfrost/pan_context.h |  5 +-
 src/gallium/drivers/panfrost/pan_job.c | 16 -
 src/gallium/drivers/panfrost/pan_screen.c  | 71 +++---
 src/gallium/drivers/panfrost/pan_screen.h  |  3 +-
 5 files changed, 57 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 312a9e93e455..aad69e3f9991 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1349,14 +1349,30 @@ panfrost_flush(
 {
 struct panfrost_context *ctx = pan_context(pipe);
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+struct util_dynarray fences;
+
+/* We must collect the fences before the flush is done, otherwise we'll
+ * lose track of them.
+ */
+if (fence) {
+util_dynarray_init(, NULL);
+panfrost_batch_fence_reference(batch->out_sync);
+util_dynarray_append(, struct panfrost_batch_fence *,
+ batch->out_sync);
+}
 
 /* Submit the frame itself */
 panfrost_batch_submit(batch);
 
 if (fence) {
-struct panfrost_fence *f = panfrost_fence_create(ctx);
+struct panfrost_fence *f = panfrost_fence_create(ctx, );
 pipe->screen->fence_reference(pipe->screen, fence, NULL);
 *fence = (struct pipe_fence_handle *)f;
+
+util_dynarray_foreach(, struct panfrost_batch_fence *, 
fence)
+panfrost_batch_fence_unreference(*fence);
+
+util_dynarray_fini();
 }
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index 3b09952345cf..d50ed57d5d8a 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -94,7 +94,7 @@ struct panfrost_query {
 
 struct panfrost_fence {
 struct pipe_reference reference;
-int fd;
+struct util_dynarray syncfds;
 };
 
 struct panfrost_streamout {
@@ -193,9 +193,6 @@ struct panfrost_context {
 
 /* True for t6XX, false for t8xx. */
 bool is_t6xx;
-
-/* The out sync fence of the last submitted batch. */
-struct panfrost_batch_fence *last_out_sync;
 };
 
 /* Corresponds to the CSO */
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index b0494af3482f..211e48bafd4e 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -819,13 +819,6 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 free(bo_handles);
 free(in_syncs);
 
-/* Release the last batch fence if any, and retain the new one */
-if (ctx->last_out_sync)
-panfrost_batch_fence_unreference(ctx->last_out_sync);
-
-panfrost_batch_fence_reference(batch->out_sync);
-ctx->last_out_sync = batch->out_sync;
-
 if (ret) {
 fprintf(stderr, "Error submitting: %m\n");
 return errno;
@@ -884,15 +877,6 @@ panfrost_batch_submit(struct panfrost_batch *batch)
  * to wait on it.
  */
 batch->out_sync->signaled = true;
-
-/* Release the last batch fence if any, and set ->last_out_sync
- * to NULL
- */
-if (ctx->last_out_sync) {
-panfrost_batch_fence_unreference(ctx->last_out_sync);
-ctx->last_out_sync = NULL;
-}
-
 goto out;
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index e2c31f7f8213..55c66e0c9a79 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -575,7 +575,9 @@ panfrost_fence_reference(struct pipe_screen *pscreen,
 struct panfrost_fence *old = *p;
 
 if (pipe_reference(&(*p)->reference, >reference)) {
-close(old->fd);
+util_dynarray_foreach(>syncfds, int, fd)
+close(*fd);
+

[Mesa-dev] [PATCH v3 10/17] panfrost: Kill the explicit serialization in panfrost_batch_submit()

2019-09-18 Thread Boris Brezillon
Now that we have all the pieces in place to support pipelining batches
we can get rid of the drmSyncobjWait() at the end of
panfrost_batch_submit().

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
* Collect R-b
---
 src/gallium/drivers/panfrost/pan_job.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index e7eae399830f..235cb21dc8c8 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -868,7 +868,6 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 panfrost_batch_submit((*dep)->batch);
 }
 
-struct panfrost_context *ctx = batch->ctx;
 int ret;
 
 /* Nothing to do! */
@@ -892,18 +891,7 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 
 out:
 panfrost_freeze_batch(batch);
-
-/* We always stall the pipeline for correct results since pipelined
- * rendering is quite broken right now (to be fixed by the panfrost_job
- * refactor, just take the perf hit for correctness)
- */
-if (!batch->out_sync->signaled)
-drmSyncobjWait(pan_screen(ctx->base.screen)->fd,
-   >out_sync->syncobj, 1, INT64_MAX, 0,
-   NULL);
-
 panfrost_free_batch(batch);
-
 }
 
 void
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 02/17] panfrost: Make panfrost_batch->bos a hash table

2019-09-18 Thread Boris Brezillon
So we can store the flags as data and keep the BO as a key. This way
we keep track of the type of access done on BOs.

Signed-off-by: Boris Brezillon 
---
Changes in v3:
* None
---
 src/gallium/drivers/panfrost/pan_job.c | 33 +-
 src/gallium/drivers/panfrost/pan_job.h |  2 +-
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 8e2703ae168c..785317dbd0b0 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -44,9 +44,8 @@ panfrost_create_batch(struct panfrost_context *ctx,
 
 batch->ctx = ctx;
 
-batch->bos = _mesa_set_create(batch,
-  _mesa_hash_pointer,
-  _mesa_key_pointer_equal);
+batch->bos = _mesa_hash_table_create(batch, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
 
 batch->minx = batch->miny = ~0;
 batch->maxx = batch->maxy = 0;
@@ -67,10 +66,8 @@ panfrost_free_batch(struct panfrost_batch *batch)
 
 struct panfrost_context *ctx = batch->ctx;
 
-set_foreach(batch->bos, entry) {
-struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
-panfrost_bo_unreference(bo);
-}
+hash_table_foreach(batch->bos, entry)
+panfrost_bo_unreference((struct panfrost_bo *)entry->key);
 
 _mesa_hash_table_remove_key(ctx->batches, >key);
 
@@ -138,11 +135,25 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, 
struct panfrost_bo *bo,
 if (!bo)
 return;
 
-if (_mesa_set_search(batch->bos, bo))
+struct hash_entry *entry;
+uint32_t old_flags = 0;
+
+entry = _mesa_hash_table_search(batch->bos, bo);
+if (!entry) {
+entry = _mesa_hash_table_insert(batch->bos, bo,
+(void *)(uintptr_t)flags);
+panfrost_bo_reference(bo);
+   } else {
+old_flags = (uintptr_t)entry->data;
+}
+
+assert(entry);
+
+if (old_flags == flags)
 return;
 
-panfrost_bo_reference(bo);
-_mesa_set_add(batch->bos, bo);
+flags |= old_flags;
+entry->data = (void *)(uintptr_t)flags;
 }
 
 void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
@@ -376,7 +387,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 bo_handles = calloc(batch->bos->entries, sizeof(*bo_handles));
 assert(bo_handles);
 
-set_foreach(batch->bos, entry) {
+hash_table_foreach(batch->bos, entry) {
 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
 assert(bo->gem_handle > 0);
 bo_handles[submit.bo_handle_count++] = bo->gem_handle;
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index 0b37a3131e86..3f2cf1a999f3 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -98,7 +98,7 @@ struct panfrost_batch {
 unsigned job_index;
 
 /* BOs referenced -- will be used for flushing logic */
-struct set *bos;
+struct hash_table *bos;
 
 /* Current transient BO */
struct panfrost_bo *transient_bo;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 05/17] panfrost: Add a panfrost_freeze_batch() helper

2019-09-18 Thread Boris Brezillon
We'll soon need to freeze a batch not only when it's flushed, but also
when another batch depends on us, so let's add a helper to avoid
duplicating the logic.

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
* Collect R-b
---
 src/gallium/drivers/panfrost/pan_job.c | 62 ++
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 55780dd3d9d6..872c846207bf 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -98,22 +98,59 @@ panfrost_create_batch(struct panfrost_context *ctx,
 return batch;
 }
 
+static void
+panfrost_freeze_batch(struct panfrost_batch *batch)
+{
+struct panfrost_context *ctx = batch->ctx;
+struct hash_entry *entry;
+
+/* Remove the entry in the FBO -> batch hash table if the batch
+ * matches. This way, next draws/clears targeting this FBO will trigger
+ * the creation of a new batch.
+ */
+entry = _mesa_hash_table_search(ctx->batches, >key);
+if (entry && entry->data == batch)
+_mesa_hash_table_remove(ctx->batches, entry);
+
+/* If this is the bound batch, the panfrost_context parameters are
+ * relevant so submitting it invalidates those parameters, but if it's
+ * not bound, the context parameters are for some other batch so we
+ * can't invalidate them.
+ */
+if (ctx->batch == batch) {
+panfrost_invalidate_frame(ctx);
+ctx->batch = NULL;
+}
+}
+
+#ifndef NDEBUG
+static bool panfrost_batch_is_frozen(struct panfrost_batch *batch)
+{
+struct panfrost_context *ctx = batch->ctx;
+struct hash_entry *entry;
+
+entry = _mesa_hash_table_search(ctx->batches, >key);
+if (entry && entry->data == batch)
+return false;
+
+if (ctx->batch == batch)
+return false;
+
+return true;
+}
+#endif
+
 static void
 panfrost_free_batch(struct panfrost_batch *batch)
 {
 if (!batch)
 return;
 
-struct panfrost_context *ctx = batch->ctx;
+assert(panfrost_batch_is_frozen(batch));
 
 hash_table_foreach(batch->bos, entry)
 panfrost_bo_unreference((struct panfrost_bo *)entry->key);
 
-_mesa_hash_table_remove_key(ctx->batches, >key);
-
-if (ctx->batch == batch)
-ctx->batch = NULL;
-
 /* The out_sync fence lifetime is different from the the batch one
  * since other batches might want to wait on a fence of already
  * submitted/signaled batch. All we need to do here is make sure the
@@ -529,19 +566,8 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
 
 out:
-/* If this is the bound batch, the panfrost_context parameters are
- * relevant so submitting it invalidates those paramaters, but if it's
- * not bound, the context parameters are for some other batch so we
- * can't invalidate them.
- */
-if (ctx->batch == batch)
-panfrost_invalidate_frame(ctx);
-
-/* The job has been submitted, let's invalidate the current FBO job
- * cache.
-*/
+panfrost_freeze_batch(batch);
 assert(!ctx->batch || batch == ctx->batch);
-ctx->batch = NULL;
 
 /* We always stall the pipeline for correct results since pipelined
  * rendering is quite broken right now (to be fixed by the panfrost_job
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 11/17] panfrost: Get rid of the flush in panfrost_set_framebuffer_state()

2019-09-18 Thread Boris Brezillon
Now that we have track inter-batch dependencies, the flush done in
panfrost_set_framebuffer_state() is no longer needed. Let's get rid of
it.

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
* Collect R-b
---
 src/gallium/drivers/panfrost/pan_context.c | 46 ++
 1 file changed, 3 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 861b4b621602..07bafad58a00 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -2299,50 +2299,10 @@ panfrost_set_framebuffer_state(struct pipe_context 
*pctx,
 {
 struct panfrost_context *ctx = pan_context(pctx);
 
-/* Flush when switching framebuffers, but not if the framebuffer
- * state is being restored by u_blitter
- */
-
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-bool is_scanout = panfrost_batch_is_scanout(batch);
-bool has_draws = batch->last_job.gpu;
-
-/* Bail out early when the current and new states are the same. */
-if (util_framebuffer_state_equal(>pipe_framebuffer, fb))
-return;
-
-/* The wallpaper logic sets a new FB state before doing the blit and
- * restore the old one when it's done. Those FB states are reported to
- * be different because the surface they are pointing to are different,
- * but those surfaces actually point to the same cbufs/zbufs. In that
- * case we definitely don't want new FB descs to be emitted/attached
- * since the job is expected to be flushed just after the blit is done,
- * so let's just copy the new state and return here.
- */
-if (ctx->wallpaper_batch) {
-util_copy_framebuffer_state(>pipe_framebuffer, fb);
-return;
-}
-
-if (!is_scanout || has_draws)
-panfrost_flush_all_batches(ctx, true);
-else
-assert(!ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer 
&&
-   
!ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
-
-/* Invalidate the FBO job cache since we've just been assigned a new
- * FB state.
- */
-ctx->batch = NULL;
-
+panfrost_hint_afbc(pan_screen(pctx->screen), fb);
 util_copy_framebuffer_state(>pipe_framebuffer, fb);
-
-/* Given that we're rendering, we'd love to have compression */
-struct panfrost_screen *screen = pan_screen(ctx->base.screen);
-
-panfrost_hint_afbc(screen, >pipe_framebuffer);
-for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
-ctx->payloads[i].postfix.framebuffer = 0;
+ctx->batch = NULL;
+panfrost_invalidate_frame(ctx);
 }
 
 static void *
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 03/17] panfrost: Add a batch fence

2019-09-18 Thread Boris Brezillon
So we can implement fine-grained dependency tracking between batches.

Signed-off-by: Boris Brezillon 
---
Changes in v3:
* Fix typos
* Do not initialize the syncobj in a signaled state, and set
  fence->signaled to true when submitting a dummy batch (one with no
  draw/clear queued)
---
 src/gallium/drivers/panfrost/pan_job.c | 56 +-
 src/gallium/drivers/panfrost/pan_job.h | 39 ++
 2 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 785317dbd0b0..b6763da66a97 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -36,6 +36,45 @@
 #include "pan_util.h"
 #include "pandecode/decode.h"
 
+static struct panfrost_batch_fence *
+panfrost_create_batch_fence(struct panfrost_batch *batch)
+{
+struct panfrost_batch_fence *fence;
+ASSERTED int ret;
+
+fence = rzalloc(NULL, struct panfrost_batch_fence);
+assert(fence);
+pipe_reference_init(>reference, 1);
+fence->ctx = batch->ctx;
+fence->batch = batch;
+ret = drmSyncobjCreate(pan_screen(batch->ctx->base.screen)->fd, 0,
+   >syncobj);
+assert(!ret);
+
+return fence;
+}
+
+static void
+panfrost_free_batch_fence(struct panfrost_batch_fence *fence)
+{
+drmSyncobjDestroy(pan_screen(fence->ctx->base.screen)->fd,
+  fence->syncobj);
+ralloc_free(fence);
+}
+
+void
+panfrost_batch_fence_unreference(struct panfrost_batch_fence *fence)
+{
+if (pipe_reference(>reference, NULL))
+ panfrost_free_batch_fence(fence);
+}
+
+void
+panfrost_batch_fence_reference(struct panfrost_batch_fence *fence)
+{
+pipe_reference(NULL, >reference);
+}
+
 static struct panfrost_batch *
 panfrost_create_batch(struct panfrost_context *ctx,
   const struct pipe_framebuffer_state *key)
@@ -53,6 +92,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
 
 util_dynarray_init(>headers, batch);
 util_dynarray_init(>gpu_headers, batch);
+batch->out_sync = panfrost_create_batch_fence(batch);
 util_copy_framebuffer_state(>key, key);
 
 return batch;
@@ -74,6 +114,15 @@ panfrost_free_batch(struct panfrost_batch *batch)
 if (ctx->batch == batch)
 ctx->batch = NULL;
 
+/* The out_sync fence lifetime is different from the the batch one
+ * since other batches might want to wait on a fence of already
+ * submitted/signaled batch. All we need to do here is make sure the
+ * fence does not point to an invalid batch, which the core will
+ * interpret as 'batch is already submitted'.
+ */
+batch->out_sync->batch = NULL;
+panfrost_batch_fence_unreference(batch->out_sync);
+
 util_unreference_framebuffer_state(>key);
 ralloc_free(batch);
 }
@@ -441,8 +490,13 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 int ret;
 
 /* Nothing to do! */
-if (!batch->last_job.gpu && !batch->clear)
+if (!batch->last_job.gpu && !batch->clear) {
+/* Mark the fence as signaled so the fence logic does not try
+ * to wait on it.
+ */
+batch->out_sync->signaled = true;
 goto out;
+}
 
 if (!batch->clear && batch->last_tiler.gpu)
 panfrost_batch_draw_wallpaper(batch);
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index 3f2cf1a999f3..88f1e4620fd0 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -31,6 +31,36 @@
 #include "pan_allocate.h"
 #include "pan_resource.h"
 
+/* panfrost_batch_fence is the out fence of a batch that users or other batches
+ * might want to wait on. The batch fence lifetime is different from the batch
+ * one as want will certainly want to wait upon the fence after the batch has
+ * been submitted (which is when panfrost_batch objects are freed).
+ */
+struct panfrost_batch_fence {
+/* Refcounting object for the fence. */
+struct pipe_reference reference;
+
+/* Batch that created this fence object. Will become NULL at batch
+ * submission time. This field is mainly here to know whether the
+ * batch has been flushed or not.
+ */
+struct panfrost_batch *batch;
+
+/* Context this fence is attached to. We need both ctx and batch, as
+ * the batch will go away after it's been submitted, but the fence
+ * will stay a bit longer.
+ */
+struct panfrost_context *ctx;
+
+/* Sync object backing this fence

[Mesa-dev] [PATCH v3 01/17] panfrost: Extend the panfrost_batch_add_bo() API to pass access flags

2019-09-18 Thread Boris Brezillon
The type of access being done on a BO has impacts on job scheduling
(shared resources being written enforce serialization while those
being read only allow for job parallelization) and BO lifetime (the
fragment job might last longer than the vertex/tiler ones, if we can,
it's good to release BOs earlier so that others can re-use them
through the BO re-use cache).

Let's pass extra access flags to panfrost_batch_add_bo() and
panfrost_batch_create_bo() so the batch submission logic can take the
appropriate when submitting batches. Note that this information is not
used yet, we're just patching callers to pass the correct flags here.

Signed-off-by: Boris Brezillon 
---
Changes in v3:
* s/PAN_BO_GPU_ACCESS/PAN_BO_ACCESS/
* Fix wrong access types for streamout and vertex index buf
* Add a panfrost_bo_access_for_stage() helper
---
 src/gallium/drivers/panfrost/pan_allocate.c   | 14 ++-
 src/gallium/drivers/panfrost/pan_blend_cso.c  |  6 ++-
 src/gallium/drivers/panfrost/pan_bo.h | 29 ++
 src/gallium/drivers/panfrost/pan_context.c| 36 +
 src/gallium/drivers/panfrost/pan_instancing.c |  5 ++-
 src/gallium/drivers/panfrost/pan_job.c| 39 ++-
 src/gallium/drivers/panfrost/pan_job.h|  5 ++-
 src/gallium/drivers/panfrost/pan_varyings.c   | 10 -
 8 files changed, 120 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c 
b/src/gallium/drivers/panfrost/pan_allocate.c
index 3076c23ab1cc..b16a1253ac2f 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -63,8 +63,18 @@ panfrost_allocate_transient(struct panfrost_batch *batch, 
size_t sz)
 size_t bo_sz = sz < TRANSIENT_SLAB_SIZE ?
TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096);
 
-/* We can't reuse the current BO, but we can create a new one. 
*/
-bo = panfrost_batch_create_bo(batch, bo_sz, 0);
+/* We can't reuse the current BO, but we can create a new one.
+ * We don't know what the BO will be used for, so let's flag it
+ * RW and attach it to both the fragment and vertex/tiler jobs.
+ * TODO: if we want fine grained BO assignment we should pass
+ * flags to this function and keep the read/write,
+ * fragment/vertex+tiler pools separate.
+ */
+bo = panfrost_batch_create_bo(batch, bo_sz, 0,
+  PAN_BO_ACCESS_PRIVATE |
+  PAN_BO_ACCESS_RW |
+  PAN_BO_ACCESS_VERTEX_TILER |
+  PAN_BO_ACCESS_FRAGMENT);
 
 if (sz < TRANSIENT_SLAB_SIZE) {
 batch->transient_bo = bo;
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index 6bd6ff71cdc7..48bd513ab6e5 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -273,7 +273,11 @@ panfrost_get_blend_for_context(struct panfrost_context 
*ctx, unsigned rti)
 
 /* Upload the shader */
 final.shader.bo = panfrost_batch_create_bo(batch, shader->size,
-   PAN_BO_EXECUTE);
+   PAN_BO_EXECUTE,
+   PAN_BO_ACCESS_PRIVATE |
+   PAN_BO_ACCESS_READ |
+   PAN_BO_ACCESS_VERTEX_TILER |
+   PAN_BO_ACCESS_FRAGMENT);
 memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
 
 if (shader->patch_index) {
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
index 33fbddff3369..73cc74a260d4 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -56,6 +56,24 @@ struct panfrost_screen;
  * let the BO logic know about this contraint. */
 #define PAN_BO_DONT_REUSE (1 << 5)
 
+/* GPU access flags */
+
+/* BO is either shared (can be accessed by more than one GPU batch) or private
+ * (reserved by a specific GPU job). */
+#define PAN_BO_ACCESS_PRIVATE (0 << 0)
+#define PAN_BO_ACCESS_SHARED  (1 << 0)
+
+/* BO is being read/written by the GPU */
+#define PAN_BO_ACCESS_READ(1 << 1)
+#define PAN_BO_ACCESS_WRITE   (1 << 2)
+#define PAN_BO_ACCESS_RW  (PAN_BO_ACCESS_READ | 
PAN_BO_ACCESS_WRITE)
+
+/* BO is accessed by the vertex/tiler job. */
+#define PAN_BO_ACCESS_VERTEX_TILER(1 << 3)
+
+/* BO is accessed by the fragment job. */
+#define PAN_BO_ACCESS_FRAGMENT  

[Mesa-dev] [PATCH v3 00/17] panfrost: Support batch pipelining

2019-09-18 Thread Boris Brezillon
Hello,

This is the third attempt at supporting batch pipelining. This time I
implemented it using a dependency graph (as suggested by Alyssa and
Steven) so that batch submission can be delayed even more: the only
time we flush batches now is when we have an explicit flush or when
the CPU needs to access a BO (we might want to tweak that a bit to
avoid the extra latency incurred by this solution). With that in place
we hope to increase GPU utilization.

Patches 15 and 16 are optional, but I remember reading (I think it was
Steven who mentioned that) that draw order matters when queueing render
operations for different frames (frame N should ideally be ready before
frame N+1). Not sure if enforcing draw call order is enough to guarantee
that rendering of frame N always finishes before frame N+1 though.
If that's something you don't want to merge, I can drop it.

Regards,

Boris

Boris Brezillon (17):
  panfrost: Extend the panfrost_batch_add_bo() API to pass access flags
  panfrost: Make panfrost_batch->bos a hash table
  panfrost: Add a batch fence
  panfrost: Use the per-batch fences to wait on the last submitted batch
  panfrost: Add a panfrost_freeze_batch() helper
  panfrost: Start tracking inter-batch dependencies
  panfrost: Prepare panfrost_fence for batch pipelining
  panfrost: Add a panfrost_flush_all_batches() helper
  panfrost: Add a panfrost_flush_batches_accessing_bo() helper
  panfrost: Kill the explicit serialization in panfrost_batch_submit()
  panfrost: Get rid of the flush in panfrost_set_framebuffer_state()
  panfrost: Add flags to reflect the BO imported/exported state
  panfrost: Make sure the BO is 'ready' when picked from the cache
  panfrost: Do fine-grained flushing when preparing BO for CPU accesses
  panfrost: Rename ctx->batches into ctx->fbo_to_batch
  panfrost: Take draw call order into account
  panfrost/ci: New tests are passing

 .../drivers/panfrost/ci/expected-failures.txt |   4 -
 src/gallium/drivers/panfrost/pan_allocate.c   |  14 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c  |   6 +-
 src/gallium/drivers/panfrost/pan_bo.c | 116 ++-
 src/gallium/drivers/panfrost/pan_bo.h |  44 ++
 src/gallium/drivers/panfrost/pan_compute.c|   2 +-
 src/gallium/drivers/panfrost/pan_context.c| 121 ++--
 src/gallium/drivers/panfrost/pan_context.h|  15 +-
 src/gallium/drivers/panfrost/pan_instancing.c |   5 +-
 src/gallium/drivers/panfrost/pan_job.c| 668 --
 src/gallium/drivers/panfrost/pan_job.h|  58 +-
 src/gallium/drivers/panfrost/pan_resource.c   |  27 +-
 src/gallium/drivers/panfrost/pan_screen.c |  65 +-
 src/gallium/drivers/panfrost/pan_screen.h |   3 +-
 src/gallium/drivers/panfrost/pan_varyings.c   |  10 +-
 15 files changed, 956 insertions(+), 202 deletions(-)

-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 04/17] panfrost: Use the per-batch fences to wait on the last submitted batch

2019-09-18 Thread Boris Brezillon
We just replace the per-context out_sync object by a pointer to the
the fence of the last last submitted batch. Pipelining of batches will
come later.

Signed-off-by: Boris Brezillon 
---
Alyssa, I dropped your R-b since the other changes you asked me to do
in "panfrost: Add a batch fence" had some impact on this patch.

Changes in v3:
* Make sure we don't try to wait on dummy batches (those with no
  vertex/tiler/fragment jobs)
---
 src/gallium/drivers/panfrost/pan_context.c |  6 
 src/gallium/drivers/panfrost/pan_context.h |  3 +-
 src/gallium/drivers/panfrost/pan_job.c | 35 ++
 src/gallium/drivers/panfrost/pan_screen.c  | 18 +--
 4 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 65a6c7f8c5ae..312a9e93e455 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -2702,12 +2702,6 @@ panfrost_create_context(struct pipe_screen *screen, void 
*priv, unsigned flags)
 panfrost_blend_context_init(gallium);
 panfrost_compute_context_init(gallium);
 
-ASSERTED int ret;
-
-ret = drmSyncobjCreate(pscreen->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
-   >out_sync);
-assert(!ret);
-
 /* XXX: leaks */
 gallium->stream_uploader = u_upload_create_default(gallium);
 gallium->const_uploader = gallium->stream_uploader;
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index c145d589757e..ce3e0c899a4f 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -191,7 +191,8 @@ struct panfrost_context {
 /* True for t6XX, false for t8xx. */
 bool is_t6xx;
 
-uint32_t out_sync;
+/* The out sync fence of the last submitted batch. */
+struct panfrost_batch_fence *last_out_sync;
 };
 
 /* Corresponds to the CSO */
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index b6763da66a97..55780dd3d9d6 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -425,11 +425,13 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 uint32_t *bo_handles;
 int ret;
 
-submit.in_syncs = (u64) (uintptr_t) >out_sync;
-submit.in_sync_count = 1;
 
-submit.out_sync = ctx->out_sync;
+if (ctx->last_out_sync) {
+submit.in_sync_count = 1;
+submit.in_syncs = (uintptr_t)>last_out_sync->syncobj;
+}
 
+submit.out_sync = batch->out_sync->syncobj;
 submit.jc = first_job_desc;
 submit.requirements = reqs;
 
@@ -445,6 +447,14 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 submit.bo_handles = (u64) (uintptr_t) bo_handles;
 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_SUBMIT, );
 free(bo_handles);
+
+/* Release the last batch fence if any, and retain the new one */
+if (ctx->last_out_sync)
+panfrost_batch_fence_unreference(ctx->last_out_sync);
+
+panfrost_batch_fence_reference(batch->out_sync);
+ctx->last_out_sync = batch->out_sync;
+
 if (ret) {
 fprintf(stderr, "Error submitting: %m\n");
 return errno;
@@ -453,7 +463,8 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 /* Trace the job if we're doing that */
 if (pan_debug & PAN_DBG_TRACE) {
 /* Wait so we can get errors reported back */
-drmSyncobjWait(screen->fd, >out_sync, 1, INT64_MAX, 0, 
NULL);
+drmSyncobjWait(screen->fd, >out_sync->syncobj, 1,
+   INT64_MAX, 0, NULL);
 pandecode_jc(submit.jc, FALSE);
 }
 
@@ -495,6 +506,15 @@ panfrost_batch_submit(struct panfrost_batch *batch)
  * to wait on it.
  */
 batch->out_sync->signaled = true;
+
+/* Release the last batch fence if any, and set ->last_out_sync
+ * to NULL
+ */
+if (ctx->last_out_sync) {
+panfrost_batch_fence_unreference(ctx->last_out_sync);
+ctx->last_out_sync = NULL;
+}
+
 goto out;
 }
 
@@ -527,8 +547,11 @@ out:
  * rendering is quite broken right now (to be fixed by the panfrost_job
  * refactor, just take the perf hit for correctness)
  */
-drmSyncobjWait(pan_screen(ctx->base.screen)->fd, >out_sync, 1,
-   INT64_MAX, 0, NULL);
+if (!batch->out_sync->signaled)
+  

[Mesa-dev] [PATCH v3 08/17] panfrost: Add a panfrost_flush_all_batches() helper

2019-09-18 Thread Boris Brezillon
And use it in panfrost_flush() to flush all batches, and not only the
one currently bound to the context.

We also replace all internal calls to panfrost_flush() by
panfrost_flush_all_batches() ones.

Signed-off-by: Boris Brezillon 
Reviewed-by: Alyssa Rosenzweig 
---
Changes in v3:
* Add missing blank line
* Collect R-b
---
 src/gallium/drivers/panfrost/pan_compute.c  |  2 +-
 src/gallium/drivers/panfrost/pan_context.c  | 23 +++
 src/gallium/drivers/panfrost/pan_job.c  | 46 -
 src/gallium/drivers/panfrost/pan_job.h  |  2 +-
 src/gallium/drivers/panfrost/pan_resource.c |  6 +--
 5 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_compute.c 
b/src/gallium/drivers/panfrost/pan_compute.c
index 4639c1b03c38..036dffbb17be 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -133,7 +133,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
 /* Queue the job */
 panfrost_scoreboard_queue_compute_job(batch, transfer);
 
-panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 }
 
 void
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index aad69e3f9991..861b4b621602 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1348,7 +1348,6 @@ panfrost_flush(
 unsigned flags)
 {
 struct panfrost_context *ctx = pan_context(pipe);
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 struct util_dynarray fences;
 
 /* We must collect the fences before the flush is done, otherwise we'll
@@ -1356,13 +1355,18 @@ panfrost_flush(
  */
 if (fence) {
 util_dynarray_init(, NULL);
-panfrost_batch_fence_reference(batch->out_sync);
-util_dynarray_append(, struct panfrost_batch_fence *,
- batch->out_sync);
+hash_table_foreach(ctx->batches, hentry) {
+struct panfrost_batch *batch = hentry->data;
+
+panfrost_batch_fence_reference(batch->out_sync);
+util_dynarray_append(,
+ struct panfrost_batch_fence *,
+ batch->out_sync);
+}
 }
 
-/* Submit the frame itself */
-panfrost_batch_submit(batch);
+/* Submit all pending jobs */
+panfrost_flush_all_batches(ctx, false);
 
 if (fence) {
 struct panfrost_fence *f = panfrost_fence_create(ctx, );
@@ -2321,7 +2325,7 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
 }
 
 if (!is_scanout || has_draws)
-panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 else
 assert(!ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer 
&&

!ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
@@ -2553,6 +2557,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
   union pipe_query_result *vresult)
 {
 struct panfrost_query *query = (struct panfrost_query *) q;
+struct panfrost_context *ctx = pan_context(pipe);
 
 
 switch (query->type) {
@@ -2560,7 +2565,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
 case PIPE_QUERY_OCCLUSION_PREDICATE:
 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 /* Flush first */
-panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 
 /* Read back the query results */
 unsigned *result = (unsigned *) query->transfer.cpu;
@@ -2576,7 +2581,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
 
 case PIPE_QUERY_PRIMITIVES_GENERATED:
 case PIPE_QUERY_PRIMITIVES_EMITTED:
-panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 vresult->u64 = query->end - query->start;
 break;
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 211e48bafd4e..3ccf4bb6b3e9 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -856,7 +856,7 @@ panfrost_batch_submit_jobs(struct panfrost_batch *batch)
 return ret;
 }
 
-void
+static void
 panfrost_batch_submit(struct panfrost_batch *batch)
 {
 assert(batch);
@@ -904,8 +904,52 @@ out:
 
 panfrost_free_batch(batch);
 
+}
+
+void
+panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait)
+{
+struct util

Re: [Mesa-dev] [PATCH v2 00/37] panfrost: Support batch pipelining

2019-09-18 Thread Boris Brezillon
+Rob

On Tue, 17 Sep 2019 17:13:04 +0200
Boris Brezillon  wrote:

> On Tue, 17 Sep 2019 08:36:56 -0400
> Alyssa Rosenzweig  wrote:
> 
> > "Can't use pipe_framebuffer_state as a hash key" Is this still relevant?
> > I thought we did this.  
> 
> I did this yes. I thought it was only a problem for the wallpaper
> draw, but it's actually wrong for any kind of blit where src and
> dst point to the same resource, you're right. I guess the solution
> would be to not use util_framebuffer_state_equal() but compare the
> resources pointed by surfaces contained in the fb state, and not hash
> the fb state directly, but hash its resource pointers instead (pretty
> much what was done before my patch :-/).

I tried implementing my own hash/compare funcs for the FBO key (instead
of using util_framebuffer_state_equal() and hashing the whole struct)
based on the freedreno logic, and I added an assert to see if there was
cases where util_framebuffer_state_equal() was returning something
different (see the below diff). There are indeed 2 deqp-gles2 tests that
trigger this assert.

So, now the question is, is it worth fixing, or can we live with the
extra batch addition because we can't figure out that 2 FBOs are identical
in rare occasions.

There's still something I don't understand in the freedreno logic: they
say they don't want to retain refs to the surface pointed by the fb
state, because refcounting gets messy then, but I don't get why, especially
since they retain refs to the resources pointed by those surfaces.

Anyway, I guess all of this can be sorted out after this series has landed.

--->8---
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 04257727ee0a..a90c427f6042 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -100,6 +100,74 @@ panfrost_batch_fence_reference(struct panfrost_batch_fence 
*fence)
 pipe_reference(NULL, >reference);
 }
 
+static bool
+panfrost_batch_surf_compare(const struct pipe_surface *a,
+const struct pipe_surface *b)
+{
+if (!a || !b)
+return a == b;
+
+if (a->texture != b->texture ||
+memcmp(>u, >u, sizeof(a->u)) ||
+a->nr_samples != b->nr_samples ||
+a->format != b->format)
+return false;
+
+return true;
+}
+
+static bool
+panfrost_batch_compare(const void *a, const void *b)
+{
+const struct pipe_framebuffer_state *fba = a, *fbb = b;
+unsigned i;
+
+if (fba->width != fbb->width || fba->height != fbb->height ||
+fba->layers != fbb->layers || fba->samples != fbb->samples ||
+fba->nr_cbufs != fbb->nr_cbufs)
+return false;
+
+for (i = 0; i < fba->nr_cbufs; i++) {
+if (!panfrost_batch_surf_compare(fba->cbufs[i], fbb->cbufs[i]))
+return false;
+}
+
+return panfrost_batch_surf_compare(fba->zsbuf, fbb->zsbuf);
+}
+
+static uint32_t
+panfrost_batch_surf_hash(uint32_t hash, const struct pipe_surface *surf)
+{
+uint32_t fmt_samples = surf ? (surf->format | (surf->nr_samples << 
16)) : 0;
+struct pipe_resource *rsrc = surf ? surf->texture : NULL;
+union pipe_surface_desc dummy_desc = { };
+
+hash = _mesa_fnv32_1a_accumulate_block(hash, , sizeof(rsrc));
+hash = _mesa_fnv32_1a_accumulate_block(hash,
+   surf ? >u : _desc,
+   sizeof(surf->u));
+return _mesa_fnv32_1a_accumulate_block(hash, _samples,
+   sizeof(fmt_samples));
+}
+
+static uint32_t
+panfrost_batch_hash(const void *key)
+{
+const struct pipe_framebuffer_state *fb = key;
+uint64_t header = fb->width | fb->height << 16 |
+  (uint64_t)fb->layers << 32 |
+  (uint64_t)fb->samples << 48 |
+  (uint64_t)fb->nr_cbufs << 56;
+uint32_t hash = _mesa_fnv32_1a_offset_bias;
+unsigned i;
+
+hash = _mesa_fnv32_1a_accumulate_block(hash, , sizeof(header));
+for (i = 0; i < ARRAY_SIZE(fb->cbufs); i++)
+hash = panfrost_batch_surf_hash(hash, fb->cbufs[i]);
+
+return panfrost_batch_surf_hash(hash, fb->zsbuf);
+}
+
 static struct panfrost_batch *
 panfrost_create_batch(struct panfrost_context *ctx,
   const struct pipe_framebuffer_state *key)
@@ -252,8 +320,10 @@ panfrost_get_batch(struct panfrost_context *ctx,
 struct hash_entry *entry = _mesa_hash_table_search(ctx->fbo_to_batch,
ke

Re: [Mesa-dev] [PATCH v2 00/37] panfrost: Support batch pipelining

2019-09-17 Thread Boris Brezillon
On Tue, 17 Sep 2019 08:36:56 -0400
Alyssa Rosenzweig  wrote:

> "Can't use pipe_framebuffer_state as a hash key" Is this still relevant?
> I thought we did this.

I did this yes. I thought it was only a problem for the wallpaper
draw, but it's actually wrong for any kind of blit where src and
dst point to the same resource, you're right. I guess the solution
would be to not use util_framebuffer_state_equal() but compare the
resources pointed by surfaces contained in the fb state, and not hash
the fb state directly, but hash its resource pointers instead (pretty
much what was done before my patch :-/).

> 
> You're right that the 32-batch bitset is something we can integrate in a
> v2.

Ok.

> 
> I guess that's it, never mind. Good stuff regardless :)

Thanks.

I'll apply patches that received R-bs and respin the rest of the series.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 36/37] panfrost: Take draw call order into account

2019-09-17 Thread Boris Brezillon
On Tue, 17 Sep 2019 08:32:35 -0400
Alyssa Rosenzweig  wrote:

> Hmm, could you explain a bit why this is necessary?
> 
> I would think if there's no dependency, there's no dependency, and if
> this fixes bugs, that's a dependency tracking issue that we're just
> papering over.

Indeed, there's no explicit dependency, and the logic works just fine
without this patch. It's just that Steven mentioned in his previous
review that you sometimes have weak (or non-explicit) dependencies. Say
you want to queue draws for frame N and N+1 before you call glFlush()
(is that what happens when we do triple buffering?), sometimes frame N
and N+1 have no inter-dependencies (they access different resources and
frame N+1 doesn't care about the state of frame N). Even if things
would be correct if frame N+1 is rendered before frame N, the user
probably expects the opposite, such that it can output frame N as soon
as possible.

Anyway, I'm fine dropping patch 35 and 36 (see the note in my cover
letter ;-)).

> 
> (Also, I guess r-b on previous patch retracted temporarily since it was a 
> setup for
> this.)
> 
> On Mon, Sep 16, 2019 at 11:37:14AM +0200, Boris Brezillon wrote:
> > This is not strictly required, but let's try to match the draw call
> > orders, just in case the app had a reason to do it in this order.
> > 
> > Signed-off-by: Boris Brezillon 
> > ---
> >  src/gallium/drivers/panfrost/pan_context.h |  6 ++
> >  src/gallium/drivers/panfrost/pan_job.c | 23 +++---
> >  src/gallium/drivers/panfrost/pan_job.h |  3 +++
> >  3 files changed, 29 insertions(+), 3 deletions(-)
> > 
> > diff --git a/src/gallium/drivers/panfrost/pan_context.h 
> > b/src/gallium/drivers/panfrost/pan_context.h
> > index f13967f51b46..c6b53685b285 100644
> > --- a/src/gallium/drivers/panfrost/pan_context.h
> > +++ b/src/gallium/drivers/panfrost/pan_context.h
> > @@ -114,6 +114,12 @@ struct panfrost_context {
> >  struct panfrost_batch *batch;
> >  struct hash_table *fbo_to_batch;
> >  
> > +/* A list containing all non-submitted batches since the last 
> > flush.
> > + * This list is used to keep track of clear/draw order on batches 
> > that
> > + * don't have explicit dependencies between them.
> > + */
> > +struct list_head batch_queue;
> > +
> >  /* panfrost_bo -> panfrost_bo_access */
> >  struct hash_table *accessed_bos;
> >  
> > diff --git a/src/gallium/drivers/panfrost/pan_job.c 
> > b/src/gallium/drivers/panfrost/pan_job.c
> > index 13d7e8086e62..e030f8e98dad 100644
> > --- a/src/gallium/drivers/panfrost/pan_job.c
> > +++ b/src/gallium/drivers/panfrost/pan_job.c
> > @@ -118,6 +118,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
> >  util_dynarray_init(>headers, batch);
> >  util_dynarray_init(>gpu_headers, batch);
> >  util_dynarray_init(>dependencies, batch);
> > +list_inithead(>queue_node);
> >  batch->out_sync = panfrost_create_batch_fence(batch);
> >  util_copy_framebuffer_state(>key, key);
> >  
> > @@ -181,6 +182,9 @@ panfrost_free_batch(struct panfrost_batch *batch)
> >struct panfrost_batch_fence *, dep)
> >  panfrost_batch_fence_unreference(*dep);
> >  
> > +/* Remove the batch from the batch queue. */
> > +list_del(>queue_node);
> > +
> >  /* The out_sync fence lifetime is different from the the batch one
> >   * since other batches might want to wait on an fence of already
> >   * submitted/signaled batch. All we need to do here is make sure 
> > the
> > @@ -543,6 +547,13 @@ void panfrost_batch_add_fbo_bos(struct panfrost_batch 
> > *batch)
> >  struct panfrost_resource *rsrc = 
> > pan_resource(batch->key.zsbuf->texture);
> >  panfrost_batch_add_bo(batch, rsrc->bo, flags);
> >  }
> > +
> > +/* We the batch was not already present in the queue, add it know.
> > + * Should we move the batch at end of the queue when a new draw
> > + * happens?
> > + */
> > +if (list_empty(>queue_node))
> > +list_addtail(>queue_node, >ctx->batch_queue);
> >  }
> >  
> >  struct panfrost_bo *
> > @@ -878,10 +889,15 @@ panfrost_flush_all_batches(struct panfrost_context 
> > *ctx, bool wait)
> >  util_dynarray_init(, NULL);
> >  }
> >  
> > -hash_table_

Re: [Mesa-dev] [PATCH v2 23/37] panfrost: Make panfrost_batch->bos a hash table

2019-09-16 Thread Boris Brezillon
On Mon, 16 Sep 2019 15:26:12 -0400
Alyssa Rosenzweig  wrote:

> Well, the hash tables strongly assume you're not using NULLs for things.
> 
> See _mesa_hash_table_set_deleted_key for how to change that behaviour.

Maybe I'm missing something, but AFAICT it's the key field that requires
special care, not the data one.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 00/37] panfrost: Support batch pipelining

2019-09-16 Thread Boris Brezillon
On Mon, 16 Sep 2019 16:29:05 -0400
Alyssa Rosenzweig  wrote:

> > As a drive-by comment, in case you didn't know, the "standard"
> > solution for avoiding flushing when BO's are written by the CPU (e.g.
> > uniform buffer updates) as documented in ARM's performance guide is to
> > add a copy-on-write mechanism, so that you have "shadow" BO's when the
> > original BO is modified by the user. I believe this is implemented in
> > freedreno, at least there was a talk about it at XDC a few years ago.  
> 
> Yes, this is implemented in freedreno. BO shadowing will be the next
> step once this pipelining code settles down. For now, this series is
> about eliminating the strict flushes between each and every frame of
> each and every FBO that we currently occur now.
> 
> Boris, references on the freedreno model (which is the mesa gold
> standard):
> 
> https://www.x.org/wiki/Events/XDC2016/Program/clark_ooo_rendering.pdf
> https://bloggingthemonkey.blogspot.com/2016/07/dirty-tricks-for-moar-fps.html
> 
> The former presentation is definitely worth a read; evidently we've
> already painted ourselves into some corners :p

I had a quick look at the presentation, and it looks pretty similar to
what is being implemented in this series. The only difference I could
spot is the limitation to 32 batches to avoid usage of sets in the BO
access tracking logic, and that's still something I can change (I'd
prefer to do that in a follow-up patch though).

What specific aspects do you think we got wrong?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 28/37] panfrost: Start tracking inter-batch dependencies

2019-09-16 Thread Boris Brezillon
On Mon, 16 Sep 2019 16:15:14 -0400
Alyssa Rosenzweig  wrote:

> > + * A BO is either being written or read at any time, that's what the type 
> > field
> > + * encodes.  
> 
> Might this be inferred from (writer != NULL) and (readers->length > 0)?

No, I need to keep the old writer around when the new access is a
read because all subsequent reads will need to have the reader -> writer
dep defined. 

> 
> > +util_dynarray_foreach(>dependencies,
> > +  struct panfrost_batch_fence *, dep)
> > +panfrost_batch_fence_unreference(*dep);
> > +  
> 
> Nit: Wrap in { braces } for 2+ lines for clarity
> 
> > +static void
> > +panfrost_batch_add_dep(struct panfrost_batch *batch,
> > +   struct panfrost_batch_fence *newdep)
> > +{
> > +if (batch == newdep->batch)
> > +return;
> > +
> > +util_dynarray_foreach(>dependencies,
> > +  struct panfrost_batch_fence *, dep) {
> > +if (*dep == newdep)
> > +return;
> > +}
> > +
> > +/* Make sure the dependency graph is acyclic. */
> > +assert(!panfrost_dep_graph_contains_batch(newdep->batch, batch));
> > +
> > +panfrost_batch_fence_reference(newdep);
> > +util_dynarray_append(>dependencies,
> > + struct panfrost_batch_fence *, newdep);
> > +
> > +/* We now have a batch depending on us, let's make sure new 
> > draw/clear
> > + * calls targeting the same FBO use a new batch object.
> > + */
> > +if (newdep->batch)
> > +panfrost_freeze_batch(newdep->batch);
> > +}  
> 
> I'm wondering if batch->dependencies should be expressed as a set,
> rather than a dynarray, such that testing whether a batch has a
> given dependency is ideally O(1), not O(N).
> 
> In practice I don't know if the asymptotic complexity matters, esp. for
> small numbers of batches, and in practice hash table iteration is slow
> enough in mesa* that maybe it would be counterproductive.
> 
> Just something I thought I might throw out there.
> 
> * Set iteration ought to be no slower than array iteration, but constant
>   factors are a thing.

I thought the number of deps would be small enough to not justify the
use of a set, but maybe I'm wrong.

> 
> > +int ret = drmSyncobjWait(pan_screen(fence->ctx->base.screen)->fd,
> > + >syncobj, 1, 0, 0, NULL);
> > +
> > +fence->signaled = ret >= 0;
> > +return fence->signaled;
> > +}  
> 
> Nit: Add a "/* Cache whether the fence was signaled */" comment?

Sure.

> 
> > +static void
> > +panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
> > + struct panfrost_bo_access *access,
> > +const struct panfrost_bo *bo)  
> 
> Could you remind me what gc abbreviates? Sorry.

Garbage collect.

> 
> I'm a little unclear on what the function's purpose is based on the
> name.

Collect signaled fences to keep the kernel-side syncobj-map small. The
idea is to collect those signaled fences at the end of each flush_all
call. This function is likely to collect only fences from previous
batch flushes not the one that have just have just been submitted and
are probably still in flight when we trigger the garbage collection.
Anyway, we need to do this garbage collection at some point if we don't
want the BO access map to keep invalid entries around and retain
syncobjs forever.

> 
> > +{
> > +if (access->writer && 
> > panfrost_batch_fence_is_signaled(access->writer)) {
> > +panfrost_batch_fence_unreference(access->writer);
> > +access->writer = NULL;
> > +   }  
> 
> Spacing.
> 
> > +
> > +unsigned nreaders = 0;
> > +util_dynarray_foreach(>readers, struct 
> > panfrost_batch_fence *,
> > +  reader) {
> > +if (!*reader)
> > +continue;  
> 
> Please add parens (!(*reader)) for clarity.
> 
> > +static void
> > +panfrost_gc_fences(struct panfrost_context *ctx)
> > +{
> > +hash_table_foreach(ctx->accessed_bos, entry) {
> > +struct panfrost_bo_access *access = entry->data;
> > +
> > +assert(access);
> > +panfrost_bo_access_gc_fences(ctx, access, entry->key);
> > +if (!util_dynarray_num_elements(>readers,
> > +struct 
> > panfrost_batch_fence *) &&
> > +!access->writer)
> > +_mesa_hash_table_remove(ctx->accessed_bos, entry);
> > +}
> > +}  
> 
> Question: is it safe to remove entries while iterating the table?
> (Not a review comment, I don't know the details of mesa's
> implementation)

According to the doc, it is.

> 
> Also not clear what panfrost_gc_fences is for.

See above.

> 
> > +static void
> > 

Re: [Mesa-dev] [PATCH v2 25/37] panfrost: Add a batch fence

2019-09-16 Thread Boris Brezillon
On Mon, 16 Sep 2019 15:38:10 -0400
Alyssa Rosenzweig  wrote:

> > +/* Start in a signaled state so that even non-submitted batches
> > + * (those that have no draw/clear) can be waited upon.
> > + */  
> 
> When would this happen? Presumably if a batch does nothing whatsoever,
> it doesn't make sense to wait on it.

Was just simpler to have all batch fences containing a syncobjs on
which we can call WAIT_SYNCOBJ even no real fence is attached to it.

The other option being to set fence->signaled to true when a batch with
no draw/clear is submitted, and then skip those entries when we build
the array passed to the waitsyncobj() func.

> >  #include "pan_resource.h"
> >  
> > +/* Batch that created this fence object. Will become NULL at batch
> > + * submission time. This field is mainly here to know whether the
> > + * batch has been flushed or not.
> > + */
> > +struct panfrost_batch *batch;  
> 
> Could this be replaced by just a `bool flushed`, or do we actually use
> the value in a later patch?

I actually use the value to flush deps when they're not already flushed.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 24/37] panfrost: Cache GPU accesses to BOs

2019-09-16 Thread Boris Brezillon
On Mon, 16 Sep 2019 10:05:52 -0400
Alyssa Rosenzweig  wrote:

> > +/* If ->gpu_access is 0, the BO is idle, and if the WRITE 
> > flag
> > + * is cleared, that means we only have readers.
> > + */
> > +if (!bo->gpu_access)
> > +return true;
> > +else if (!(access_type & PAN_BO_GPU_ACCESS_READ) &&
> > + !(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
> > +return true;  
> 
> The second condition is a little confusing, though I think it's correct.
> Not sure if there's any way to clarify what's meant but just thought I'd
> comment, since inevitably future readers will squint too.

I can do:

/* If ->gpu_access is 0, the BO is idle, no need to wait. */
if (!bo->gpu_access)
return true;

/* If the caller only wants to wait for writers and no
 * writes are pending, we don't have to wait.
 */
if (access_type == PAN_BO_GPU_ACCESS_WRITE &&
!(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
return true;

instead.

> 
> > +/* Update the BO access flags so that panfrost_bo_wait() 
> > knows
> > + * about all pending accesses.
> > + */
> > +bo->gpu_access |= flags & (PAN_BO_GPU_ACCESS_RW);  
> 
> This looks like black magic. Maybe just clarify in the comment why this
> & is reasonable (relying on bit mask magic).

It's just here to clear all non-RW flags (we only care about the read/write
information when it comes to BO idleness). I'll add a comment to explain that
part, and maybe another one to explain why we have a '|=' and not just '='.

> 
> ---
> 
> That aside, as I mentioned it would maybe make more sense to squash this
> into the patch introduce the bo_wait ioctl() in the first place? If
> that's too complicated with merge conflicts and stuff, don't sweat it,
> though :)

I'm fine with that, I'll re-order things to avoid introducing the bo_wait()
infra before we have the access type info.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 23/37] panfrost: Make panfrost_batch->bos a hash table

2019-09-16 Thread Boris Brezillon
On Mon, 16 Sep 2019 10:00:13 -0400
Alyssa Rosenzweig  wrote:

> What if flags = 0?

Not sure what you have in mind. 0 would be a valid value (though not
really useful since that just means the BO is private and we don't give
any information on the type of access done on this BO). If you're
worried about having hentry->data = (uintptr_t)0 (IOW hentry->data =
NULL), I don't see the problem.

> 
> On Mon, Sep 16, 2019 at 11:37:01AM +0200, Boris Brezillon wrote:
> > So we can store the flags as data and keep the BO as a key. This way
> > we keep track of the type of access done on BOs.
> > 
> > Signed-off-by: Boris Brezillon 
> > ---
> >  src/gallium/drivers/panfrost/pan_job.c | 33 +-
> >  src/gallium/drivers/panfrost/pan_job.h |  2 +-
> >  2 files changed, 23 insertions(+), 12 deletions(-)
> > 
> > diff --git a/src/gallium/drivers/panfrost/pan_job.c 
> > b/src/gallium/drivers/panfrost/pan_job.c
> > index 6332529b2f9b..739f36a593f1 100644
> > --- a/src/gallium/drivers/panfrost/pan_job.c
> > +++ b/src/gallium/drivers/panfrost/pan_job.c
> > @@ -44,9 +44,8 @@ panfrost_create_batch(struct panfrost_context *ctx,
> >  
> >  batch->ctx = ctx;
> >  
> > -batch->bos = _mesa_set_create(batch,
> > -  _mesa_hash_pointer,
> > -  _mesa_key_pointer_equal);
> > +batch->bos = _mesa_hash_table_create(batch, _mesa_hash_pointer,
> > + _mesa_key_pointer_equal);
> >  
> >  batch->minx = batch->miny = ~0;
> >  batch->maxx = batch->maxy = 0;
> > @@ -67,10 +66,8 @@ panfrost_free_batch(struct panfrost_batch *batch)
> >  
> >  struct panfrost_context *ctx = batch->ctx;
> >  
> > -set_foreach(batch->bos, entry) {
> > -struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
> > -panfrost_bo_unreference(bo);
> > -}
> > +hash_table_foreach(batch->bos, entry)
> > +panfrost_bo_unreference((struct panfrost_bo *)entry->key);
> >  
> >  _mesa_hash_table_remove_key(ctx->batches, >key);
> >  
> > @@ -138,11 +135,25 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, 
> > struct panfrost_bo *bo,
> >  if (!bo)
> >  return;
> >  
> > -if (_mesa_set_search(batch->bos, bo))
> > +struct hash_entry *entry;
> > +uint32_t old_flags = 0;
> > +
> > +entry = _mesa_hash_table_search(batch->bos, bo);
> > +if (!entry) {
> > +entry = _mesa_hash_table_insert(batch->bos, bo,
> > +(void *)(uintptr_t)flags);
> > +panfrost_bo_reference(bo);
> > +   } else {
> > +old_flags = (uintptr_t)entry->data;
> > +}
> > +
> > +assert(entry);
> > +
> > +if (old_flags == flags)
> >  return;
> >  
> > -panfrost_bo_reference(bo);
> > -_mesa_set_add(batch->bos, bo);
> > +flags |= old_flags;
> > +entry->data = (void *)(uintptr_t)flags;
> >  }
> >  
> >  void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
> > @@ -376,7 +387,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch 
> > *batch,
> >  bo_handles = calloc(batch->bos->entries, sizeof(*bo_handles));
> >  assert(bo_handles);
> >  
> > -set_foreach(batch->bos, entry) {
> > +hash_table_foreach(batch->bos, entry) {
> >  struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
> >  assert(bo->gem_handle > 0);
> >  bo_handles[submit.bo_handle_count++] = bo->gem_handle;
> > diff --git a/src/gallium/drivers/panfrost/pan_job.h 
> > b/src/gallium/drivers/panfrost/pan_job.h
> > index 0b37a3131e86..3f2cf1a999f3 100644
> > --- a/src/gallium/drivers/panfrost/pan_job.h
> > +++ b/src/gallium/drivers/panfrost/pan_job.h
> > @@ -98,7 +98,7 @@ struct panfrost_batch {
> >  unsigned job_index;
> >  
> >  /* BOs referenced -- will be used for flushing logic */
> > -struct set *bos;
> > +struct hash_table *bos;
> >  
> >  /* Current transient BO */
> > struct panfrost_bo *transient_bo;
> > -- 
> > 2.21.0  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 22/37] panfrost: Extend the panfrost_batch_add_bo() API to pass access flags

2019-09-16 Thread Boris Brezillon
On Mon, 16 Sep 2019 09:59:15 -0400
Alyssa Rosenzweig  wrote:

> PAN_BO_GPU_ACCESS_* is rather wordy. We're a GPU driver, of course it's
> GPU access :)

Well, the driver can also do CPU accesses to the same BOs :P.

> 
> Could we just do PAN_BO_ACCESS_* instead?

I guess that's fine as long as it's documented.

> 
> >  static mali_ptr
> >  panfrost_upload_tex(
> >  struct panfrost_context *ctx,
> > +enum pipe_shader_type st,
> >  struct panfrost_sampler_view *view)
> >  {
> >  if (!view)
> > @@ -610,7 +611,11 @@ panfrost_upload_tex(
> >  
> >  /* Add the BO to the job so it's retained until the job is done. */
> >  struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
> > -panfrost_batch_add_bo(batch, rsrc->bo);
> > +panfrost_batch_add_bo(batch, rsrc->bo,
> > +  PAN_BO_GPU_ACCESS_SHARED | 
> > PAN_BO_GPU_ACCESS_READ |
> > +  PAN_BO_GPU_ACCESS_VERTEX_TILER |
> > +  (st == PIPE_SHADER_FRAGMENT ?
> > +   PAN_BO_GPU_ACCESS_FRAGMENT : 0));  
> 
> I'm not sure this is quite right... should it maybe be:
> 
> (st == PIPE_SHADER_FRAGMENT ? PAN_BO_ACCESS_FRAGMENT :
> PAN_BO_ACCESS_VERTEX_TILER)

That's a good question. I wasn't sure so I decided to put the
vertex/tiler unconditionally.

> 
> I.e., if it's accessed from the fragment shader, is it necessarily
> needed for the vertex/tiler part?
> 
> > -panfrost_batch_add_bo(batch, bo);
> > +panfrost_batch_add_bo(batch, bo,
> > +  PAN_BO_GPU_ACCESS_SHARED | 
> > PAN_BO_GPU_ACCESS_RW |
> > +  PAN_BO_GPU_ACCESS_VERTEX_TILER |
> > +  (st == PIPE_SHADER_FRAGMENT ?
> > +   PAN_BO_GPU_ACCESS_FRAGMENT : 0));  
> 
> Ditto. We should maybe have a `pan_bo_access_for_stage(enum
> pipe_shader_type)` to abstract this logic.

Good idea.

> 
> > @@ -803,7 +813,12 @@ panfrost_map_constant_buffer_gpu(
> >  struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
> >  
> >  if (rsrc) {
> > -panfrost_batch_add_bo(batch, rsrc->bo);
> > +panfrost_batch_add_bo(batch, rsrc->bo,
> > +  PAN_BO_GPU_ACCESS_SHARED |
> > +  PAN_BO_GPU_ACCESS_READ |
> > +  PAN_BO_GPU_ACCESS_VERTEX_TILER |
> > +  (st == PIPE_SHADER_FRAGMENT ?
> > +   PAN_BO_GPU_ACCESS_FRAGMENT : 0));  
> 
> Ditto.
> 
> >  if (!info->has_user_indices) {
> >  /* Only resources can be directly mapped */
> > -panfrost_batch_add_bo(batch, rsrc->bo);
> > +panfrost_batch_add_bo(batch, rsrc->bo,
> > +  PAN_BO_GPU_ACCESS_FRAGMENT);
> >  return rsrc->bo->gpu + offset;  
> 
> The index buffer is to determine geometry, so it is definitely accessed
> from the vertex/tiler chain.

Oops, that's a mistake. I meant PAN_BO_GPU_ACCESS_VERTEX_TILER here.

> 
> I'm not sure if it's also accessed by the fragment chain. Also, should
> this have ACCESS_SHARED | ACCESS_READ to be consistent with the others?

It should definitely have ACCESS_SHARED | ACCESS_READ.

> 
> > @@ -69,7 +69,10 @@ panfrost_emit_streamout(
> >  /* Grab the BO and bind it to the batch */
> >  struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
> >  struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
> > -panfrost_batch_add_bo(batch, bo);
> > +panfrost_batch_add_bo(batch, bo,
> > +  PAN_BO_GPU_ACCESS_SHARED |
> > +  PAN_BO_GPU_ACCESS_WRITE |
> > +  PAN_BO_GPU_ACCESS_VERTEX_TILER);  
> 
> We operate somewhat like:
> 
> [ Vertices ] -- vertex shader --> [ Varyings ] -- tiler --> [ Geometry ]
> 
> So varyings are WRITE from the perspective of the VERTEX but READ from
> the perspective of the TILER and FRAGMENT.
> 
> Now, this is for streamout. However, streamout does not imply rasterize
> discard. Hence, it is legal to have streamout and also render that
> geometry with a FRAGMENT job. So it's premature to drop the READ and
> FRAGMENT flags (this will presumably regress a bunch of dEQP-GLES3 tests
> for streamout).

Okay, will add PAN_BO_GPU_ACCESS_FRAGMENT and turn
PAN_BO_GPU_ACCESS_WRITE into PAN_BO_GPU_ACCESS_RW.

Thanks for the review.

Boris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 00/37] panfrost: Support batch pipelining

2019-09-16 Thread Boris Brezillon
On Mon, 16 Sep 2019 17:11:01 +0700
Connor Abbott  wrote:

> As a drive-by comment, in case you didn't know, the "standard"
> solution for avoiding flushing when BO's are written by the CPU (e.g.
> uniform buffer updates) as documented in ARM's performance guide is to
> add a copy-on-write mechanism, so that you have "shadow" BO's when the
> original BO is modified by the user. I believe this is implemented in
> freedreno, at least there was a talk about it at XDC a few years ago.

No, I didn't know that. Thanks for the heads-up.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 36/37] panfrost: Take draw call order into account

2019-09-16 Thread Boris Brezillon
This is not strictly required, but let's try to match the draw call
orders, just in case the app had a reason to do it in this order.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.h |  6 ++
 src/gallium/drivers/panfrost/pan_job.c | 23 +++---
 src/gallium/drivers/panfrost/pan_job.h |  3 +++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index f13967f51b46..c6b53685b285 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -114,6 +114,12 @@ struct panfrost_context {
 struct panfrost_batch *batch;
 struct hash_table *fbo_to_batch;
 
+/* A list containing all non-submitted batches since the last flush.
+ * This list is used to keep track of clear/draw order on batches that
+ * don't have explicit dependencies between them.
+ */
+struct list_head batch_queue;
+
 /* panfrost_bo -> panfrost_bo_access */
 struct hash_table *accessed_bos;
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 13d7e8086e62..e030f8e98dad 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -118,6 +118,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
 util_dynarray_init(>headers, batch);
 util_dynarray_init(>gpu_headers, batch);
 util_dynarray_init(>dependencies, batch);
+list_inithead(>queue_node);
 batch->out_sync = panfrost_create_batch_fence(batch);
 util_copy_framebuffer_state(>key, key);
 
@@ -181,6 +182,9 @@ panfrost_free_batch(struct panfrost_batch *batch)
   struct panfrost_batch_fence *, dep)
 panfrost_batch_fence_unreference(*dep);
 
+/* Remove the batch from the batch queue. */
+list_del(>queue_node);
+
 /* The out_sync fence lifetime is different from the the batch one
  * since other batches might want to wait on an fence of already
  * submitted/signaled batch. All we need to do here is make sure the
@@ -543,6 +547,13 @@ void panfrost_batch_add_fbo_bos(struct panfrost_batch 
*batch)
 struct panfrost_resource *rsrc = 
pan_resource(batch->key.zsbuf->texture);
 panfrost_batch_add_bo(batch, rsrc->bo, flags);
 }
+
+/* We the batch was not already present in the queue, add it know.
+ * Should we move the batch at end of the queue when a new draw
+ * happens?
+ */
+if (list_empty(>queue_node))
+list_addtail(>queue_node, >ctx->batch_queue);
 }
 
 struct panfrost_bo *
@@ -878,10 +889,15 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, 
bool wait)
 util_dynarray_init(, NULL);
 }
 
-hash_table_foreach(ctx->fbo_to_batch, hentry) {
-struct panfrost_batch *batch = hentry->data;
+/* We can use the for_each_entry_safe() iterator here because the
+ * next element might be removed from the list when flushing the
+ * dependencies in panfrost_batch_submit().
+ */
+while (!list_empty(>batch_queue)) {
+struct panfrost_batch *batch;
 
-assert(batch);
+batch = list_first_entry(>batch_queue,
+ struct panfrost_batch, queue_node);
 
 if (wait) {
 panfrost_batch_fence_reference(batch->out_sync);
@@ -1150,4 +1166,5 @@ panfrost_batch_init(struct panfrost_context *ctx)
 panfrost_batch_compare);
 ctx->accessed_bos = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
 _mesa_key_pointer_equal);
+list_inithead(>batch_queue);
 }
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index d198864ce4f7..34926e30cdde 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -71,6 +71,9 @@ struct panfrost_batch {
 struct panfrost_context *ctx;
 struct pipe_framebuffer_state key;
 
+/* Used to insert the batch in the batch queue */
+struct list_head queue_node;
+
 /* Buffers cleared (PIPE_CLEAR_* bitmask) */
 unsigned clear;
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 11/37] panfrost: Stop exposing panfrost_bo_cache_{fetch, put}()

2019-09-16 Thread Boris Brezillon
They are not expected to be called directly, users should use
panfrost_bo_{create,release}() instead.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_bo.c | 4 ++--
 src/gallium/drivers/panfrost/pan_bo.h | 6 --
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index e6a5c972ead9..23273abc5f22 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -89,7 +89,7 @@ pan_bucket(struct panfrost_screen *screen, unsigned size)
  * cache. If it fails, it returns NULL signaling the caller to allocate a new
  * BO. */
 
-struct panfrost_bo *
+static struct panfrost_bo *
 panfrost_bo_cache_fetch(
 struct panfrost_screen *screen,
 size_t size, uint32_t flags)
@@ -130,7 +130,7 @@ panfrost_bo_cache_fetch(
 /* Tries to add a BO to the cache. Returns if it was
  * successful */
 
-bool
+static bool
 panfrost_bo_cache_put(
 struct panfrost_screen *screen,
 struct panfrost_bo *bo)
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
index 6d17ebecf6e6..5afaa0c873d3 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -88,12 +88,6 @@ struct panfrost_bo *
 panfrost_bo_import(struct panfrost_screen *screen, int fd);
 int
 panfrost_bo_export(struct panfrost_screen *screen, const struct panfrost_bo 
*bo);
-struct panfrost_bo *
-panfrost_bo_cache_fetch(struct panfrost_screen *screen,
-   size_t size, uint32_t flags);
-bool
-panfrost_bo_cache_put(struct panfrost_screen *screen,
-  struct panfrost_bo *bo);
 void
 panfrost_bo_cache_evict_all(struct panfrost_screen *screen);
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 07/37] panfrost: Get rid of pan_drm.c

2019-09-16 Thread Boris Brezillon
pan_drm.c was only meaningful when we were supporting 2 kernel drivers
(mali_kbase, and the drm one). Now that there's now kernel-driver
abstraction we're better off moving those functions were they belong:

* BO related functions in pan_bo.c
* fence related functions + query_gpu_version() in pan_screen.c
* submit related functions in pan_job.c

While at it, we rename the functions according to the place they're
being moved to.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/meson.build |   1 -
 src/gallium/drivers/panfrost/pan_allocate.c  |   2 +-
 src/gallium/drivers/panfrost/pan_assemble.c  |   2 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c |   2 +-
 src/gallium/drivers/panfrost/pan_bo.c| 199 +-
 src/gallium/drivers/panfrost/pan_context.c   |  18 +-
 src/gallium/drivers/panfrost/pan_context.h   |   3 -
 src/gallium/drivers/panfrost/pan_drm.c   | 395 ---
 src/gallium/drivers/panfrost/pan_job.c   |  82 +++-
 src/gallium/drivers/panfrost/pan_resource.c  |  14 +-
 src/gallium/drivers/panfrost/pan_screen.c|  78 +++-
 src/gallium/drivers/panfrost/pan_screen.h|  30 +-
 12 files changed, 382 insertions(+), 444 deletions(-)
 delete mode 100644 src/gallium/drivers/panfrost/pan_drm.c

diff --git a/src/gallium/drivers/panfrost/meson.build 
b/src/gallium/drivers/panfrost/meson.build
index 73c3b54923a4..ca9f6b7afb63 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -35,7 +35,6 @@ files_panfrost = files(
   'pan_bo.c',
   'pan_blit.c',
   'pan_job.c',
-  'pan_drm.c',
   'pan_allocate.c',
   'pan_assemble.c',
   'pan_format.c',
diff --git a/src/gallium/drivers/panfrost/pan_allocate.c 
b/src/gallium/drivers/panfrost/pan_allocate.c
index 44af631d355f..e7970c1be2d4 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -65,7 +65,7 @@ panfrost_allocate_transient(struct panfrost_batch *batch, 
size_t sz)
TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096);
 
 /* We can't reuse the current BO, but we can create a new one. 
*/
-bo = panfrost_drm_create_bo(screen, bo_sz, 0);
+bo = panfrost_bo_create(screen, bo_sz, 0);
 panfrost_batch_add_bo(batch, bo);
 
 /* Creating a BO adds a reference, and then the job adds a
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c 
b/src/gallium/drivers/panfrost/pan_assemble.c
index b57cd5ef6ad2..de73cf8839a7 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -82,7 +82,7 @@ panfrost_shader_compile(
  * I bet someone just thought that would be a cute pun. At least,
  * that's how I'd do it. */
 
-state->bo = panfrost_drm_create_bo(screen, size, PAN_ALLOCATE_EXECUTE);
+state->bo = panfrost_bo_create(screen, size, PAN_ALLOCATE_EXECUTE);
 memcpy(state->bo->cpu, dst, size);
 meta->shader = state->bo->gpu | program.first_tag;
 
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index ab49772f3ba3..82527a5602ae 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -272,7 +272,7 @@ panfrost_get_blend_for_context(struct panfrost_context 
*ctx, unsigned rti)
 final.shader.first_tag = shader->first_tag;
 
 /* Upload the shader */
-final.shader.bo = panfrost_drm_create_bo(screen, shader->size, 
PAN_ALLOCATE_EXECUTE);
+final.shader.bo = panfrost_bo_create(screen, shader->size, 
PAN_ALLOCATE_EXECUTE);
 memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
 
 /* Pass BO ownership to job */
diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index f2f49437a89f..22476f095660 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -23,11 +23,19 @@
  * Authors (Collabora):
  *   Alyssa Rosenzweig 
  */
+#include 
+#include 
 #include 
 #include 
 #include "drm-uapi/panfrost_drm.h"
 
 #include "pan_screen.h"
+#include "pan_util.h"
+#include "pandecode/decode.h"
+
+#include "os/os_mman.h"
+
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 
 /* This file implements a userspace BO cache. Allocating and freeing
@@ -105,7 +113,7 @@ panfrost_bo_cache_fetch(
 
 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, 
);
 if (!ret && !madv.retained) {
-panfrost_drm_release_bo(screen, entry, false);
+panfrost_bo_release(screen, entry, false);
 continue;
 }
 /* Let's go! */

[Mesa-dev] [PATCH v2 16/37] panfrost: Don't return imported/exported BOs to the cache

2019-09-16 Thread Boris Brezillon
We don't know who else is using the BO in that case, and thus shouldn't
re-use it for something else.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_bo.c | 5 +
 src/gallium/drivers/panfrost/pan_bo.h | 4 
 2 files changed, 9 insertions(+)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 5d0f296cc4fb..209d1e0d71e5 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -182,6 +182,9 @@ panfrost_bo_cache_put(struct panfrost_bo *bo)
 {
 struct panfrost_screen *screen = bo->screen;
 
+if (bo->flags & PAN_BO_DONT_REUSE)
+return false;
+
 pthread_mutex_lock(>bo_cache_lock);
 struct list_head *bucket = pan_bucket(screen, bo->size);
 struct drm_panfrost_madvise madv;
@@ -352,6 +355,7 @@ panfrost_bo_import(struct panfrost_screen *screen, int fd)
 bo->gem_handle = gem_handle;
 bo->gpu = (mali_ptr) get_bo_offset.offset;
 bo->size = lseek(fd, 0, SEEK_END);
+bo->flags |= PAN_BO_DONT_REUSE;
 assert(bo->size > 0);
 pipe_reference_init(>reference, 1);
 
@@ -372,6 +376,7 @@ panfrost_bo_export(struct panfrost_bo *bo)
 if (ret == -1)
 return -1;
 
+bo->flags |= PAN_BO_DONT_REUSE;
 return args.fd;
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
index 2858d3782eff..33fbddff3369 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -52,6 +52,10 @@ struct panfrost_screen;
  * (semantically distinct from INVISIBLE, which cannot never be mmaped) */
 #define PAN_BO_DELAY_MMAP (1 << 4)
 
+/* Some BOs shouldn't be returned back to the reuse BO cache, use this flag to
+ * let the BO logic know about this contraint. */
+#define PAN_BO_DONT_REUSE (1 << 5)
+
 struct panfrost_bo {
 /* Must be first for casting */
 struct list_head link;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 30/37] panfrost: Add a panfrost_flush_all_batches() helper

2019-09-16 Thread Boris Brezillon
And use it in panfrost_flush() to flush all batches, and not only the
one currently bound to the context.

We also replace all internal calls to panfrost_flush() by
panfrost_flush_all_batches() ones.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_compute.c  |  2 +-
 src/gallium/drivers/panfrost/pan_context.c  | 23 ++-
 src/gallium/drivers/panfrost/pan_job.c  | 45 -
 src/gallium/drivers/panfrost/pan_job.h  |  2 +-
 src/gallium/drivers/panfrost/pan_resource.c |  6 +--
 5 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_compute.c 
b/src/gallium/drivers/panfrost/pan_compute.c
index 4639c1b03c38..036dffbb17be 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -133,7 +133,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
 /* Queue the job */
 panfrost_scoreboard_queue_compute_job(batch, transfer);
 
-panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 }
 
 void
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index c7299e737c72..56a76a230141 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1354,7 +1354,6 @@ panfrost_flush(
 unsigned flags)
 {
 struct panfrost_context *ctx = pan_context(pipe);
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 struct util_dynarray fences;
 
 /* We must collect the fences before the flush is done, otherwise we'll
@@ -1362,13 +1361,18 @@ panfrost_flush(
  */
 if (fence) {
 util_dynarray_init(, NULL);
-panfrost_batch_fence_reference(batch->out_sync);
-util_dynarray_append(, struct panfrost_batch_fence *,
- batch->out_sync);
+hash_table_foreach(ctx->batches, hentry) {
+struct panfrost_batch *batch = hentry->data;
+
+panfrost_batch_fence_reference(batch->out_sync);
+util_dynarray_append(,
+ struct panfrost_batch_fence *,
+ batch->out_sync);
+}
 }
 
-/* Submit the frame itself */
-panfrost_batch_submit(batch);
+/* Submit all pending jobs */
+panfrost_flush_all_batches(ctx, false);
 
 if (fence) {
 struct panfrost_fence *f = panfrost_fence_create(ctx, );
@@ -2328,7 +2332,7 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
 }
 
 if (!is_scanout || has_draws)
-panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 else
 assert(!ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer 
&&

!ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
@@ -2560,6 +2564,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
   union pipe_query_result *vresult)
 {
 struct panfrost_query *query = (struct panfrost_query *) q;
+struct panfrost_context *ctx = pan_context(pipe);
 
 
 switch (query->type) {
@@ -2567,7 +2572,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
 case PIPE_QUERY_OCCLUSION_PREDICATE:
 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 /* Flush first */
-panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 
 /* Read back the query results */
 unsigned *result = (unsigned *) query->transfer.cpu;
@@ -2583,7 +2588,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
 
 case PIPE_QUERY_PRIMITIVES_GENERATED:
 case PIPE_QUERY_PRIMITIVES_EMITTED:
-panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 vresult->u64 = query->end - query->start;
 break;
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 74fcfd642b45..e36f252e01fc 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -834,7 +834,7 @@ panfrost_batch_submit_jobs(struct panfrost_batch *batch)
 return ret;
 }
 
-void
+static void
 panfrost_batch_submit(struct panfrost_batch *batch)
 {
 assert(batch);
@@ -874,8 +874,51 @@ out:
>out_sync->syncobj, 1, INT64_MAX, 0, NULL);
 panfrost_free_batch(batch);
 
+}
+
+void
+panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait)
+{
+struc

[Mesa-dev] [PATCH v2 08/37] panfrost: Move panfrost_bo_{reference, unreference}() to pan_bo.c

2019-09-16 Thread Boris Brezillon
This way we have all BO related functions placed in the same source
file.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_bo.c   | 20 
 src/gallium/drivers/panfrost/pan_resource.c | 19 ---
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 22476f095660..9b0e8d943b43 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -29,6 +29,7 @@
 #include 
 #include "drm-uapi/panfrost_drm.h"
 
+#include "pan_resource.h"
 #include "pan_screen.h"
 #include "pan_util.h"
 #include "pandecode/decode.h"
@@ -319,6 +320,25 @@ panfrost_bo_release(struct panfrost_screen *screen, struct 
panfrost_bo *bo,
 ralloc_free(bo);
 }
 
+void
+panfrost_bo_reference(struct panfrost_bo *bo)
+{
+if (bo)
+pipe_reference(NULL, >reference);
+}
+
+void
+panfrost_bo_unreference(struct pipe_screen *screen, struct panfrost_bo *bo)
+{
+if (!bo)
+return;
+
+/* When the reference count goes to zero, we need to cleanup */
+
+if (pipe_reference(>reference, NULL))
+panfrost_bo_release(pan_screen(screen), bo, true);
+}
+
 struct panfrost_bo *
 panfrost_bo_import(struct panfrost_screen *screen, int fd)
 {
diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index 47a44bfd81a0..766edee3ca6f 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -521,25 +521,6 @@ panfrost_resource_create(struct pipe_screen *screen,
 return (struct pipe_resource *)so;
 }
 
-void
-panfrost_bo_reference(struct panfrost_bo *bo)
-{
-if (bo)
-pipe_reference(NULL, >reference);
-}
-
-void
-panfrost_bo_unreference(struct pipe_screen *screen, struct panfrost_bo *bo)
-{
-if (!bo)
-return;
-
-/* When the reference count goes to zero, we need to cleanup */
-
-if (pipe_reference(>reference, NULL))
-panfrost_bo_release(pan_screen(screen), bo, true);
-}
-
 static void
 panfrost_resource_destroy(struct pipe_screen *screen,
   struct pipe_resource *pt)
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 13/37] panfrost: Stop passing screen around for BO operations

2019-09-16 Thread Boris Brezillon
Store a screen pointer in panfrost_bo so we don't have to pass a screen
object to all functions manipulating the BO.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_allocate.c  |  2 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c |  2 +-
 src/gallium/drivers/panfrost/pan_bo.c| 40 ++--
 src/gallium/drivers/panfrost/pan_bo.h| 11 +++---
 src/gallium/drivers/panfrost/pan_context.c   |  9 ++---
 src/gallium/drivers/panfrost/pan_job.c   |  2 +-
 src/gallium/drivers/panfrost/pan_resource.c  |  8 ++--
 7 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c 
b/src/gallium/drivers/panfrost/pan_allocate.c
index bdf6f26b77b8..7938196e3e4f 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -71,7 +71,7 @@ panfrost_allocate_transient(struct panfrost_batch *batch, 
size_t sz)
 
 /* Creating a BO adds a reference, and then the job adds a
  * second one. So we need to pop back one reference */
-panfrost_bo_unreference(>base, bo);
+panfrost_bo_unreference(bo);
 
 if (sz < TRANSIENT_SLAB_SIZE) {
 batch->transient_bo = bo;
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index 83492e1ed03d..90a1e2956a53 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -278,7 +278,7 @@ panfrost_get_blend_for_context(struct panfrost_context 
*ctx, unsigned rti)
 
 /* Pass BO ownership to job */
 panfrost_batch_add_bo(batch, final.shader.bo);
-panfrost_bo_unreference(ctx->base.screen, final.shader.bo);
+panfrost_bo_unreference(final.shader.bo);
 
 if (shader->patch_index) {
 /* We have to specialize the blend shader to use constants, so
diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 23273abc5f22..d9c4cb208bc8 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -114,7 +114,7 @@ panfrost_bo_cache_fetch(
 
 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, 
);
 if (!ret && !madv.retained) {
-panfrost_bo_release(screen, entry, false);
+panfrost_bo_release(entry, false);
 continue;
 }
 /* Let's go! */
@@ -131,10 +131,10 @@ panfrost_bo_cache_fetch(
  * successful */
 
 static bool
-panfrost_bo_cache_put(
-struct panfrost_screen *screen,
-struct panfrost_bo *bo)
+panfrost_bo_cache_put(struct panfrost_bo *bo)
 {
+struct panfrost_screen *screen = bo->screen;
+
 pthread_mutex_lock(>bo_cache_lock);
 struct list_head *bucket = pan_bucket(screen, bo->size);
 struct drm_panfrost_madvise madv;
@@ -168,14 +168,14 @@ panfrost_bo_cache_evict_all(
 
 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, 
link) {
 list_del(>link);
-panfrost_bo_release(screen, entry, false);
+panfrost_bo_release(entry, false);
 }
 }
 pthread_mutex_unlock(>bo_cache_lock);
 }
 
 void
-panfrost_bo_mmap(struct panfrost_screen *screen, struct panfrost_bo *bo)
+panfrost_bo_mmap(struct panfrost_bo *bo)
 {
 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
 int ret;
@@ -183,14 +183,14 @@ panfrost_bo_mmap(struct panfrost_screen *screen, struct 
panfrost_bo *bo)
 if (bo->cpu)
 return;
 
-ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, _bo);
+ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, _bo);
 if (ret) {
 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
 assert(0);
 }
 
 bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
-  screen->fd, mmap_bo.offset);
+  bo->screen->fd, mmap_bo.offset);
 if (bo->cpu == MAP_FAILED) {
 fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
 assert(0);
@@ -202,7 +202,7 @@ panfrost_bo_mmap(struct panfrost_screen *screen, struct 
panfrost_bo *bo)
 }
 
 static void
-panfrost_bo_munmap(struct panfrost_screen *screen, struct panfrost_bo *bo)
+panfrost_bo_munmap(struct panfrost_bo *bo)
 {
 if (!bo->cpu)
 return;
@@ -277,7 +277,7 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t 
size,
  * for GPU-internal use. But we do trace them anyway. */
 

[Mesa-dev] [PATCH v2 29/37] panfrost: Prepare panfrost_fence for batch pipelining

2019-09-16 Thread Boris Brezillon
The panfrost_fence logic currently waits on the last submitted batch,
but the batch serialization that was enforced in
panfrost_batch_submit() is about to go away, allowing for several
batches to be pipelined, and the last submitted one is not necessarily
the one that will finish last.

We need to make sure the fence logic waits on all flushed batches, not
only the last one.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 18 ++-
 src/gallium/drivers/panfrost/pan_context.h |  5 +-
 src/gallium/drivers/panfrost/pan_job.c |  7 ---
 src/gallium/drivers/panfrost/pan_screen.c  | 60 +-
 src/gallium/drivers/panfrost/pan_screen.h  |  3 +-
 5 files changed, 55 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 0197f78b5506..c7299e737c72 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1355,14 +1355,30 @@ panfrost_flush(
 {
 struct panfrost_context *ctx = pan_context(pipe);
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+struct util_dynarray fences;
+
+/* We must collect the fences before the flush is done, otherwise we'll
+ * lose track of them.
+ */
+if (fence) {
+util_dynarray_init(, NULL);
+panfrost_batch_fence_reference(batch->out_sync);
+util_dynarray_append(, struct panfrost_batch_fence *,
+ batch->out_sync);
+}
 
 /* Submit the frame itself */
 panfrost_batch_submit(batch);
 
 if (fence) {
-struct panfrost_fence *f = panfrost_fence_create(ctx);
+struct panfrost_fence *f = panfrost_fence_create(ctx, );
 pipe->screen->fence_reference(pipe->screen, fence, NULL);
 *fence = (struct pipe_fence_handle *)f;
+
+util_dynarray_foreach(, struct panfrost_batch_fence *, 
fence)
+panfrost_batch_fence_unreference(*fence);
+
+util_dynarray_fini();
 }
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index 3b09952345cf..d50ed57d5d8a 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -94,7 +94,7 @@ struct panfrost_query {
 
 struct panfrost_fence {
 struct pipe_reference reference;
-int fd;
+struct util_dynarray syncfds;
 };
 
 struct panfrost_streamout {
@@ -193,9 +193,6 @@ struct panfrost_context {
 
 /* True for t6XX, false for t8xx. */
 bool is_t6xx;
-
-/* The out sync fence of the last submitted batch. */
-struct panfrost_batch_fence *last_out_sync;
 };
 
 /* Corresponds to the CSO */
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 04f4f22dab74..74fcfd642b45 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -797,13 +797,6 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 free(bo_handles);
 free(in_syncs);
 
-/* Release the last batch fence if any, and retain the new one */
-if (ctx->last_out_sync)
-panfrost_batch_fence_unreference(ctx->last_out_sync);
-
-panfrost_batch_fence_reference(batch->out_sync);
-ctx->last_out_sync = batch->out_sync;
-
 if (ret) {
 fprintf(stderr, "Error submitting: %m\n");
 return errno;
diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index 22f7feb890e2..cf6bc38fbe0b 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -575,7 +575,9 @@ panfrost_fence_reference(struct pipe_screen *pscreen,
 struct panfrost_fence *old = *p;
 
 if (pipe_reference(&(*p)->reference, >reference)) {
-close(old->fd);
+util_dynarray_foreach(>syncfds, int, fd)
+close(*fd);
+util_dynarray_fini(>syncfds);
 free(old);
 }
 *p = f;
@@ -589,52 +591,60 @@ panfrost_fence_finish(struct pipe_screen *pscreen,
 {
 struct panfrost_screen *screen = pan_screen(pscreen);
 struct panfrost_fence *f = (struct panfrost_fence *)fence;
+struct util_dynarray syncobjs;
 int ret;
-unsigned syncobj;
 
-ret = drmSyncobjCreate(screen->fd, 0, );
-if (ret) {
-fprintf(stderr, "Failed to create syncobj to wait on: %m\n");
-return false;
-}
+if (!util_dynarray_num_elements(>syncfds, int))
+return true;
 
-ret = drmSyncobjImportSyncFil

[Mesa-dev] [PATCH v2 20/37] panfrost: Add FBO BOs to batch->bos earlier

2019-09-16 Thread Boris Brezillon
If we want the batch dependency tracking to work correctly we must
make sure all BOs are added to the batch->bos set early enough. Adding
FBO BOs when generating the fragment job is clearly to late. Add a
panfrost_batch_add_fbo_bos helper and call it in the clear/draw path.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c  |  2 ++
 src/gallium/drivers/panfrost/pan_fragment.c |  3 ---
 src/gallium/drivers/panfrost/pan_job.c  | 13 +
 src/gallium/drivers/panfrost/pan_job.h  |  2 ++
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index c5139a21f9a3..34bc6e41218d 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -160,6 +160,7 @@ panfrost_clear(
 struct panfrost_context *ctx = pan_context(pipe);
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
+panfrost_batch_add_fbo_bos(batch);
 panfrost_batch_clear(batch, buffers, color, depth, stencil);
 }
 
@@ -879,6 +880,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool 
with_vertex_data)
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 
+panfrost_batch_add_fbo_bos(batch);
 panfrost_attach_vt_framebuffer(ctx);
 
 if (with_vertex_data) {
diff --git a/src/gallium/drivers/panfrost/pan_fragment.c 
b/src/gallium/drivers/panfrost/pan_fragment.c
index 2b6ffd841fe9..00ff363a1bba 100644
--- a/src/gallium/drivers/panfrost/pan_fragment.c
+++ b/src/gallium/drivers/panfrost/pan_fragment.c
@@ -42,9 +42,6 @@ panfrost_initialize_surface(
 struct panfrost_resource *rsrc = pan_resource(surf->texture);
 
 rsrc->slices[level].initialized = true;
-
-assert(rsrc->bo);
-panfrost_batch_add_bo(batch, rsrc->bo);
 }
 
 /* Generate a fragment job. This should be called once per frame. (According to
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index cc0db3e440a1..5b9a51325c3b 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -144,6 +144,19 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, struct 
panfrost_bo *bo)
 _mesa_set_add(batch->bos, bo);
 }
 
+void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
+{
+for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
+struct panfrost_resource *rsrc = 
pan_resource(batch->key.cbufs[i]->texture);
+panfrost_batch_add_bo(batch, rsrc->bo);
+}
+
+if (batch->key.zsbuf) {
+struct panfrost_resource *rsrc = 
pan_resource(batch->key.zsbuf->texture);
+panfrost_batch_add_bo(batch, rsrc->bo);
+}
+}
+
 struct panfrost_bo *
 panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
  uint32_t create_flags)
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index b1351b902bd2..3474a102f5a4 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -124,6 +124,8 @@ panfrost_batch_init(struct panfrost_context *ctx);
 void
 panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo);
 
+void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch);
+
 struct panfrost_bo *
 panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
  uint32_t create_flags);
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 26/37] panfrost: Use the per-batch fences to wait on the last submitted batch

2019-09-16 Thread Boris Brezillon
We just replace the per-context out_sync object by a pointer to the
the fence of the last last submitted batch. Pipelining of batches will
come later.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c |  6 --
 src/gallium/drivers/panfrost/pan_context.h |  3 ++-
 src/gallium/drivers/panfrost/pan_job.c | 23 --
 src/gallium/drivers/panfrost/pan_screen.c  |  6 --
 4 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index a76caecef0e3..0197f78b5506 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -2712,12 +2712,6 @@ panfrost_create_context(struct pipe_screen *screen, void 
*priv, unsigned flags)
 panfrost_blend_context_init(gallium);
 panfrost_compute_context_init(gallium);
 
-ASSERTED int ret;
-
-ret = drmSyncobjCreate(pscreen->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
-   >out_sync);
-assert(!ret);
-
 /* XXX: leaks */
 gallium->stream_uploader = u_upload_create_default(gallium);
 gallium->const_uploader = gallium->stream_uploader;
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index c145d589757e..ce3e0c899a4f 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -191,7 +191,8 @@ struct panfrost_context {
 /* True for t6XX, false for t8xx. */
 bool is_t6xx;
 
-uint32_t out_sync;
+/* The out sync fence of the last submitted batch. */
+struct panfrost_batch_fence *last_out_sync;
 };
 
 /* Corresponds to the CSO */
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 8712e2ce598a..78f2b766adb1 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -427,11 +427,13 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 uint32_t *bo_handles;
 int ret;
 
-submit.in_syncs = (u64) (uintptr_t) >out_sync;
-submit.in_sync_count = 1;
 
-submit.out_sync = ctx->out_sync;
+if (ctx->last_out_sync) {
+submit.in_sync_count = 1;
+submit.in_syncs = (uintptr_t)>last_out_sync->syncobj;
+}
 
+submit.out_sync = batch->out_sync->syncobj;
 submit.jc = first_job_desc;
 submit.requirements = reqs;
 
@@ -454,6 +456,14 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 submit.bo_handles = (u64) (uintptr_t) bo_handles;
 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_SUBMIT, );
 free(bo_handles);
+
+/* Release the last batch fence if any, and retain the new one */
+if (ctx->last_out_sync)
+panfrost_batch_fence_unreference(ctx->last_out_sync);
+
+panfrost_batch_fence_reference(batch->out_sync);
+ctx->last_out_sync = batch->out_sync;
+
 if (ret) {
 fprintf(stderr, "Error submitting: %m\n");
 return errno;
@@ -462,7 +472,8 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 /* Trace the job if we're doing that */
 if (pan_debug & PAN_DBG_TRACE) {
 /* Wait so we can get errors reported back */
-drmSyncobjWait(screen->fd, >out_sync, 1, INT64_MAX, 0, 
NULL);
+drmSyncobjWait(screen->fd, >out_sync->syncobj, 1,
+   INT64_MAX, 0, NULL);
 pandecode_jc(submit.jc, FALSE);
 }
 
@@ -531,8 +542,8 @@ out:
  * rendering is quite broken right now (to be fixed by the panfrost_job
  * refactor, just take the perf hit for correctness)
  */
-drmSyncobjWait(pan_screen(ctx->base.screen)->fd, >out_sync, 1,
-   INT64_MAX, 0, NULL);
+drmSyncobjWait(pan_screen(ctx->base.screen)->fd,
+   >out_sync->syncobj, 1, INT64_MAX, 0, NULL);
 panfrost_free_batch(batch);
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index dae8b941f1ea..22f7feb890e2 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -590,8 +590,8 @@ panfrost_fence_finish(struct pipe_screen *pscreen,
 struct panfrost_screen *screen = pan_screen(pscreen);
 struct panfrost_fence *f = (struct panfrost_fence *)fence;
 int ret;
-
 unsigned syncobj;
+
 ret = drmSyncobjCreate(screen->fd, 0, );
 if (ret) {
 fprintf(stderr, "Failed to create syncobj to wait on: %m\n");
@@ -623,12 +623,14 @@ panfrost_fence_create(struct panfrost_context *ctx)
 if

[Mesa-dev] [PATCH v2 17/37] panfrost: Make sure the BO is 'ready' when picked from the cache

2019-09-16 Thread Boris Brezillon
This is needed if we want to free the panfrost_batch object at submit
time in order to not have to GC the batch on the next job submission.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_bo.c | 78 +++
 src/gallium/drivers/panfrost/pan_bo.h |  2 +
 2 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 209d1e0d71e5..3f05226f96f4 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -23,6 +23,7 @@
  * Authors (Collabora):
  *   Alyssa Rosenzweig 
  */
+#include 
 #include 
 #include 
 #include 
@@ -101,6 +102,30 @@ panfrost_bo_free(struct panfrost_bo *bo)
 ralloc_free(bo);
 }
 
+/* Returns true if the BO is ready, false otherwise. */
+bool
+panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns)
+{
+struct drm_panfrost_wait_bo req = {
+.handle = bo->gem_handle,
+   .timeout_ns = timeout_ns,
+};
+int ret;
+
+/* The ioctl returns >= 0 value when the BO we are waiting for is ready
+ * -1 otherwise.
+ */
+ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, );
+if (ret != -1)
+return true;
+
+/* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
+ * is invalid, which shouldn't happen here.
+ */
+assert(errno == ETIMEDOUT || errno == EBUSY);
+return false;
+}
+
 /* Helper to calculate the bucket index of a BO */
 
 static unsigned
@@ -137,9 +162,8 @@ pan_bucket(struct panfrost_screen *screen, unsigned size)
  * BO. */
 
 static struct panfrost_bo *
-panfrost_bo_cache_fetch(
-struct panfrost_screen *screen,
-size_t size, uint32_t flags)
+panfrost_bo_cache_fetch(struct panfrost_screen *screen,
+size_t size, uint32_t flags, bool dontwait)
 {
 pthread_mutex_lock(>bo_cache_lock);
 struct list_head *bucket = pan_bucket(screen, size);
@@ -147,27 +171,29 @@ panfrost_bo_cache_fetch(
 
 /* Iterate the bucket looking for something suitable */
 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
-if (entry->size >= size &&
-entry->flags == flags) {
-int ret;
-struct drm_panfrost_madvise madv;
+if (entry->size < size || entry->flags != flags)
+continue;
 
-/* This one works, splice it out of the cache */
-list_del(>link);
+if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX))
+continue;
 
-madv.handle = entry->gem_handle;
-madv.madv = PANFROST_MADV_WILLNEED;
-madv.retained = 0;
+struct drm_panfrost_madvise madv = {
+.handle = entry->gem_handle,
+.madv = PANFROST_MADV_WILLNEED,
+};
+int ret;
 
-ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, 
);
-if (!ret && !madv.retained) {
-panfrost_bo_free(entry);
-continue;
-}
-/* Let's go! */
-bo = entry;
-break;
+/* This one works, splice it out of the cache */
+list_del(>link);
+
+ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, );
+if (!ret && !madv.retained) {
+panfrost_bo_free(entry);
+continue;
 }
+/* Let's go! */
+bo = entry;
+break;
 }
 pthread_mutex_unlock(>bo_cache_lock);
 
@@ -281,12 +307,18 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t 
size,
 if (flags & PAN_BO_GROWABLE)
 assert(flags & PAN_BO_INVISIBLE);
 
-/* Before creating a BO, we first want to check the cache, otherwise,
- * the cache misses and we need to allocate a BO fresh from the kernel
+/* Before creating a BO, we first want to check the cache but without
+ * waiting for BO readiness (BOs in the cache can still be referenced
+ * by jobs that are not finished yet).
+ * If the cached allocation fails we fall back on fresh BO allocation,
+ * and if that fails too, we try one more time to allocate from the
+ * cache, but this time we accept to wait.
  */
-bo = panfrost_bo_cache_fetch(screen, size, flags);
+bo = panfrost_bo_cache_fetch(screen, size, flags

[Mesa-dev] [PATCH v2 15/37] panfrost: Add panfrost_bo_{alloc, free}()

2019-09-16 Thread Boris Brezillon
Thanks to that we avoid the recursive call into panfrost_bo_create()
and we can get rid of panfrost_bo_release() by inlining the code in
panfrost_bo_unreference().

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_bo.c | 146 --
 1 file changed, 69 insertions(+), 77 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 396b25230c46..5d0f296cc4fb 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -54,6 +54,53 @@
  * around the linked list.
  */
 
+static struct panfrost_bo *
+panfrost_bo_alloc(struct panfrost_screen *screen, size_t size,
+  uint32_t flags)
+{
+struct drm_panfrost_create_bo create_bo = { .size = size };
+struct panfrost_bo *bo;
+int ret;
+
+if (screen->kernel_version->version_major > 1 ||
+screen->kernel_version->version_minor >= 1) {
+if (flags & PAN_BO_GROWABLE)
+create_bo.flags |= PANFROST_BO_HEAP;
+if (!(flags & PAN_BO_EXECUTE))
+create_bo.flags |= PANFROST_BO_NOEXEC;
+}
+
+ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, _bo);
+if (ret) {
+fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
+return NULL;
+}
+
+bo = rzalloc(screen, struct panfrost_bo);
+assert(bo);
+bo->size = create_bo.size;
+bo->gpu = create_bo.offset;
+bo->gem_handle = create_bo.handle;
+bo->flags = flags;
+bo->screen = screen;
+return bo;
+}
+
+static void
+panfrost_bo_free(struct panfrost_bo *bo)
+{
+struct drm_gem_close gem_close = { .handle = bo->gem_handle };
+int ret;
+
+ret = drmIoctl(bo->screen->fd, DRM_IOCTL_GEM_CLOSE, _close);
+if (ret) {
+fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
+assert(0);
+}
+
+ralloc_free(bo);
+}
+
 /* Helper to calculate the bucket index of a BO */
 
 static unsigned
@@ -84,9 +131,6 @@ pan_bucket(struct panfrost_screen *screen, unsigned size)
 return >bo_cache[pan_bucket_index(size)];
 }
 
-static void
-panfrost_bo_release(struct panfrost_bo *bo, bool cacheable);
-
 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
  * BO cache. If it succeeds, it returns that BO and removes the BO from the
  * cache. If it fails, it returns NULL signaling the caller to allocate a new
@@ -117,7 +161,7 @@ panfrost_bo_cache_fetch(
 
 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, 
);
 if (!ret && !madv.retained) {
-panfrost_bo_release(entry, false);
+panfrost_bo_free(entry);
 continue;
 }
 /* Let's go! */
@@ -171,7 +215,7 @@ panfrost_bo_cache_evict_all(
 
 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, 
link) {
 list_del(>link);
-panfrost_bo_release(entry, false);
+panfrost_bo_free(entry);
 }
 }
 pthread_mutex_unlock(>bo_cache_lock);
@@ -234,46 +278,17 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t 
size,
 if (flags & PAN_BO_GROWABLE)
 assert(flags & PAN_BO_INVISIBLE);
 
-unsigned translated_flags = 0;
-
-if (screen->kernel_version->version_major > 1 ||
-screen->kernel_version->version_minor >= 1) {
-if (flags & PAN_BO_GROWABLE)
-translated_flags |= PANFROST_BO_HEAP;
-if (!(flags & PAN_BO_EXECUTE))
-translated_flags |= PANFROST_BO_NOEXEC;
-}
-
-struct drm_panfrost_create_bo create_bo = {
-.size = size,
-.flags = translated_flags,
-};
-
-/* Before creating a BO, we first want to check the cache */
-
+/* Before creating a BO, we first want to check the cache, otherwise,
+ * the cache misses and we need to allocate a BO fresh from the kernel
+ */
 bo = panfrost_bo_cache_fetch(screen, size, flags);
+if (!bo)
+bo = panfrost_bo_alloc(screen, size, flags);
 
-if (bo == NULL) {
-/* Otherwise, the cache misses and we need to allocate a BO 
fresh from
- * the kernel */
+if (!bo)
+fprintf(stderr, "BO creation failed\n");
 
-int ret;
-
-ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, 
_bo);
-

[Mesa-dev] [PATCH v2 19/37] panfrost: Add the panfrost_batch_create_bo() helper

2019-09-16 Thread Boris Brezillon
This helper automates the panfrost_bo_create()+panfrost_batch_add_bo()+
panfrost_bo_unreference() sequence that's done for all per-batch BOs.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_allocate.c  |  9 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c |  8 ++---
 src/gallium/drivers/panfrost/pan_job.c   | 32 +---
 src/gallium/drivers/panfrost/pan_job.h   |  4 +++
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c 
b/src/gallium/drivers/panfrost/pan_allocate.c
index 7938196e3e4f..3076c23ab1cc 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -42,8 +42,6 @@
 struct panfrost_transfer
 panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz)
 {
-struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
-
 /* Pad the size */
 sz = ALIGN_POT(sz, ALIGNMENT);
 
@@ -66,12 +64,7 @@ panfrost_allocate_transient(struct panfrost_batch *batch, 
size_t sz)
TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096);
 
 /* We can't reuse the current BO, but we can create a new one. 
*/
-bo = panfrost_bo_create(screen, bo_sz, 0);
-panfrost_batch_add_bo(batch, bo);
-
-/* Creating a BO adds a reference, and then the job adds a
- * second one. So we need to pop back one reference */
-panfrost_bo_unreference(bo);
+bo = panfrost_batch_create_bo(batch, bo_sz, 0);
 
 if (sz < TRANSIENT_SLAB_SIZE) {
 batch->transient_bo = bo;
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index 90a1e2956a53..6bd6ff71cdc7 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -227,7 +227,6 @@ panfrost_blend_constant(float *out, float *in, unsigned 
mask)
 struct panfrost_blend_final
 panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
 {
-struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
 /* Grab the format, falling back gracefully if called invalidly (which
@@ -273,13 +272,10 @@ panfrost_get_blend_for_context(struct panfrost_context 
*ctx, unsigned rti)
 final.shader.first_tag = shader->first_tag;
 
 /* Upload the shader */
-final.shader.bo = panfrost_bo_create(screen, shader->size, 
PAN_BO_EXECUTE);
+final.shader.bo = panfrost_batch_create_bo(batch, shader->size,
+   PAN_BO_EXECUTE);
 memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
 
-/* Pass BO ownership to job */
-panfrost_batch_add_bo(batch, final.shader.bo);
-panfrost_bo_unreference(final.shader.bo);
-
 if (shader->patch_index) {
 /* We have to specialize the blend shader to use constants, so
  * patch in the current constants */
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 4ffc990a5334..cc0db3e440a1 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -144,6 +144,25 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, struct 
panfrost_bo *bo)
 _mesa_set_add(batch->bos, bo);
 }
 
+struct panfrost_bo *
+panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
+ uint32_t create_flags)
+{
+struct panfrost_bo *bo;
+
+bo = panfrost_bo_create(pan_screen(batch->ctx->base.screen), size,
+create_flags);
+panfrost_batch_add_bo(batch, bo);
+
+/* panfrost_batch_add_bo() has retained a reference and
+ * panfrost_bo_create() initialize the refcnt to 1, so let's
+ * unreference the BO here so it gets released when the batch is
+ * destroyed (unless it's retained by someone else in the meantime).
+ */
+panfrost_bo_unreference(bo);
+return bo;
+}
+
 /* Returns the polygon list's GPU address if available, or otherwise allocates
  * the polygon list.  It's perfectly fast to use allocate/free BO directly,
  * since we'll hit the BO cache and this is one-per-batch anyway. */
@@ -154,19 +173,10 @@ panfrost_batch_get_polygon_list(struct panfrost_batch 
*batch, unsigned size)
 if (batch->polygon_list) {
 assert(batch->polygon_list->size >= size);
 } else {
-struct panfrost_screen *screen = 
pan_screen(batch->ctx->base.screen);
-
 /* Create the BO as invisible, as there's no reason to map */
 
-  

[Mesa-dev] [PATCH v2 24/37] panfrost: Cache GPU accesses to BOs

2019-09-16 Thread Boris Brezillon
This way we can avoid calling ioctl(WAIT_BO) when we already know the
BO is idle because it hasn't been touched by a GPU job or because the
previous call to panfrost_bo_wait() returned true.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_bo.c  | 40 +++---
 src/gallium/drivers/panfrost/pan_bo.h  |  9 +-
 src/gallium/drivers/panfrost/pan_job.c |  7 +
 3 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 4aabd5fd23ab..c3b62c369f3e 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -102,9 +102,18 @@ panfrost_bo_free(struct panfrost_bo *bo)
 ralloc_free(bo);
 }
 
-/* Returns true if the BO is ready, false otherwise. */
+/* Returns true if the BO is ready, false otherwise.
+ * access_type is encoding the type of access one wants to ensure is done.
+ * Say you want to make sure all writers are done writing, you should pass
+ * PAN_BO_GPU_ACCESS_WRITE.
+ * If you want to wait for all users, you should pass PAN_BO_GPU_ACCESS_RW.
+ * PAN_BO_GPU_ACCESS_READ would work too as waiting for readers implies
+ * waiting for writers as well, but we want to make things explicit and waiting
+ * only for readers is impossible.
+ */
 bool
-panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns)
+panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+ uint32_t access_type)
 {
 struct drm_panfrost_wait_bo req = {
 .handle = bo->gem_handle,
@@ -112,12 +121,34 @@ panfrost_bo_wait(struct panfrost_bo *bo, int64_t 
timeout_ns)
 };
 int ret;
 
+assert(access_type == PAN_BO_GPU_ACCESS_WRITE ||
+   access_type == PAN_BO_GPU_ACCESS_RW);
+
+/* If the BO has been exported or imported we can't rely on the cached
+ * state, we need to call the WAIT_BO ioctl.
+ */
+if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
+/* If ->gpu_access is 0, the BO is idle, and if the WRITE flag
+ * is cleared, that means we only have readers.
+ */
+if (!bo->gpu_access)
+return true;
+else if (!(access_type & PAN_BO_GPU_ACCESS_READ) &&
+ !(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
+return true;
+}
+
 /* The ioctl returns >= 0 value when the BO we are waiting for is ready
  * -1 otherwise.
  */
 ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, );
-if (ret != -1)
+if (ret != -1) {
+/* Set gpu_access to 0 so that the next call to bo_wait()
+ * doesn't have to call the WAIT_BO ioctl.
+ */
+bo->gpu_access = 0;
 return true;
+}
 
 /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
  * is invalid, which shouldn't happen here.
@@ -174,7 +205,8 @@ panfrost_bo_cache_fetch(struct panfrost_screen *screen,
 if (entry->size < size || entry->flags != flags)
 continue;
 
-if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX))
+if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
+  PAN_BO_GPU_ACCESS_RW))
 continue;
 
 struct drm_panfrost_madvise madv = {
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
index a4b4d05b96e9..60ccb0c075a6 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -100,10 +100,17 @@ struct panfrost_bo {
 int gem_handle;
 
 uint32_t flags;
+
+/* Combination of PAN_BO_GPU_ACCESS_{READ,WRITE} flags encoding pending
+ * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
+ * when the BO is idle.
+ */
+uint32_t gpu_access;
 };
 
 bool
-panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns);
+panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+ uint32_t access_type);
 void
 panfrost_bo_reference(struct panfrost_bo *bo);
 void
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 739f36a593f1..30720ab98bb9 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -389,8 +389,15 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 
 hash_table_foreach(batch->bos, entry) {
 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
+uint32_t flags = (uintptr_t)entry->data;
+
 assert(bo->gem_handle > 0);
 bo_handles[submit.bo_handl

[Mesa-dev] [PATCH v2 22/37] panfrost: Extend the panfrost_batch_add_bo() API to pass access flags

2019-09-16 Thread Boris Brezillon
The type of access being done on a BO has impacts on job scheduling
(shared resources being written enforce serialization while those
being read only allow for job parallelization) and BO lifetime (the
fragment job might last longer than the vertex/tiler ones, if we can,
it's good to release BOs earlier so that others can re-use them
through the BO re-use cache).

Let's pass extra access flags to panfrost_batch_add_bo() and
panfrost_batch_create_bo() so the batch submission logic can take the
appropriate when submitting batches. Note that this information is not
used yet, we're just patching callers to pass the correct flags here.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_allocate.c   | 14 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c  |  6 ++-
 src/gallium/drivers/panfrost/pan_bo.h | 18 
 src/gallium/drivers/panfrost/pan_context.c| 43 +++
 src/gallium/drivers/panfrost/pan_instancing.c |  5 ++-
 src/gallium/drivers/panfrost/pan_job.c| 39 +
 src/gallium/drivers/panfrost/pan_job.h|  5 ++-
 src/gallium/drivers/panfrost/pan_varyings.c   |  5 ++-
 8 files changed, 111 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c 
b/src/gallium/drivers/panfrost/pan_allocate.c
index 3076c23ab1cc..1ca812c1fbaa 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -63,8 +63,18 @@ panfrost_allocate_transient(struct panfrost_batch *batch, 
size_t sz)
 size_t bo_sz = sz < TRANSIENT_SLAB_SIZE ?
TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096);
 
-/* We can't reuse the current BO, but we can create a new one. 
*/
-bo = panfrost_batch_create_bo(batch, bo_sz, 0);
+/* We can't reuse the current BO, but we can create a new one.
+ * We don't know what the BO will be used for, so let's flag it
+ * RW and attach it to both the fragment and vertex/tiler jobs.
+ * TODO: if we want fine grained BO assignment we should pass
+ * flags to this function and keep the read/write,
+ * fragment/vertex+tiler pools separate.
+ */
+bo = panfrost_batch_create_bo(batch, bo_sz, 0,
+  PAN_BO_GPU_ACCESS_PRIVATE |
+  PAN_BO_GPU_ACCESS_RW |
+  PAN_BO_GPU_ACCESS_VERTEX_TILER |
+  PAN_BO_GPU_ACCESS_FRAGMENT);
 
 if (sz < TRANSIENT_SLAB_SIZE) {
 batch->transient_bo = bo;
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index 6bd6ff71cdc7..f592522a4d26 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -273,7 +273,11 @@ panfrost_get_blend_for_context(struct panfrost_context 
*ctx, unsigned rti)
 
 /* Upload the shader */
 final.shader.bo = panfrost_batch_create_bo(batch, shader->size,
-   PAN_BO_EXECUTE);
+   PAN_BO_EXECUTE,
+   PAN_BO_GPU_ACCESS_PRIVATE |
+   PAN_BO_GPU_ACCESS_READ |
+   
PAN_BO_GPU_ACCESS_VERTEX_TILER |
+   PAN_BO_GPU_ACCESS_FRAGMENT);
 memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
 
 if (shader->patch_index) {
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
index e141a60fc407..a4b4d05b96e9 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -62,6 +62,24 @@ struct panfrost_screen;
 /* BO has been exported */
 #define PAN_BO_EXPORTED   (1 << 7)
 
+/* GPU access flags */
+
+/* BO is either shared (can be accessed by more than one GPU batch) or private
+ * (reserved by a specific GPU job). */
+#define PAN_BO_GPU_ACCESS_PRIVATE (0 << 0)
+#define PAN_BO_GPU_ACCESS_SHARED  (1 << 0)
+
+/* BO is being read/written by the GPU */
+#define PAN_BO_GPU_ACCESS_READ(1 << 1)
+#define PAN_BO_GPU_ACCESS_WRITE   (1 << 2)
+#define PAN_BO_GPU_ACCESS_RW  (PAN_BO_GPU_ACCESS_READ | 
PAN_BO_GPU_ACCESS_WRITE)
+
+/* BO is accessed by the vertex/tiler job. */
+#define PAN_BO_GPU_ACCESS_VERTEX_TILER(1 << 3)
+
+/* BO is accessed by the fragment job. */
+#define PAN_BO_GPU_ACCESS_FRAGMENT(1 << 4)
+
 struct panfrost_bo {
 /* Must be first for casting */
 struct list_head link;
diff --git a

[Mesa-dev] [PATCH v2 06/37] panfrost: Stop passing has_draws to panfrost_drm_submit_vs_fs_batch()

2019-09-16 Thread Boris Brezillon
has_draws can be inferred directly from the batch->last_job value, no
need to pass it around.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_drm.c| 3 ++-
 src/gallium/drivers/panfrost/pan_job.c| 4 +---
 src/gallium/drivers/panfrost/pan_screen.h | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index b77af714d117..c41701e16ff5 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -271,9 +271,10 @@ panfrost_drm_submit_batch(struct panfrost_batch *batch, 
u64 first_job_desc,
 }
 
 int
-panfrost_drm_submit_vs_fs_batch(struct panfrost_batch *batch, bool has_draws)
+panfrost_drm_submit_vs_fs_batch(struct panfrost_batch *batch)
 {
 struct panfrost_context *ctx = batch->ctx;
+bool has_draws = batch->last_job.gpu;
 int ret = 0;
 
 panfrost_batch_add_bo(batch, ctx->scratchpad);
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index e06440010aeb..ecaf081f1c38 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -284,9 +284,7 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 
 panfrost_scoreboard_link_batch(batch);
 
-bool has_draws = batch->last_job.gpu;
-
-ret = panfrost_drm_submit_vs_fs_batch(batch, has_draws);
+ret = panfrost_drm_submit_vs_fs_batch(batch);
 
 if (ret)
 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
diff --git a/src/gallium/drivers/panfrost/pan_screen.h 
b/src/gallium/drivers/panfrost/pan_screen.h
index 0124e559a700..2f17e2dae726 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -132,7 +132,7 @@ panfrost_drm_import_bo(struct panfrost_screen *screen, int 
fd);
 int
 panfrost_drm_export_bo(struct panfrost_screen *screen, const struct 
panfrost_bo *bo);
 int
-panfrost_drm_submit_vs_fs_batch(struct panfrost_batch *batch, bool has_draws);
+panfrost_drm_submit_vs_fs_batch(struct panfrost_batch *batch);
 unsigned
 panfrost_drm_query_gpu_version(struct panfrost_screen *screen);
 int
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 12/37] panfrost: Don't check if BO is mmaped before calling panfrost_bo_mmap()

2019-09-16 Thread Boris Brezillon
panfrost_bo_mmap() already takes care of that.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_resource.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index 97ab2f9d9a4e..7083ee37bae2 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -561,11 +561,7 @@ panfrost_transfer_map(struct pipe_context *pctx,
 *out_transfer = >base;
 
 /* If we haven't already mmaped, now's the time */
-
-if (!bo->cpu) {
-struct panfrost_screen *screen = pan_screen(pctx->screen);
-panfrost_bo_mmap(screen, bo);
-}
+panfrost_bo_mmap(pan_screen(pctx->screen), bo);
 
 /* Check if we're bound for rendering and this is a read pixels. If so,
  * we need to flush */
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 14/37] panfrost: Stop using panfrost_bo_release() outside of pan_bo.c

2019-09-16 Thread Boris Brezillon
panfrost_bo_unreference() should be used instead.

The only difference caused by this change is that the scratchpad,
tiler_heap and tiler_dummy BOs are now returned to the cache instead
of being freed when a context is destroyed. This is only a problem if
we care about context isolation, which apparently is not the case since
transient BOs are already returned to the per-FD cache (and all contexts
share the same address space anyway, so enforcing context isolation
is almost impossible).

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_bo.c   | 5 -
 src/gallium/drivers/panfrost/pan_bo.h   | 2 --
 src/gallium/drivers/panfrost/pan_context.c  | 6 +++---
 src/gallium/drivers/panfrost/pan_resource.c | 2 +-
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index d9c4cb208bc8..396b25230c46 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -84,6 +84,9 @@ pan_bucket(struct panfrost_screen *screen, unsigned size)
 return >bo_cache[pan_bucket_index(size)];
 }
 
+static void
+panfrost_bo_release(struct panfrost_bo *bo, bool cacheable);
+
 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
  * BO cache. If it succeeds, it returns that BO and removes the BO from the
  * cache. If it fails, it returns NULL signaling the caller to allocate a new
@@ -287,7 +290,7 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t 
size,
 return bo;
 }
 
-void
+static void
 panfrost_bo_release(struct panfrost_bo *bo, bool cacheable)
 {
 if (!bo)
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
index dfdb202e5d34..2858d3782eff 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -83,8 +83,6 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t 
size,
uint32_t flags);
 void
 panfrost_bo_mmap(struct panfrost_bo *bo);
-void
-panfrost_bo_release(struct panfrost_bo *bo, bool cacheable);
 struct panfrost_bo *
 panfrost_bo_import(struct panfrost_screen *screen, int fd);
 int
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 55fe9c264548..c5139a21f9a3 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -2425,9 +2425,9 @@ panfrost_destroy(struct pipe_context *pipe)
 if (panfrost->blitter_wallpaper)
 util_blitter_destroy(panfrost->blitter_wallpaper);
 
-panfrost_bo_release(panfrost->scratchpad, false);
-panfrost_bo_release(panfrost->tiler_heap, false);
-panfrost_bo_release(panfrost->tiler_dummy, false);
+panfrost_bo_unreference(panfrost->scratchpad);
+panfrost_bo_unreference(panfrost->tiler_heap);
+panfrost_bo_unreference(panfrost->tiler_dummy);
 
 ralloc_free(pipe);
 }
diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index 1e8a1eadb51d..363a330c4fd0 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -839,7 +839,7 @@ panfrost_resource_hint_layout(
 
 /* If we grew in size, reallocate the BO */
 if (new_size > rsrc->bo->size) {
-panfrost_bo_release(rsrc->bo, true);
+panfrost_bo_unreference(rsrc->bo);
 rsrc->bo = panfrost_bo_create(screen, new_size, 
PAN_BO_DELAY_MMAP);
 }
 }
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 35/37] panfrost: Rename ctx->batches into ctx->fbo_to_batch

2019-09-16 Thread Boris Brezillon
We are about to add a batch queue to keep track of submission order.
Let's rename the existing batches hash table (which is used to get the
batch attached to an FBO) into fbo_to_batch to avoid confusion.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c |  2 +-
 src/gallium/drivers/panfrost/pan_context.h |  2 +-
 src/gallium/drivers/panfrost/pan_job.c | 21 +++--
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 90c7512b105f..b8f653bf8e72 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1361,7 +1361,7 @@ panfrost_flush(
  */
 if (fence) {
 util_dynarray_init(, NULL);
-hash_table_foreach(ctx->batches, hentry) {
+hash_table_foreach(ctx->fbo_to_batch, hentry) {
 struct panfrost_batch *batch = hentry->data;
 
 panfrost_batch_fence_reference(batch->out_sync);
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index d50ed57d5d8a..f13967f51b46 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -112,7 +112,7 @@ struct panfrost_context {
 
 /* Bound job batch and map of panfrost_batch_key to job batches */
 struct panfrost_batch *batch;
-struct hash_table *batches;
+struct hash_table *fbo_to_batch;
 
 /* panfrost_bo -> panfrost_bo_access */
 struct hash_table *accessed_bos;
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index df92b791a1f2..13d7e8086e62 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -134,9 +134,9 @@ panfrost_freeze_batch(struct panfrost_batch *batch)
  * matches. This way, next draws/clears targeting this FBO will trigger
  * the creation of a new batch.
  */
-entry = _mesa_hash_table_search(ctx->batches, >key);
+entry = _mesa_hash_table_search(ctx->fbo_to_batch, >key);
if (entry && entry->data == batch)
-_mesa_hash_table_remove(ctx->batches, entry);
+_mesa_hash_table_remove(ctx->fbo_to_batch, entry);
 
 /* If this is the bound batch, the panfrost_context parameters are
  * relevant so submitting it invalidates those parameters, but if it's
@@ -155,7 +155,7 @@ static bool panfrost_batch_is_frozen(struct panfrost_batch 
*batch)
 struct panfrost_context *ctx = batch->ctx;
 struct hash_entry *entry;
 
-entry = _mesa_hash_table_search(ctx->batches, >key);
+entry = _mesa_hash_table_search(ctx->fbo_to_batch, >key);
 if (entry && entry->data == batch)
 return false;
 
@@ -245,7 +245,8 @@ panfrost_get_batch(struct panfrost_context *ctx,
const struct pipe_framebuffer_state *key)
 {
 /* Lookup the job first */
-struct hash_entry *entry = _mesa_hash_table_search(ctx->batches, key);
+struct hash_entry *entry = _mesa_hash_table_search(ctx->fbo_to_batch,
+   key);
 
 if (entry)
 return entry->data;
@@ -255,7 +256,7 @@ panfrost_get_batch(struct panfrost_context *ctx,
 struct panfrost_batch *batch = panfrost_create_batch(ctx, key);
 
 /* Save the created job */
-_mesa_hash_table_insert(ctx->batches, >key, batch);
+_mesa_hash_table_insert(ctx->fbo_to_batch, >key, batch);
 
 return batch;
 }
@@ -877,7 +878,7 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, 
bool wait)
 util_dynarray_init(, NULL);
 }
 
-hash_table_foreach(ctx->batches, hentry) {
+hash_table_foreach(ctx->fbo_to_batch, hentry) {
 struct panfrost_batch *batch = hentry->data;
 
 assert(batch);
@@ -892,7 +893,7 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, 
bool wait)
 panfrost_batch_submit(batch);
 }
 
-assert(!ctx->batches->entries);
+assert(!ctx->fbo_to_batch->entries);
 
 /* Collect batch fences before returning */
 panfrost_gc_fences(ctx);
@@ -1144,9 +1145,9 @@ panfrost_batch_is_scanout(struct panfrost_batch *batch)
 void
 panfrost_batch_init(struct panfrost_context *ctx)
 {
-ctx->batches = _mesa_hash_table_create(ctx,
-   panfrost_batch_hash,
-   panfrost_batch_compare);
+ctx->fbo_to_batch = _mesa_hash_table_create(ctx,
+panfros

[Mesa-dev] [PATCH v2 34/37] panfrost: Do fine-grained flushing when preparing BO for CPU accesses

2019-09-16 Thread Boris Brezillon
We don't have to flush all batches when we're only interested in
reading/writing a specific BO. Thanks to the
panfrost_flush_batches_accessing_bo() and panfrost_bo_wait() helpers
we can now flush only the batches touching the BO we want to access
from the CPU.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_resource.c | 27 +
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index 1f7605adcd5d..d59529ff15b7 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -578,10 +578,8 @@ panfrost_transfer_map(struct pipe_context *pctx,
 is_bound |= fb->cbufs[c]->texture == resource;
 }
 
-if (is_bound && (usage & PIPE_TRANSFER_READ)) {
-assert(level == 0);
-panfrost_flush_all_batches(ctx, true);
-}
+if (is_bound && (usage & PIPE_TRANSFER_READ))
+ assert(level == 0);
 
 /* TODO: Respect usage flags */
 
@@ -594,11 +592,11 @@ panfrost_transfer_map(struct pipe_context *pctx,
 /* No flush for writes to uninitialized */
 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 if (usage & PIPE_TRANSFER_WRITE) {
-/* STUB: flush reading */
-//printf("debug: missed reading flush %d\n", 
resource->target);
+panfrost_flush_batches_accessing_bo(ctx, bo, 
PAN_BO_GPU_ACCESS_RW);
+panfrost_bo_wait(bo, INT64_MAX, PAN_BO_GPU_ACCESS_RW);
 } else if (usage & PIPE_TRANSFER_READ) {
-/* STUB: flush writing */
-//printf("debug: missed writing flush %d (%d-%d)\n", 
resource->target, box->x, box->x + box->width);
+panfrost_flush_batches_accessing_bo(ctx, bo, 
PAN_BO_GPU_ACCESS_WRITE);
+panfrost_bo_wait(bo, INT64_MAX, 
PAN_BO_GPU_ACCESS_WRITE);
 } else {
 /* Why are you even mapping?! */
 }
@@ -748,11 +746,8 @@ panfrost_generate_mipmap(
  * reorder-type optimizations in place. But for now prioritize
  * correctness. */
 
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-bool has_draws = batch->last_job.gpu;
-
-if (has_draws)
-panfrost_flush_all_batches(ctx, true);
+panfrost_flush_batches_accessing_bo(ctx, rsrc->bo, 
PAN_BO_GPU_ACCESS_RW);
+panfrost_bo_wait(rsrc->bo, INT64_MAX, PAN_BO_GPU_ACCESS_RW);
 
 /* We've flushed the original buffer if needed, now trigger a blit */
 
@@ -765,8 +760,10 @@ panfrost_generate_mipmap(
 /* If the blit was successful, flush once more. If it wasn't, well, let
  * the state tracker deal with it. */
 
-if (blit_res)
-panfrost_flush_all_batches(ctx, true);
+if (blit_res) {
+panfrost_flush_batches_accessing_bo(ctx, rsrc->bo, 
PAN_BO_GPU_ACCESS_WRITE);
+panfrost_bo_wait(rsrc->bo, INT64_MAX, PAN_BO_GPU_ACCESS_WRITE);
+}
 
 return blit_res;
 }
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 27/37] panfrost: Add a panfrost_freeze_batch() helper

2019-09-16 Thread Boris Brezillon
We'll soon need to freeze a batch not only when it's flushed, but also
when another batch depends on us, so let's add a helper to avoid
duplicating the logic.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_job.c | 62 ++
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 78f2b766adb1..ec397b855a69 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -100,22 +100,59 @@ panfrost_create_batch(struct panfrost_context *ctx,
 return batch;
 }
 
+static void
+panfrost_freeze_batch(struct panfrost_batch *batch)
+{
+struct panfrost_context *ctx = batch->ctx;
+struct hash_entry *entry;
+
+/* Remove the entry in the FBO -> batch hash table if the batch
+ * matches. This way, next draws/clears targeting this FBO will trigger
+ * the creation of a new batch.
+ */
+entry = _mesa_hash_table_search(ctx->batches, >key);
+   if (entry && entry->data == batch)
+_mesa_hash_table_remove(ctx->batches, entry);
+
+/* If this is the bound batch, the panfrost_context parameters are
+ * relevant so submitting it invalidates those parameters, but if it's
+ * not bound, the context parameters are for some other batch so we
+ * can't invalidate them.
+ */
+if (ctx->batch == batch) {
+panfrost_invalidate_frame(ctx);
+ctx->batch = NULL;
+}
+}
+
+#ifndef NDEBUG
+static bool panfrost_batch_is_frozen(struct panfrost_batch *batch)
+{
+struct panfrost_context *ctx = batch->ctx;
+struct hash_entry *entry;
+
+entry = _mesa_hash_table_search(ctx->batches, >key);
+if (entry && entry->data == batch)
+return false;
+
+if (ctx->batch == batch)
+return false;
+
+return true;
+}
+#endif
+
 static void
 panfrost_free_batch(struct panfrost_batch *batch)
 {
 if (!batch)
 return;
 
-struct panfrost_context *ctx = batch->ctx;
+assert(panfrost_batch_is_frozen(batch));
 
 hash_table_foreach(batch->bos, entry)
 panfrost_bo_unreference((struct panfrost_bo *)entry->key);
 
-_mesa_hash_table_remove_key(ctx->batches, >key);
-
-if (ctx->batch == batch)
-ctx->batch = NULL;
-
 /* The out_sync fence lifetime is different from the the batch one
  * since other batches might want to wait on an fence of already
  * submitted/signaled batch. All we need to do here is make sure the
@@ -524,19 +561,8 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
 
 out:
-/* If this is the bound batch, the panfrost_context parameters are
- * relevant so submitting it invalidates those paramaters, but if it's
- * not bound, the context parameters are for some other batch so we
- * can't invalidate them.
- */
-if (ctx->batch == batch)
-panfrost_invalidate_frame(ctx);
-
-/* The job has been submitted, let's invalidate the current FBO job
- * cache.
-*/
+panfrost_freeze_batch(batch);
 assert(!ctx->batch || batch == ctx->batch);
-ctx->batch = NULL;
 
 /* We always stall the pipeline for correct results since pipelined
  * rendering is quite broken right now (to be fixed by the panfrost_job
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 37/37] panfrost/ci: New tests are passing

2019-09-16 Thread Boris Brezillon
All dEQP-GLES2.functional.fbo.render.texsubimage.* tests are now
passing.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/ci/expected-failures.txt | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/ci/expected-failures.txt 
b/src/gallium/drivers/panfrost/ci/expected-failures.txt
index b0fc872a3009..3c707230dd23 100644
--- a/src/gallium/drivers/panfrost/ci/expected-failures.txt
+++ b/src/gallium/drivers/panfrost/ci/expected-failures.txt
@@ -53,10 +53,6 @@ 
dEQP-GLES2.functional.fbo.render.shared_colorbuffer.tex2d_rgb_depth_component16
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.rbo_rgb565_depth_component16
 Fail
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.tex2d_rgba_depth_component16
 Fail
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.tex2d_rgb_depth_component16 
Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgba Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgb Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgba Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgb Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.0 Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.10 Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.11 Fail
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 32/37] panfrost: Kill the explicit serialization in panfrost_batch_submit()

2019-09-16 Thread Boris Brezillon
Now that we have all the pieces in place to support pipelining batches
we can get rid of the drmSyncobjWait() at the end of
panfrost_batch_submit().

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_job.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index de2922a8366e..df92b791a1f2 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -846,7 +846,6 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 panfrost_batch_submit((*dep)->batch);
 }
 
-struct panfrost_context *ctx = batch->ctx;
 int ret;
 
 /* Nothing to do! */
@@ -865,15 +864,7 @@ panfrost_batch_submit(struct panfrost_batch *batch)
 
 out:
 panfrost_freeze_batch(batch);
-
-/* We always stall the pipeline for correct results since pipelined
- * rendering is quite broken right now (to be fixed by the panfrost_job
- * refactor, just take the perf hit for correctness)
- */
-drmSyncobjWait(pan_screen(ctx->base.screen)->fd,
-   >out_sync->syncobj, 1, INT64_MAX, 0, NULL);
 panfrost_free_batch(batch);
-
 }
 
 void
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 33/37] panfrost: Get rid of the flush in panfrost_set_framebuffer_state()

2019-09-16 Thread Boris Brezillon
Now that we have track inter-batch dependencies, the flush done in
panfrost_set_framebuffer_state() is no longer needed. Let's get rid of
it.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 46 ++
 1 file changed, 3 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 56a76a230141..90c7512b105f 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -2306,50 +2306,10 @@ panfrost_set_framebuffer_state(struct pipe_context 
*pctx,
 {
 struct panfrost_context *ctx = pan_context(pctx);
 
-/* Flush when switching framebuffers, but not if the framebuffer
- * state is being restored by u_blitter
- */
-
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-bool is_scanout = panfrost_batch_is_scanout(batch);
-bool has_draws = batch->last_job.gpu;
-
-/* Bail out early when the current and new states are the same. */
-if (util_framebuffer_state_equal(>pipe_framebuffer, fb))
-return;
-
-/* The wallpaper logic sets a new FB state before doing the blit and
- * restore the old one when it's done. Those FB states are reported to
- * be different because the surface they are pointing to are different,
- * but those surfaces actually point to the same cbufs/zbufs. In that
- * case we definitely don't want new FB descs to be emitted/attached
- * since the job is expected to be flushed just after the blit is done,
- * so let's just copy the new state and return here.
- */
-if (ctx->wallpaper_batch) {
-util_copy_framebuffer_state(>pipe_framebuffer, fb);
-return;
-}
-
-if (!is_scanout || has_draws)
-panfrost_flush_all_batches(ctx, true);
-else
-assert(!ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer 
&&
-   
!ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
-
-/* Invalidate the FBO job cache since we've just been assigned a new
- * FB state.
- */
-ctx->batch = NULL;
-
+panfrost_hint_afbc(pan_screen(pctx->screen), fb);
 util_copy_framebuffer_state(>pipe_framebuffer, fb);
-
-/* Given that we're rendering, we'd love to have compression */
-struct panfrost_screen *screen = pan_screen(ctx->base.screen);
-
-panfrost_hint_afbc(screen, >pipe_framebuffer);
-for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
-ctx->payloads[i].postfix.framebuffer = 0;
+ctx->batch = NULL;
+panfrost_invalidate_frame(ctx);
 }
 
 static void *
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 28/37] panfrost: Start tracking inter-batch dependencies

2019-09-16 Thread Boris Brezillon
The idea is to track which BO are being accessed and the type of access
to determine when a dependency exists. Thanks to that we can build a
dependency graph that will allow us to flush batches in the correct
order.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.h |   3 +
 src/gallium/drivers/panfrost/pan_job.c | 324 -
 src/gallium/drivers/panfrost/pan_job.h |   3 +
 3 files changed, 325 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index ce3e0c899a4f..3b09952345cf 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -114,6 +114,9 @@ struct panfrost_context {
 struct panfrost_batch *batch;
 struct hash_table *batches;
 
+/* panfrost_bo -> panfrost_bo_access */
+struct hash_table *accessed_bos;
+
 /* Within a launch_grid call.. */
 const struct pipe_grid_info *compute_grid;
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index ec397b855a69..04f4f22dab74 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -36,6 +36,29 @@
 #include "pan_util.h"
 #include "pandecode/decode.h"
 
+/* panfrost_bo_access is here to help us keep track of batch accesses to BOs
+ * and build a proper dependency graph such that batches can be pipelined for
+ * better GPU utilization.
+ *
+ * Each accessed BO has a corresponding entry in the ->accessed_bos hash table.
+ * A BO is either being written or read at any time, that's what the type field
+ * encodes.
+ * When the last access is a write, the batch writing the BO might have read
+ * dependencies (readers that have not been executed yet and want to read the
+ * previous BO content), and when the last access is a read, all readers might
+ * depend on another batch to push its results to memory. That's what the
+ * readers/writers keep track off.
+ * There can only be one writer at any given time, if a new batch wants to
+ * write to the same BO, a dependency will be added between the new writer and
+ * the old writer (at the batch level), and panfrost_bo_access->writer will be
+ * updated to point to the new writer.
+ */
+struct panfrost_bo_access {
+uint32_t type;
+struct util_dynarray readers;
+struct panfrost_batch_fence *writer;
+};
+
 static struct panfrost_batch_fence *
 panfrost_create_batch_fence(struct panfrost_batch *batch)
 {
@@ -94,6 +117,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
 
 util_dynarray_init(>headers, batch);
 util_dynarray_init(>gpu_headers, batch);
+util_dynarray_init(>dependencies, batch);
 batch->out_sync = panfrost_create_batch_fence(batch);
 util_copy_framebuffer_state(>key, key);
 
@@ -153,6 +177,10 @@ panfrost_free_batch(struct panfrost_batch *batch)
 hash_table_foreach(batch->bos, entry)
 panfrost_bo_unreference((struct panfrost_bo *)entry->key);
 
+util_dynarray_foreach(>dependencies,
+  struct panfrost_batch_fence *, dep)
+panfrost_batch_fence_unreference(*dep);
+
 /* The out_sync fence lifetime is different from the the batch one
  * since other batches might want to wait on an fence of already
  * submitted/signaled batch. All we need to do here is make sure the
@@ -166,6 +194,52 @@ panfrost_free_batch(struct panfrost_batch *batch)
 ralloc_free(batch);
 }
 
+#ifndef NDEBUG
+static bool
+panfrost_dep_graph_contains_batch(struct panfrost_batch *root,
+  struct panfrost_batch *batch)
+{
+if (!root)
+return false;
+
+util_dynarray_foreach(>dependencies,
+  struct panfrost_batch_fence *, dep) {
+if ((*dep)->batch == batch ||
+panfrost_dep_graph_contains_batch((*dep)->batch, batch))
+return true;
+}
+
+return false;
+}
+#endif
+
+static void
+panfrost_batch_add_dep(struct panfrost_batch *batch,
+   struct panfrost_batch_fence *newdep)
+{
+if (batch == newdep->batch)
+return;
+
+util_dynarray_foreach(>dependencies,
+  struct panfrost_batch_fence *, dep) {
+if (*dep == newdep)
+return;
+}
+
+/* Make sure the dependency graph is acyclic. */
+assert(!panfrost_dep_graph_contains_batch(newdep->batch, batch));
+
+panfrost_batch_fence_reference(newdep);
+util_dynarray_append(>dependencies,
+ struct panfrost_batch_fence *, newdep);
+
+/* We now have a batch depending on us, let's make sure new 

[Mesa-dev] [PATCH v2 10/37] panfrost: Move the BO API to its own header

2019-09-16 Thread Boris Brezillon
Right now, the BO API is spread over pan_{allocate,resource,screen}.h.
Let's move all BO related definitions to a separate header file.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_allocate.c   |   1 +
 src/gallium/drivers/panfrost/pan_allocate.h   |  20 
 src/gallium/drivers/panfrost/pan_assemble.c   |   1 +
 src/gallium/drivers/panfrost/pan_blend_cso.c  |   1 +
 src/gallium/drivers/panfrost/pan_bo.c |   2 +-
 src/gallium/drivers/panfrost/pan_bo.h | 100 ++
 src/gallium/drivers/panfrost/pan_context.c|   1 +
 src/gallium/drivers/panfrost/pan_instancing.c |   1 +
 src/gallium/drivers/panfrost/pan_job.c|   1 +
 src/gallium/drivers/panfrost/pan_mfbd.c   |   1 +
 src/gallium/drivers/panfrost/pan_resource.c   |   1 +
 src/gallium/drivers/panfrost/pan_resource.h   |   6 --
 src/gallium/drivers/panfrost/pan_screen.c |   1 +
 src/gallium/drivers/panfrost/pan_screen.h |  47 
 src/gallium/drivers/panfrost/pan_sfbd.c   |   1 +
 src/gallium/drivers/panfrost/pan_varyings.c   |   1 +
 16 files changed, 112 insertions(+), 74 deletions(-)
 create mode 100644 src/gallium/drivers/panfrost/pan_bo.h

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c 
b/src/gallium/drivers/panfrost/pan_allocate.c
index e7970c1be2d4..bdf6f26b77b8 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include "pan_bo.h"
 #include "pan_context.h"
 
 /* TODO: What does this actually have to be? */
diff --git a/src/gallium/drivers/panfrost/pan_allocate.h 
b/src/gallium/drivers/panfrost/pan_allocate.h
index a80eadaffce8..f18218fb32a1 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.h
+++ b/src/gallium/drivers/panfrost/pan_allocate.h
@@ -43,26 +43,6 @@ struct panfrost_transfer {
 mali_ptr gpu;
 };
 
-struct panfrost_bo {
-/* Must be first for casting */
-struct list_head link;
-
-struct pipe_reference reference;
-
-/* Mapping for the entire object (all levels) */
-uint8_t *cpu;
-
-/* GPU address for the object */
-mali_ptr gpu;
-
-/* Size of all entire trees */
-size_t size;
-
-int gem_handle;
-
-uint32_t flags;
-};
-
 struct panfrost_transfer
 panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz);
 
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c 
b/src/gallium/drivers/panfrost/pan_assemble.c
index cc4822a23615..afd16abb2d21 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include "pan_bo.h"
 #include "pan_context.h"
 
 #include "compiler/nir/nir.h"
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index c61ffe203c4c..83492e1ed03d 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -29,6 +29,7 @@
 #include "util/u_memory.h"
 #include "pan_blend_shaders.h"
 #include "pan_blending.h"
+#include "pan_bo.h"
 
 /* A given Gallium blend state can be encoded to the hardware in numerous,
  * dramatically divergent ways due to the interactions of blending with
diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 7f14b3e3638b..e6a5c972ead9 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -29,7 +29,7 @@
 #include 
 #include "drm-uapi/panfrost_drm.h"
 
-#include "pan_resource.h"
+#include "pan_bo.h"
 #include "pan_screen.h"
 #include "pan_util.h"
 #include "pandecode/decode.h"
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
new file mode 100644
index ..6d17ebecf6e6
--- /dev/null
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -0,0 +1,100 @@
+/*
+ * © Copyright 2019 Alyssa Rosenzweig
+ * © Copyright 2019 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ME

[Mesa-dev] [PATCH v2 05/37] panfrost: Kill a useless memset(0) in panfrost_create_context()

2019-09-16 Thread Boris Brezillon
ctx is allocated with rzalloc() which takes care of zero-ing the memory
region. No need to call memset(0) on top.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 06a12662dd36..323a48090365 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -2628,7 +2628,6 @@ panfrost_create_context(struct pipe_screen *screen, void 
*priv, unsigned flags)
 {
 struct panfrost_context *ctx = rzalloc(screen, struct 
panfrost_context);
 struct panfrost_screen *pscreen = pan_screen(screen);
-memset(ctx, 0, sizeof(*ctx));
 struct pipe_context *gallium = (struct pipe_context *) ctx;
 
 ctx->is_t6xx = pscreen->gpu_id < 0x0700; /* Literally, "earlier than 
T700" */
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 21/37] panfrost: Allocate tiler and scratchpad BOs per-batch

2019-09-16 Thread Boris Brezillon
If we want to execute several batches in parallel they need to have
their own tiler and scratchpad BOs. Let move those objects to
panfrost_batch and allocate them on a per-batch basis.

Signed-off-by: Boris Brezillon 
---
Note to Alyssa: I tried removing the dummy_tiler BO replacing it by a
dummy value (tried both 0xdeafbeef and 0x0) and unfortunately it
crashed, so I decided to keep this dummy allocation for now.
---
 src/gallium/drivers/panfrost/pan_context.c | 43 +++---
 src/gallium/drivers/panfrost/pan_context.h |  4 --
 src/gallium/drivers/panfrost/pan_job.c | 41 +++--
 src/gallium/drivers/panfrost/pan_job.h | 18 +
 4 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 34bc6e41218d..6cb6cdd1a686 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -57,7 +57,6 @@
 static struct midgard_tiler_descriptor
 panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
 {
-struct panfrost_context *ctx = batch->ctx;
 struct midgard_tiler_descriptor t = {};
 unsigned height = batch->key.height;
 unsigned width = batch->key.width;
@@ -76,21 +75,28 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, 
unsigned vertex_count)
 /* Sanity check */
 
 if (t.hierarchy_mask) {
+struct panfrost_bo *tiler_heap;
+
+tiler_heap = panfrost_batch_get_tiler_heap(batch);
 t.polygon_list = panfrost_batch_get_polygon_list(batch,
  header_size +
  
t.polygon_list_size);
 
 
 /* Allow the entire tiler heap */
-t.heap_start = ctx->tiler_heap->gpu;
-t.heap_end = ctx->tiler_heap->gpu + ctx->tiler_heap->size;
+t.heap_start = tiler_heap->gpu;
+t.heap_end = tiler_heap->gpu + tiler_heap->size;
 } else {
+struct panfrost_bo *tiler_dummy;
+
+tiler_dummy = panfrost_batch_get_tiler_dummy(batch);
+
 /* The tiler is disabled, so don't allow the tiler heap */
-t.heap_start = ctx->tiler_heap->gpu;
+t.heap_start = tiler_dummy->gpu;
 t.heap_end = t.heap_start;
 
 /* Use a dummy polygon list */
-t.polygon_list = ctx->tiler_dummy->gpu;
+t.polygon_list = tiler_dummy->gpu;
 
 /* Disable the tiler */
 t.hierarchy_mask |= MALI_TILER_DISABLED;
@@ -105,7 +111,6 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, 
unsigned vertex_count)
 struct mali_single_framebuffer
 panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count)
 {
-struct panfrost_context *ctx = batch->ctx;
 unsigned width = batch->key.width;
 unsigned height = batch->key.height;
 
@@ -115,7 +120,7 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned 
vertex_count)
 .unknown2 = 0x1f,
 .format = 0x3000,
 .clear_flags = 0x1000,
-.unknown_address_0 = ctx->scratchpad->gpu,
+.unknown_address_0 = panfrost_batch_get_scratchpad(batch)->gpu,
 .tiler = panfrost_emit_midg_tiler(batch, vertex_count),
 };
 
@@ -125,7 +130,6 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned 
vertex_count)
 struct bifrost_framebuffer
 panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
 {
-struct panfrost_context *ctx = batch->ctx;
 unsigned width = batch->key.width;
 unsigned height = batch->key.height;
 
@@ -143,7 +147,7 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned 
vertex_count)
 
 .unknown2 = 0x1f,
 
-.scratchpad = ctx->scratchpad->gpu,
+.scratchpad = panfrost_batch_get_scratchpad(batch)->gpu,
 .tiler = panfrost_emit_midg_tiler(batch, vertex_count)
 };
 
@@ -2427,10 +2431,6 @@ panfrost_destroy(struct pipe_context *pipe)
 if (panfrost->blitter_wallpaper)
 util_blitter_destroy(panfrost->blitter_wallpaper);
 
-panfrost_bo_unreference(panfrost->scratchpad);
-panfrost_bo_unreference(panfrost->tiler_heap);
-panfrost_bo_unreference(panfrost->tiler_dummy);
-
 ralloc_free(pipe);
 }
 
@@ -2607,21 +2607,6 @@ panfrost_set_stream_output_targets(struct pipe_context 
*pctx,
 so->num_targets = num_targets;
 }
 
-static void
-panfrost_setup_hardware(struct panfrost_context *ctx)
-{
-struct pipe_context *gallium = (struct pipe_context *) ctx;
-s

[Mesa-dev] [PATCH v2 31/37] panfrost: Add a panfrost_flush_batches_accessing_bo() helper

2019-09-16 Thread Boris Brezillon
This will allow us to only flush batches touching a specific resource,
which is particularly useful when the CPU needs to access a BO.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_job.c | 31 ++
 src/gallium/drivers/panfrost/pan_job.h |  4 
 2 files changed, 35 insertions(+)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index e36f252e01fc..de2922a8366e 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -921,6 +921,37 @@ panfrost_flush_all_batches(struct panfrost_context *ctx, 
bool wait)
 util_dynarray_fini();
 }
 
+void
+panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx,
+struct panfrost_bo *bo,
+uint32_t access_type)
+{
+struct panfrost_bo_access *access;
+struct hash_entry *hentry;
+
+/* It doesn't make any to flush only the readers. */
+assert(access_type == PAN_BO_GPU_ACCESS_WRITE ||
+   access_type == PAN_BO_GPU_ACCESS_RW);
+
+hentry = _mesa_hash_table_search(ctx->accessed_bos, bo);
+access = hentry ? hentry->data : NULL;
+if (!access)
+return;
+
+if (access_type & PAN_BO_GPU_ACCESS_WRITE && access->writer &&
+access->writer->batch)
+panfrost_batch_submit(access->writer->batch);
+
+if (!(access_type & PAN_BO_GPU_ACCESS_READ))
+return;
+
+util_dynarray_foreach(>readers, struct panfrost_batch_fence *,
+  reader) {
+if (*reader && (*reader)->batch)
+panfrost_batch_submit((*reader)->batch);
+}
+}
+
 void
 panfrost_batch_set_requirements(struct panfrost_batch *batch)
 {
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index 5c9d5e3715d5..d198864ce4f7 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -185,6 +185,10 @@ panfrost_batch_create_bo(struct panfrost_batch *batch, 
size_t size,
 void
 panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait);
 
+void
+panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx,
+struct panfrost_bo *bo, uint32_t flags);
+
 void
 panfrost_batch_set_requirements(struct panfrost_batch *batch);
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 23/37] panfrost: Make panfrost_batch->bos a hash table

2019-09-16 Thread Boris Brezillon
So we can store the flags as data and keep the BO as a key. This way
we keep track of the type of access done on BOs.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_job.c | 33 +-
 src/gallium/drivers/panfrost/pan_job.h |  2 +-
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 6332529b2f9b..739f36a593f1 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -44,9 +44,8 @@ panfrost_create_batch(struct panfrost_context *ctx,
 
 batch->ctx = ctx;
 
-batch->bos = _mesa_set_create(batch,
-  _mesa_hash_pointer,
-  _mesa_key_pointer_equal);
+batch->bos = _mesa_hash_table_create(batch, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
 
 batch->minx = batch->miny = ~0;
 batch->maxx = batch->maxy = 0;
@@ -67,10 +66,8 @@ panfrost_free_batch(struct panfrost_batch *batch)
 
 struct panfrost_context *ctx = batch->ctx;
 
-set_foreach(batch->bos, entry) {
-struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
-panfrost_bo_unreference(bo);
-}
+hash_table_foreach(batch->bos, entry)
+panfrost_bo_unreference((struct panfrost_bo *)entry->key);
 
 _mesa_hash_table_remove_key(ctx->batches, >key);
 
@@ -138,11 +135,25 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, 
struct panfrost_bo *bo,
 if (!bo)
 return;
 
-if (_mesa_set_search(batch->bos, bo))
+struct hash_entry *entry;
+uint32_t old_flags = 0;
+
+entry = _mesa_hash_table_search(batch->bos, bo);
+if (!entry) {
+entry = _mesa_hash_table_insert(batch->bos, bo,
+(void *)(uintptr_t)flags);
+panfrost_bo_reference(bo);
+   } else {
+old_flags = (uintptr_t)entry->data;
+}
+
+assert(entry);
+
+if (old_flags == flags)
 return;
 
-panfrost_bo_reference(bo);
-_mesa_set_add(batch->bos, bo);
+flags |= old_flags;
+entry->data = (void *)(uintptr_t)flags;
 }
 
 void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
@@ -376,7 +387,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 bo_handles = calloc(batch->bos->entries, sizeof(*bo_handles));
 assert(bo_handles);
 
-set_foreach(batch->bos, entry) {
+hash_table_foreach(batch->bos, entry) {
 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
 assert(bo->gem_handle > 0);
 bo_handles[submit.bo_handle_count++] = bo->gem_handle;
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index 0b37a3131e86..3f2cf1a999f3 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -98,7 +98,7 @@ struct panfrost_batch {
 unsigned job_index;
 
 /* BOs referenced -- will be used for flushing logic */
-struct set *bos;
+struct hash_table *bos;
 
 /* Current transient BO */
struct panfrost_bo *transient_bo;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 25/37] panfrost: Add a batch fence

2019-09-16 Thread Boris Brezillon
So we can implement fine-grained dependency tracking between batches.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_job.c | 51 ++
 src/gallium/drivers/panfrost/pan_job.h | 39 
 2 files changed, 90 insertions(+)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 30720ab98bb9..8712e2ce598a 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -36,6 +36,47 @@
 #include "pan_util.h"
 #include "pandecode/decode.h"
 
+static struct panfrost_batch_fence *
+panfrost_create_batch_fence(struct panfrost_batch *batch)
+{
+struct panfrost_batch_fence *fence;
+
+fence = rzalloc(NULL, struct panfrost_batch_fence);
+assert(fence);
+pipe_reference_init(>reference, 1);
+fence->ctx = batch->ctx;
+fence->batch = batch;
+
+/* Start in a signaled state so that even non-submitted batches
+ * (those that have no draw/clear) can be waited upon.
+ */
+drmSyncobjCreate(pan_screen(batch->ctx->base.screen)->fd,
+ DRM_SYNCOBJ_CREATE_SIGNALED, >syncobj);
+
+return fence;
+}
+
+static void
+panfrost_free_batch_fence(struct panfrost_batch_fence *fence)
+{
+drmSyncobjDestroy(pan_screen(fence->ctx->base.screen)->fd,
+  fence->syncobj);
+ralloc_free(fence);
+}
+
+void
+panfrost_batch_fence_unreference(struct panfrost_batch_fence *fence)
+{
+if (pipe_reference(>reference, NULL))
+ panfrost_free_batch_fence(fence);
+}
+
+void
+panfrost_batch_fence_reference(struct panfrost_batch_fence *fence)
+{
+pipe_reference(NULL, >reference);
+}
+
 static struct panfrost_batch *
 panfrost_create_batch(struct panfrost_context *ctx,
   const struct pipe_framebuffer_state *key)
@@ -53,6 +94,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
 
 util_dynarray_init(>headers, batch);
 util_dynarray_init(>gpu_headers, batch);
+batch->out_sync = panfrost_create_batch_fence(batch);
 util_copy_framebuffer_state(>key, key);
 
 return batch;
@@ -74,6 +116,15 @@ panfrost_free_batch(struct panfrost_batch *batch)
 if (ctx->batch == batch)
 ctx->batch = NULL;
 
+/* The out_sync fence lifetime is different from the the batch one
+ * since other batches might want to wait on an fence of already
+ * submitted/signaled batch. All we need to do here is make sure the
+ * fence does not point to an invalid batch, which the core will
+ * interpret as 'batch is already submitted'.
+ */
+batch->out_sync->batch = NULL;
+panfrost_batch_fence_unreference(batch->out_sync);
+
 util_unreference_framebuffer_state(>key);
 ralloc_free(batch);
 }
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index 3f2cf1a999f3..32bfc1fe3388 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -31,6 +31,36 @@
 #include "pan_allocate.h"
 #include "pan_resource.h"
 
+/* panfrost_batch_fence is the out fence of batch that users or other batches
+ * might want to wait on. The batch fence lifetime is different from the batch
+ * one as want will certainly want to wait upon the fence after the batch has
+ * been submitted (which is when panfrost_batch objects are freed).
+ */
+struct panfrost_batch_fence {
+/* Refcounting object for the fence. */
+struct pipe_reference reference;
+
+/* Batch that created this fence object. Will become NULL at batch
+ * submission time. This field is mainly here to know whether the
+ * batch has been flushed or not.
+ */
+struct panfrost_batch *batch;
+
+/* Context this fence is attached to. We need both ctx and batch, as
+ * the batch will go away after it's been submitted, but the fence
+ * will stay a bit longer.
+ */
+struct panfrost_context *ctx;
+
+/* Sync object backing this fence. */
+uint32_t syncobj;
+
+/* Cached value of the signaled state to avoid calling WAIC_SYNCOBJs
+ * when we know the fence has already been signaled.
+ */
+bool signaled;
+};
+
 #define PAN_REQ_MSAA(1 << 0)
 #define PAN_REQ_DEPTH_WRITE (1 << 1)
 
@@ -120,10 +150,19 @@ struct panfrost_batch {
 
 /* Framebuffer descriptor. */
 mali_ptr framebuffer;
+
+/* Output sync object. Only valid when submitted is true. */
+struct panfrost_batch_fence *out_sync;
 };
 
 /* Functions for managing the above */
 
+void
+panfrost_batch_fence_unreference(struct panfrost_batch_fence *fence);
+
+void
+panfr

[Mesa-dev] [PATCH v2 18/37] panfrost: Add flags to reflect the BO imported/exported state

2019-09-16 Thread Boris Brezillon
Will be useful to make the ioctl(WAIT_BO) call conditional on BOs that
are not exported/imported (meaning that all GPU accesses are known
by the context).

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_bo.c | 4 ++--
 src/gallium/drivers/panfrost/pan_bo.h | 6 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 3f05226f96f4..4aabd5fd23ab 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -387,7 +387,7 @@ panfrost_bo_import(struct panfrost_screen *screen, int fd)
 bo->gem_handle = gem_handle;
 bo->gpu = (mali_ptr) get_bo_offset.offset;
 bo->size = lseek(fd, 0, SEEK_END);
-bo->flags |= PAN_BO_DONT_REUSE;
+bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED;
 assert(bo->size > 0);
 pipe_reference_init(>reference, 1);
 
@@ -408,7 +408,7 @@ panfrost_bo_export(struct panfrost_bo *bo)
 if (ret == -1)
 return -1;
 
-bo->flags |= PAN_BO_DONT_REUSE;
+bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED;
 return args.fd;
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_bo.h 
b/src/gallium/drivers/panfrost/pan_bo.h
index 49b392f7bd76..e141a60fc407 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -56,6 +56,12 @@ struct panfrost_screen;
  * let the BO logic know about this contraint. */
 #define PAN_BO_DONT_REUSE (1 << 5)
 
+/* BO has been imported */
+#define PAN_BO_IMPORTED   (1 << 6)
+
+/* BO has been exported */
+#define PAN_BO_EXPORTED   (1 << 7)
+
 struct panfrost_bo {
 /* Must be first for casting */
 struct list_head link;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 09/37] panfrost: s/PAN_ALLOCATE_/PAN_BO_/

2019-09-16 Thread Boris Brezillon
Change the prefix for BO allocation flags to make it consistent with
the rest of the BO API.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_assemble.c  |  2 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c |  2 +-
 src/gallium/drivers/panfrost/pan_bo.c| 12 ++--
 src/gallium/drivers/panfrost/pan_context.c   |  6 +++---
 src/gallium/drivers/panfrost/pan_job.c   |  2 +-
 src/gallium/drivers/panfrost/pan_resource.c  |  4 ++--
 src/gallium/drivers/panfrost/pan_screen.h| 10 +-
 7 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_assemble.c 
b/src/gallium/drivers/panfrost/pan_assemble.c
index de73cf8839a7..cc4822a23615 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -82,7 +82,7 @@ panfrost_shader_compile(
  * I bet someone just thought that would be a cute pun. At least,
  * that's how I'd do it. */
 
-state->bo = panfrost_bo_create(screen, size, PAN_ALLOCATE_EXECUTE);
+state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE);
 memcpy(state->bo->cpu, dst, size);
 meta->shader = state->bo->gpu | program.first_tag;
 
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index 82527a5602ae..c61ffe203c4c 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -272,7 +272,7 @@ panfrost_get_blend_for_context(struct panfrost_context 
*ctx, unsigned rti)
 final.shader.first_tag = shader->first_tag;
 
 /* Upload the shader */
-final.shader.bo = panfrost_bo_create(screen, shader->size, 
PAN_ALLOCATE_EXECUTE);
+final.shader.bo = panfrost_bo_create(screen, shader->size, 
PAN_BO_EXECUTE);
 memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
 
 /* Pass BO ownership to job */
diff --git a/src/gallium/drivers/panfrost/pan_bo.c 
b/src/gallium/drivers/panfrost/pan_bo.c
index 9b0e8d943b43..7f14b3e3638b 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -228,16 +228,16 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t 
size,
 size = MAX2(size, 4096);
 
 /* GROWABLE BOs cannot be mmapped */
-if (flags & PAN_ALLOCATE_GROWABLE)
-assert(flags & PAN_ALLOCATE_INVISIBLE);
+if (flags & PAN_BO_GROWABLE)
+assert(flags & PAN_BO_INVISIBLE);
 
 unsigned translated_flags = 0;
 
 if (screen->kernel_version->version_major > 1 ||
 screen->kernel_version->version_minor >= 1) {
-if (flags & PAN_ALLOCATE_GROWABLE)
+if (flags & PAN_BO_GROWABLE)
 translated_flags |= PANFROST_BO_HEAP;
-if (!(flags & PAN_ALLOCATE_EXECUTE))
+if (!(flags & PAN_BO_EXECUTE))
 translated_flags |= PANFROST_BO_NOEXEC;
 }
 
@@ -276,9 +276,9 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t 
size,
  * never map since we don't care about their contents; they're purely
  * for GPU-internal use. But we do trace them anyway. */
 
-if (!(flags & (PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_DELAY_MMAP)))
+if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
 panfrost_bo_mmap(screen, bo);
-else if (flags & PAN_ALLOCATE_INVISIBLE) {
+else if (flags & PAN_BO_INVISIBLE) {
 if (pan_debug & PAN_DBG_TRACE)
 pandecode_inject_mmap(bo->gpu, NULL, bo->size, NULL);
 }
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index cadb462c5b01..f01ddf18b105 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -2613,10 +2613,10 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
 
 ctx->scratchpad = panfrost_bo_create(screen, 64 * 4 * 4096, 0);
 ctx->tiler_heap = panfrost_bo_create(screen, 4096 * 4096,
- PAN_ALLOCATE_INVISIBLE |
- PAN_ALLOCATE_GROWABLE);
+ PAN_BO_INVISIBLE |
+ PAN_BO_GROWABLE);
 ctx->tiler_dummy = panfrost_bo_create(screen, 4096,
-  PAN_ALLOCATE_INVISIBLE);
+  PAN_BO_INVISIBLE);
 assert(ctx->scratchpad && ctx->tiler_heap && ctx->tiler_dummy);
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job

[Mesa-dev] [PATCH v2 04/37] panfrost: Add polygon_list to the batch BO set at allocation time

2019-09-16 Thread Boris Brezillon
That's what we do for other per-batch BOs, and we'll soon add an helper
to automate this create_bo()+add_bo()+bo_unreference() sequence, so
let's prepare the code to ease this transition.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_drm.c |  1 -
 src/gallium/drivers/panfrost/pan_job.c | 10 +++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index ada6221850c0..b77af714d117 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -278,7 +278,6 @@ panfrost_drm_submit_vs_fs_batch(struct panfrost_batch 
*batch, bool has_draws)
 
 panfrost_batch_add_bo(batch, ctx->scratchpad);
 panfrost_batch_add_bo(batch, ctx->tiler_heap);
-panfrost_batch_add_bo(batch, batch->polygon_list);
 
 if (batch->first_job.gpu) {
 ret = panfrost_drm_submit_batch(batch, batch->first_job.gpu, 
0);
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 03119e643846..e06440010aeb 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -67,9 +67,6 @@ panfrost_free_batch(struct panfrost_batch *batch)
 panfrost_bo_unreference(ctx->base.screen, bo);
 }
 
-/* Unreference the polygon list */
-panfrost_bo_unreference(ctx->base.screen, batch->polygon_list);
-
 _mesa_hash_table_remove_key(ctx->batches, >key);
 
 if (ctx->batch == batch)
@@ -158,6 +155,13 @@ panfrost_batch_get_polygon_list(struct panfrost_batch 
*batch, unsigned size)
 
 batch->polygon_list = panfrost_drm_create_bo(screen,
 size, PAN_ALLOCATE_INVISIBLE);
+panfrost_batch_add_bo(batch, batch->polygon_list);
+
+/* A BO reference has been retained by panfrost_batch_add_bo(),
+ * so we need to unreference it here if we want the BO to be
+ * automatically released when the batch is destroyed.
+ */
+panfrost_bo_unreference(>base, batch->polygon_list);
 }
 
 return batch->polygon_list->gpu;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 01/37] panfrost: Stop exposing internal panfrost_*_batch() functions

2019-09-16 Thread Boris Brezillon
panfrost_{create,free,get}_batch() are only called inside pan_job.c.
Let's make them static.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_job.c |  6 +++---
 src/gallium/drivers/panfrost/pan_job.h | 11 ---
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 00df90063428..03119e643846 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -31,7 +31,7 @@
 #include "util/u_format.h"
 #include "util/u_pack_color.h"
 
-struct panfrost_batch *
+static struct panfrost_batch *
 panfrost_create_batch(struct panfrost_context *ctx,
   const struct pipe_framebuffer_state *key)
 {
@@ -54,7 +54,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
 return batch;
 }
 
-void
+static void
 panfrost_free_batch(struct panfrost_batch *batch)
 {
 if (!batch)
@@ -79,7 +79,7 @@ panfrost_free_batch(struct panfrost_batch *batch)
 ralloc_free(batch);
 }
 
-struct panfrost_batch *
+static struct panfrost_batch *
 panfrost_get_batch(struct panfrost_context *ctx,
const struct pipe_framebuffer_state *key)
 {
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index fe15e2dddabf..b0580ea2d470 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -115,17 +115,6 @@ struct panfrost_batch {
 
 /* Functions for managing the above */
 
-struct panfrost_batch *
-panfrost_create_batch(struct panfrost_context *ctx,
-  const struct pipe_framebuffer_state *key);
-
-void
-panfrost_free_batch(struct panfrost_batch *batch);
-
-struct panfrost_batch *
-panfrost_get_batch(struct panfrost_context *ctx,
-   const struct pipe_framebuffer_state *key);
-
 struct panfrost_batch *
 panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 03/37] panfrost: Add missing panfrost_batch_add_bo() calls

2019-09-16 Thread Boris Brezillon
Some BOs are used by batches but never explicitly added to the BO set.
This is currently not a problem because we wait for the execution of
a batch to be finished before releasing a BO, but we will soon relax
this rule.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index a063c16ab019..06a12662dd36 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -794,11 +794,12 @@ panfrost_map_constant_buffer_gpu(
 {
 struct pipe_constant_buffer *cb = >cb[index];
 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
+struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
 if (rsrc) {
+panfrost_batch_add_bo(batch, rsrc->bo);
 return rsrc->bo->gpu;
} else if (cb->user_buffer) {
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 return panfrost_upload_transient(batch, cb->user_buffer, 
cb->buffer_size);
} else {
 unreachable("No constant buffer");
@@ -1113,6 +1114,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool 
with_vertex_data)
 
 struct panfrost_shader_state *ss = 
>variants[all->active_variant];
 
+panfrost_batch_add_bo(batch, ss->bo);
+
 /* Uniforms are implicitly UBO #0 */
 bool has_uniforms = buf->enabled_mask & (1 << 0);
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 02/37] panfrost: Use the correct type for the bo_handle array

2019-09-16 Thread Boris Brezillon
The DRM driver expects an array of u32, let's use the correct type, even
if using an int works in practice because it's still a 32-bit integer.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_drm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index 8b8e38f03809..ada6221850c0 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -232,7 +232,8 @@ panfrost_drm_submit_batch(struct panfrost_batch *batch, u64 
first_job_desc,
 struct pipe_context *gallium = (struct pipe_context *) ctx;
 struct panfrost_screen *screen = pan_screen(gallium->screen);
 struct drm_panfrost_submit submit = {0,};
-int *bo_handles, ret;
+uint32_t *bo_handles;
+int ret;
 
 submit.in_syncs = (u64) (uintptr_t) >out_sync;
 submit.in_sync_count = 1;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 00/37] panfrost: Support batch pipelining

2019-09-16 Thread Boris Brezillon
Hello,

This is the second attempt at supporting batch pipelining. This time I
implemented it using a dependency graph (as suggested by Alyssa and
Steven) so that batch submission can be delayed even more: the only
time we flush batches now is when we have an explicit flush or when
the CPU needs to access a BO (we might want to tweak that a bit to
avoid the extra latency incurred by this solution). With that in place
we hope to increase GPU utilization.

A few words about the patches in this series:

* Like the previous version, this series is a mix of cleanups and
  functional changes. Most of them should be pretty trivial to review
  and I intend to merge them independently once they have receive
  proper review (to avoid having to send another patch bomb like this
  one).

* The "rework BO API" batch has been split to ease review

* Patches 35 and 36 are not mandatory, but I remember reading (I think
  it was Steven who mentioned that) that draw order matters when
  queueing render operations for different frames (frame N should
  ideally be ready before frame N+1). Not sure if enforcing draw call
  order is enough to guarantee that rendering of frame N always
  finishes before frame N+1 though.

Regards,

Boris

Boris Brezillon (37):
  panfrost: Stop exposing internal panfrost_*_batch() functions
  panfrost: Use the correct type for the bo_handle array
  panfrost: Add missing panfrost_batch_add_bo() calls
  panfrost: Add polygon_list to the batch BO set at allocation time
  panfrost: Kill a useless memset(0) in panfrost_create_context()
  panfrost: Stop passing has_draws to panfrost_drm_submit_vs_fs_batch()
  panfrost: Get rid of pan_drm.c
  panfrost: Move panfrost_bo_{reference,unreference}() to pan_bo.c
  panfrost: s/PAN_ALLOCATE_/PAN_BO_/
  panfrost: Move the BO API to its own header
  panfrost: Stop exposing panfrost_bo_cache_{fetch,put}()
  panfrost: Don't check if BO is mmaped before calling
panfrost_bo_mmap()
  panfrost: Stop passing screen around for BO operations
  panfrost: Stop using panfrost_bo_release() outside of pan_bo.c
  panfrost: Add panfrost_bo_{alloc,free}()
  panfrost: Don't return imported/exported BOs to the cache
  panfrost: Make sure the BO is 'ready' when picked from the cache
  panfrost: Add flags to reflect the BO imported/exported state
  panfrost: Add the panfrost_batch_create_bo() helper
  panfrost: Add FBO BOs to batch->bos earlier
  panfrost: Allocate tiler and scratchpad BOs per-batch
  panfrost: Extend the panfrost_batch_add_bo() API to pass access flags
  panfrost: Make panfrost_batch->bos a hash table
  panfrost: Cache GPU accesses to BOs
  panfrost: Add a batch fence
  panfrost: Use the per-batch fences to wait on the last submitted batch
  panfrost: Add a panfrost_freeze_batch() helper
  panfrost: Start tracking inter-batch dependencies
  panfrost: Prepare panfrost_fence for batch pipelining
  panfrost: Add a panfrost_flush_all_batches() helper
  panfrost: Add a panfrost_flush_batches_accessing_bo() helper
  panfrost: Kill the explicit serialization in panfrost_batch_submit()
  panfrost: Get rid of the flush in panfrost_set_framebuffer_state()
  panfrost: Do fine-grained flushing when preparing BO for CPU accesses
  panfrost: Rename ctx->batches into ctx->fbo_to_batch
  panfrost: Take draw call order into account
  panfrost/ci: New tests are passing

 .../drivers/panfrost/ci/expected-failures.txt |   4 -
 src/gallium/drivers/panfrost/meson.build  |   1 -
 src/gallium/drivers/panfrost/pan_allocate.c   |  22 +-
 src/gallium/drivers/panfrost/pan_allocate.h   |  20 -
 src/gallium/drivers/panfrost/pan_assemble.c   |   3 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c  |  13 +-
 src/gallium/drivers/panfrost/pan_bo.c | 331 +++-
 src/gallium/drivers/panfrost/pan_bo.h | 130 +++
 src/gallium/drivers/panfrost/pan_compute.c|   2 +-
 src/gallium/drivers/panfrost/pan_context.c| 175 ++--
 src/gallium/drivers/panfrost/pan_context.h|  22 +-
 src/gallium/drivers/panfrost/pan_drm.c| 394 -
 src/gallium/drivers/panfrost/pan_fragment.c   |   3 -
 src/gallium/drivers/panfrost/pan_instancing.c |   6 +-
 src/gallium/drivers/panfrost/pan_job.c| 760 --
 src/gallium/drivers/panfrost/pan_job.h|  85 +-
 src/gallium/drivers/panfrost/pan_mfbd.c   |   1 +
 src/gallium/drivers/panfrost/pan_resource.c   |  65 +-
 src/gallium/drivers/panfrost/pan_resource.h   |   6 -
 src/gallium/drivers/panfrost/pan_screen.c |  91 ++-
 src/gallium/drivers/panfrost/pan_screen.h |  62 +-
 src/gallium/drivers/panfrost/pan_sfbd.c   |   1 +
 src/gallium/drivers/panfrost/pan_varyings.c   |   6 +-
 23 files changed, 1456 insertions(+), 747 deletions(-)
 create mode 100644 src/gallium/drivers/panfrost/pan_bo.h
 delete mode 100644 src/gallium/drivers/panfrost/pan_drm.c

-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.f

Re: [Mesa-dev] [PATCH v3 00/25] panfrost: Rework the batch pipelining logic

2019-09-08 Thread Boris Brezillon
On Thu,  5 Sep 2019 21:41:25 +0200
Boris Brezillon  wrote:

> Hello,
> 
> This is actually a v1 expect for patches 1 to 4, which have already
> been submitted separately.
> 
> The goal here is to rework the panfrost_job logic (renamed
> panfrost_batch at the beginning of the series) to avoid unnecessary
> flushes when we can.
> 
> The new solution is based on the VC4/V3D implementation.
> 
> Regards,
> 
> Boris
> 
> Boris Brezillon (25):
>   panfrost: s/job/batch/
>   panfrost: Pass a batch to panfrost_drm_submit_vs_fs_batch()
>   panfrost: Stop passing a ctx to functions being passed a batch
>   panfrost: Make transient allocation rely on the BO cache
>   panfrost: Convert ctx->{scratchpad,tiler_heap,tiler_dummy} to plain
> BOs
>   panfrost: Get rid of unused panfrost_context fields
>   panfrost: Get rid of the now unused SLAB allocator
>   panfrost: Rename pan_bo_cache.c into pan_bo.c

Queued patches 1 to 8.

>   panfrost: Rework the panfrost_bo API
>   panfrost: Make sure the BO is 'ready' when picked from the cache
>   panfrost: Use a pipe_framebuffer_state as the batch key
>   panfrost: Get rid of the unused 'flush jobs accessing res' infra
>   panfrost: Allow testing if a specific batch is targeting a scanout FB
>   panfrost: Move the fence creation in panfrost_flush()
>   panfrost: Move the batch submission logic to panfrost_batch_submit()
>   panfrost: Pass a batch to panfrost_{allocate,upload}_transient()
>   panfrost: Pass a batch to functions emitting FB descs
>   panfrost: Use ctx->wallpaper_batch in panfrost_blit_wallpaper()
>   panfrost: Pass a batch to panfrost_set_value_job()
>   panfrost: Prepare things to avoid flushes on FB switch
>   panfrost: Add new helpers to describe job depencencies on BOs
>   panfrost: Delay payloads[].offset_start initialization
>   panfrost: Remove uneeded add_bo() in initialize_surface()
>   panfrost: Support batch pipelining
>   panfrost/ci: New tests are passing
> 
>  .../drivers/panfrost/ci/expected-failures.txt |   4 -
>  src/gallium/drivers/panfrost/meson.build  |   2 +-
>  src/gallium/drivers/panfrost/pan_allocate.c   |  95 +---
>  src/gallium/drivers/panfrost/pan_allocate.h   |  40 +-
>  src/gallium/drivers/panfrost/pan_assemble.c   |   3 +-
>  src/gallium/drivers/panfrost/pan_blend_cso.c  |   9 +-
>  src/gallium/drivers/panfrost/pan_blit.c   |   9 +-
>  src/gallium/drivers/panfrost/pan_bo.c | 405 ++
>  src/gallium/drivers/panfrost/pan_bo.h |  80 +++
>  src/gallium/drivers/panfrost/pan_bo_cache.c   | 167 --
>  src/gallium/drivers/panfrost/pan_compute.c|  12 +-
>  src/gallium/drivers/panfrost/pan_context.c| 478 +++--
>  src/gallium/drivers/panfrost/pan_context.h|  51 +-
>  src/gallium/drivers/panfrost/pan_drm.c| 266 +-
>  src/gallium/drivers/panfrost/pan_fragment.c   |  32 +-
>  src/gallium/drivers/panfrost/pan_instancing.c |   9 +-
>  src/gallium/drivers/panfrost/pan_job.c| 493 --
>  src/gallium/drivers/panfrost/pan_job.h|  97 ++--
>  src/gallium/drivers/panfrost/pan_mfbd.c   |  58 +--
>  src/gallium/drivers/panfrost/pan_resource.c   |  64 +--
>  src/gallium/drivers/panfrost/pan_resource.h   |   8 +-
>  src/gallium/drivers/panfrost/pan_scoreboard.c |  29 +-
>  src/gallium/drivers/panfrost/pan_screen.c |   5 +-
>  src/gallium/drivers/panfrost/pan_screen.h |  62 +--
>  src/gallium/drivers/panfrost/pan_sfbd.c   |  50 +-
>  src/gallium/drivers/panfrost/pan_varyings.c   |  13 +-
>  26 files changed, 1277 insertions(+), 1264 deletions(-)
>  create mode 100644 src/gallium/drivers/panfrost/pan_bo.c
>  create mode 100644 src/gallium/drivers/panfrost/pan_bo.h
>  delete mode 100644 src/gallium/drivers/panfrost/pan_bo_cache.c
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 24/25] panfrost: Support batch pipelining

2019-09-06 Thread Boris Brezillon
On Fri, 6 Sep 2019 08:10:55 -0400
Alyssa Rosenzweig  wrote:

> > Now, if we go for the dep graph solution, that's probably a non issue,
> > since deps can be added at any point as long as they are described
> > before the flush happens.
> >
> > [snip]
> >
> > Thanks for the detailed explanation. I'll look into that. This being
> > said, I was wondering if we shouldn't merge this patch (after I
> > addressed your first comment maybe) before getting involved in a more
> > advanced solution (which I agree is what we should aim for).  
> 
> If it's alright, I would prefer to focus on patches 1-23; most of it
> looks wonderful so the few comments I had should be easily addressed for
> the v2.
> 
> Once all of that initial work is merged (and your revision queue and my
> review queue are cleared), we can circle back to this change.
> 
> I would prefer to go straight to a dep graph approach; this patch is a
> good intermediate step for testing the earlier patches in the series but
> given the extra complexity added for the draw flushing (which you
> mention is only needed with the non-graph solution), I don't know if we
> should merge.
> 
> Thoughts?

I'm definitely biased :-), but I do find the changes at hand not that
invasive: most of the logic is placed in helpers that are called in one
or 2 places. I mean, removing those explicit flushes when the time
comes shouldn't be too hard, and I do think it's one step in the right
direction even though it's not the perfect solution yet.

Anyway, I guess having patch 1 to 23 merged would already
significantly reduce my patch queue, and I'm definitely interested in
working on the dep graph solution, so I'm not strongly opposed to the
idea of dropping this patch. 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 24/25] panfrost: Support batch pipelining

2019-09-06 Thread Boris Brezillon
On Fri, 6 Sep 2019 07:40:17 -0400
Alyssa Rosenzweig  wrote:

> I think we can simplify `panfrost_flush_draw_deps`. We need to flush
> any BOs that write where we read/write and any BOs that read where we
> write. Since we collect this information via add_bo, we can
> implement this logic generically, without requiring a special case
> for every kind of BO we might need to flush, which is verbose and easy
> to forget when adding new BOs later. You might need some extra tables in
> panfrost_batch.

With the current design where deps are flushed before issuing draw/clear
job, the existing add_bo() calls happen too late. This being said,
we could add BOs earlier and store the type of access in batch->bos
(turn it into a hash table where the key is the BO and the data
contains the flags). With that in place, we'd be able to automatically
add BOs to the ctx->{write,read}_bos hash tables.

Now, if we go for the dep graph solution, that's probably a non issue,
since deps can be added at any point as long as they are described
before the flush happens.

> 
> 
> 
> On design more generally:
> 
> I don't think we want to trigger any flushes at draw time. Rather, we
> want to trigger at flush time. Conceptually, right before we send a
> batch to the GPU, we ensure all of the other batches it needs have been
> sent first and there is a barrier between them (via wait_bo).

I agree, and actually had this rework on my TODO list.

> 
> The first consequence of delaying is that CPU-side logic can proceed
> without being stalled on results.
> 
> The second is that different batches can be _totally_ independent.
> Consider an app that does the following passes:
> 
> [FBO 1: Render a depth map of an object ]
> [FBO 2: Render a normal map of that object ]
> [Scanout: Render using the depth/normal maps as textures ]
> 
> In this case, the app should generate CPU-side batches for all three
> render targets at once. Then, when flush() is called, fbo #1 and fbo #2
> should be submitted and waited upon so they execute concurrently, then
> scanout is submitted and waited.

Yes, also thought about that. We'd need to move the out_sync object
to the batch to make that possible, but that's definitely an
improvement I had in mind.

> This should be a little faster,
> especially paired with _NEXT changes in the kernel. CC'ing Steven to
> ensure the principle is sound.

Haven't looked at that patch yet.

> 
> We can model this with a dependency graph, where batches are nodes and
> the dependency of a batch X on a batch Y is represented as an edge from
> Y to X. So this is a directed arrow graph. For well-behaved apps, the
> graph must be acyclic (why?).
> 
> This touches on the idea of topological sorting: a topological sort of
> the dependency graph is a valid order to submit the batches in. So
> hypothetically, you could delay everything to flush time, construct the
> dependency graph, do a topological sort, and then submit/wait in the
> order of the sort.
> 
> But more interesting will be to extend to the concurrent FBO case, an
> algorithm for which follows simply from topological sorting:
> 
> ---
> 
> 0. Create the dependency graph. Cull nodes that are not connected to the
> node we're trying to flush (the scanout batch). In other words, reduce
> the graph to its component containing the flushed node. See also
> https://en.wikipedia.org/wiki/Connected_component_(graph_theory)#Algorithms
> 
> 1. For each node with no incoming edges (=batch with no dependencies),
> submit this batch. Remove it from the dependency graph, removing all
> outgoing edges. Add it to a set of submitted batches.
> 
> 3. For each submitted batch, wait on that batch.
> 
> 4. Jump back to step #1 until there are no more nodes with no incoming
> edges.
> 
> ---
> 
> Intuitively, the idea is "submit as much as we can all at once, then
> wait for it. Keep doing that until we submitted everything we need."
> 
> A bit more formally, nodes with no edges have no unsatisfied
> dependencies by definition, so we can submit them in any order. We
> choose to submit these first. We are allowed to submit a wait at any
> time. Once we wait on a batch, it is complete, so any batches that
> depend on it have that dependency satisfied, represented by removing the
> edge from the dependency graph.
> 
> Do note that the subtlety of the termination condition: no more nodes
> with no incoming edges. This makes proving that the algorithm halts
> easy, since every iteration either removes a node or halts, and there
> are a finite integral non-negative number of nodes.
> 
> * Whether this is a useful optimization is greatly dependent on the
>   hardware. The Arm guys can chime in here, but I do know the GPU has
>   some parallel execution capabilities so this shouldn't be a total
>   waste.

Thanks for the detailed explanation. I'll look into that. This being
said, I was wondering if we shouldn't merge this patch (after I
addressed your first comment maybe) before getting involved in 

Re: [Mesa-dev] [PATCH v3 21/25] panfrost: Add new helpers to describe job depencencies on BOs

2019-09-06 Thread Boris Brezillon
On Thu, 5 Sep 2019 19:26:45 -0400
Alyssa Rosenzweig  wrote:

> > --- a/src/gallium/drivers/panfrost/pan_fragment.c
> > +++ b/src/gallium/drivers/panfrost/pan_fragment.c
> > @@ -44,7 +44,7 @@ panfrost_initialize_surface(
> >  rsrc->slices[level].initialized = true;
> >  
> >  assert(rsrc->bo);
> > -panfrost_batch_add_bo(batch, rsrc->bo);
> > +panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
> >  }  
> 
> This should be write-only. The corresponding read would be iff we're
> wallpapering, so add an add_bo with RO in the wallpaper drawing
> routine.

Actually we can't do that in the wallpaper draw, it's too late (the
wallpaper draw happens at flush time, and adding the BO when we're
already flushing the batch is pointless). 

> 
> I don't know if it really matters (since we can only have one write
> at a time) but let's be precise.

That's true, marking the BO for read access is useless when it's
already flagged for write since a write will anyway force batches that
want to read or write this BO to flush. If we really want to be precise
(for debug purpose I guess), we should probably have:

   panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_WR);
   if (!batch->clear)
  panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RD);

> 
> ---
> 
> On that note, sometimes we stuff multiple related-but-independent
> buffers within a single BO, particularly multiple miplevels/cubemap
> faces/etc in one BO.  Hypothetically, it is legal to render to
> multiple faces independently at once. In practice, I don't know if
> this case is it is, we can of course split up the resource into
> per-face BOs.

I guess we'd have to introduce the concept of BO regions and only
force a flush when things overlap, assuming we want to keep those
independent buffers stored in the same BO of course.

> 
> >  _mesa_hash_table_remove_key(ctx->batches, >key);
> > +util_unreference_framebuffer_state(>key);  
> 
> (Remind me where was the corresponding reference..?)

Duh, should be moved to patch 11 ("panfrost: Use a
pipe_framebuffer_state as the batch key").

> 
> > +void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
> > +{
> > +for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
> > +struct panfrost_resource *rsrc =
> > pan_resource(batch->key.cbufs[i]->texture);
> > +panfrost_batch_add_bo(batch, rsrc->bo,
> > PAN_SHARED_BO_RW);
> > +   }
> > +
> > +if (batch->key.zsbuf) {
> > +struct panfrost_resource *rsrc =
> > pan_resource(batch->key.zsbuf->texture);
> > +panfrost_batch_add_bo(batch, rsrc->bo,
> > PAN_SHARED_BO_RW);
> > +}
> > +}  
> 
> As per above, these should be write-only. Also, is this duplicate from
> the panfrost_batch_add_bo in panfrost_initialize_surface? It feels
> like it. Which one is deadcode..?

We only draw the wallpaper on cbufs[0] right now, so I guess we can use
BO_WR here.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 23/25] panfrost: Remove uneeded add_bo() in initialize_surface()

2019-09-06 Thread Boris Brezillon
On Thu, 5 Sep 2019 19:28:04 -0400
Alyssa Rosenzweig  wrote:

> Ah, ignore my previous comment. Could we squash this into the patch that
> added the PAN_SHARED_BO_RW define?

Absolutely (I don't know why I did that separately).

> 
> On Thu, Sep 05, 2019 at 09:41:48PM +0200, Boris Brezillon wrote:
> > Should already be added in panfrost_draw_vbo() and panfrost_clear(),
> > no need to add it here too.
> > 
> > Signed-off-by: Boris Brezillon 
> > ---
> >  src/gallium/drivers/panfrost/pan_fragment.c | 3 ---
> >  1 file changed, 3 deletions(-)
> > 
> > diff --git a/src/gallium/drivers/panfrost/pan_fragment.c 
> > b/src/gallium/drivers/panfrost/pan_fragment.c
> > index cbb95b79f52a..00ff363a1bba 100644
> > --- a/src/gallium/drivers/panfrost/pan_fragment.c
> > +++ b/src/gallium/drivers/panfrost/pan_fragment.c
> > @@ -42,9 +42,6 @@ panfrost_initialize_surface(
> >  struct panfrost_resource *rsrc = pan_resource(surf->texture);
> >  
> >  rsrc->slices[level].initialized = true;
> > -
> > -assert(rsrc->bo);
> > -panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
> >  }
> >  
> >  /* Generate a fragment job. This should be called once per frame. 
> > (According to
> > -- 
> > 2.21.0  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 10/25] panfrost: Make sure the BO is 'ready' when picked from the cache

2019-09-05 Thread Boris Brezillon
On Thu, 5 Sep 2019 16:43:23 -0400
Alyssa Rosenzweig  wrote:

> > +bool
> > +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns)
> > +{
> > +struct drm_panfrost_wait_bo req = {
> > +.handle = bo->gem_handle,
> > +   .timeout_ns = timeout_ns,
> > +};
> > +int ret;
> > +
> > +ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, );
> > +if (ret != -1)
> > +return true;
> > +
> > +assert(errno == ETIMEDOUT || errno == EBUSY);
> > +return false;
> > +}  
> 
> I would appreciate a comment explaining what the return value of this
> ioctl is. `ret != -1` and asserting an errno is... suspicious? Not
> wrong, to my knowledge, but hard to decipher without context.

Will document that.

> 
> > +/* Before creating a BO, we first want to check the cache but 
> > without
> > + * waiting for BO readiness (BOs in the cache can still be 
> > referenced
> > + * by jobs that are not finished yet).
> > + * If the cached allocation fails we fall back on fresh BO 
> > allocation,
> > + * and if that fails too, we try one more time to allocate from the
> > + * cache, but this time we accept to wait.
> >   */  
> 
> Conceptually:
> 
> We first try a ready BO from the cache. OK.
> 
> If that fails, there is no BO in the cache that is currently ready for
> use; by definition of BO readiness, this is because another concurrent
> job is using it. We then try to create a new BO. Suppose a given job
> uses an average of `b` BOs. Then for `j` concurrent jobs, assuming all
> of these allocations succeed, we have `j * b` BOs in the cache. This is
> an unfortunate bump in memory usage but necessary for pipelining.
> 
> If that allocation fails, by definition of memory allocation failures,
> we ran out of memory and cannot proceed with the allocation. Either:
> 
>  - The BO cache is responsible for this. In this case, continuing to use
>the BO cache (even with the waits) will just dig us deeper into the
>hole. Perhaps we should call bo_evict_all from userspace to handle
>the memory pressure? Or does madvise render this irrelevant?

Evict won't help here as memory will only be released after the jobs
are done using it. And madvise doesn't help either, for the same reason.

> 
>  - The BO cache is not responsible for this. In this case, we could
>continue to use the BO cache, but then either:
> 
>   - There is a BO we can wait for. Then waiting is okay.
>   - There is not. Then that cache fetch fails and we kerplutz.
> What now? If we need an allocation, cache or no cache, if the
> kernel says no, no means no. What then?

The behavior hasn't changed regarding allocation failures: it's still
an assert(), so the code is not more or less buggy than it was :p. What
happens when assert()s are disabled? probably a segfault because of a
NULL pointer dereference. So, adding the fprintf() is probably a good
idea as a first step, and then we can see if we can handle the OOM case
gracefully.

> 
> In short, I'm not convinced this algorithm (specifically the last step)
> is ideal.

It really depends on how robust you want to be when the system is under
memory pressure vs how long you accept to wait. Note that, in the worst
case scenario we wouldn't wait more than we currently do, as having each
batch wait on BOs of the previous batch is just like the serialization
we had in panfrost_flush(). I don't see it as a huge problem, but maybe
I'm wrong.

> 
> If there is no memory left for us, is it responsible to continue at all?

It's not exactly no memory, it's no immediately available memory.

> Should we just fail the allocation after step 2, and if the caller has a
> problem with that, it's their issue? Or we abort here after step 2?

I think that one is a separate issue. I mean, that's something we have
to handle even if we go through step 3 and step 3 fails. 

> I
> don't like the robustness implications but low memory behaviour is a
> risky subject as it is; I don't want to add more unknowns into it --
> aborting it with an assert(0) is something we can recognize immediately.
> Strange crashes in random places with no explanation, less so.

And that hasn't changed. We still have an assert after step 3.

> 
> CC'ing Rob to see if he has any advise re Panfrost madvise interactions
> as well as general kernel OOM policy.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 09/25] panfrost: Rework the panfrost_bo API

2019-09-05 Thread Boris Brezillon
On Thu, 5 Sep 2019 16:31:04 -0400
Alyssa Rosenzweig  wrote:

> > +static struct panfrost_bo *
> > +panfrost_bo_alloc(struct panfrost_screen *screen, size_t size,
> > +  uint32_t flags)
> > +{  
> ...
> > +ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, 
> > _bo);
> > +if (ret)
> > +return NULL;  
> 
> I notice this had a print to stderr before with an assertion out, but
> now it fails silently. Is this change of behaviour intentional? 

It is.

> BO
> creation would previously return a valid BO gauranteed. This is no
> longer so obviously true -- although I see we later assert that the
> return is non-NULL in the caller.
> 
> Could you help me understand the new logic a bit? Thank you!
> 

The rationale behind this change being that panfrost_bo_alloc() will
not be our last option (see patch 9). I can add the fprintf() back in
this patch, and move it to the caller in patch 9 if you prefer.

> > +if (!(flags & (PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_DELAY_MMAP)))
> > +panfrost_bo_mmap(bo);
> > +   else if ((flags & PAN_ALLOCATE_INVISIBLE) && (pan_debug & 
> > PAN_DBG_TRACE))  
> 
> I think the spacing got wacky here (on the beginning of the last line)
>

Will fix that.
 
> > +static void
> > +panfrost_bo_release(struct panfrost_bo *bo)
> > +{
> > +
> > +/* Rather than freeing the BO now, we'll cache the BO for later
> > + * allocations if we're allowed to */
> > +
> > +panfrost_bo_munmap(bo);
> > +
> > +if (panfrost_bo_cache_put(bo))
> > +return;
> > +
> > +panfrost_bo_free(bo);
> > +}  
> 
> I see we now have the distinction between panfrost_bo_release (cached)
> and panfrost_bo_free (uncached). I'm worried the distinction might not
> be obvious to future Panfrost hackers.
> 
> Could you add a comment above each function clarifying the cache
> behaviour?

Looks like the _release() function can be inlined in
panfrost_bo_unreference(). I'm still not happy with the
panfrost_bo_create() name though. Maybe we should rename this one into
panfrost_get_bo().

> 
> -
> 
> Other than these, the cleanup in general seems like a good idea. But in
> general, please try to split up patches like this to aid reviewin. Thank
> you!

Yes, I guess I got tired splitting things up and decided to group
changes that were kind of related in a single patch (also don't like
having 30+ patch series). I'll split that up in v4.

Thanks for the review!

Boris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 14/25] panfrost: Move the fence creation in panfrost_flush()

2019-09-05 Thread Boris Brezillon
panfrost_flush() is about to be reworked to flush all pending batches,
but we want the fence to block on the last one. Let's move the fence
creation logic in panfrost_flush() to prepare for this situation.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 13 +
 src/gallium/drivers/panfrost/pan_context.h |  3 +++
 src/gallium/drivers/panfrost/pan_drm.c | 11 ++-
 src/gallium/drivers/panfrost/pan_screen.h  |  3 +--
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index e34f5757b1cf..6552052b8cad 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1308,7 +1308,6 @@ panfrost_queue_draw(struct panfrost_context *ctx)
 
 static void
 panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
-  struct pipe_fence_handle **fence,
   struct panfrost_batch *batch)
 {
 panfrost_batch_submit(batch);
@@ -1316,14 +1315,14 @@ panfrost_submit_frame(struct panfrost_context *ctx, 
bool flush_immediate,
 /* If visual, we can stall a frame */
 
 if (!flush_immediate)
-panfrost_drm_force_flush_fragment(ctx, fence);
+panfrost_drm_force_flush_fragment(ctx);
 
 ctx->last_fragment_flushed = false;
 ctx->last_batch = batch;
 
 /* If readback, flush now (hurts the pipelined performance) */
 if (flush_immediate)
-panfrost_drm_force_flush_fragment(ctx, fence);
+panfrost_drm_force_flush_fragment(ctx);
 }
 
 static void
@@ -1452,7 +1451,13 @@ panfrost_flush(
 bool flush_immediate = /*flags & PIPE_FLUSH_END_OF_FRAME*/true;
 
 /* Submit the frame itself */
-panfrost_submit_frame(ctx, flush_immediate, fence, batch);
+panfrost_submit_frame(ctx, flush_immediate, batch);
+
+if (fence) {
+struct panfrost_fence *f = panfrost_fence_create(ctx);
+pipe->screen->fence_reference(pipe->screen, fence, NULL);
+*fence = (struct pipe_fence_handle *)f;
+}
 
 /* Prepare for the next frame */
 panfrost_invalidate_frame(ctx);
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index 02552ed23de2..6ad2cc81c781 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -297,6 +297,9 @@ pan_context(struct pipe_context *pcontext)
 return (struct panfrost_context *) pcontext;
 }
 
+struct panfrost_fence *
+panfrost_fence_create(struct panfrost_context *ctx);
+
 struct pipe_context *
 panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned 
flags);
 
diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index e4b75fad4078..47cec9f39fef 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -109,7 +109,7 @@ panfrost_drm_submit_vs_fs_batch(struct panfrost_batch 
*batch, bool has_draws)
 return ret;
 }
 
-static struct panfrost_fence *
+struct panfrost_fence *
 panfrost_fence_create(struct panfrost_context *ctx)
 {
 struct pipe_context *gallium = (struct pipe_context *) ctx;
@@ -136,8 +136,7 @@ panfrost_fence_create(struct panfrost_context *ctx)
 }
 
 void
-panfrost_drm_force_flush_fragment(struct panfrost_context *ctx,
-  struct pipe_fence_handle **fence)
+panfrost_drm_force_flush_fragment(struct panfrost_context *ctx)
 {
 struct pipe_context *gallium = (struct pipe_context *) ctx;
 struct panfrost_screen *screen = pan_screen(gallium->screen);
@@ -149,12 +148,6 @@ panfrost_drm_force_flush_fragment(struct panfrost_context 
*ctx,
 /* The job finished up, so we're safe to clean it up now */
 panfrost_free_batch(ctx->last_batch);
 }
-
-if (fence) {
-struct panfrost_fence *f = panfrost_fence_create(ctx);
-gallium->screen->fence_reference(gallium->screen, fence, NULL);
-*fence = (struct pipe_fence_handle *)f;
-}
 }
 
 unsigned
diff --git a/src/gallium/drivers/panfrost/pan_screen.h 
b/src/gallium/drivers/panfrost/pan_screen.h
index aab141a563c2..4acdd3572c9f 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -123,8 +123,7 @@ pan_screen(struct pipe_screen *p)
 int
 panfrost_drm_submit_vs_fs_batch(struct panfrost_batch *batch, bool has_draws);
 void
-panfrost_drm_force_flush_fragment(struct panfrost_context *ctx,
-  struct pipe_fence_handle **fence);
+panfrost_drm_force_flush_fragment(struct panfrost_context *ctx);
 unsigned
 panfrost_drm_query_gpu_version(struct panfros

[Mesa-dev] [PATCH v3 23/25] panfrost: Remove uneeded add_bo() in initialize_surface()

2019-09-05 Thread Boris Brezillon
Should already be added in panfrost_draw_vbo() and panfrost_clear(),
no need to add it here too.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_fragment.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_fragment.c 
b/src/gallium/drivers/panfrost/pan_fragment.c
index cbb95b79f52a..00ff363a1bba 100644
--- a/src/gallium/drivers/panfrost/pan_fragment.c
+++ b/src/gallium/drivers/panfrost/pan_fragment.c
@@ -42,9 +42,6 @@ panfrost_initialize_surface(
 struct panfrost_resource *rsrc = pan_resource(surf->texture);
 
 rsrc->slices[level].initialized = true;
-
-assert(rsrc->bo);
-panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
 }
 
 /* Generate a fragment job. This should be called once per frame. (According to
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 21/25] panfrost: Add new helpers to describe job depencencies on BOs

2019-09-05 Thread Boris Brezillon
Batch ordering is most of the time enforced by the resources they are
reading/writing from/to. This patch adds some new helpers to keep track
of that and modifies the existing add_bo() helper to pass flags encoding
the type of access a batch intends to do on this BO.

Since all resources are backed by BOs, and
given we might want to describe dependencies on BOs that are not
exposed as resources, we decided to use BOs as keys on our hash tables.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_allocate.c   |   2 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c  |   2 +-
 src/gallium/drivers/panfrost/pan_context.c|  10 +-
 src/gallium/drivers/panfrost/pan_context.h|   5 +
 src/gallium/drivers/panfrost/pan_drm.c|   6 +-
 src/gallium/drivers/panfrost/pan_fragment.c   |   2 +-
 src/gallium/drivers/panfrost/pan_instancing.c |   2 +-
 src/gallium/drivers/panfrost/pan_job.c| 124 +-
 src/gallium/drivers/panfrost/pan_job.h|  21 ++-
 src/gallium/drivers/panfrost/pan_varyings.c   |   2 +-
 10 files changed, 159 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c 
b/src/gallium/drivers/panfrost/pan_allocate.c
index 7938196e3e4f..7b0a7baa32dc 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -67,7 +67,7 @@ panfrost_allocate_transient(struct panfrost_batch *batch, 
size_t sz)
 
 /* We can't reuse the current BO, but we can create a new one. 
*/
 bo = panfrost_bo_create(screen, bo_sz, 0);
-panfrost_batch_add_bo(batch, bo);
+panfrost_batch_add_bo(batch, bo, PAN_PRIVATE_BO);
 
 /* Creating a BO adds a reference, and then the job adds a
  * second one. So we need to pop back one reference */
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index 69897be4f007..b27e36a7ce28 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -277,7 +277,7 @@ panfrost_get_blend_for_context(struct panfrost_context 
*ctx, unsigned rti)
 memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
 
 /* Pass BO ownership to job */
-panfrost_batch_add_bo(batch, final.shader.bo);
+panfrost_batch_add_bo(batch, final.shader.bo, PAN_PRIVATE_BO);
 panfrost_bo_unreference(final.shader.bo);
 
 if (shader->patch_index) {
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 3e0a3e9df992..c31dc1580524 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -160,6 +160,7 @@ panfrost_clear(
 struct panfrost_context *ctx = pan_context(pipe);
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
+panfrost_batch_add_fbo_bos(batch);
 panfrost_batch_clear(batch, buffers, color, depth, stencil);
 }
 
@@ -605,7 +606,7 @@ panfrost_upload_tex(
 
 /* Add the BO to the job so it's retained until the job is done. */
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-panfrost_batch_add_bo(batch, rsrc->bo);
+panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RD);
 
 /* Add the usage flags in, since they can change across the CSO
  * lifetime due to layout switches */
@@ -724,7 +725,7 @@ static void panfrost_upload_ssbo_sysval(
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
 
-panfrost_batch_add_bo(batch, bo);
+panfrost_batch_add_bo(batch, bo, PAN_SHARED_BO_RW);
 
 /* Upload address and size as sysval */
 uniform->du[0] = bo->gpu + sb.buffer_offset;
@@ -878,6 +879,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool 
with_vertex_data)
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 
+panfrost_batch_add_fbo_bos(batch);
 panfrost_attach_vt_framebuffer(ctx);
 
 if (with_vertex_data) {
@@ -929,7 +931,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool 
with_vertex_data)
 
 panfrost_patch_shader_state(ctx, variant, 
PIPE_SHADER_FRAGMENT, false);
 
-panfrost_batch_add_bo(batch, variant->bo);
+panfrost_batch_add_bo(batch, variant->bo, PAN_PRIVATE_BO);
 
 #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
 
@@ -1389,7 +1391,7 @@ panfrost_get_index_buffer_mapped(struct panfrost_context 
*ctx, const struct pipe
 
 if (!info->has_user_indices) {
 /* Only resources can be directly mapped */
-panfrost_batch_add_bo(batch, rsrc->bo);
+  

[Mesa-dev] [PATCH v3 25/25] panfrost/ci: New tests are passing

2019-09-05 Thread Boris Brezillon
All dEQP-GLES2.functional.fbo.render.texsubimage.* tests are now
passing.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/ci/expected-failures.txt | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/ci/expected-failures.txt 
b/src/gallium/drivers/panfrost/ci/expected-failures.txt
index b0fc872a3009..3c707230dd23 100644
--- a/src/gallium/drivers/panfrost/ci/expected-failures.txt
+++ b/src/gallium/drivers/panfrost/ci/expected-failures.txt
@@ -53,10 +53,6 @@ 
dEQP-GLES2.functional.fbo.render.shared_colorbuffer.tex2d_rgb_depth_component16
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.rbo_rgb565_depth_component16
 Fail
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.tex2d_rgba_depth_component16
 Fail
 
dEQP-GLES2.functional.fbo.render.shared_depthbuffer.tex2d_rgb_depth_component16 
Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgba Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgb Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgba Fail
-dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgb Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.0 Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.10 Fail
 dEQP-GLES2.functional.fragment_ops.depth_stencil.random.11 Fail
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 19/25] panfrost: Pass a batch to panfrost_set_value_job()

2019-09-05 Thread Boris Brezillon
So we can emit SET_VALUE jobs for a batch that's not currently bound
to the context.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_scoreboard.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_scoreboard.c 
b/src/gallium/drivers/panfrost/pan_scoreboard.c
index f0771a2c5b56..f340bb62662e 100644
--- a/src/gallium/drivers/panfrost/pan_scoreboard.c
+++ b/src/gallium/drivers/panfrost/pan_scoreboard.c
@@ -270,7 +270,7 @@ panfrost_scoreboard_queue_fused_job_prepend(
 /* Generates a set value job, used below as part of TILER job scheduling. */
 
 static struct panfrost_transfer
-panfrost_set_value_job(struct panfrost_context *ctx, mali_ptr polygon_list)
+panfrost_set_value_job(struct panfrost_batch *batch, mali_ptr polygon_list)
 {
 struct mali_job_descriptor_header job = {
 .job_type = JOB_TYPE_SET_VALUE,
@@ -282,7 +282,6 @@ panfrost_set_value_job(struct panfrost_context *ctx, 
mali_ptr polygon_list)
 .unknown = 0x3,
 };
 
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 struct panfrost_transfer transfer = panfrost_allocate_transient(batch, 
sizeof(job) + sizeof(payload));
 memcpy(transfer.cpu, , sizeof(job));
 memcpy(transfer.cpu + sizeof(job), , sizeof(payload));
@@ -303,11 +302,10 @@ panfrost_scoreboard_set_value(struct panfrost_batch 
*batch)
 /* Okay, we do. Let's generate it. We'll need the job's polygon list
  * regardless of size. */
 
-struct panfrost_context *ctx = batch->ctx;
 mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch, 0);
 
 struct panfrost_transfer job =
-panfrost_set_value_job(ctx, polygon_list);
+panfrost_set_value_job(batch, polygon_list);
 
 /* Queue it */
 panfrost_scoreboard_queue_compute_job(batch, job);
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 16/25] panfrost: Pass a batch to panfrost_{allocate, upload}_transient()

2019-09-05 Thread Boris Brezillon
We need that if we want to emit CMDs to a job that's not currenlty
bound to the context, which in turn will be needed if we want to relax
the job serialization we have right now (only flush jobs when we need
to: on a flush request, or when one job depends on results of other
jobs).

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_allocate.c   | 10 ++--
 src/gallium/drivers/panfrost/pan_allocate.h   |  7 +--
 src/gallium/drivers/panfrost/pan_compute.c| 10 ++--
 src/gallium/drivers/panfrost/pan_context.c| 51 +++
 src/gallium/drivers/panfrost/pan_fragment.c   |  2 +-
 src/gallium/drivers/panfrost/pan_instancing.c |  2 +-
 src/gallium/drivers/panfrost/pan_mfbd.c   |  3 +-
 src/gallium/drivers/panfrost/pan_scoreboard.c |  3 +-
 src/gallium/drivers/panfrost/pan_sfbd.c   |  2 +-
 src/gallium/drivers/panfrost/pan_varyings.c   |  8 +--
 10 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c 
b/src/gallium/drivers/panfrost/pan_allocate.c
index beebb0bc6d7e..7938196e3e4f 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -40,10 +40,9 @@
  * into the pool and copy there */
 
 struct panfrost_transfer
-panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz)
+panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz)
 {
-struct panfrost_screen *screen = pan_screen(ctx->base.screen);
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
 
 /* Pad the size */
 sz = ALIGN_POT(sz, ALIGNMENT);
@@ -90,9 +89,10 @@ panfrost_allocate_transient(struct panfrost_context *ctx, 
size_t sz)
 }
 
 mali_ptr
-panfrost_upload_transient(struct panfrost_context *ctx, const void *data, 
size_t sz)
+panfrost_upload_transient(struct panfrost_batch *batch, const void *data,
+  size_t sz)
 {
-struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, 
sz);
+struct panfrost_transfer transfer = panfrost_allocate_transient(batch, 
sz);
 memcpy(transfer.cpu, data, sz);
 return transfer.gpu;
 }
diff --git a/src/gallium/drivers/panfrost/pan_allocate.h 
b/src/gallium/drivers/panfrost/pan_allocate.h
index 91c2af9c4f17..f18218fb32a1 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.h
+++ b/src/gallium/drivers/panfrost/pan_allocate.h
@@ -33,7 +33,7 @@
 
 #include "util/list.h"
 
-struct panfrost_context;
+struct panfrost_batch;
 
 /* Represents a fat pointer for GPU-mapped memory, returned from the transient
  * allocator and not used for much else */
@@ -44,9 +44,10 @@ struct panfrost_transfer {
 };
 
 struct panfrost_transfer
-panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz);
+panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz);
 
 mali_ptr
-panfrost_upload_transient(struct panfrost_context *ctx, const void *data, 
size_t sz);
+panfrost_upload_transient(struct panfrost_batch *batch, const void *data,
+  size_t sz);
 
 #endif /* __PAN_ALLOCATE_H__ */
diff --git a/src/gallium/drivers/panfrost/pan_compute.c 
b/src/gallium/drivers/panfrost/pan_compute.c
index 51967fe481ef..4639c1b03c38 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -87,6 +87,9 @@ panfrost_launch_grid(struct pipe_context *pipe,
 {
 struct panfrost_context *ctx = pan_context(pipe);
 
+/* TODO: Do we want a special compute-only batch? */
+struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+
 ctx->compute_grid = info;
 
 struct mali_job_descriptor_header job = {
@@ -113,7 +116,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
 };
 
 payload->postfix.framebuffer =
-panfrost_upload_transient(ctx, _fbd, 
sizeof(compute_fbd));
+panfrost_upload_transient(batch, _fbd, 
sizeof(compute_fbd));
 
 /* Invoke according to the grid info */
 
@@ -123,13 +126,10 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
 /* Upload the payload */
 
-struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, 
sizeof(job) + sizeof(*payload));
+struct panfrost_transfer transfer = panfrost_allocate_transient(batch, 
sizeof(job) + sizeof(*payload));
 memcpy(transfer.cpu, , sizeof(job));
 memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload));
 
-/* TODO: Do we want a special compute-only batch? */
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-
 /* Queue the job */
 panfrost_scoreboard_queue_compute_job(batch, transfer);
 
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index fdc62d2f957f..a1b112c08919 100644
--- a/src

[Mesa-dev] [PATCH v3 20/25] panfrost: Prepare things to avoid flushes on FB switch

2019-09-05 Thread Boris Brezillon
panfrost_attach_vt_xxx() functions are now passed a batch, and the
generated FB desc is kept in panfrost_batch so we can switch FBs
without forcing a flush. The postfix->framebuffer field is restored
on the next attach_vt_framebuffer() call if the batch already has an
FB desc.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 17 +
 src/gallium/drivers/panfrost/pan_job.h |  3 +++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index c56f404cd9e9..3e0a3e9df992 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -164,18 +164,16 @@ panfrost_clear(
 }
 
 static mali_ptr
-panfrost_attach_vt_mfbd(struct panfrost_context *ctx)
+panfrost_attach_vt_mfbd(struct panfrost_batch *batch)
 {
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 struct bifrost_framebuffer mfbd = panfrost_emit_mfbd(batch, ~0);
 
 return panfrost_upload_transient(batch, , sizeof(mfbd)) | 
MALI_MFBD;
 }
 
 static mali_ptr
-panfrost_attach_vt_sfbd(struct panfrost_context *ctx)
+panfrost_attach_vt_sfbd(struct panfrost_batch *batch)
 {
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 struct mali_single_framebuffer sfbd = panfrost_emit_sfbd(batch, ~0);
 
 return panfrost_upload_transient(batch, , sizeof(sfbd)) | 
MALI_SFBD;
@@ -192,12 +190,15 @@ panfrost_attach_vt_framebuffer(struct panfrost_context 
*ctx)
 }
 
 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
-mali_ptr framebuffer = screen->require_sfbd ?
-   panfrost_attach_vt_sfbd(ctx) :
-   panfrost_attach_vt_mfbd(ctx);
+struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+
+if (!batch->framebuffer)
+batch->framebuffer = screen->require_sfbd ?
+ panfrost_attach_vt_sfbd(batch) :
+ panfrost_attach_vt_mfbd(batch);
 
 for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
-ctx->payloads[i].postfix.framebuffer = framebuffer;
+ctx->payloads[i].postfix.framebuffer = batch->framebuffer;
 }
 
 /* Reset per-frame context, called on context initialisation as well as after
diff --git a/src/gallium/drivers/panfrost/pan_job.h 
b/src/gallium/drivers/panfrost/pan_job.h
index ea832f2c3efe..48d483c9a724 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -115,6 +115,9 @@ struct panfrost_batch {
 
 /* Polygon list bound to the batch, or NULL if none bound yet */
 struct panfrost_bo *polygon_list;
+
+/* Framebuffer descriptor. */
+mali_ptr framebuffer;
 };
 
 /* Functions for managing the above */
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 22/25] panfrost: Delay payloads[].offset_start initialization

2019-09-05 Thread Boris Brezillon
panfrost_draw_vbo() Might call the primeconvert/without_prim_restart
helpers which will enter the ->draw_vbo() again. Let's delay
payloads[].offset_start initialization so we don't initialize them
twice.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index c31dc1580524..02726e7cd349 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1447,9 +1447,6 @@ panfrost_draw_vbo(
 if (panfrost_scissor_culls_everything(ctx))
 return;
 
-ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
-ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
-
 int mode = info->mode;
 
 /* Fallback unsupported restart index */
@@ -1480,6 +1477,9 @@ panfrost_draw_vbo(
 }
 }
 
+ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
+ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
+
 /* Now that we have a guaranteed terminating path, find the job.
  * Assignment commented out to prevent unused warning */
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 15/25] panfrost: Move the batch submission logic to panfrost_batch_submit()

2019-09-05 Thread Boris Brezillon
We are about to patch panfrost_flush() to flush all pending batches,
not only the current one. In order to do that, we need to move the
'flush single batch' code to panfrost_batch_submit().

While at it, we get rid of the existing pipelining logic, which is
currently unused and replace it by an unconditional wait at the end of
panfrost_batch_submit(). A new pipeline logic will be introduced later
on.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 145 +
 src/gallium/drivers/panfrost/pan_context.h |   9 +-
 src/gallium/drivers/panfrost/pan_drm.c |  15 ---
 src/gallium/drivers/panfrost/pan_job.c | 125 +-
 src/gallium/drivers/panfrost/pan_screen.h  |   2 -
 5 files changed, 123 insertions(+), 173 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 6552052b8cad..fdc62d2f957f 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -203,7 +203,7 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
 /* Reset per-frame context, called on context initialisation as well as after
  * flushing a frame */
 
-static void
+void
 panfrost_invalidate_frame(struct panfrost_context *ctx)
 {
 for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
@@ -1306,130 +1306,6 @@ panfrost_queue_draw(struct panfrost_context *ctx)
 
 /* The entire frame is in memory -- send it off to the kernel! */
 
-static void
-panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
-  struct panfrost_batch *batch)
-{
-panfrost_batch_submit(batch);
-
-/* If visual, we can stall a frame */
-
-if (!flush_immediate)
-panfrost_drm_force_flush_fragment(ctx);
-
-ctx->last_fragment_flushed = false;
-ctx->last_batch = batch;
-
-/* If readback, flush now (hurts the pipelined performance) */
-if (flush_immediate)
-panfrost_drm_force_flush_fragment(ctx);
-}
-
-static void
-panfrost_draw_wallpaper(struct pipe_context *pipe)
-{
-struct panfrost_context *ctx = pan_context(pipe);
-
-/* Nothing to reload? TODO: MRT wallpapers */
-if (ctx->pipe_framebuffer.cbufs[0] == NULL)
-return;
-
-/* Check if the buffer has any content on it worth preserving */
-
-struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
-struct panfrost_resource *rsrc = pan_resource(surf->texture);
-unsigned level = surf->u.tex.level;
-
-if (!rsrc->slices[level].initialized)
-return;
-
-/* Save the batch */
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-
-ctx->wallpaper_batch = batch;
-
-/* Clamp the rendering area to the damage extent. The
- * KHR_partial_update() spec states that trying to render outside of
- * the damage region is "undefined behavior", so we should be safe.
- */
-unsigned damage_width = (rsrc->damage.extent.maxx - 
rsrc->damage.extent.minx);
-unsigned damage_height = (rsrc->damage.extent.maxy - 
rsrc->damage.extent.miny);
-
-if (damage_width && damage_height) {
-panfrost_batch_intersection_scissor(batch,
-rsrc->damage.extent.minx,
-rsrc->damage.extent.miny,
-rsrc->damage.extent.maxx,
-rsrc->damage.extent.maxy);
-}
-
-/* FIXME: Looks like aligning on a tile is not enough, but
- * aligning on twice the tile size seems to works. We don't
- * know exactly what happens here but this deserves extra
- * investigation to figure it out.
- */
-batch->minx = batch->minx & ~((MALI_TILE_LENGTH * 2) - 1);
-batch->miny = batch->miny & ~((MALI_TILE_LENGTH * 2) - 1);
-batch->maxx = MIN2(ALIGN_POT(batch->maxx, MALI_TILE_LENGTH * 2),
-   rsrc->base.width0);
-batch->maxy = MIN2(ALIGN_POT(batch->maxy, MALI_TILE_LENGTH * 2),
-   rsrc->base.height0);
-
-struct pipe_scissor_state damage;
-struct pipe_box rects[4];
-
-/* Clamp the damage box to the rendering area. */
-damage.minx = MAX2(batch->minx, rsrc->damage.biggest_rect.x);
-damage.miny = MAX2(batch->miny, rsrc->damage.biggest_rect.y);
-damage.maxx = MIN2(batch->maxx,
-   rsrc->damage.biggest_rect.x +
-   rsrc->damage.biggest_rect.width);
-damage.maxy = MIN2(batch->maxy,
-   rsrc->damage.biggest_rect.y +
-

[Mesa-dev] [PATCH v3 18/25] panfrost: Use ctx->wallpaper_batch in panfrost_blit_wallpaper()

2019-09-05 Thread Boris Brezillon
We'll soon be able to flush a batch that's not currently bound to the
context, which means ctx->pipe_framebuffer will not necessarily be the
FBO targeted by the wallpaper draw. Let's prepare for this case and
use ctx->wallpaper_batch in panfrost_blit_wallpaper().

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_blit.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_blit.c 
b/src/gallium/drivers/panfrost/pan_blit.c
index 4be8c044ee2f..2d44f06227bf 100644
--- a/src/gallium/drivers/panfrost/pan_blit.c
+++ b/src/gallium/drivers/panfrost/pan_blit.c
@@ -105,16 +105,17 @@ panfrost_blit(struct pipe_context *pipe,
 void
 panfrost_blit_wallpaper(struct panfrost_context *ctx, struct pipe_box *box)
 {
+struct panfrost_batch *batch = ctx->wallpaper_batch;
 struct pipe_blit_info binfo = { };
 
 panfrost_blitter_save(ctx, ctx->blitter_wallpaper);
 
-struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
+struct pipe_surface *surf = batch->key.cbufs[0];
 unsigned level = surf->u.tex.level;
 unsigned layer = surf->u.tex.first_layer;
 assert(surf->u.tex.last_layer == layer);
 
-binfo.src.resource = binfo.dst.resource = 
ctx->pipe_framebuffer.cbufs[0]->texture;
+binfo.src.resource = binfo.dst.resource = batch->key.cbufs[0]->texture;
 binfo.src.level = binfo.dst.level = level;
 binfo.src.box.x = binfo.dst.box.x = box->x;
 binfo.src.box.y = binfo.dst.box.y = box->y;
@@ -123,9 +124,9 @@ panfrost_blit_wallpaper(struct panfrost_context *ctx, 
struct pipe_box *box)
 binfo.src.box.height = binfo.dst.box.height = box->height;
 binfo.src.box.depth = binfo.dst.box.depth = 1;
 
-binfo.src.format = binfo.dst.format = 
ctx->pipe_framebuffer.cbufs[0]->format;
+binfo.src.format = binfo.dst.format = batch->key.cbufs[0]->format;
 
-assert(ctx->pipe_framebuffer.nr_cbufs == 1);
+assert(batch->key.nr_cbufs == 1);
 binfo.mask = PIPE_MASK_RGBA;
 binfo.filter = PIPE_TEX_FILTER_LINEAR;
 binfo.scissor_enable = FALSE;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 24/25] panfrost: Support batch pipelining

2019-09-05 Thread Boris Brezillon
We adjust the code to explicitly request flush of batches accessing
BOs they care about. Thanks to that, we can get rid of the implicit
serialization done in panfrost_batch_submit() and
panfrost_set_framebuffer_state(). Finally, panfrost_flush() is
changed to to flush all pending batches.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_compute.c  |   2 +-
 src/gallium/drivers/panfrost/pan_context.c  | 145 +---
 src/gallium/drivers/panfrost/pan_job.c  |  15 +-
 src/gallium/drivers/panfrost/pan_resource.c |  26 ++--
 4 files changed, 115 insertions(+), 73 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_compute.c 
b/src/gallium/drivers/panfrost/pan_compute.c
index 4639c1b03c38..036dffbb17be 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -133,7 +133,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
 /* Queue the job */
 panfrost_scoreboard_queue_compute_job(batch, transfer);
 
-panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+panfrost_flush_all_batches(ctx, true);
 }
 
 void
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 02726e7cd349..993744a1ffd0 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -150,6 +150,28 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned 
vertex_count)
 return framebuffer;
 }
 
+static void
+panfrost_flush_fbo_deps(struct panfrost_context *ctx)
+{
+struct pipe_framebuffer_state *fb = >pipe_framebuffer;
+for (unsigned i = 0; i < fb->nr_cbufs; i++) {
+if (!fb->cbufs[i])
+continue;
+
+struct panfrost_resource *rsrc = 
pan_resource(fb->cbufs[i]->texture);
+
+panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true);
+}
+
+if (fb->zsbuf) {
+struct panfrost_resource *rsrc = 
pan_resource(fb->zsbuf->texture);
+
+panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true);
+}
+}
+
 static void
 panfrost_clear(
 struct pipe_context *pipe,
@@ -160,6 +182,7 @@ panfrost_clear(
 struct panfrost_context *ctx = pan_context(pipe);
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
+panfrost_flush_fbo_deps(ctx);
 panfrost_batch_add_fbo_bos(batch);
 panfrost_batch_clear(batch, buffers, color, depth, stencil);
 }
@@ -1324,10 +1347,9 @@ panfrost_flush(
 unsigned flags)
 {
 struct panfrost_context *ctx = pan_context(pipe);
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
-/* Submit the frame itself */
-panfrost_batch_submit(batch);
+/* Submit all pending jobs */
+panfrost_flush_all_batches(ctx, false);
 
 if (fence) {
 struct panfrost_fence *f = panfrost_fence_create(ctx);
@@ -1433,6 +1455,71 @@ panfrost_statistics_record(
 ctx->tf_prims_generated += prims;
 }
 
+static void
+panfrost_flush_draw_deps(struct panfrost_context *ctx, const struct 
pipe_draw_info *info)
+{
+   struct panfrost_resource *rsrc;
+
+if (ctx->wallpaper_batch)
+return;
+
+panfrost_flush_fbo_deps(ctx);
+
+for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
+for (unsigned i = 0; i < ctx->sampler_view_count[stage]; i++) {
+struct panfrost_sampler_view *view = 
ctx->sampler_views[stage][i];
+
+if (!view)
+continue;
+
+rsrc = pan_resource(view->base.texture);
+panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+}
+
+for (unsigned i = 0; i < 32; i++) {
+if (!(ctx->ssbo_mask[stage] & (1 << i)))
+continue;
+
+rsrc = pan_resource(ctx->ssbo[stage][i].buffer);
+panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true);
+}
+}
+
+if (info->index_size && !info->has_user_indices) {
+struct panfrost_resource *rsrc = 
pan_resource(info->index.resource);
+
+panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+}
+
+for (unsigned i = 0; ctx->vertex && i < ctx->vertex->num_elements; 
i++) {
+struct pipe_vertex_element *velem = >vertex->pipe[i];
+unsigned vbi = velem-&

[Mesa-dev] [PATCH v3 17/25] panfrost: Pass a batch to functions emitting FB descs

2019-09-05 Thread Boris Brezillon
So we can emit such jobs to a batch that's not currently bound to the
context.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c  | 36 ++---
 src/gallium/drivers/panfrost/pan_context.h  | 10 +++---
 src/gallium/drivers/panfrost/pan_drm.c  |  2 +-
 src/gallium/drivers/panfrost/pan_fragment.c | 11 +++
 src/gallium/drivers/panfrost/pan_mfbd.c | 25 ++
 src/gallium/drivers/panfrost/pan_sfbd.c | 13 
 6 files changed, 44 insertions(+), 53 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index a1b112c08919..c56f404cd9e9 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -55,14 +55,12 @@
 /* Framebuffer descriptor */
 
 static struct midgard_tiler_descriptor
-panfrost_emit_midg_tiler(
-struct panfrost_context *ctx,
-unsigned width,
-unsigned height,
-unsigned vertex_count)
+panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
 {
+struct panfrost_context *ctx = batch->ctx;
 struct midgard_tiler_descriptor t = {};
-struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+unsigned height = batch->key.height;
+unsigned width = batch->key.width;
 
 t.hierarchy_mask =
 panfrost_choose_hierarchy_mask(width, height, vertex_count);
@@ -105,10 +103,11 @@ panfrost_emit_midg_tiler(
 }
 
 struct mali_single_framebuffer
-panfrost_emit_sfbd(struct panfrost_context *ctx, unsigned vertex_count)
+panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count)
 {
-unsigned width = ctx->pipe_framebuffer.width;
-unsigned height = ctx->pipe_framebuffer.height;
+struct panfrost_context *ctx = batch->ctx;
+unsigned width = batch->key.width;
+unsigned height = batch->key.height;
 
 struct mali_single_framebuffer framebuffer = {
 .width = MALI_POSITIVE(width),
@@ -117,18 +116,18 @@ panfrost_emit_sfbd(struct panfrost_context *ctx, unsigned 
vertex_count)
 .format = 0x3000,
 .clear_flags = 0x1000,
 .unknown_address_0 = ctx->scratchpad->gpu,
-.tiler = panfrost_emit_midg_tiler(ctx,
-  width, height, vertex_count),
+.tiler = panfrost_emit_midg_tiler(batch, vertex_count),
 };
 
 return framebuffer;
 }
 
 struct bifrost_framebuffer
-panfrost_emit_mfbd(struct panfrost_context *ctx, unsigned vertex_count)
+panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
 {
-unsigned width = ctx->pipe_framebuffer.width;
-unsigned height = ctx->pipe_framebuffer.height;
+struct panfrost_context *ctx = batch->ctx;
+unsigned width = batch->key.width;
+unsigned height = batch->key.height;
 
 struct bifrost_framebuffer framebuffer = {
 .unk0 = 0x1e5, /* 1e4 if no spill */
@@ -139,14 +138,13 @@ panfrost_emit_mfbd(struct panfrost_context *ctx, unsigned 
vertex_count)
 
 .unk1 = 0x1080,
 
-.rt_count_1 = MALI_POSITIVE(ctx->pipe_framebuffer.nr_cbufs),
+.rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
 .rt_count_2 = 4,
 
 .unknown2 = 0x1f,
 
 .scratchpad = ctx->scratchpad->gpu,
-.tiler = panfrost_emit_midg_tiler(ctx,
-  width, height, vertex_count)
+.tiler = panfrost_emit_midg_tiler(batch, vertex_count)
 };
 
 return framebuffer;
@@ -169,7 +167,7 @@ static mali_ptr
 panfrost_attach_vt_mfbd(struct panfrost_context *ctx)
 {
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-struct bifrost_framebuffer mfbd = panfrost_emit_mfbd(ctx, ~0);
+struct bifrost_framebuffer mfbd = panfrost_emit_mfbd(batch, ~0);
 
 return panfrost_upload_transient(batch, , sizeof(mfbd)) | 
MALI_MFBD;
 }
@@ -178,7 +176,7 @@ static mali_ptr
 panfrost_attach_vt_sfbd(struct panfrost_context *ctx)
 {
 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-struct mali_single_framebuffer sfbd = panfrost_emit_sfbd(ctx, ~0);
+struct mali_single_framebuffer sfbd = panfrost_emit_sfbd(batch, ~0);
 
 return panfrost_upload_transient(batch, , sizeof(sfbd)) | 
MALI_SFBD;
 }
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index f5e54f862cca..f0578d6808d2 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -315,17 +315,17 @@ panfrost_flush(
 struct pipe_fence_handle **fence,
 unsigned flags);
 
-mali_ptr panfrost_sfbd_fragment

  1   2   3   4   >