On Fri, 13 Sep 2019 14:44:14 +0100
Steven Price <steven.pr...@arm.com> wrote:

> On 13/09/2019 12:17, Boris Brezillon wrote:
> > The READ/WRITE flags are particularly useful if we want to avoid
> > serialization of jobs that read from the same BO but never write to it.
> > The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
> > shared but jobs are using different portions of the buffer.
> > 
> > Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com>  
> 
> Good feature - we could do with an (easy) way of the user driver
> detecting this - so it might be worth bumping the driver version for this?

I was trying to support this feature without adding a new feature
flag or bumping the minor version, but I guess there's no good
reason to do that.

> 
> Some more comments below.
> 
> > ---
> >  drivers/gpu/drm/panfrost/panfrost_drv.c |  72 +++++++++--
> >  drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++-----
> >  drivers/gpu/drm/panfrost/panfrost_job.h |  11 +-
> >  include/uapi/drm/panfrost_drm.h         |  41 ++++++
> >  4 files changed, 247 insertions(+), 41 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
> > b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index d74442d71048..08082fd557c3 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev,
> >               struct drm_panfrost_submit *args,
> >               struct panfrost_job *job)
> >  {
> > -   job->bo_count = args->bo_handle_count;
> > +   struct drm_panfrost_submit_bo *bo_descs = NULL;
> > +   u32 *handles = NULL;
> > +   u32 i, bo_count;
> > +   int ret = 0;
> >  
> > -   if (!job->bo_count)
> > +   bo_count = args->bo_desc_count ?
> > +              args->bo_desc_count : args->bo_handle_count;
> > +   if (!bo_count)
> >             return 0;
> >  
> > -   job->implicit_fences = kvmalloc_array(job->bo_count,
> > -                             sizeof(struct dma_fence *),
> > +   job->bos = kvmalloc_array(bo_count, sizeof(*job->bos),
> >                               GFP_KERNEL | __GFP_ZERO);
> > -   if (!job->implicit_fences)
> > +   if (!job->bos)
> >             return -ENOMEM;
> >  
> > -   return drm_gem_objects_lookup(file_priv,
> > -                                 (void __user 
> > *)(uintptr_t)args->bo_handles,
> > -                                 job->bo_count, &job->bos);
> > +   job->bo_count = bo_count;
> > +   bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs),
> > +                             GFP_KERNEL | __GFP_ZERO);
> > +   if (!bo_descs) {
> > +           ret = -ENOMEM;
> > +           goto out;  
> 
> This can be just "return -ENOMEM" - both handles and bo_descs will be NULL.

Will fix that.

> 
> > +   }
> > +
> > +   if (!args->bo_desc_count) {
> > +           handles = kvmalloc_array(bo_count, sizeof(*handles),
> > +                                    GFP_KERNEL);
> > +           if (!handles) {
> > +                   ret =-ENOMEM;
> > +                   goto out;
> > +           }
> > +
> > +           if (copy_from_user(handles,
> > +                              (void __user *)(uintptr_t)args->bo_handles,
> > +                              job->bo_count * sizeof(*handles))) {
> > +                   ret = -EFAULT;
> > +                   goto out;
> > +           }
> > +
> > +           for (i = 0; i < job->bo_count; i++) {
> > +                   bo_descs[i].handle = handles[i];
> > +                   bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE |
> > +                                       PANFROST_SUBMIT_BO_READ;  
> 
> You can use PANFROST_SUBMIT_BO_RW here.

That as well.

> 
> > +           }
> > +   } else if (copy_from_user(bo_descs,
> > +                             (void __user *)(uintptr_t)args->bo_descs,
> > +                             job->bo_count * sizeof(*bo_descs))) {
> > +           ret = -EFAULT;
> > +           goto out;
> > +   }
> > +
> > +   for (i = 0; i < job->bo_count; i++) {
> > +           if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) ||
> > +                    !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) {
> > +                   ret = -EINVAL;
> > +                   goto out;
> > +           }
> > +
> > +           job->bos[i].flags = bo_descs[i].flags;
> > +           job->bos[i].obj = drm_gem_object_lookup(file_priv,
> > +                                                   bo_descs[i].handle);
> > +           if (!job->bos[i].obj) {
> > +                   ret = -ENOENT;
> > +                   goto out;
> > +           }
> > +   }
> > +
> > +out:
> > +   kvfree(handles);
> > +   kvfree(bo_descs);
> > +   return ret;
> >  }
> >  
> >  /**
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
> > b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index 05c85f45a0de..e4b74fde9339 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct 
> > panfrost_job *job, int js)
> >     pm_runtime_put_autosuspend(pfdev->dev);
> >  }
> >  
> > -static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
> > -                                      int bo_count,
> > -                                      struct dma_fence **implicit_fences)
> > +static int panfrost_acquire_object_fences(struct panfrost_job *job)
> >  {
> > -   int i;
> > +   int i, ret;
> >  
> > -   for (i = 0; i < bo_count; i++)
> > -           implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
> > +   for (i = 0; i < job->bo_count; i++) {
> > +           struct panfrost_job_bo_desc *bo = &job->bos[i];
> > +           struct dma_resv *robj = bo->obj->resv;
> > +
> > +           if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) {
> > +                   ret = dma_resv_reserve_shared(robj, 1);
> > +                   if (ret)
> > +                           return ret;
> > +           }
> > +
> > +           if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
> > +                   continue;
> > +
> > +           if (bo->flags & PANFROST_SUBMIT_BO_WRITE) {
> > +                   ret = dma_resv_get_fences_rcu(robj, &bo->excl,
> > +                                                 &bo->shared_count,
> > +                                                 &bo->shared);
> > +                   if (ret)
> > +                           return ret;
> > +           } else {
> > +                   bo->excl = dma_resv_get_excl_rcu(robj);
> > +           }  
> 
> The implementation of NO_IMPLICIT_FENCE seems a bit strange to me: READ
> | NO_IMPLICIT_FENCE still reserves space for a shared fence. I don't
> understand why.

The NO_IMPLICIT_FENCE flag is telling the core we don't want to wait on
the fence installed by other jobs on this BO, but other might want to
wait on our render-done fence (when CPU accesses are required, or simply
because other jobs didn't pass the NO_IMPLICIT fence flag).

> 
> > +   }
> > +
> > +   return 0;
> >  }
> >  
> > -static void panfrost_attach_object_fences(struct drm_gem_object **bos,
> > -                                     int bo_count,
> > -                                     struct dma_fence *fence)
> > +static void panfrost_attach_object_fences(struct panfrost_job *job)
> >  {
> >     int i;
> >  
> > -   for (i = 0; i < bo_count; i++)
> > -           dma_resv_add_excl_fence(bos[i]->resv, fence);
> > +   for (i = 0; i < job->bo_count; i++) {
> > +           struct drm_gem_object *obj = job->bos[i].obj;
> > +
> > +           if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)
> > +                   dma_resv_add_excl_fence(obj->resv,
> > +                                           job->render_done_fence);
> > +           else
> > +                   dma_resv_add_shared_fence(obj->resv,
> > +                                             job->render_done_fence);
> > +   }
> > +}
> > +
> > +static int panfrost_job_lock_bos(struct panfrost_job *job,
> > +                            struct ww_acquire_ctx *acquire_ctx)
> > +{
> > +   int contended = -1;
> > +   int i, ret;
> > +
> > +   ww_acquire_init(acquire_ctx, &reservation_ww_class);
> > +
> > +retry:
> > +   if (contended != -1) {
> > +           struct drm_gem_object *obj = job->bos[contended].obj;
> > +
> > +           ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
> > +                                                  acquire_ctx);  
> 
> dma_resv_lock_slot_interruptible()?

Yep (I started this on an older kernel version where dma_resv_ helpers
didn't exit and apparently forgot to convert a few things when
rebasing).

> 
> > +           if (ret) {
> > +                   ww_acquire_done(acquire_ctx);
> > +                   return ret;
> > +           }
> > +   }
> > +
> > +   for (i = 0; i < job->bo_count; i++) {
> > +           if (i == contended)
> > +                   continue;
> > +
> > +           ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock,
> > +                                             acquire_ctx);  
> 
> dma_resv_lock_interruptible()?

Ditto

> 
> > +           if (ret) {
> > +                   int j;
> > +
> > +                   for (j = 0; j < i; j++)
> > +                           ww_mutex_unlock(&job->bos[j].obj->resv->lock);
> > +
> > +                   if (contended != -1 && contended >= i) {
> > +                           struct drm_gem_object *contended_obj;
> > +
> > +                           contended_obj = job->bos[contended].obj;
> > +                           ww_mutex_unlock(&contended_obj->resv->lock);
> > +                   }
> > +
> > +                   if (ret == -EDEADLK) {
> > +                           contended = i;
> > +                           goto retry;
> > +                   }
> > +
> > +                   ww_acquire_done(acquire_ctx);
> > +                   return ret;
> > +           }
> > +   }
> > +
> > +   ww_acquire_done(acquire_ctx);
> > +
> > +   return 0;
> > +}  
> 
> This looks like a copy of drm_gem_lock_reservations(). The only reason
> for it as far as I can see is because we now have an array of struct
> panfrost_job_bo_desc rather than a direct array of struct
> drm_gem_object. I'm not sure having everything neatly in one structure
> is worth this cost?

I really like the ideas of having all BO related fields put in a
separate structure, but okay.


> >  /**
> >   * struct drm_panfrost_submit - ioctl argument for submitting commands to 
> > the 3D
> >   * engine.
> > @@ -68,6 +90,25 @@ struct drm_panfrost_submit {
> >  
> >     /** A combination of PANFROST_JD_REQ_* */
> >     __u32 requirements;
> > +
> > +   /**
> > +    * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This
> > +    * field is meant to replace &drm_panfrost_submit.bo_handles which did
> > +    * not provide enough information to relax synchronization between
> > +    * jobs that only only read the BO they share. When both
> > +    * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs
> > +    * are provided, drm_panfrost_submit.bo_handles is ignored.
> > +    */
> > +   __u64 bo_descs;
> > +
> > +   /**
> > +    * Number of BO descriptors passed in (size is that times
> > +    * sizeof(drm_panfrost_submit_bo_desc)).
> > +    */
> > +   __u32 bo_desc_count;  
> 
> We don't really need another count field. bo_handle_count could be
> re-used. Indeed this could even be handled with just a flags field with
> a new flag specifying that bo_handles no longer points to handles but to
> bo_desc objects instead.

As said above, I was trying to avoid bumping the minor version or
adding a feature flag, hence the decision to add 2 separate fields for
the BO desc array. I agree, it's probably not the best solution, so
I'll add a new _REQ_ flag and bump the minor version as you suggest.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to