Re: [PATCH v2 4/4] drm/v3d: add multiple syncobjs support

2021-09-30 Thread Melissa Wen
On 09/30, Melissa Wen wrote:
> On 09/30, Iago Toral wrote:
> > On Wed, 2021-09-29 at 10:45 +0100, Melissa Wen wrote:
> > > Using the generic extension from the previous patch, a specific
> > > multisync
> > > extension enables more than one in/out binary syncobj per job
> > > submission.
> > > Arrays of syncobjs are set in struct drm_v3d_multisync, that also
> > > cares
> > > of determining the stage for sync (wait deps) according to the job
> > > queue.
> > > 
> > > v2:
> > > - subclass the generic extension struct (Daniel)
> > > - simplify adding dependency conditions to make understandable (Iago)
> > > 
> > > Signed-off-by: Melissa Wen 
> > > ---
> > >  drivers/gpu/drm/v3d/v3d_drv.c |   6 +-
> > >  drivers/gpu/drm/v3d/v3d_drv.h |  23 +++--
> > >  drivers/gpu/drm/v3d/v3d_gem.c | 176 ++
> > > 
> > >  include/uapi/drm/v3d_drm.h|  49 +-
> > >  4 files changed, 224 insertions(+), 30 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c
> > > b/drivers/gpu/drm/v3d/v3d_drv.c
> > > index 3d6b9bcce2f7..bd46396a1ae0 100644
> > > --- a/drivers/gpu/drm/v3d/v3d_drv.c
> > > +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> > > @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device
> > > *dev, void *data,
> > >   case DRM_V3D_PARAM_SUPPORTS_PERFMON:
> > >   args->value = (v3d->ver >= 40);
> > >   return 0;
> > > + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
> > > + args->value = 1;
> > > + return 0;
> > >   default:
> > >   DRM_DEBUG("Unknown parameter %d\n", args->param);
> > >   return -EINVAL;
> > > @@ -135,9 +138,8 @@ v3d_postclose(struct drm_device *dev, struct
> > > drm_file *file)
> > >   struct v3d_file_priv *v3d_priv = file->driver_priv;
> > >   enum v3d_queue q;
> > >  
> > > - for (q = 0; q < V3D_MAX_QUEUES; q++) {
> > > + for (q = 0; q < V3D_MAX_QUEUES; q++)
> > >   drm_sched_entity_destroy(_priv->sched_entity[q]);
> > > - }
> > >  
> > >   v3d_perfmon_close_file(v3d_priv);
> > >   kfree(v3d_priv);
> > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h
> > > b/drivers/gpu/drm/v3d/v3d_drv.h
> > > index b900a050d5e2..b14ff1e96f49 100644
> > > --- a/drivers/gpu/drm/v3d/v3d_drv.h
> > > +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> > > @@ -19,15 +19,6 @@ struct reset_control;
> > >  
> > >  #define GMP_GRANULARITY (128 * 1024)
> > >  
> > > -/* Enum for each of the V3D queues. */
> > > -enum v3d_queue {
> > > - V3D_BIN,
> > > - V3D_RENDER,
> > > - V3D_TFU,
> > > - V3D_CSD,
> > > - V3D_CACHE_CLEAN,
> > > -};
> > > -
> > >  #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
> > >  
> > >  struct v3d_queue_state {
> > > @@ -294,6 +285,20 @@ struct v3d_csd_job {
> > >   struct drm_v3d_submit_csd args;
> > >  };
> > >  
> > > +struct v3d_submit_outsync {
> > > + struct drm_syncobj *syncobj;
> > > +};
> > > +
> > > +struct v3d_submit_ext {
> > > + u32 wait_stage;
> > > +
> > > + u32 in_sync_count;
> > > + u64 in_syncs;
> > > +
> > > + u32 out_sync_count;
> > > + struct v3d_submit_outsync *out_syncs;
> > > +};
> > > +
> > >  /**
> > >   * __wait_for - magic wait macro
> > >   *
> > > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> > > b/drivers/gpu/drm/v3d/v3d_gem.c
> > > index b912419027f7..e92998d39eaa 100644
> > > --- a/drivers/gpu/drm/v3d/v3d_gem.c
> > > +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> > > @@ -454,11 +454,12 @@ v3d_job_add_deps(struct drm_file *file_priv,
> > > struct v3d_job *job,
> > >  static int
> > >  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
> > >void **container, size_t size, void (*free)(struct kref
> > > *ref),
> > > -  u32 in_sync, enum v3d_queue queue)
> > > +  u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue
> > > queue)
> > >  {
> > >   struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
> > >   struct v3d_job *job;
> > > - int ret;
> > > + bool has_multisync = (se && se->in_sync_count);
> > 
> > I think this is not correct... we could be using the multisync
> > interface and pass 0 in_syncs and/or 0 out_syncs... what should
> > determine if we take the multisync path is the presence of the
> > extension alone.
> hmm.. yeah. so, I should drop this has_multisync and change conditions
> to only check if we have a submit_ext (that means we have multisync
> support) and move the in_sync_count to check if we should add any wait
> semaphores, as below: 
> 
> > 
> > > + int ret, i;
> > >  
> > >   *container = kcalloc(1, size, GFP_KERNEL);
> > >   if (!*container) {
> > > @@ -479,9 +480,28 @@ v3d_job_init(struct v3d_dev *v3d, struct
> > > drm_file *file_priv,
> > >   if (ret)
> > >   goto fail;
> > >  
> > > - ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
> > > - if (ret)
> > > - goto fail_job;
>   if (se) {
>   if (se->in_sync_count && se->wait_stage == queue) {
never mind. I realized store extension flag on v3d_submit_ext works
better for handling multiples wait and signal semaphores.
Just sent a v3.

Thanks,

Melissa
> > > +  

Re: [PATCH v2 4/4] drm/v3d: add multiple syncobjs support

2021-09-30 Thread Melissa Wen
On 09/30, Iago Toral wrote:
> On Wed, 2021-09-29 at 10:45 +0100, Melissa Wen wrote:
> > Using the generic extension from the previous patch, a specific
> > multisync
> > extension enables more than one in/out binary syncobj per job
> > submission.
> > Arrays of syncobjs are set in struct drm_v3d_multisync, that also
> > cares
> > of determining the stage for sync (wait deps) according to the job
> > queue.
> > 
> > v2:
> > - subclass the generic extension struct (Daniel)
> > - simplify adding dependency conditions to make understandable (Iago)
> > 
> > Signed-off-by: Melissa Wen 
> > ---
> >  drivers/gpu/drm/v3d/v3d_drv.c |   6 +-
> >  drivers/gpu/drm/v3d/v3d_drv.h |  23 +++--
> >  drivers/gpu/drm/v3d/v3d_gem.c | 176 ++
> > 
> >  include/uapi/drm/v3d_drm.h|  49 +-
> >  4 files changed, 224 insertions(+), 30 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c
> > b/drivers/gpu/drm/v3d/v3d_drv.c
> > index 3d6b9bcce2f7..bd46396a1ae0 100644
> > --- a/drivers/gpu/drm/v3d/v3d_drv.c
> > +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> > @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device
> > *dev, void *data,
> > case DRM_V3D_PARAM_SUPPORTS_PERFMON:
> > args->value = (v3d->ver >= 40);
> > return 0;
> > +   case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
> > +   args->value = 1;
> > +   return 0;
> > default:
> > DRM_DEBUG("Unknown parameter %d\n", args->param);
> > return -EINVAL;
> > @@ -135,9 +138,8 @@ v3d_postclose(struct drm_device *dev, struct
> > drm_file *file)
> > struct v3d_file_priv *v3d_priv = file->driver_priv;
> > enum v3d_queue q;
> >  
> > -   for (q = 0; q < V3D_MAX_QUEUES; q++) {
> > +   for (q = 0; q < V3D_MAX_QUEUES; q++)
> > drm_sched_entity_destroy(_priv->sched_entity[q]);
> > -   }
> >  
> > v3d_perfmon_close_file(v3d_priv);
> > kfree(v3d_priv);
> > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h
> > b/drivers/gpu/drm/v3d/v3d_drv.h
> > index b900a050d5e2..b14ff1e96f49 100644
> > --- a/drivers/gpu/drm/v3d/v3d_drv.h
> > +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> > @@ -19,15 +19,6 @@ struct reset_control;
> >  
> >  #define GMP_GRANULARITY (128 * 1024)
> >  
> > -/* Enum for each of the V3D queues. */
> > -enum v3d_queue {
> > -   V3D_BIN,
> > -   V3D_RENDER,
> > -   V3D_TFU,
> > -   V3D_CSD,
> > -   V3D_CACHE_CLEAN,
> > -};
> > -
> >  #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
> >  
> >  struct v3d_queue_state {
> > @@ -294,6 +285,20 @@ struct v3d_csd_job {
> > struct drm_v3d_submit_csd args;
> >  };
> >  
> > +struct v3d_submit_outsync {
> > +   struct drm_syncobj *syncobj;
> > +};
> > +
> > +struct v3d_submit_ext {
> > +   u32 wait_stage;
> > +
> > +   u32 in_sync_count;
> > +   u64 in_syncs;
> > +
> > +   u32 out_sync_count;
> > +   struct v3d_submit_outsync *out_syncs;
> > +};
> > +
> >  /**
> >   * __wait_for - magic wait macro
> >   *
> > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> > b/drivers/gpu/drm/v3d/v3d_gem.c
> > index b912419027f7..e92998d39eaa 100644
> > --- a/drivers/gpu/drm/v3d/v3d_gem.c
> > +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> > @@ -454,11 +454,12 @@ v3d_job_add_deps(struct drm_file *file_priv,
> > struct v3d_job *job,
> >  static int
> >  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
> >  void **container, size_t size, void (*free)(struct kref
> > *ref),
> > -u32 in_sync, enum v3d_queue queue)
> > +u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue
> > queue)
> >  {
> > struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
> > struct v3d_job *job;
> > -   int ret;
> > +   bool has_multisync = (se && se->in_sync_count);
> 
> I think this is not correct... we could be using the multisync
> interface and pass 0 in_syncs and/or 0 out_syncs... what should
> determine if we take the multisync path is the presence of the
> extension alone.
hmm.. yeah. so, I should drop this has_multisync and change conditions
to only check if we have a submit_ext (that means we have multisync
support) and move the in_sync_count to check if we should add any wait
semaphores, as below: 

> 
> > +   int ret, i;
> >  
> > *container = kcalloc(1, size, GFP_KERNEL);
> > if (!*container) {
> > @@ -479,9 +480,28 @@ v3d_job_init(struct v3d_dev *v3d, struct
> > drm_file *file_priv,
> > if (ret)
> > goto fail;
> >  
> > -   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
> > -   if (ret)
> > -   goto fail_job;
if (se) {
if (se->in_sync_count && se->wait_stage == queue) {
> > +   struct drm_v3d_sem __user *handle =
> > u64_to_user_ptr(se->in_syncs);
> > +
> > +   for (i = 0; i < se->in_sync_count; i++) {
> > +   struct drm_v3d_sem in;
> > +
> > +   ret = copy_from_user(, handle++,
> > sizeof(in));
> > +   if (ret) {
> > +

Re: [PATCH v2 4/4] drm/v3d: add multiple syncobjs support

2021-09-30 Thread Iago Toral
On Wed, 2021-09-29 at 10:45 +0100, Melissa Wen wrote:
> Using the generic extension from the previous patch, a specific
> multisync
> extension enables more than one in/out binary syncobj per job
> submission.
> Arrays of syncobjs are set in struct drm_v3d_multisync, that also
> cares
> of determining the stage for sync (wait deps) according to the job
> queue.
> 
> v2:
> - subclass the generic extension struct (Daniel)
> - simplify adding dependency conditions to make understandable (Iago)
> 
> Signed-off-by: Melissa Wen 
> ---
>  drivers/gpu/drm/v3d/v3d_drv.c |   6 +-
>  drivers/gpu/drm/v3d/v3d_drv.h |  23 +++--
>  drivers/gpu/drm/v3d/v3d_gem.c | 176 ++
> 
>  include/uapi/drm/v3d_drm.h|  49 +-
>  4 files changed, 224 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c
> b/drivers/gpu/drm/v3d/v3d_drv.c
> index 3d6b9bcce2f7..bd46396a1ae0 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device
> *dev, void *data,
>   case DRM_V3D_PARAM_SUPPORTS_PERFMON:
>   args->value = (v3d->ver >= 40);
>   return 0;
> + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
> + args->value = 1;
> + return 0;
>   default:
>   DRM_DEBUG("Unknown parameter %d\n", args->param);
>   return -EINVAL;
> @@ -135,9 +138,8 @@ v3d_postclose(struct drm_device *dev, struct
> drm_file *file)
>   struct v3d_file_priv *v3d_priv = file->driver_priv;
>   enum v3d_queue q;
>  
> - for (q = 0; q < V3D_MAX_QUEUES; q++) {
> + for (q = 0; q < V3D_MAX_QUEUES; q++)
>   drm_sched_entity_destroy(_priv->sched_entity[q]);
> - }
>  
>   v3d_perfmon_close_file(v3d_priv);
>   kfree(v3d_priv);
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h
> b/drivers/gpu/drm/v3d/v3d_drv.h
> index b900a050d5e2..b14ff1e96f49 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -19,15 +19,6 @@ struct reset_control;
>  
>  #define GMP_GRANULARITY (128 * 1024)
>  
> -/* Enum for each of the V3D queues. */
> -enum v3d_queue {
> - V3D_BIN,
> - V3D_RENDER,
> - V3D_TFU,
> - V3D_CSD,
> - V3D_CACHE_CLEAN,
> -};
> -
>  #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
>  
>  struct v3d_queue_state {
> @@ -294,6 +285,20 @@ struct v3d_csd_job {
>   struct drm_v3d_submit_csd args;
>  };
>  
> +struct v3d_submit_outsync {
> + struct drm_syncobj *syncobj;
> +};
> +
> +struct v3d_submit_ext {
> + u32 wait_stage;
> +
> + u32 in_sync_count;
> + u64 in_syncs;
> +
> + u32 out_sync_count;
> + struct v3d_submit_outsync *out_syncs;
> +};
> +
>  /**
>   * __wait_for - magic wait macro
>   *
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> b/drivers/gpu/drm/v3d/v3d_gem.c
> index b912419027f7..e92998d39eaa 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -454,11 +454,12 @@ v3d_job_add_deps(struct drm_file *file_priv,
> struct v3d_job *job,
>  static int
>  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
>void **container, size_t size, void (*free)(struct kref
> *ref),
> -  u32 in_sync, enum v3d_queue queue)
> +  u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue
> queue)
>  {
>   struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
>   struct v3d_job *job;
> - int ret;
> + bool has_multisync = (se && se->in_sync_count);

I think this is not correct... we could be using the multisync
interface and pass 0 in_syncs and/or 0 out_syncs... what should
determine if we take the multisync path is the presence of the
extension alone.

> + int ret, i;
>  
>   *container = kcalloc(1, size, GFP_KERNEL);
>   if (!*container) {
> @@ -479,9 +480,28 @@ v3d_job_init(struct v3d_dev *v3d, struct
> drm_file *file_priv,
>   if (ret)
>   goto fail;
>  
> - ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
> - if (ret)
> - goto fail_job;
> + if (has_multisync) {
> + if (se->wait_stage == queue) {
> + struct drm_v3d_sem __user *handle =
> u64_to_user_ptr(se->in_syncs);
> +
> + for (i = 0; i < se->in_sync_count; i++) {
> + struct drm_v3d_sem in;
> +
> + ret = copy_from_user(, handle++,
> sizeof(in));
> + if (ret) {
> + DRM_DEBUG("Failed to copy wait
> dep handle.\n");
> + goto fail_job;
> + }
> + ret = v3d_job_add_deps(file_priv, job,
> in.handle, 0);
> + if (ret)
> + goto fail_job;
> + }
> + }
> + } else {
> + ret = v3d_job_add_deps(file_priv, job, 

[PATCH v2 4/4] drm/v3d: add multiple syncobjs support

2021-09-29 Thread Melissa Wen
Using the generic extension from the previous patch, a specific multisync
extension enables more than one in/out binary syncobj per job submission.
Arrays of syncobjs are set in struct drm_v3d_multisync, that also cares
of determining the stage for sync (wait deps) according to the job
queue.

v2:
- subclass the generic extension struct (Daniel)
- simplify adding dependency conditions to make understandable (Iago)

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/v3d/v3d_drv.c |   6 +-
 drivers/gpu/drm/v3d/v3d_drv.h |  23 +++--
 drivers/gpu/drm/v3d/v3d_gem.c | 176 ++
 include/uapi/drm/v3d_drm.h|  49 +-
 4 files changed, 224 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 3d6b9bcce2f7..bd46396a1ae0 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void 
*data,
case DRM_V3D_PARAM_SUPPORTS_PERFMON:
args->value = (v3d->ver >= 40);
return 0;
+   case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
+   args->value = 1;
+   return 0;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
@@ -135,9 +138,8 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file)
struct v3d_file_priv *v3d_priv = file->driver_priv;
enum v3d_queue q;
 
-   for (q = 0; q < V3D_MAX_QUEUES; q++) {
+   for (q = 0; q < V3D_MAX_QUEUES; q++)
drm_sched_entity_destroy(_priv->sched_entity[q]);
-   }
 
v3d_perfmon_close_file(v3d_priv);
kfree(v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index b900a050d5e2..b14ff1e96f49 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -19,15 +19,6 @@ struct reset_control;
 
 #define GMP_GRANULARITY (128 * 1024)
 
-/* Enum for each of the V3D queues. */
-enum v3d_queue {
-   V3D_BIN,
-   V3D_RENDER,
-   V3D_TFU,
-   V3D_CSD,
-   V3D_CACHE_CLEAN,
-};
-
 #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
 
 struct v3d_queue_state {
@@ -294,6 +285,20 @@ struct v3d_csd_job {
struct drm_v3d_submit_csd args;
 };
 
+struct v3d_submit_outsync {
+   struct drm_syncobj *syncobj;
+};
+
+struct v3d_submit_ext {
+   u32 wait_stage;
+
+   u32 in_sync_count;
+   u64 in_syncs;
+
+   u32 out_sync_count;
+   struct v3d_submit_outsync *out_syncs;
+};
+
 /**
  * __wait_for - magic wait macro
  *
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index b912419027f7..e92998d39eaa 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -454,11 +454,12 @@ v3d_job_add_deps(struct drm_file *file_priv, struct 
v3d_job *job,
 static int
 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 void **container, size_t size, void (*free)(struct kref *ref),
-u32 in_sync, enum v3d_queue queue)
+u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue)
 {
struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
struct v3d_job *job;
-   int ret;
+   bool has_multisync = (se && se->in_sync_count);
+   int ret, i;
 
*container = kcalloc(1, size, GFP_KERNEL);
if (!*container) {
@@ -479,9 +480,28 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
*file_priv,
if (ret)
goto fail;
 
-   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
-   if (ret)
-   goto fail_job;
+   if (has_multisync) {
+   if (se->wait_stage == queue) {
+   struct drm_v3d_sem __user *handle = 
u64_to_user_ptr(se->in_syncs);
+
+   for (i = 0; i < se->in_sync_count; i++) {
+   struct drm_v3d_sem in;
+
+   ret = copy_from_user(, handle++, sizeof(in));
+   if (ret) {
+   DRM_DEBUG("Failed to copy wait dep 
handle.\n");
+   goto fail_job;
+   }
+   ret = v3d_job_add_deps(file_priv, job, 
in.handle, 0);
+   if (ret)
+   goto fail_job;
+   }
+   }
+   } else {
+   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
+   if (ret)
+   goto fail_job;
+   }
 
kref_init(>refcount);
 
@@ -514,6 +534,7 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file 
*file_priv,
 struct v3d_job *job,
 struct ww_acquire_ctx *acquire_ctx,
 u32 out_sync,
+