Re: [PATCH v2 4/4] drm/v3d: add multiple syncobjs support
On 09/30, Melissa Wen wrote: > On 09/30, Iago Toral wrote: > > On Wed, 2021-09-29 at 10:45 +0100, Melissa Wen wrote: > > > Using the generic extension from the previous patch, a specific > > > multisync > > > extension enables more than one in/out binary syncobj per job > > > submission. > > > Arrays of syncobjs are set in struct drm_v3d_multisync, that also > > > cares > > > of determining the stage for sync (wait deps) according to the job > > > queue. > > > > > > v2: > > > - subclass the generic extension struct (Daniel) > > > - simplify adding dependency conditions to make understandable (Iago) > > > > > > Signed-off-by: Melissa Wen > > > --- > > > drivers/gpu/drm/v3d/v3d_drv.c | 6 +- > > > drivers/gpu/drm/v3d/v3d_drv.h | 23 +++-- > > > drivers/gpu/drm/v3d/v3d_gem.c | 176 ++ > > > > > > include/uapi/drm/v3d_drm.h| 49 +- > > > 4 files changed, 224 insertions(+), 30 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c > > > b/drivers/gpu/drm/v3d/v3d_drv.c > > > index 3d6b9bcce2f7..bd46396a1ae0 100644 > > > --- a/drivers/gpu/drm/v3d/v3d_drv.c > > > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > > > @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device > > > *dev, void *data, > > > case DRM_V3D_PARAM_SUPPORTS_PERFMON: > > > args->value = (v3d->ver >= 40); > > > return 0; > > > + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: > > > + args->value = 1; > > > + return 0; > > > default: > > > DRM_DEBUG("Unknown parameter %d\n", args->param); > > > return -EINVAL; > > > @@ -135,9 +138,8 @@ v3d_postclose(struct drm_device *dev, struct > > > drm_file *file) > > > struct v3d_file_priv *v3d_priv = file->driver_priv; > > > enum v3d_queue q; > > > > > > - for (q = 0; q < V3D_MAX_QUEUES; q++) { > > > + for (q = 0; q < V3D_MAX_QUEUES; q++) > > > drm_sched_entity_destroy(_priv->sched_entity[q]); > > > - } > > > > > > v3d_perfmon_close_file(v3d_priv); > > > kfree(v3d_priv); > > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h > > > b/drivers/gpu/drm/v3d/v3d_drv.h > > > index b900a050d5e2..b14ff1e96f49 100644 > > > --- a/drivers/gpu/drm/v3d/v3d_drv.h > > > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > > > @@ -19,15 +19,6 @@ struct reset_control; > > > > > > #define GMP_GRANULARITY (128 * 1024) > > > > > > -/* Enum for each of the V3D queues. */ > > > -enum v3d_queue { > > > - V3D_BIN, > > > - V3D_RENDER, > > > - V3D_TFU, > > > - V3D_CSD, > > > - V3D_CACHE_CLEAN, > > > -}; > > > - > > > #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) > > > > > > struct v3d_queue_state { > > > @@ -294,6 +285,20 @@ struct v3d_csd_job { > > > struct drm_v3d_submit_csd args; > > > }; > > > > > > +struct v3d_submit_outsync { > > > + struct drm_syncobj *syncobj; > > > +}; > > > + > > > +struct v3d_submit_ext { > > > + u32 wait_stage; > > > + > > > + u32 in_sync_count; > > > + u64 in_syncs; > > > + > > > + u32 out_sync_count; > > > + struct v3d_submit_outsync *out_syncs; > > > +}; > > > + > > > /** > > > * __wait_for - magic wait macro > > > * > > > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c > > > b/drivers/gpu/drm/v3d/v3d_gem.c > > > index b912419027f7..e92998d39eaa 100644 > > > --- a/drivers/gpu/drm/v3d/v3d_gem.c > > > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > > > @@ -454,11 +454,12 @@ v3d_job_add_deps(struct drm_file *file_priv, > > > struct v3d_job *job, > > > static int > > > v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, > > >void **container, size_t size, void (*free)(struct kref > > > *ref), > > > - u32 in_sync, enum v3d_queue queue) > > > + u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue > > > queue) > > > { > > > struct v3d_file_priv *v3d_priv = file_priv->driver_priv; > > > struct v3d_job *job; > > > - int ret; > > > + bool has_multisync = (se && se->in_sync_count); > > > > I think this is not correct... we could be using the multisync > > interface and pass 0 in_syncs and/or 0 out_syncs... what should > > determine if we take the multisync path is the presence of the > > extension alone. > hmm.. yeah. so, I should drop this has_multisync and change conditions > to only check if we have a submit_ext (that means we have multisync > support) and move the in_sync_count to check if we should add any wait > semaphores, as below: > > > > > > + int ret, i; > > > > > > *container = kcalloc(1, size, GFP_KERNEL); > > > if (!*container) { > > > @@ -479,9 +480,28 @@ v3d_job_init(struct v3d_dev *v3d, struct > > > drm_file *file_priv, > > > if (ret) > > > goto fail; > > > > > > - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); > > > - if (ret) > > > - goto fail_job; > if (se) { > if (se->in_sync_count && se->wait_stage == queue) { never mind. I realized store extension flag on v3d_submit_ext works better for handling multiples wait and signal semaphores. Just sent a v3. Thanks, Melissa > > > +
Re: [PATCH v2 4/4] drm/v3d: add multiple syncobjs support
On 09/30, Iago Toral wrote: > On Wed, 2021-09-29 at 10:45 +0100, Melissa Wen wrote: > > Using the generic extension from the previous patch, a specific > > multisync > > extension enables more than one in/out binary syncobj per job > > submission. > > Arrays of syncobjs are set in struct drm_v3d_multisync, that also > > cares > > of determining the stage for sync (wait deps) according to the job > > queue. > > > > v2: > > - subclass the generic extension struct (Daniel) > > - simplify adding dependency conditions to make understandable (Iago) > > > > Signed-off-by: Melissa Wen > > --- > > drivers/gpu/drm/v3d/v3d_drv.c | 6 +- > > drivers/gpu/drm/v3d/v3d_drv.h | 23 +++-- > > drivers/gpu/drm/v3d/v3d_gem.c | 176 ++ > > > > include/uapi/drm/v3d_drm.h| 49 +- > > 4 files changed, 224 insertions(+), 30 deletions(-) > > > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c > > b/drivers/gpu/drm/v3d/v3d_drv.c > > index 3d6b9bcce2f7..bd46396a1ae0 100644 > > --- a/drivers/gpu/drm/v3d/v3d_drv.c > > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > > @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device > > *dev, void *data, > > case DRM_V3D_PARAM_SUPPORTS_PERFMON: > > args->value = (v3d->ver >= 40); > > return 0; > > + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: > > + args->value = 1; > > + return 0; > > default: > > DRM_DEBUG("Unknown parameter %d\n", args->param); > > return -EINVAL; > > @@ -135,9 +138,8 @@ v3d_postclose(struct drm_device *dev, struct > > drm_file *file) > > struct v3d_file_priv *v3d_priv = file->driver_priv; > > enum v3d_queue q; > > > > - for (q = 0; q < V3D_MAX_QUEUES; q++) { > > + for (q = 0; q < V3D_MAX_QUEUES; q++) > > drm_sched_entity_destroy(_priv->sched_entity[q]); > > - } > > > > v3d_perfmon_close_file(v3d_priv); > > kfree(v3d_priv); > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h > > b/drivers/gpu/drm/v3d/v3d_drv.h > > index b900a050d5e2..b14ff1e96f49 100644 > > --- a/drivers/gpu/drm/v3d/v3d_drv.h > > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > > @@ -19,15 +19,6 @@ struct reset_control; > > > > #define GMP_GRANULARITY (128 * 1024) > > > > -/* Enum for each of the V3D queues. */ > > -enum v3d_queue { > > - V3D_BIN, > > - V3D_RENDER, > > - V3D_TFU, > > - V3D_CSD, > > - V3D_CACHE_CLEAN, > > -}; > > - > > #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) > > > > struct v3d_queue_state { > > @@ -294,6 +285,20 @@ struct v3d_csd_job { > > struct drm_v3d_submit_csd args; > > }; > > > > +struct v3d_submit_outsync { > > + struct drm_syncobj *syncobj; > > +}; > > + > > +struct v3d_submit_ext { > > + u32 wait_stage; > > + > > + u32 in_sync_count; > > + u64 in_syncs; > > + > > + u32 out_sync_count; > > + struct v3d_submit_outsync *out_syncs; > > +}; > > + > > /** > > * __wait_for - magic wait macro > > * > > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c > > b/drivers/gpu/drm/v3d/v3d_gem.c > > index b912419027f7..e92998d39eaa 100644 > > --- a/drivers/gpu/drm/v3d/v3d_gem.c > > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > > @@ -454,11 +454,12 @@ v3d_job_add_deps(struct drm_file *file_priv, > > struct v3d_job *job, > > static int > > v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, > > void **container, size_t size, void (*free)(struct kref > > *ref), > > -u32 in_sync, enum v3d_queue queue) > > +u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue > > queue) > > { > > struct v3d_file_priv *v3d_priv = file_priv->driver_priv; > > struct v3d_job *job; > > - int ret; > > + bool has_multisync = (se && se->in_sync_count); > > I think this is not correct... we could be using the multisync > interface and pass 0 in_syncs and/or 0 out_syncs... what should > determine if we take the multisync path is the presence of the > extension alone. hmm.. yeah. so, I should drop this has_multisync and change conditions to only check if we have a submit_ext (that means we have multisync support) and move the in_sync_count to check if we should add any wait semaphores, as below: > > > + int ret, i; > > > > *container = kcalloc(1, size, GFP_KERNEL); > > if (!*container) { > > @@ -479,9 +480,28 @@ v3d_job_init(struct v3d_dev *v3d, struct > > drm_file *file_priv, > > if (ret) > > goto fail; > > > > - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); > > - if (ret) > > - goto fail_job; if (se) { if (se->in_sync_count && se->wait_stage == queue) { > > + struct drm_v3d_sem __user *handle = > > u64_to_user_ptr(se->in_syncs); > > + > > + for (i = 0; i < se->in_sync_count; i++) { > > + struct drm_v3d_sem in; > > + > > + ret = copy_from_user(, handle++, > > sizeof(in)); > > + if (ret) { > > +
Re: [PATCH v2 4/4] drm/v3d: add multiple syncobjs support
On Wed, 2021-09-29 at 10:45 +0100, Melissa Wen wrote: > Using the generic extension from the previous patch, a specific > multisync > extension enables more than one in/out binary syncobj per job > submission. > Arrays of syncobjs are set in struct drm_v3d_multisync, that also > cares > of determining the stage for sync (wait deps) according to the job > queue. > > v2: > - subclass the generic extension struct (Daniel) > - simplify adding dependency conditions to make understandable (Iago) > > Signed-off-by: Melissa Wen > --- > drivers/gpu/drm/v3d/v3d_drv.c | 6 +- > drivers/gpu/drm/v3d/v3d_drv.h | 23 +++-- > drivers/gpu/drm/v3d/v3d_gem.c | 176 ++ > > include/uapi/drm/v3d_drm.h| 49 +- > 4 files changed, 224 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c > b/drivers/gpu/drm/v3d/v3d_drv.c > index 3d6b9bcce2f7..bd46396a1ae0 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.c > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device > *dev, void *data, > case DRM_V3D_PARAM_SUPPORTS_PERFMON: > args->value = (v3d->ver >= 40); > return 0; > + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: > + args->value = 1; > + return 0; > default: > DRM_DEBUG("Unknown parameter %d\n", args->param); > return -EINVAL; > @@ -135,9 +138,8 @@ v3d_postclose(struct drm_device *dev, struct > drm_file *file) > struct v3d_file_priv *v3d_priv = file->driver_priv; > enum v3d_queue q; > > - for (q = 0; q < V3D_MAX_QUEUES; q++) { > + for (q = 0; q < V3D_MAX_QUEUES; q++) > drm_sched_entity_destroy(_priv->sched_entity[q]); > - } > > v3d_perfmon_close_file(v3d_priv); > kfree(v3d_priv); > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h > b/drivers/gpu/drm/v3d/v3d_drv.h > index b900a050d5e2..b14ff1e96f49 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.h > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > @@ -19,15 +19,6 @@ struct reset_control; > > #define GMP_GRANULARITY (128 * 1024) > > -/* Enum for each of the V3D queues. */ > -enum v3d_queue { > - V3D_BIN, > - V3D_RENDER, > - V3D_TFU, > - V3D_CSD, > - V3D_CACHE_CLEAN, > -}; > - > #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) > > struct v3d_queue_state { > @@ -294,6 +285,20 @@ struct v3d_csd_job { > struct drm_v3d_submit_csd args; > }; > > +struct v3d_submit_outsync { > + struct drm_syncobj *syncobj; > +}; > + > +struct v3d_submit_ext { > + u32 wait_stage; > + > + u32 in_sync_count; > + u64 in_syncs; > + > + u32 out_sync_count; > + struct v3d_submit_outsync *out_syncs; > +}; > + > /** > * __wait_for - magic wait macro > * > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c > b/drivers/gpu/drm/v3d/v3d_gem.c > index b912419027f7..e92998d39eaa 100644 > --- a/drivers/gpu/drm/v3d/v3d_gem.c > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > @@ -454,11 +454,12 @@ v3d_job_add_deps(struct drm_file *file_priv, > struct v3d_job *job, > static int > v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, >void **container, size_t size, void (*free)(struct kref > *ref), > - u32 in_sync, enum v3d_queue queue) > + u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue > queue) > { > struct v3d_file_priv *v3d_priv = file_priv->driver_priv; > struct v3d_job *job; > - int ret; > + bool has_multisync = (se && se->in_sync_count); I think this is not correct... we could be using the multisync interface and pass 0 in_syncs and/or 0 out_syncs... what should determine if we take the multisync path is the presence of the extension alone. > + int ret, i; > > *container = kcalloc(1, size, GFP_KERNEL); > if (!*container) { > @@ -479,9 +480,28 @@ v3d_job_init(struct v3d_dev *v3d, struct > drm_file *file_priv, > if (ret) > goto fail; > > - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); > - if (ret) > - goto fail_job; > + if (has_multisync) { > + if (se->wait_stage == queue) { > + struct drm_v3d_sem __user *handle = > u64_to_user_ptr(se->in_syncs); > + > + for (i = 0; i < se->in_sync_count; i++) { > + struct drm_v3d_sem in; > + > + ret = copy_from_user(, handle++, > sizeof(in)); > + if (ret) { > + DRM_DEBUG("Failed to copy wait > dep handle.\n"); > + goto fail_job; > + } > + ret = v3d_job_add_deps(file_priv, job, > in.handle, 0); > + if (ret) > + goto fail_job; > + } > + } > + } else { > + ret = v3d_job_add_deps(file_priv, job,
[PATCH v2 4/4] drm/v3d: add multiple syncobjs support
Using the generic extension from the previous patch, a specific multisync extension enables more than one in/out binary syncobj per job submission. Arrays of syncobjs are set in struct drm_v3d_multisync, that also cares of determining the stage for sync (wait deps) according to the job queue. v2: - subclass the generic extension struct (Daniel) - simplify adding dependency conditions to make understandable (Iago) Signed-off-by: Melissa Wen --- drivers/gpu/drm/v3d/v3d_drv.c | 6 +- drivers/gpu/drm/v3d/v3d_drv.h | 23 +++-- drivers/gpu/drm/v3d/v3d_gem.c | 176 ++ include/uapi/drm/v3d_drm.h| 49 +- 4 files changed, 224 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 3d6b9bcce2f7..bd46396a1ae0 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_PERFMON: args->value = (v3d->ver >= 40); return 0; + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: + args->value = 1; + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; @@ -135,9 +138,8 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file) struct v3d_file_priv *v3d_priv = file->driver_priv; enum v3d_queue q; - for (q = 0; q < V3D_MAX_QUEUES; q++) { + for (q = 0; q < V3D_MAX_QUEUES; q++) drm_sched_entity_destroy(_priv->sched_entity[q]); - } v3d_perfmon_close_file(v3d_priv); kfree(v3d_priv); diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index b900a050d5e2..b14ff1e96f49 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -19,15 +19,6 @@ struct reset_control; #define GMP_GRANULARITY (128 * 1024) -/* Enum for each of the V3D queues. */ -enum v3d_queue { - V3D_BIN, - V3D_RENDER, - V3D_TFU, - V3D_CSD, - V3D_CACHE_CLEAN, -}; - #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) struct v3d_queue_state { @@ -294,6 +285,20 @@ struct v3d_csd_job { struct drm_v3d_submit_csd args; }; +struct v3d_submit_outsync { + struct drm_syncobj *syncobj; +}; + +struct v3d_submit_ext { + u32 wait_stage; + + u32 in_sync_count; + u64 in_syncs; + + u32 out_sync_count; + struct v3d_submit_outsync *out_syncs; +}; + /** * __wait_for - magic wait macro * diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index b912419027f7..e92998d39eaa 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -454,11 +454,12 @@ v3d_job_add_deps(struct drm_file *file_priv, struct v3d_job *job, static int v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, void **container, size_t size, void (*free)(struct kref *ref), -u32 in_sync, enum v3d_queue queue) +u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) { struct v3d_file_priv *v3d_priv = file_priv->driver_priv; struct v3d_job *job; - int ret; + bool has_multisync = (se && se->in_sync_count); + int ret, i; *container = kcalloc(1, size, GFP_KERNEL); if (!*container) { @@ -479,9 +480,28 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, if (ret) goto fail; - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); - if (ret) - goto fail_job; + if (has_multisync) { + if (se->wait_stage == queue) { + struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); + + for (i = 0; i < se->in_sync_count; i++) { + struct drm_v3d_sem in; + + ret = copy_from_user(, handle++, sizeof(in)); + if (ret) { + DRM_DEBUG("Failed to copy wait dep handle.\n"); + goto fail_job; + } + ret = v3d_job_add_deps(file_priv, job, in.handle, 0); + if (ret) + goto fail_job; + } + } + } else { + ret = v3d_job_add_deps(file_priv, job, in_sync, 0); + if (ret) + goto fail_job; + } kref_init(>refcount); @@ -514,6 +534,7 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, struct v3d_job *job, struct ww_acquire_ctx *acquire_ctx, u32 out_sync, +