Re: [PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-23 Thread Matthew Brost
On Mon, Oct 23, 2023 at 04:22:16PM +0200, Boris Brezillon wrote:
> On Mon, 23 Oct 2023 13:54:13 +
> Matthew Brost  wrote:
> 
> > On Mon, Oct 23, 2023 at 02:39:37PM +0200, Boris Brezillon wrote:
> > > On Mon, 23 Oct 2023 14:16:06 +0200
> > > Boris Brezillon  wrote:
> > >   
> > > > Hi,
> > > > 
> > > > On Tue, 17 Oct 2023 08:09:56 -0700
> > > > Matthew Brost  wrote:
> > > >   
> > > > > +static void drm_sched_run_job_work(struct work_struct *w)
> > > > > +{
> > > > > + struct drm_gpu_scheduler *sched =
> > > > > + container_of(w, struct drm_gpu_scheduler, work_run_job);
> > > > > + struct drm_sched_entity *entity;
> > > > > + struct dma_fence *fence;
> > > > > + struct drm_sched_fence *s_fence;
> > > > > + struct drm_sched_job *sched_job;
> > > > > + int r;
> > > > >  
> > > > > - atomic_inc(>hw_rq_count);
> > > > > - drm_sched_job_begin(sched_job);
> > > > > + if (READ_ONCE(sched->pause_submit))
> > > > > + return;
> > > > > +
> > > > > + entity = drm_sched_select_entity(sched, true);
> > > > > + if (!entity)
> > > > > + return;
> > > > >  
> > > > > - trace_drm_run_job(sched_job, entity);
> > > > > - fence = sched->ops->run_job(sched_job);
> > > > > + sched_job = drm_sched_entity_pop_job(entity);
> > > > > + if (!sched_job) {
> > > > >   complete_all(>entity_idle);
> > > > > - drm_sched_fence_scheduled(s_fence, fence);
> > > > > + return; /* No more work */
> > > > > + }
> > > > >  
> > > > > - if (!IS_ERR_OR_NULL(fence)) {
> > > > > - /* Drop for original kref_init of the fence */
> > > > > - dma_fence_put(fence);
> > > > > + s_fence = sched_job->s_fence;
> > > > >  
> > > > > - r = dma_fence_add_callback(fence, 
> > > > > _job->cb,
> > > > > -
> > > > > drm_sched_job_done_cb);
> > > > > - if (r == -ENOENT)
> > > > > - drm_sched_job_done(sched_job, 
> > > > > fence->error);
> > > > > - else if (r)
> > > > > - DRM_DEV_ERROR(sched->dev, "fence add 
> > > > > callback failed (%d)\n",
> > > > > -   r);
> > > > > - } else {
> > > > > - drm_sched_job_done(sched_job, IS_ERR(fence) ?
> > > > > -PTR_ERR(fence) : 0);
> > > > > - }
> > > > > + atomic_inc(>hw_rq_count);
> > > > > + drm_sched_job_begin(sched_job);
> > > > >  
> > > > > - wake_up(>job_scheduled);
> > > > > + trace_drm_run_job(sched_job, entity);
> > > > > + fence = sched->ops->run_job(sched_job);
> > > > > + complete_all(>entity_idle);
> > > > > + drm_sched_fence_scheduled(s_fence, fence);
> > > > > +
> > > > > + if (!IS_ERR_OR_NULL(fence)) {
> > > > > + /* Drop for original kref_init of the fence */
> > > > > + dma_fence_put(fence);
> > > > > +
> > > > > + r = dma_fence_add_callback(fence, _job->cb,
> > > > > +drm_sched_job_done_cb);
> > > > > + if (r == -ENOENT)
> > > > > + drm_sched_job_done(sched_job, fence->error);
> > > > > + else if (r)
> > > > > + DRM_DEV_ERROR(sched->dev, "fence add callback 
> > > > > failed (%d)\n", r);
> > > > > + } else {
> > > > > + drm_sched_job_done(sched_job, IS_ERR(fence) ?
> > > > > +PTR_ERR(fence) : 0);
> > > > >   }
> > > > 
> > > > Just ran into a race condition when using a non-ordered workqueue
> > > > for drm_sched:
> > > > 
> > > > thread Athread B
> > > > 
> > > > drm_sched_run_job_work()
> > > >   drm_sched_job_begin()
> > > > // inserts jobA in pending_list
> > > > 
> > > > 
> > > > drm_sched_free_job_work()
> > > >   
> > > > drm_sched_get_cleanup_job()
> > > > // 
> > > > check first job in pending list
> > > > // 
> > > > if s_fence->parent == NULL, consider
> > > > // 
> > > > for cleanup  
> > > >   
> > > > ->free_job(jobA)
> > > > 
> > > > drm_sched_job_cleanup()
> > > >   
> > > > // set sched_job->s_fence = NULL
> > > >   
> > > > ->run_job()
> > 

Re: [PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-23 Thread Boris Brezillon
On Mon, 23 Oct 2023 13:54:13 +
Matthew Brost  wrote:

> On Mon, Oct 23, 2023 at 02:39:37PM +0200, Boris Brezillon wrote:
> > On Mon, 23 Oct 2023 14:16:06 +0200
> > Boris Brezillon  wrote:
> >   
> > > Hi,
> > > 
> > > On Tue, 17 Oct 2023 08:09:56 -0700
> > > Matthew Brost  wrote:
> > >   
> > > > +static void drm_sched_run_job_work(struct work_struct *w)
> > > > +{
> > > > +   struct drm_gpu_scheduler *sched =
> > > > +   container_of(w, struct drm_gpu_scheduler, work_run_job);
> > > > +   struct drm_sched_entity *entity;
> > > > +   struct dma_fence *fence;
> > > > +   struct drm_sched_fence *s_fence;
> > > > +   struct drm_sched_job *sched_job;
> > > > +   int r;
> > > >  
> > > > -   atomic_inc(>hw_rq_count);
> > > > -   drm_sched_job_begin(sched_job);
> > > > +   if (READ_ONCE(sched->pause_submit))
> > > > +   return;
> > > > +
> > > > +   entity = drm_sched_select_entity(sched, true);
> > > > +   if (!entity)
> > > > +   return;
> > > >  
> > > > -   trace_drm_run_job(sched_job, entity);
> > > > -   fence = sched->ops->run_job(sched_job);
> > > > +   sched_job = drm_sched_entity_pop_job(entity);
> > > > +   if (!sched_job) {
> > > > complete_all(>entity_idle);
> > > > -   drm_sched_fence_scheduled(s_fence, fence);
> > > > +   return; /* No more work */
> > > > +   }
> > > >  
> > > > -   if (!IS_ERR_OR_NULL(fence)) {
> > > > -   /* Drop for original kref_init of the fence */
> > > > -   dma_fence_put(fence);
> > > > +   s_fence = sched_job->s_fence;
> > > >  
> > > > -   r = dma_fence_add_callback(fence, 
> > > > _job->cb,
> > > > -  
> > > > drm_sched_job_done_cb);
> > > > -   if (r == -ENOENT)
> > > > -   drm_sched_job_done(sched_job, 
> > > > fence->error);
> > > > -   else if (r)
> > > > -   DRM_DEV_ERROR(sched->dev, "fence add 
> > > > callback failed (%d)\n",
> > > > - r);
> > > > -   } else {
> > > > -   drm_sched_job_done(sched_job, IS_ERR(fence) ?
> > > > -  PTR_ERR(fence) : 0);
> > > > -   }
> > > > +   atomic_inc(>hw_rq_count);
> > > > +   drm_sched_job_begin(sched_job);
> > > >  
> > > > -   wake_up(>job_scheduled);
> > > > +   trace_drm_run_job(sched_job, entity);
> > > > +   fence = sched->ops->run_job(sched_job);
> > > > +   complete_all(>entity_idle);
> > > > +   drm_sched_fence_scheduled(s_fence, fence);
> > > > +
> > > > +   if (!IS_ERR_OR_NULL(fence)) {
> > > > +   /* Drop for original kref_init of the fence */
> > > > +   dma_fence_put(fence);
> > > > +
> > > > +   r = dma_fence_add_callback(fence, _job->cb,
> > > > +  drm_sched_job_done_cb);
> > > > +   if (r == -ENOENT)
> > > > +   drm_sched_job_done(sched_job, fence->error);
> > > > +   else if (r)
> > > > +   DRM_DEV_ERROR(sched->dev, "fence add callback 
> > > > failed (%d)\n", r);
> > > > +   } else {
> > > > +   drm_sched_job_done(sched_job, IS_ERR(fence) ?
> > > > +  PTR_ERR(fence) : 0);
> > > > }
> > > 
> > > Just ran into a race condition when using a non-ordered workqueue
> > > for drm_sched:
> > > 
> > > thread A  thread B
> > > 
> > > drm_sched_run_job_work()  
> > >   drm_sched_job_begin()
> > > // inserts jobA in pending_list
> > > 
> > >   
> > > drm_sched_free_job_work()
> > > 
> > > drm_sched_get_cleanup_job()
> > >   // check 
> > > first job in pending list
> > >   // if 
> > > s_fence->parent == NULL, consider
> > >   // for 
> > > cleanup  
> > > 
> > > ->free_job(jobA)
> > >   
> > > drm_sched_job_cleanup()
> > > // set 
> > > sched_job->s_fence = NULL
> > >   
> > > ->run_job()
> > > drm_sched_fence_scheduled()  
> > 
> > Correction: the NULL pointer deref happens in drm_sched_job_done()
> > (when the driver returns an error directly) not in
> > drm_sched_fence_scheduled(), but the problem remains the same.
> 

Re: [PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-23 Thread Matthew Brost
On Mon, Oct 23, 2023 at 02:39:37PM +0200, Boris Brezillon wrote:
> On Mon, 23 Oct 2023 14:16:06 +0200
> Boris Brezillon  wrote:
> 
> > Hi,
> > 
> > On Tue, 17 Oct 2023 08:09:56 -0700
> > Matthew Brost  wrote:
> > 
> > > +static void drm_sched_run_job_work(struct work_struct *w)
> > > +{
> > > + struct drm_gpu_scheduler *sched =
> > > + container_of(w, struct drm_gpu_scheduler, work_run_job);
> > > + struct drm_sched_entity *entity;
> > > + struct dma_fence *fence;
> > > + struct drm_sched_fence *s_fence;
> > > + struct drm_sched_job *sched_job;
> > > + int r;
> > >  
> > > - atomic_inc(>hw_rq_count);
> > > - drm_sched_job_begin(sched_job);
> > > + if (READ_ONCE(sched->pause_submit))
> > > + return;
> > > +
> > > + entity = drm_sched_select_entity(sched, true);
> > > + if (!entity)
> > > + return;
> > >  
> > > - trace_drm_run_job(sched_job, entity);
> > > - fence = sched->ops->run_job(sched_job);
> > > + sched_job = drm_sched_entity_pop_job(entity);
> > > + if (!sched_job) {
> > >   complete_all(>entity_idle);
> > > - drm_sched_fence_scheduled(s_fence, fence);
> > > + return; /* No more work */
> > > + }
> > >  
> > > - if (!IS_ERR_OR_NULL(fence)) {
> > > - /* Drop for original kref_init of the fence */
> > > - dma_fence_put(fence);
> > > + s_fence = sched_job->s_fence;
> > >  
> > > - r = dma_fence_add_callback(fence, _job->cb,
> > > -drm_sched_job_done_cb);
> > > - if (r == -ENOENT)
> > > - drm_sched_job_done(sched_job, fence->error);
> > > - else if (r)
> > > - DRM_DEV_ERROR(sched->dev, "fence add callback 
> > > failed (%d)\n",
> > > -   r);
> > > - } else {
> > > - drm_sched_job_done(sched_job, IS_ERR(fence) ?
> > > -PTR_ERR(fence) : 0);
> > > - }
> > > + atomic_inc(>hw_rq_count);
> > > + drm_sched_job_begin(sched_job);
> > >  
> > > - wake_up(>job_scheduled);
> > > + trace_drm_run_job(sched_job, entity);
> > > + fence = sched->ops->run_job(sched_job);
> > > + complete_all(>entity_idle);
> > > + drm_sched_fence_scheduled(s_fence, fence);
> > > +
> > > + if (!IS_ERR_OR_NULL(fence)) {
> > > + /* Drop for original kref_init of the fence */
> > > + dma_fence_put(fence);
> > > +
> > > + r = dma_fence_add_callback(fence, _job->cb,
> > > +drm_sched_job_done_cb);
> > > + if (r == -ENOENT)
> > > + drm_sched_job_done(sched_job, fence->error);
> > > + else if (r)
> > > + DRM_DEV_ERROR(sched->dev, "fence add callback failed 
> > > (%d)\n", r);
> > > + } else {
> > > + drm_sched_job_done(sched_job, IS_ERR(fence) ?
> > > +PTR_ERR(fence) : 0);
> > >   }  
> > 
> > Just ran into a race condition when using a non-ordered workqueue
> > for drm_sched:
> > 
> > thread Athread B
> > 
> > drm_sched_run_job_work()
> >   drm_sched_job_begin()
> > // inserts jobA in pending_list
> > 
> > 
> > drm_sched_free_job_work()
> >   
> > drm_sched_get_cleanup_job()
> > // check 
> > first job in pending list
> > // if 
> > s_fence->parent == NULL, consider
> > // for 
> > cleanup
> >   
> > ->free_job(jobA)  
> > 
> > drm_sched_job_cleanup()
> >   // set 
> > sched_job->s_fence = NULL
> > 
> > ->run_job()  
> > drm_sched_fence_scheduled()
> 
> Correction: the NULL pointer deref happens in drm_sched_job_done()
> (when the driver returns an error directly) not in
> drm_sched_fence_scheduled(), but the problem remains the same.
> 
> 

Trying to understand this. I don't see how drm_sched_get_cleanup_job can
return a job until dma_fence_is_signaled(>s_fence->finished) is
true. That fence is no signaled until drm_sched_fence_finished(s_fence,
result); called in drm_sched_job_done().

What am I missing here?

Matt

> >   // sched_job->s_fence->parent = parent_fence
> >   // BOOM => NULL pointer deref
> > 
> > For now, I'll just use a dedicated ordered wq, but if we claim
> > multi-threaded workqueues are supported, this is probably worth fixing.
> > I know there's been some discussions about when the timeout should be
> > started, and the job insertion in 

Re: [PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-23 Thread Boris Brezillon
On Mon, 23 Oct 2023 14:16:06 +0200
Boris Brezillon  wrote:

> Hi,
> 
> On Tue, 17 Oct 2023 08:09:56 -0700
> Matthew Brost  wrote:
> 
> > +static void drm_sched_run_job_work(struct work_struct *w)
> > +{
> > +   struct drm_gpu_scheduler *sched =
> > +   container_of(w, struct drm_gpu_scheduler, work_run_job);
> > +   struct drm_sched_entity *entity;
> > +   struct dma_fence *fence;
> > +   struct drm_sched_fence *s_fence;
> > +   struct drm_sched_job *sched_job;
> > +   int r;
> >  
> > -   atomic_inc(>hw_rq_count);
> > -   drm_sched_job_begin(sched_job);
> > +   if (READ_ONCE(sched->pause_submit))
> > +   return;
> > +
> > +   entity = drm_sched_select_entity(sched, true);
> > +   if (!entity)
> > +   return;
> >  
> > -   trace_drm_run_job(sched_job, entity);
> > -   fence = sched->ops->run_job(sched_job);
> > +   sched_job = drm_sched_entity_pop_job(entity);
> > +   if (!sched_job) {
> > complete_all(>entity_idle);
> > -   drm_sched_fence_scheduled(s_fence, fence);
> > +   return; /* No more work */
> > +   }
> >  
> > -   if (!IS_ERR_OR_NULL(fence)) {
> > -   /* Drop for original kref_init of the fence */
> > -   dma_fence_put(fence);
> > +   s_fence = sched_job->s_fence;
> >  
> > -   r = dma_fence_add_callback(fence, _job->cb,
> > -  drm_sched_job_done_cb);
> > -   if (r == -ENOENT)
> > -   drm_sched_job_done(sched_job, fence->error);
> > -   else if (r)
> > -   DRM_DEV_ERROR(sched->dev, "fence add callback 
> > failed (%d)\n",
> > - r);
> > -   } else {
> > -   drm_sched_job_done(sched_job, IS_ERR(fence) ?
> > -  PTR_ERR(fence) : 0);
> > -   }
> > +   atomic_inc(>hw_rq_count);
> > +   drm_sched_job_begin(sched_job);
> >  
> > -   wake_up(>job_scheduled);
> > +   trace_drm_run_job(sched_job, entity);
> > +   fence = sched->ops->run_job(sched_job);
> > +   complete_all(>entity_idle);
> > +   drm_sched_fence_scheduled(s_fence, fence);
> > +
> > +   if (!IS_ERR_OR_NULL(fence)) {
> > +   /* Drop for original kref_init of the fence */
> > +   dma_fence_put(fence);
> > +
> > +   r = dma_fence_add_callback(fence, _job->cb,
> > +  drm_sched_job_done_cb);
> > +   if (r == -ENOENT)
> > +   drm_sched_job_done(sched_job, fence->error);
> > +   else if (r)
> > +   DRM_DEV_ERROR(sched->dev, "fence add callback failed 
> > (%d)\n", r);
> > +   } else {
> > +   drm_sched_job_done(sched_job, IS_ERR(fence) ?
> > +  PTR_ERR(fence) : 0);
> > }  
> 
> Just ran into a race condition when using a non-ordered workqueue
> for drm_sched:
> 
> thread A  thread B
> 
> drm_sched_run_job_work()  
>   drm_sched_job_begin()
> // inserts jobA in pending_list
> 
>   
> drm_sched_free_job_work()
> 
> drm_sched_get_cleanup_job()
>   // check 
> first job in pending list
>   // if 
> s_fence->parent == NULL, consider
>   // for 
> cleanup
> 
> ->free_job(jobA)  
>   
> drm_sched_job_cleanup()
> // set 
> sched_job->s_fence = NULL
> 
> ->run_job()  
> drm_sched_fence_scheduled()

Correction: the NULL pointer deref happens in drm_sched_job_done()
(when the driver returns an error directly) not in
drm_sched_fence_scheduled(), but the problem remains the same.


>   // sched_job->s_fence->parent = parent_fence
>   // BOOM => NULL pointer deref
> 
> For now, I'll just use a dedicated ordered wq, but if we claim
> multi-threaded workqueues are supported, this is probably worth fixing.
> I know there's been some discussions about when the timeout should be
> started, and the job insertion in the pending_list is kinda related.
> If we want this insertion to happen before ->run_job() is called, we
> need a way to flag when a job is inserted, but not fully submitted yet.
> 
> Regards,
> 
> Boris



Re: [PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-23 Thread Boris Brezillon
Hi,

On Tue, 17 Oct 2023 08:09:56 -0700
Matthew Brost  wrote:

> +static void drm_sched_run_job_work(struct work_struct *w)
> +{
> + struct drm_gpu_scheduler *sched =
> + container_of(w, struct drm_gpu_scheduler, work_run_job);
> + struct drm_sched_entity *entity;
> + struct dma_fence *fence;
> + struct drm_sched_fence *s_fence;
> + struct drm_sched_job *sched_job;
> + int r;
>  
> - atomic_inc(>hw_rq_count);
> - drm_sched_job_begin(sched_job);
> + if (READ_ONCE(sched->pause_submit))
> + return;
> +
> + entity = drm_sched_select_entity(sched, true);
> + if (!entity)
> + return;
>  
> - trace_drm_run_job(sched_job, entity);
> - fence = sched->ops->run_job(sched_job);
> + sched_job = drm_sched_entity_pop_job(entity);
> + if (!sched_job) {
>   complete_all(>entity_idle);
> - drm_sched_fence_scheduled(s_fence, fence);
> + return; /* No more work */
> + }
>  
> - if (!IS_ERR_OR_NULL(fence)) {
> - /* Drop for original kref_init of the fence */
> - dma_fence_put(fence);
> + s_fence = sched_job->s_fence;
>  
> - r = dma_fence_add_callback(fence, _job->cb,
> -drm_sched_job_done_cb);
> - if (r == -ENOENT)
> - drm_sched_job_done(sched_job, fence->error);
> - else if (r)
> - DRM_DEV_ERROR(sched->dev, "fence add callback 
> failed (%d)\n",
> -   r);
> - } else {
> - drm_sched_job_done(sched_job, IS_ERR(fence) ?
> -PTR_ERR(fence) : 0);
> - }
> + atomic_inc(>hw_rq_count);
> + drm_sched_job_begin(sched_job);
>  
> - wake_up(>job_scheduled);
> + trace_drm_run_job(sched_job, entity);
> + fence = sched->ops->run_job(sched_job);
> + complete_all(>entity_idle);
> + drm_sched_fence_scheduled(s_fence, fence);
> +
> + if (!IS_ERR_OR_NULL(fence)) {
> + /* Drop for original kref_init of the fence */
> + dma_fence_put(fence);
> +
> + r = dma_fence_add_callback(fence, _job->cb,
> +drm_sched_job_done_cb);
> + if (r == -ENOENT)
> + drm_sched_job_done(sched_job, fence->error);
> + else if (r)
> + DRM_DEV_ERROR(sched->dev, "fence add callback failed 
> (%d)\n", r);
> + } else {
> + drm_sched_job_done(sched_job, IS_ERR(fence) ?
> +PTR_ERR(fence) : 0);
>   }

Just ran into a race condition when using a non-ordered workqueue
for drm_sched:

thread Athread B

drm_sched_run_job_work()
  drm_sched_job_begin()
// inserts jobA in pending_list


drm_sched_free_job_work()
  
drm_sched_get_cleanup_job()
// check 
first job in pending list
// if 
s_fence->parent == NULL, consider
// for 
cleanup
  
->free_job(jobA)

drm_sched_job_cleanup()
  // set 
sched_job->s_fence = NULL

->run_job()
drm_sched_fence_scheduled()
  // sched_job->s_fence->parent = parent_fence
  // BOOM => NULL pointer deref

For now, I'll just use a dedicated ordered wq, but if we claim
multi-threaded workqueues are supported, this is probably worth fixing.
I know there's been some discussions about when the timeout should be
started, and the job insertion in the pending_list is kinda related.
If we want this insertion to happen before ->run_job() is called, we
need a way to flag when a job is inserted, but not fully submitted yet.

Regards,

Boris


Re: [PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-21 Thread Luben Tuikov
Hi,

On 2023-10-19 12:55, Matthew Brost wrote:
> On Wed, Oct 18, 2023 at 09:25:36PM -0400, Luben Tuikov wrote:
>> Hi,
>>
>> On 2023-10-17 11:09, Matthew Brost wrote:
>>> Rather than call free_job and run_job in same work item have a dedicated
>>> work item for each. This aligns with the design and intended use of work
>>> queues.
>>>
>>> v2:
>>>- Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
>>>  timestamp in free_job() work item (Danilo)
>>> v3:
>>>   - Drop forward dec of drm_sched_select_entity (Boris)
>>>   - Return in drm_sched_run_job_work if entity NULL (Boris)
>>> v4:
>>>   - Replace dequeue with peek and invert logic (Luben)
>>>   - Wrap to 100 lines (Luben)
>>>   - Update comments for *_queue / *_queue_if_ready functions (Luben)
>>>
>>> Signed-off-by: Matthew Brost 
>>> ---
>>>  drivers/gpu/drm/scheduler/sched_main.c | 287 +++--
>>>  include/drm/gpu_scheduler.h|   8 +-
>>>  2 files changed, 178 insertions(+), 117 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 273e0fbc4eab..b1b8d9f96da5 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -213,11 +213,12 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq 
>>> *rq,
>>>   * drm_sched_rq_select_entity_rr - Select an entity which could provide a 
>>> job to run
>>>   *
>>>   * @rq: scheduler run queue to check.
>>> + * @peek: Just find, don't set to current.
>>
>> The "peek" rename is good--thanks!
>>
>>>   *
>>>   * Try to find a ready entity, returns NULL if none found.
>>>   */
>>>  static struct drm_sched_entity *
>>> -drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>>> +drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq, bool peek)
>>>  {
>>> struct drm_sched_entity *entity;
>>>  
>>> @@ -227,8 +228,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>>> if (entity) {
>>> list_for_each_entry_continue(entity, >entities, list) {
>>> if (drm_sched_entity_is_ready(entity)) {
>>> -   rq->current_entity = entity;
>>> -   reinit_completion(>entity_idle);
>>> +   if (!peek) {
>>> +   rq->current_entity = entity;
>>> +   reinit_completion(>entity_idle);
>>> +   }
>>> spin_unlock(>lock);
>>> return entity;
>>> }
>>> @@ -238,8 +241,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>>> list_for_each_entry(entity, >entities, list) {
>>>  
>>> if (drm_sched_entity_is_ready(entity)) {
>>> -   rq->current_entity = entity;
>>> -   reinit_completion(>entity_idle);
>>> +   if (!peek) {
>>> +   rq->current_entity = entity;
>>> +   reinit_completion(>entity_idle);
>>> +   }
>>> spin_unlock(>lock);
>>> return entity;
>>> }
>>> @@ -257,11 +262,12 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>>>   * drm_sched_rq_select_entity_fifo - Select an entity which provides a job 
>>> to run
>>>   *
>>>   * @rq: scheduler run queue to check.
>>> + * @peek: Just find, don't set to current.
>>>   *
>>>   * Find oldest waiting ready entity, returns NULL if none found.
>>>   */
>>>  static struct drm_sched_entity *
>>> -drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>>> +drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq, bool peek)
>>>  {
>>> struct rb_node *rb;
>>>  
>>> @@ -271,8 +277,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq 
>>> *rq)
>>>  
>>> entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
>>> if (drm_sched_entity_is_ready(entity)) {
>>> -   rq->current_entity = entity;
>>> -   reinit_completion(>entity_idle);
>>> +   if (!peek) {
>>> +   rq->current_entity = entity;
>>> +   reinit_completion(>entity_idle);
>>> +   }
>>> break;
>>> }
>>> }
>>> @@ -282,13 +290,98 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq 
>>> *rq)
>>>  }
>>>  
>>>  /**
>>> - * drm_sched_wqueue_enqueue - enqueue scheduler submission
>>> + * drm_sched_run_job_queue - enqueue run-job work
>>
>> Hmm... so this removes the change from patch 1 in this series, and as such
>> obviates patch 1.
>>
>> Do you want to go with "run_job_queue" and the names introduced here?
>>
> 
> Yes, I like the run_job_queue name here.
> 
>> In this case, we can have that in patch 1 instead, and this patch
>> would only split the "free job" into its own work item.
>>
> 
> Sure, so s/drm_sched_wqueue_enqueue/drm_sched_run_job_queue 

Re: [PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-19 Thread Matthew Brost
On Wed, Oct 18, 2023 at 09:25:36PM -0400, Luben Tuikov wrote:
> Hi,
> 
> On 2023-10-17 11:09, Matthew Brost wrote:
> > Rather than call free_job and run_job in same work item have a dedicated
> > work item for each. This aligns with the design and intended use of work
> > queues.
> > 
> > v2:
> >- Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
> >  timestamp in free_job() work item (Danilo)
> > v3:
> >   - Drop forward dec of drm_sched_select_entity (Boris)
> >   - Return in drm_sched_run_job_work if entity NULL (Boris)
> > v4:
> >   - Replace dequeue with peek and invert logic (Luben)
> >   - Wrap to 100 lines (Luben)
> >   - Update comments for *_queue / *_queue_if_ready functions (Luben)
> > 
> > Signed-off-by: Matthew Brost 
> > ---
> >  drivers/gpu/drm/scheduler/sched_main.c | 287 +++--
> >  include/drm/gpu_scheduler.h|   8 +-
> >  2 files changed, 178 insertions(+), 117 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> > b/drivers/gpu/drm/scheduler/sched_main.c
> > index 273e0fbc4eab..b1b8d9f96da5 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -213,11 +213,12 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq 
> > *rq,
> >   * drm_sched_rq_select_entity_rr - Select an entity which could provide a 
> > job to run
> >   *
> >   * @rq: scheduler run queue to check.
> > + * @peek: Just find, don't set to current.
> 
> The "peek" rename is good--thanks!
> 
> >   *
> >   * Try to find a ready entity, returns NULL if none found.
> >   */
> >  static struct drm_sched_entity *
> > -drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
> > +drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq, bool peek)
> >  {
> > struct drm_sched_entity *entity;
> >  
> > @@ -227,8 +228,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
> > if (entity) {
> > list_for_each_entry_continue(entity, >entities, list) {
> > if (drm_sched_entity_is_ready(entity)) {
> > -   rq->current_entity = entity;
> > -   reinit_completion(>entity_idle);
> > +   if (!peek) {
> > +   rq->current_entity = entity;
> > +   reinit_completion(>entity_idle);
> > +   }
> > spin_unlock(>lock);
> > return entity;
> > }
> > @@ -238,8 +241,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
> > list_for_each_entry(entity, >entities, list) {
> >  
> > if (drm_sched_entity_is_ready(entity)) {
> > -   rq->current_entity = entity;
> > -   reinit_completion(>entity_idle);
> > +   if (!peek) {
> > +   rq->current_entity = entity;
> > +   reinit_completion(>entity_idle);
> > +   }
> > spin_unlock(>lock);
> > return entity;
> > }
> > @@ -257,11 +262,12 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
> >   * drm_sched_rq_select_entity_fifo - Select an entity which provides a job 
> > to run
> >   *
> >   * @rq: scheduler run queue to check.
> > + * @peek: Just find, don't set to current.
> >   *
> >   * Find oldest waiting ready entity, returns NULL if none found.
> >   */
> >  static struct drm_sched_entity *
> > -drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
> > +drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq, bool peek)
> >  {
> > struct rb_node *rb;
> >  
> > @@ -271,8 +277,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq 
> > *rq)
> >  
> > entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
> > if (drm_sched_entity_is_ready(entity)) {
> > -   rq->current_entity = entity;
> > -   reinit_completion(>entity_idle);
> > +   if (!peek) {
> > +   rq->current_entity = entity;
> > +   reinit_completion(>entity_idle);
> > +   }
> > break;
> > }
> > }
> > @@ -282,13 +290,98 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq 
> > *rq)
> >  }
> >  
> >  /**
> > - * drm_sched_wqueue_enqueue - enqueue scheduler submission
> > + * drm_sched_run_job_queue - enqueue run-job work
> 
> Hmm... so this removes the change from patch 1 in this series, and as such
> obviates patch 1.
> 
> Do you want to go with "run_job_queue" and the names introduced here?
> 

Yes, I like the run_job_queue name here.

> In this case, we can have that in patch 1 instead, and this patch
> would only split the "free job" into its own work item.
>

Sure, so s/drm_sched_wqueue_enqueue/drm_sched_run_job_queue in patch #1?
 
> > + * @sched: scheduler instance
> > + */
> > 

Re: [PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-18 Thread Luben Tuikov
Hi,

On 2023-10-17 11:09, Matthew Brost wrote:
> Rather than call free_job and run_job in same work item have a dedicated
> work item for each. This aligns with the design and intended use of work
> queues.
> 
> v2:
>- Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
>  timestamp in free_job() work item (Danilo)
> v3:
>   - Drop forward dec of drm_sched_select_entity (Boris)
>   - Return in drm_sched_run_job_work if entity NULL (Boris)
> v4:
>   - Replace dequeue with peek and invert logic (Luben)
>   - Wrap to 100 lines (Luben)
>   - Update comments for *_queue / *_queue_if_ready functions (Luben)
> 
> Signed-off-by: Matthew Brost 
> ---
>  drivers/gpu/drm/scheduler/sched_main.c | 287 +++--
>  include/drm/gpu_scheduler.h|   8 +-
>  2 files changed, 178 insertions(+), 117 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 273e0fbc4eab..b1b8d9f96da5 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -213,11 +213,12 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>   * drm_sched_rq_select_entity_rr - Select an entity which could provide a 
> job to run
>   *
>   * @rq: scheduler run queue to check.
> + * @peek: Just find, don't set to current.

The "peek" rename is good--thanks!

>   *
>   * Try to find a ready entity, returns NULL if none found.
>   */
>  static struct drm_sched_entity *
> -drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
> +drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq, bool peek)
>  {
>   struct drm_sched_entity *entity;
>  
> @@ -227,8 +228,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>   if (entity) {
>   list_for_each_entry_continue(entity, >entities, list) {
>   if (drm_sched_entity_is_ready(entity)) {
> - rq->current_entity = entity;
> - reinit_completion(>entity_idle);
> + if (!peek) {
> + rq->current_entity = entity;
> + reinit_completion(>entity_idle);
> + }
>   spin_unlock(>lock);
>   return entity;
>   }
> @@ -238,8 +241,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>   list_for_each_entry(entity, >entities, list) {
>  
>   if (drm_sched_entity_is_ready(entity)) {
> - rq->current_entity = entity;
> - reinit_completion(>entity_idle);
> + if (!peek) {
> + rq->current_entity = entity;
> + reinit_completion(>entity_idle);
> + }
>   spin_unlock(>lock);
>   return entity;
>   }
> @@ -257,11 +262,12 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>   * drm_sched_rq_select_entity_fifo - Select an entity which provides a job 
> to run
>   *
>   * @rq: scheduler run queue to check.
> + * @peek: Just find, don't set to current.
>   *
>   * Find oldest waiting ready entity, returns NULL if none found.
>   */
>  static struct drm_sched_entity *
> -drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
> +drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq, bool peek)
>  {
>   struct rb_node *rb;
>  
> @@ -271,8 +277,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>  
>   entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
>   if (drm_sched_entity_is_ready(entity)) {
> - rq->current_entity = entity;
> - reinit_completion(>entity_idle);
> + if (!peek) {
> + rq->current_entity = entity;
> + reinit_completion(>entity_idle);
> + }
>   break;
>   }
>   }
> @@ -282,13 +290,98 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>  }
>  
>  /**
> - * drm_sched_wqueue_enqueue - enqueue scheduler submission
> + * drm_sched_run_job_queue - enqueue run-job work

Hmm... so this removes the change from patch 1 in this series, and as such
obviates patch 1.

Do you want to go with "run_job_queue" and the names introduced here?

In this case, we can have that in patch 1 instead, and this patch
would only split the "free job" into its own work item.

> + * @sched: scheduler instance
> + */
> +static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
> +{
> + if (!READ_ONCE(sched->pause_submit))
> + queue_work(sched->submit_wq, >work_run_job);
> +}
> +
> +/**
> + * drm_sched_can_queue -- Can we queue more to the hardware?
> + * @sched: scheduler instance
> + *
> + * Return true if we can push more jobs to the hw, otherwise 

[PATCH v6 5/7] drm/sched: Split free_job into own work item

2023-10-17 Thread Matthew Brost
Rather than call free_job and run_job in same work item have a dedicated
work item for each. This aligns with the design and intended use of work
queues.

v2:
   - Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
 timestamp in free_job() work item (Danilo)
v3:
  - Drop forward dec of drm_sched_select_entity (Boris)
  - Return in drm_sched_run_job_work if entity NULL (Boris)
v4:
  - Replace dequeue with peek and invert logic (Luben)
  - Wrap to 100 lines (Luben)
  - Update comments for *_queue / *_queue_if_ready functions (Luben)

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/scheduler/sched_main.c | 287 +++--
 include/drm/gpu_scheduler.h|   8 +-
 2 files changed, 178 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 273e0fbc4eab..b1b8d9f96da5 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -213,11 +213,12 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
  * drm_sched_rq_select_entity_rr - Select an entity which could provide a job 
to run
  *
  * @rq: scheduler run queue to check.
+ * @peek: Just find, don't set to current.
  *
  * Try to find a ready entity, returns NULL if none found.
  */
 static struct drm_sched_entity *
-drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
+drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq, bool peek)
 {
struct drm_sched_entity *entity;
 
@@ -227,8 +228,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
if (entity) {
list_for_each_entry_continue(entity, >entities, list) {
if (drm_sched_entity_is_ready(entity)) {
-   rq->current_entity = entity;
-   reinit_completion(>entity_idle);
+   if (!peek) {
+   rq->current_entity = entity;
+   reinit_completion(>entity_idle);
+   }
spin_unlock(>lock);
return entity;
}
@@ -238,8 +241,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
list_for_each_entry(entity, >entities, list) {
 
if (drm_sched_entity_is_ready(entity)) {
-   rq->current_entity = entity;
-   reinit_completion(>entity_idle);
+   if (!peek) {
+   rq->current_entity = entity;
+   reinit_completion(>entity_idle);
+   }
spin_unlock(>lock);
return entity;
}
@@ -257,11 +262,12 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
  * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to 
run
  *
  * @rq: scheduler run queue to check.
+ * @peek: Just find, don't set to current.
  *
  * Find oldest waiting ready entity, returns NULL if none found.
  */
 static struct drm_sched_entity *
-drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
+drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq, bool peek)
 {
struct rb_node *rb;
 
@@ -271,8 +277,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
 
entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
if (drm_sched_entity_is_ready(entity)) {
-   rq->current_entity = entity;
-   reinit_completion(>entity_idle);
+   if (!peek) {
+   rq->current_entity = entity;
+   reinit_completion(>entity_idle);
+   }
break;
}
}
@@ -282,13 +290,98 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
 }
 
 /**
- * drm_sched_wqueue_enqueue - enqueue scheduler submission
+ * drm_sched_run_job_queue - enqueue run-job work
+ * @sched: scheduler instance
+ */
+static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
+{
+   if (!READ_ONCE(sched->pause_submit))
+   queue_work(sched->submit_wq, >work_run_job);
+}
+
+/**
+ * drm_sched_can_queue -- Can we queue more to the hardware?
+ * @sched: scheduler instance
+ *
+ * Return true if we can push more jobs to the hw, otherwise false.
+ */
+static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
+{
+   return atomic_read(>hw_rq_count) <
+   sched->hw_submission_limit;
+}
+
+/**
+ * drm_sched_select_entity - Select next entity to process
+ *
+ * @sched: scheduler instance
+ * @peek: Just find, don't set to current.
+ *
+ * Returns the entity to process or NULL if none are found.
+ */
+static struct drm_sched_entity *
+drm_sched_select_entity(struct drm_gpu_scheduler *sched, bool peek)
+{
+   struct drm_sched_entity