amdgpu: change hw sched list on ctx priority override

Luben Tuikov Thu, 27 Feb 2020 20:00:23 -0800

On 2020-02-26 3:37 p.m., Nirmoy Das wrote:
> We were changing compute ring priority while rings were being used
> before every job submission which is not recommended. This patch
> recreates entity with higher/normal priority sched list when user
> changes ctx's priority.
> 
> high/normal priority sched list are generated from set of high/normal
> priority compute queues. When there are no high priority hw queues then
> it fall backs to software priority.
> 
> Signed-off-by: Nirmoy Das <nirmoy....@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 58 ++++++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  6 ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  4 ++
>  5 files changed, 59 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index f397ff97b4e4..8304d0c87899 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1205,7 +1205,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>       struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>       struct drm_sched_entity *entity = p->entity;
>       enum drm_sched_priority priority;
> -     struct amdgpu_ring *ring;
>       struct amdgpu_bo_list_entry *e;
>       struct amdgpu_job *job;
>       uint64_t seq;
> @@ -1258,9 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>       priority = job->base.s_priority;
>       drm_sched_entity_push_job(&job->base, entity);
>  
> -     ring = to_amdgpu_ring(entity->rq->sched);
> -     amdgpu_ring_priority_get(ring, priority);
> -
>       amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>  
>       ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 94a6c42f29ea..ea4dc57d2237 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -85,8 +85,13 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, 
> const u32 hw_ip, const
>                       num_scheds = 1;
>                       break;
>               case AMDGPU_HW_IP_COMPUTE:
> -                     scheds = adev->gfx.compute_sched;
> -                     num_scheds = adev->gfx.num_compute_sched;
> +                     if (priority <= DRM_SCHED_PRIORITY_NORMAL) {
> +                             scheds = adev->gfx.compute_sched;
> +                             num_scheds = adev->gfx.num_compute_sched;
> +                     } else {
> +                             scheds = adev->gfx.compute_sched_high;
> +                             num_scheds = adev->gfx.num_compute_sched_high;
> +                     }


Why more if-conditionals?

If you initialize a map of DRM_SCHED_PRIORITY_MAX of type then you can simply 
do:

scheds = adev->gfx.map[priority];

So, part of your array would contain normal and the rest high.

Also, I don't think you should introduce yet another
compute_sched[] array. All this should be folded into
a single array containing both normal and high.

Also consider changing to this:

enum drm_sched_priority {
        DRM_SCHED_PRIORITY_UNSET,
--------DRM_SCHED_PRIORITY_INVALID,--------<--- remove
        DRM_SCHED_PRIORITY_MIN,
        DRM_SCHED_PRIORITY_LOW = DRM_SCHED_PRIORITY_MIN,
        DRM_SCHED_PRIORITY_NORMAL,
        DRM_SCHED_PRIORITY_HIGH_SW,
        DRM_SCHED_PRIORITY_HIGH_HW,
        DRM_SCHED_PRIORITY_KERNEL,
        DRM_SCHED_PRIORITY_MAX,
};

We should never have an "invalid priority", just ignored priority. :-)
Second, having 0 as UNSET gives you easy priority when you set
map[0] = normal_priority, as memory usually comes memset to 0.
IOW, you'd not need to check against UNSET, and simply use
the default [0] which you'd set to normal priority.

>                       break;
>               case AMDGPU_HW_IP_DMA:
>                       scheds = adev->sdma.sdma_sched;
> @@ -502,6 +507,24 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx 
> *ctx,
>       return fence;
>  }
>  
> +static void amdgpu_ctx_hw_priority_override(struct amdgpu_ctx *ctx,
> +                                         const u32 hw_ip,
> +                                         enum drm_sched_priority priority)
> +{
> +     int i;
> +
> +     for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
> +             if (!ctx->entities[hw_ip][i])
> +                     continue;
> +
> +             /* TODO what happens with prev scheduled jobs */
> +             drm_sched_entity_destroy(&ctx->entities[hw_ip][i]->entity);
> +             amdgpu_ctx_fini_entity(ctx->entities[hw_ip][i]);
> +
> +             amdgpu_ctx_init_entity(ctx, AMDGPU_HW_IP_COMPUTE, i);
> +
> +     }
> +}
>  void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>                                 enum drm_sched_priority priority)
>  {
> @@ -515,12 +538,18 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx 
> *ctx,
>       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>                       struct drm_sched_entity *entity;
> +                     struct amdgpu_ring *ring;
>  
>                       if (!ctx->entities[i][j])
>                               continue;
>  
>                       entity = &ctx->entities[i][j]->entity;
> -                     drm_sched_entity_set_priority(entity, ctx_prio);
> +                     ring = to_amdgpu_ring(entity->rq->sched);
> +
> +                     if (ring->funcs->init_priority)
> +                             amdgpu_ctx_hw_priority_override(ctx, i, 
> priority);
> +                     else
> +                             drm_sched_entity_set_priority(entity, ctx_prio);

Why more if-conditionals?
Could we not have a map?

>               }
>       }
>  }
> @@ -630,6 +659,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
>  
>  void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
>  {
> +     enum drm_sched_priority priority;
>       int i, j;
>  
>       for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
> @@ -638,8 +668,26 @@ void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
>       }
>  
>       for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> -             adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
> -             adev->gfx.num_compute_sched++;
> +             priority = adev->gfx.compute_ring[i].priority;
> +             if (priority <= DRM_SCHED_PRIORITY_NORMAL) {
> +                     adev->gfx.compute_sched[i] =
> +                             &adev->gfx.compute_ring[i].sched;
> +                     adev->gfx.num_compute_sched++;
> +             } else {
> +                     adev->gfx.compute_sched_high[i] =
> +                             &adev->gfx.compute_ring[i].sched;
> +                     adev->gfx.num_compute_sched_high++;
> +             }
> +     }

I think it would be better to use a map for this
as opposed to if-conditional.

> +
> +     /* if there are no high prio compute queue then mirror with normal
> +      * priority so amdgpu_ctx_init_entity() works as expected */
> +     if (!adev->gfx.num_compute_sched_high) {
> +             for (i = 0; i < adev->gfx.num_compute_sched; i++) {
> +                     adev->gfx.compute_sched_high[i] =
> +                            adev->gfx.compute_sched[i];
> +             }
> +             adev->gfx.num_compute_sched_high = adev->gfx.num_compute_sched;
>       }
>  
>       for (i = 0; i < adev->sdma.num_instances; i++) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index ca17ffb01301..d58d748e3a56 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -279,7 +279,9 @@ struct amdgpu_gfx {
>       unsigned                        num_gfx_rings;
>       struct amdgpu_ring              compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
>       struct drm_gpu_scheduler        
> *compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
> +     struct drm_gpu_scheduler        
> *compute_sched_high[AMDGPU_MAX_COMPUTE_RINGS];
>       uint32_t                        num_compute_sched;
> +     uint32_t                        num_compute_sched_high;
>       unsigned                        num_compute_rings;
>       struct amdgpu_irq_src           eop_irq;
>       struct amdgpu_irq_src           priv_reg_irq;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index d42be880a236..4981e443a884 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -117,12 +117,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
>  
>  static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>  {
> -     struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>       struct amdgpu_job *job = to_amdgpu_job(s_job);
>  
>       drm_sched_job_cleanup(s_job);
>  
> -     amdgpu_ring_priority_put(ring, s_job->s_priority);
>       dma_fence_put(job->fence);
>       amdgpu_sync_free(&job->sync);
>       amdgpu_sync_free(&job->sched_sync);
> @@ -143,7 +141,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
> drm_sched_entity *entity,
>                     void *owner, struct dma_fence **f)
>  {
>       enum drm_sched_priority priority;
> -     struct amdgpu_ring *ring;
>       int r;
>  
>       if (!f)
> @@ -158,9 +155,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
> drm_sched_entity *entity,
>       priority = job->base.s_priority;
>       drm_sched_entity_push_job(&job->base, entity);
>  
> -     ring = to_amdgpu_ring(entity->rq->sched);
> -     amdgpu_ring_priority_get(ring, priority);
> -
>       return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 18e11b0fdc3e..4501ae7afb2e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -326,6 +326,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
> amdgpu_ring *ring,
>  
>       ring->max_dw = max_dw;
>       ring->priority = DRM_SCHED_PRIORITY_NORMAL;
> +     if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
> +         ring->funcs->init_priority)
> +             ring->funcs->init_priority(ring);
> +

Why not add "init_priority" to all and just call it here unconditionally?

Regards,
Luben

>       mutex_init(&ring->priority_mutex);
>  
>       for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH 2/3] drm/amdgpu: change hw sched list on ctx priority override

Reply via email to