[PATCH] drm/amdgpu: allocate entities on demand

Nirmoy Das Fri, 24 Jan 2020 03:52:26 -0800

Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.


This patch tries to resolve entity/fences wastage by creating entity
only when needed.

Signed-off-by: Nirmoy Das <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 244 +++++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |   2 +-
 2 files changed, 135 insertions(+), 111 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..73f7615df8c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int 
amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
        [AMDGPU_HW_IP_VCN_JPEG] =       1,
 };
 
-static int amdgpu_ctx_total_num_entities(void)
-{
-       unsigned i, num_entities = 0;
-
-       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-               num_entities += amdgpu_ctx_num_entities[i];
-
-       return num_entities;
-}
-
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
                                      enum drm_sched_priority priority)
 {
+       if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+               return -EINVAL;
+
        /* NORMAL and below are accessible by everyone */
        if (priority <= DRM_SCHED_PRIORITY_NORMAL)
                return 0;
@@ -68,64 +61,44 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
        return -EACCES;
 }
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
-                          enum drm_sched_priority priority,
-                          struct drm_file *filp,
-                          struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, 
const u32 ring)
 {
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
-       unsigned i, j;
+       struct amdgpu_device *adev = ctx->adev;
+       struct amdgpu_ctx_entity *entity;
+       struct drm_gpu_scheduler **scheds;
+       struct drm_gpu_scheduler *sched;
+       unsigned num_scheds = 0;
+       enum drm_sched_priority priority;
        int r;
 
-       if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
-               return -EINVAL;
-
-       r = amdgpu_ctx_priority_permit(filp, priority);
-       if (r)
-               return r;
-
-       memset(ctx, 0, sizeof(*ctx));
-       ctx->adev = adev;
-
-
-       ctx->entities[0] = kcalloc(num_entities,
-                                  sizeof(struct amdgpu_ctx_entity),
-                                  GFP_KERNEL);
-       if (!ctx->entities[0])
-               return -ENOMEM;
-
-
-       for (i = 0; i < num_entities; ++i) {
-               struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
-               entity->sequence = 1;
-               entity->fences = kcalloc(amdgpu_sched_jobs,
-                                        sizeof(struct dma_fence*), GFP_KERNEL);
-               if (!entity->fences) {
-                       r = -ENOMEM;
-                       goto error_cleanup_memory;
-               }
+       if (!ctx->entities[hw_ip]) {
+               ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
+                                              sizeof(struct amdgpu_ctx_entity 
*),
+                                              GFP_KERNEL);
+               if (!ctx->entities[hw_ip])
+                       return  -ENOMEM;
        }
-       for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
-               ctx->entities[i] = ctx->entities[i - 1] +
-                       amdgpu_ctx_num_entities[i - 1];
 
-       kref_init(&ctx->refcount);
-       spin_lock_init(&ctx->ring_lock);
-       mutex_init(&ctx->lock);
+       ctx->entities[hw_ip][ring] = kcalloc(1, sizeof(struct 
amdgpu_ctx_entity),
+                                            GFP_KERNEL);
+       if (!ctx->entities[hw_ip][ring]) {
+               r = -ENOMEM;
+               goto error_free_entity;
+       }
 
-       ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
-       ctx->reset_counter_query = ctx->reset_counter;
-       ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
-       ctx->init_priority = priority;
-       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+       entity = ctx->entities[hw_ip][ring];
 
-       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-               struct drm_gpu_scheduler **scheds;
-               struct drm_gpu_scheduler *sched;
-               unsigned num_scheds = 0;
+       entity->sequence = 1;
+       entity->fences = kcalloc(amdgpu_sched_jobs,
+                                sizeof(struct dma_fence*), GFP_KERNEL);
+       if (!entity->fences) {
+               r = -ENOMEM;
+               goto error_free_entity;
+       }
 
-               switch (i) {
+       priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+                               ctx->init_priority : ctx->override_priority;
+       switch (hw_ip) {
                case AMDGPU_HW_IP_GFX:
                        sched = &adev->gfx.gfx_ring[0].sched;
                        scheds = &sched;
@@ -166,57 +139,85 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
                        scheds = adev->jpeg.jpeg_sched;
                        num_scheds =  adev->jpeg.num_jpeg_sched;
                        break;
-               }
-
-               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-                       r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-                                                 priority, scheds,
-                                                 num_scheds, &ctx->guilty);
-               if (r)
-                       goto error_cleanup_entities;
        }
 
+       r = drm_sched_entity_init(&ctx->entities[hw_ip][ring]->entity,
+                                 priority, scheds,
+                                 num_scheds, &ctx->guilty);
+       if (r)
+               goto error_free_entity;
+
        return 0;
 
-error_cleanup_entities:
-       for (i = 0; i < num_entities; ++i)
-               drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+error_free_entity:
+       kfree(ctx->entities[hw_ip][ring]);
+       return r;
+}
 
-error_cleanup_memory:
-       for (i = 0; i < num_entities; ++i) {
-               struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+                          enum drm_sched_priority priority,
+                          struct drm_file *filp,
+                          struct amdgpu_ctx *ctx)
+{
+       int r;
 
-               kfree(entity->fences);
-               entity->fences = NULL;
-       }
+       r = amdgpu_ctx_priority_permit(filp, priority);
+       if (r)
+               return r;
+
+       memset(ctx, 0, sizeof(*ctx));
+
+       ctx->adev = adev;
+
+       kref_init(&ctx->refcount);
+       spin_lock_init(&ctx->ring_lock);
+       mutex_init(&ctx->lock);
+
+       ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+       ctx->reset_counter_query = ctx->reset_counter;
+       ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+       ctx->init_priority = priority;
+       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+       return 0;
 
-       kfree(ctx->entities[0]);
-       ctx->entities[0] = NULL;
-       return r;
 }
 
 static void amdgpu_ctx_fini(struct kref *ref)
 {
        struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
        struct amdgpu_device *adev = ctx->adev;
-       unsigned i, j;
+       unsigned i, j, k;
 
        if (!adev)
                return;
 
-       for (i = 0; i < num_entities; ++i) {
-               struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                       struct amdgpu_ctx_entity *entity;
+
+                       if (!ctx->entities[i] || !ctx->entities[i][j])
+                               continue;
+
+                       entity = ctx->entities[i][j];
+                       if (!entity->fences)
+                               continue;
 
-               for (j = 0; j < amdgpu_sched_jobs; ++j)
-                       dma_fence_put(entity->fences[j]);
+                       for (k = 0; k < amdgpu_sched_jobs; ++k)
+                               dma_fence_put(entity->fences[k]);
 
-               kfree(entity->fences);
+                       kfree(entity->fences);
+                       entity->fences = NULL;
+
+                       kfree(entity);
+                       ctx->entities[i][j] = NULL;
+               }
+
+               kfree(ctx->entities[i];
+               ctx->entities[i] = NULL;
        }
 
-       kfree(ctx->entities[0]);
        mutex_destroy(&ctx->lock);
-
        kfree(ctx);
 }
 
@@ -239,7 +240,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 
hw_ip, u32 instance,
                return -EINVAL;
        }
 
-       *entity = &ctx->entities[hw_ip][ring].entity;
+       if ((ctx->entities[hw_ip] == NULL) || (ctx->entities[hw_ip][ring] == 
NULL))
+               amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+
+
+       *entity = &ctx->entities[hw_ip][ring]->entity;
        return 0;
 }
 
@@ -279,14 +284,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
        struct amdgpu_ctx *ctx;
-       unsigned num_entities;
-       u32 i;
+       u32 i, j;
 
        ctx = container_of(ref, struct amdgpu_ctx, refcount);
+       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                       if (!ctx->entities[i] || !ctx->entities[i][j])
+                               continue;
 
-       num_entities = amdgpu_ctx_total_num_entities();
-       for (i = 0; i < num_entities; i++)
-               drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+                       drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+               }
+       }
 
        amdgpu_ctx_fini(ref);
 }
@@ -516,19 +524,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx 
*ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
                                  enum drm_sched_priority priority)
 {
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
        enum drm_sched_priority ctx_prio;
-       unsigned i;
+       unsigned i, j;
 
        ctx->override_priority = priority;
 
        ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
                        ctx->init_priority : ctx->override_priority;
+       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                       struct drm_sched_entity *entity;
 
-       for (i = 0; i < num_entities; i++) {
-               struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+                       if (!ctx->entities[i] || !ctx->entities[i][j])
+                               continue;
 
-               drm_sched_entity_set_priority(entity, ctx_prio);
+                       entity = &ctx->entities[i][j]->entity;
+                       drm_sched_entity_set_priority(entity, ctx_prio);
+               }
        }
 }
 
@@ -564,20 +576,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 {
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
        struct amdgpu_ctx *ctx;
        struct idr *idp;
-       uint32_t id, i;
+       uint32_t id, i, j;
 
        idp = &mgr->ctx_handles;
 
        mutex_lock(&mgr->lock);
        idr_for_each_entry(idp, ctx, id) {
-               for (i = 0; i < num_entities; i++) {
-                       struct drm_sched_entity *entity;
+               for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+                       for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                               struct drm_sched_entity *entity;
+
+                               if (!ctx->entities[i] || !ctx->entities[i][j])
+                                       continue;
 
-                       entity = &ctx->entities[0][i].entity;
-                       timeout = drm_sched_entity_flush(entity, timeout);
+                               entity = &ctx->entities[i][j]->entity;
+                               timeout = drm_sched_entity_flush(entity, 
timeout);
+                       }
                }
        }
        mutex_unlock(&mgr->lock);
@@ -586,10 +602,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr 
*mgr, long timeout)
 
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
        struct amdgpu_ctx *ctx;
        struct idr *idp;
-       uint32_t id, i;
+       uint32_t id, i, j;
 
        idp = &mgr->ctx_handles;
 
@@ -599,8 +614,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
                        continue;
                }
 
-               for (i = 0; i < num_entities; i++)
-                       drm_sched_entity_fini(&ctx->entities[0][i].entity);
+               for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+                       for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                               struct drm_sched_entity *entity;
+
+                               if (!ctx->entities[i] || !ctx->entities[i][j])
+                                       continue;
+
+                               entity = &ctx->entities[i][j]->entity;
+                               drm_sched_entity_fini(entity);
+                       }
+               }
        }
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index a6cd9d4b078c..ca9363e71df5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -42,7 +42,7 @@ struct amdgpu_ctx {
        unsigned                        reset_counter_query;
        uint32_t                        vram_lost_counter;
        spinlock_t                      ring_lock;
-       struct amdgpu_ctx_entity        *entities[AMDGPU_HW_IP_NUM];
+       struct amdgpu_ctx_entity        **entities[AMDGPU_HW_IP_NUM];
        bool                            preamble_presented;
        enum drm_sched_priority         init_priority;
        enum drm_sched_priority         override_priority;
-- 
2.24.1

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: allocate entities on demand

Reply via email to