amdgpu: allocate entities on demand

Nirmoy Das Fri, 06 Dec 2019 10:01:16 -0800

Currently we pre-allocate entities for all the HW IPs on
context creation and some of which are might never be used.


This patch tries to resolve entity wastage by creating entities
for a HW IP only when it is required.

Signed-off-by: Nirmoy Das <nirmoy....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 176 +++++++++++++-----------
 1 file changed, 97 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 1d6850af9908..c7643af8827f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -68,13 +68,99 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
        return -EACCES;
 }
 
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
+{
+       struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+       struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
+       struct amdgpu_device *adev = ctx->adev;
+       unsigned num_rings = 0;
+       unsigned num_scheds = 0;
+       unsigned i, j;
+       int r = 0;
+
+       switch (hw_ip) {
+               case AMDGPU_HW_IP_GFX:
+                       rings[0] = &adev->gfx.gfx_ring[0];
+                       num_rings = 1;
+                       break;
+               case AMDGPU_HW_IP_COMPUTE:
+                       for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+                               rings[i] = &adev->gfx.compute_ring[i];
+                       num_rings = adev->gfx.num_compute_rings;
+                       break;
+               case AMDGPU_HW_IP_DMA:
+                       for (i = 0; i < adev->sdma.num_instances; ++i)
+                               rings[i] = &adev->sdma.instance[i].ring;
+                       num_rings = adev->sdma.num_instances;
+                       break;
+               case AMDGPU_HW_IP_UVD:
+                       rings[0] = &adev->uvd.inst[0].ring;
+                       num_rings = 1;
+                       break;
+               case AMDGPU_HW_IP_VCE:
+                       rings[0] = &adev->vce.ring[0];
+                       num_rings = 1;
+                       break;
+               case AMDGPU_HW_IP_UVD_ENC:
+                       rings[0] = &adev->uvd.inst[0].ring_enc[0];
+                       num_rings = 1;
+                       break;
+               case AMDGPU_HW_IP_VCN_DEC:
+                       for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+                               if (adev->vcn.harvest_config & (1 << i))
+                                       continue;
+                               rings[num_rings++] = 
&adev->vcn.inst[i].ring_dec;
+                       }
+                       break;
+               case AMDGPU_HW_IP_VCN_ENC:
+                       for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+                               if (adev->vcn.harvest_config & (1 << i))
+                                       continue;
+                               for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+                                       rings[num_rings++] = 
&adev->vcn.inst[i].ring_enc[j];
+                       }
+                       break;
+               case AMDGPU_HW_IP_VCN_JPEG:
+                       for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+                               if (adev->vcn.harvest_config & (1 << i))
+                                       continue;
+                               rings[num_rings++] = 
&adev->jpeg.inst[i].ring_dec;
+                       }
+                       break;
+       }
+
+       for (i = 0; i < num_rings; ++i) {
+               if (!rings[i]->adev)
+                       continue;
+
+               sched_list[num_scheds++] = &rings[i]->sched;
+       }
+
+       for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+               r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
+                               ctx->init_priority, sched_list, num_scheds, 
&ctx->guilty);
+       if (r)
+               goto error_cleanup_entities;
+
+       for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+               ctx->entities[hw_ip][i].sequence = 1;
+
+       return 0;
+
+error_cleanup_entities:
+       for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+               drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
+
+       return r;
+}
+
 static int amdgpu_ctx_init(struct amdgpu_device *adev,
                           enum drm_sched_priority priority,
                           struct drm_file *filp,
                           struct amdgpu_ctx *ctx)
 {
        unsigned num_entities = amdgpu_ctx_total_num_entities();
-       unsigned i, j, k;
+       unsigned i;
        int r;
 
        if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -103,7 +189,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
        for (i = 0; i < num_entities; ++i) {
                struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
 
-               entity->sequence = 1;
+               entity->sequence = -1;
                entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
        }
        for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
@@ -120,85 +206,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
        ctx->init_priority = priority;
        ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
-       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-               struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-               struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
-               unsigned num_rings = 0;
-               unsigned num_rqs = 0;
-
-               switch (i) {
-               case AMDGPU_HW_IP_GFX:
-                       rings[0] = &adev->gfx.gfx_ring[0];
-                       num_rings = 1;
-                       break;
-               case AMDGPU_HW_IP_COMPUTE:
-                       for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-                               rings[j] = &adev->gfx.compute_ring[j];
-                       num_rings = adev->gfx.num_compute_rings;
-                       break;
-               case AMDGPU_HW_IP_DMA:
-                       for (j = 0; j < adev->sdma.num_instances; ++j)
-                               rings[j] = &adev->sdma.instance[j].ring;
-                       num_rings = adev->sdma.num_instances;
-                       break;
-               case AMDGPU_HW_IP_UVD:
-                       rings[0] = &adev->uvd.inst[0].ring;
-                       num_rings = 1;
-                       break;
-               case AMDGPU_HW_IP_VCE:
-                       rings[0] = &adev->vce.ring[0];
-                       num_rings = 1;
-                       break;
-               case AMDGPU_HW_IP_UVD_ENC:
-                       rings[0] = &adev->uvd.inst[0].ring_enc[0];
-                       num_rings = 1;
-                       break;
-               case AMDGPU_HW_IP_VCN_DEC:
-                       for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-                               if (adev->vcn.harvest_config & (1 << j))
-                                       continue;
-                               rings[num_rings++] = 
&adev->vcn.inst[j].ring_dec;
-                       }
-                       break;
-               case AMDGPU_HW_IP_VCN_ENC:
-                       for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-                               if (adev->vcn.harvest_config & (1 << j))
-                                       continue;
-                               for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-                                       rings[num_rings++] = 
&adev->vcn.inst[j].ring_enc[k];
-                       }
-                       break;
-               case AMDGPU_HW_IP_VCN_JPEG:
-                       for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-                               if (adev->vcn.harvest_config & (1 << j))
-                                       continue;
-                               rings[num_rings++] = 
&adev->jpeg.inst[j].ring_dec;
-                       }
-                       break;
-               }
-
-               for (j = 0; j < num_rings; ++j) {
-                       if (!rings[j]->adev)
-                               continue;
-
-                       sched_list[num_rqs++] = &rings[j]->sched;
-               }
-
-               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-                       r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-                                                 priority, sched_list,
-                                                 num_rqs, &ctx->guilty);
-               if (r)
-                       goto error_cleanup_entities;
-       }
-
        return 0;
 
-error_cleanup_entities:
-       for (i = 0; i < num_entities; ++i)
-               drm_sched_entity_destroy(&ctx->entities[0][i].entity);
-       kfree(ctx->entities[0]);
-
 error_free_fences:
        kfree(ctx->fences);
        ctx->fences = NULL;
@@ -229,6 +238,8 @@ static void amdgpu_ctx_fini(struct kref *ref)
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
                          u32 ring, struct drm_sched_entity **entity)
 {
+       int r;
+
        if (hw_ip >= AMDGPU_HW_IP_NUM) {
                DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
                return -EINVAL;
@@ -245,6 +256,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 
hw_ip, u32 instance,
                return -EINVAL;
        }
 
+       if (ctx->entities[hw_ip][ring].sequence == -1) {
+               r = amdgpu_ctx_init_entity(ctx, hw_ip);
+
+               if (r)
+                       return r;
+       }
+
        *entity = &ctx->entities[hw_ip][ring].entity;
        return 0;
 }
-- 
2.24.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/4] drm/amdgpu: allocate entities on demand

Reply via email to