amdgpu: allocate entities on demand

Nirmoy Wed, 22 Jan 2020 04:01:56 -0800


On 1/22/20 12:39 PM, Christian König wrote:

Am 22.01.20 um 10:33 schrieb Nirmoy Das:
Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.
This patch tries to resolve entity/fences wastage by creating entities
for a HW IP only when it is required.

v2: consolidated priority checking at amdgpu_ctx_priority_permit()
Well that is not really what I had in mind. See almost allapplications use only the first instance of a hw_ip.
So what we should probably do allocate entities on demand instead ofwhole hw_ips.
Do you know what I mean?


Okay I understand what you mean. I will rework this and get back to you.


Regards,

Nirmoy


Regards,
Christian.


Signed-off-by: Nirmoy Das <nirmoy....@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 160 +++++++++++++-----------
  1 file changed, 87 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.cb/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index eecbd68db986..d704e1bebb1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

@@ -45,6 +45,9 @@ const unsigned intamdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {

  static int amdgpu_ctx_priority_permit(struct drm_file *filp,
                        enum drm_sched_priority priority)
  {
+    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+        return -EINVAL;
+
      /* NORMAL and below are accessible by everyone */
      if (priority <= DRM_SCHED_PRIORITY_NORMAL)
          return 0;

@@ -58,65 +61,37 @@ static int amdgpu_ctx_priority_permit(structdrm_file *filp,

      return -EACCES;
  }
  -static int amdgpu_ctx_init(struct amdgpu_device *adev,
-               enum drm_sched_priority priority,
-               struct drm_file *filp,
-               struct amdgpu_ctx *ctx)

+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32hw_ip)

  {
-    unsigned i, j;
-    int r;
+    struct amdgpu_device *adev = ctx->adev;
+    struct drm_gpu_scheduler **scheds;
+    struct drm_gpu_scheduler *sched;
+    unsigned num_scheds = 0;
+    enum drm_sched_priority priority;
+    int j, r;
  -    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
-        return -EINVAL;
+    ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
+                       sizeof(struct amdgpu_ctx_entity), GFP_KERNEL);
  -    r = amdgpu_ctx_priority_permit(filp, priority);
-    if (r)
-        return r;
+    if (!ctx->entities[hw_ip])
+        return  -ENOMEM;
  -    memset(ctx, 0, sizeof(*ctx));
-    ctx->adev = adev;
+    for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
  -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
-                       sizeof(struct amdgpu_ctx_entity),
-                       GFP_KERNEL);
+        struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
  -        if (!ctx->entities[0]) {
-            r =  -ENOMEM;
-            goto error_cleanup_entity_memory;
+        entity->sequence = 1;
+        entity->fences = kcalloc(amdgpu_sched_jobs,
+                     sizeof(struct dma_fence*), GFP_KERNEL);
+        if (!entity->fences) {
+            r = -ENOMEM;
+            goto error_cleanup_memory;
          }
      }
  -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-            struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
-
-            entity->sequence = 1;
-            entity->fences = kcalloc(amdgpu_sched_jobs,
-                         sizeof(struct dma_fence*), GFP_KERNEL);
-            if (!entity->fences) {
-                r = -ENOMEM;
-                goto error_cleanup_memory;
-            }
-        }
-    }
-
-    kref_init(&ctx->refcount);
-    spin_lock_init(&ctx->ring_lock);
-    mutex_init(&ctx->lock);
-
-    ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
-    ctx->reset_counter_query = ctx->reset_counter;
-    ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
-    ctx->init_priority = priority;
-    ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
-    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        struct drm_gpu_scheduler **scheds;
-        struct drm_gpu_scheduler *sched;
-        unsigned num_scheds = 0;
-
-        switch (i) {
+    priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+                ctx->init_priority : ctx->override_priority;
+    switch (hw_ip) {
          case AMDGPU_HW_IP_GFX:
              sched = &adev->gfx.gfx_ring[0].sched;
              scheds = &sched;

@@ -157,12 +132,12 @@ static int amdgpu_ctx_init(struct amdgpu_device*adev,

              scheds = adev->jpeg.jpeg_sched;
              num_scheds =  adev->jpeg.num_jpeg_sched;
              break;
-        }
+    }
  -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-                          priority, scheds,
-                          num_scheds, &ctx->guilty);
+    for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
+        r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity,
+                      priority, scheds,
+                      num_scheds, &ctx->guilty);
          if (r)
              goto error_cleanup_entities;
      }

@@ -170,30 +145,51 @@ static int amdgpu_ctx_init(struct amdgpu_device*adev,

      return 0;
    error_cleanup_entities:
-    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- drm_sched_entity_destroy(&ctx->entities[i][j].entity);
-    }
+    for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j)
+ drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity);
    error_cleanup_memory:
-    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-            struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+    for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
+        struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
  -            kfree(entity->fences);
-            entity->fences = NULL;
-        }
+        kfree(entity->fences);
+        entity->fences = NULL;
      }
  -error_cleanup_entity_memory:
-    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        kfree(ctx->entities[i]);
-        ctx->entities[i] = NULL;
-    }
+    kfree(ctx->entities[hw_ip]);
+    ctx->entities[hw_ip] = NULL;
        return r;
  }
  +static int amdgpu_ctx_init(struct amdgpu_device *adev,
+               enum drm_sched_priority priority,
+               struct drm_file *filp,
+               struct amdgpu_ctx *ctx)
+{
+    int r;
+
+    r = amdgpu_ctx_priority_permit(filp, priority);
+    if (r)
+        return r;
+
+    memset(ctx, 0, sizeof(*ctx));
+    ctx->adev = adev;
+
+    kref_init(&ctx->refcount);
+    spin_lock_init(&ctx->ring_lock);
+    mutex_init(&ctx->lock);
+
+    ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+    ctx->reset_counter_query = ctx->reset_counter;
+    ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+    ctx->init_priority = priority;
+    ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+    return 0;
+
+}
+
  static void amdgpu_ctx_fini(struct kref *ref)
  {

struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx,refcount);

@@ -204,7 +200,14 @@ static void amdgpu_ctx_fini(struct kref *ref)
          return;
      for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
          for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-            struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+            struct amdgpu_ctx_entity *entity;
+
+            if (!ctx->entities[i])
+                continue;
+
+            entity = &ctx->entities[i][j];
+            if (!entity->fences)
+                continue;
                for (k = 0; k < amdgpu_sched_jobs; ++k)
                  dma_fence_put(entity->fences[k]);

@@ -241,6 +244,9 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx,u32 hw_ip, u32 instance,

          return -EINVAL;
      }
  +    if (ctx->entities[hw_ip] == NULL)
+        amdgpu_ctx_init_entity(ctx, hw_ip);
+
      *entity = &ctx->entities[hw_ip][ring].entity;
      return 0;
  }
@@ -285,8 +291,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
        ctx = container_of(ref, struct amdgpu_ctx, refcount);
      for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+            if (!ctx->entities[i])
+                continue;
drm_sched_entity_destroy(&ctx->entities[i][j].entity);
+        }
      }
        amdgpu_ctx_fini(ref);

@@ -579,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(structamdgpu_ctx_mgr *mgr, long timeout)

              for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
                  struct drm_sched_entity *entity;
  +                if (!ctx->entities[i])
+                    continue;
+
                  entity = &ctx->entities[i][j].entity;
                  timeout = drm_sched_entity_flush(entity, timeout);
              }

@@ -601,11 +613,13 @@ void amdgpu_ctx_mgr_entity_fini(structamdgpu_ctx_mgr *mgr)

              DRM_ERROR("ctx %p is still alive\n", ctx);
              continue;
          }
-
          for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
              for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
                  struct drm_sched_entity *entity;
  +                if (!ctx->entities[i])
+                    continue;
+
                  entity = &ctx->entities[i][j].entity;
                  drm_sched_entity_fini(entity);
              }

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amdgpu: allocate entities on demand

Reply via email to