On 07/11/2018 11:05 AM, Christian König wrote:
Am 11.07.2018 um 16:51 schrieb Andrey Grodzovsky:
This change is to support MESA performace optimization.
Modify CS IOCTL to allow its input as command buffer and an array of
buffer handles to create a temporay bo list and then destroy it
when IOCTL completes.
This saves on calling for BO_LIST create and destry IOCTLs in MESA
and by this improves performance.

Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 12 ++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 62 ++++++++++++++++++-----------   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 52 +++++++++++++++++++++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
  include/uapi/drm/amdgpu_drm.h               |  1 +
  5 files changed, 101 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8eaba0f..e082eba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -732,6 +732,16 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
                   struct list_head *validated);
  void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
  void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+                      struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+                 struct drm_file *filp,
+                 struct drm_amdgpu_bo_list_entry *info,
+                 unsigned num_entries,
+                 int *id);
+
+void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
    /*
   * GFX stuff
@@ -1048,6 +1058,8 @@ struct amdgpu_cs_parser {
        unsigned num_post_dep_syncobjs;
      struct drm_syncobj **post_dep_syncobjs;
+
+    bool destroy_bo_list;

I think you don't need this. IIRC the bo_list structure was reference counted.

So all you need to do is to make sure that the temporary bo_list you create has a reference count of 1 and so is destroyed when the CS IOCTL calls amdgpu_bo_list_put() on it.

That would simplify the patch quite a bit.

amdgpu_bo_list_destroy is essential because it removes the list from idr struct bo_list_handles, amdgpu_bo_list_put doesn't do it. Regarding the refcount, it's 2 because it's 1 on list create in amdgpu_cs_bo_handles_chunk->amdgpu_bo_list_create and then it's incremented to 2 in amdgpu_cs_parser_bos->amdgpu_bo_list_get. So  by calling amdgpu_bo_list_put and then amdgpu_bo_list_destroy the count properly
drops down to 0.

Andrey


Apart from that looks really good to me, especially that you only need a new chunk ID for the UAPI is quite nice.

Thanks,
Christian.

  };
    #define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 92be7f6..9acdacc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -55,7 +55,7 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
      kfree_rcu(list, rhead);
  }
  -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
                   struct drm_file *filp,
                   struct drm_amdgpu_bo_list_entry *info,
                   unsigned num_entries,
@@ -91,7 +91,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
      return 0;
  }
  -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
+void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
  {
      struct amdgpu_bo_list *list;
  @@ -263,49 +263,64 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
      kfree(list);
  }
  -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
-                struct drm_file *filp)
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+                      struct drm_amdgpu_bo_list_entry **info_param)
  {
-    const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-
-    struct amdgpu_device *adev = dev->dev_private;
-    struct amdgpu_fpriv *fpriv = filp->driver_priv;
-    union drm_amdgpu_bo_list *args = data;
-    uint32_t handle = args->in.list_handle;
-    const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
-
      struct drm_amdgpu_bo_list_entry *info;
-    struct amdgpu_bo_list *list;
-
      int r;
+    const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+    const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
  -    info = kvmalloc_array(args->in.bo_number,
+    info = kvmalloc_array(in->bo_number,
                   sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
      if (!info)
          return -ENOMEM;
        /* copy the handle array from userspace to a kernel buffer */
      r = -EFAULT;
-    if (likely(info_size == args->in.bo_info_size)) {
-        unsigned long bytes = args->in.bo_number *
-            args->in.bo_info_size;
+    if (likely(info_size == in->bo_info_size)) {
+        unsigned long bytes = in->bo_number *
+            in->bo_info_size;
            if (copy_from_user(info, uptr, bytes))
              goto error_free;
        } else {
-        unsigned long bytes = min(args->in.bo_info_size, info_size);
+        unsigned long bytes = min(in->bo_info_size, info_size);
          unsigned i;
  -        memset(info, 0, args->in.bo_number * info_size);
-        for (i = 0; i < args->in.bo_number; ++i) {
+        memset(info, 0, in->bo_number * info_size);
+        for (i = 0; i < in->bo_number; ++i) {
              if (copy_from_user(&info[i], uptr, bytes))
                  goto error_free;
  -            uptr += args->in.bo_info_size;
+            uptr += in->bo_info_size;
          }
      }
  +    *info_param = info;
+    return 0;
+
+error_free:
+    kvfree(info);
+    return r;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+                struct drm_file *filp)
+{
+    struct amdgpu_device *adev = dev->dev_private;
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    union drm_amdgpu_bo_list *args = data;
+    uint32_t handle = args->in.list_handle;
+    struct drm_amdgpu_bo_list_entry *info = NULL;
+    struct amdgpu_bo_list *list;
+    int r;
+
+    r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+    if (r)
+        goto error_free;
+
      switch (args->in.operation) {
      case AMDGPU_BO_LIST_OP_CREATE:
          r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, @@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
      return 0;
    error_free:
-    kvfree(info);
+    if (info)
+        kvfree(info);
      return r;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9881a1e..1b17f6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -66,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
      return 0;
  }
  -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+                      struct drm_amdgpu_bo_list_in *data,
+                      int *id)
+{
+    int r;
+    struct drm_amdgpu_bo_list_entry *info = NULL;
+
+    r = amdgpu_bo_create_list_entry_array(data, &info);
+    if (r)
+        return r;
+
+    r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, id);
+    if (r)
+        goto error_free;
+
+    kvfree(info);
+    return 0;
+
+error_free:
+    if (info)
+        kvfree(info);
+
+    return r;
+}
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
  {
      struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
      struct amdgpu_vm *vm = &fpriv->vm;
-    union drm_amdgpu_cs *cs = data;
      uint64_t *chunk_array_user;
      uint64_t *chunk_array;
      unsigned size, num_ibs = 0;
@@ -164,6 +188,20 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                break;
  +        case AMDGPU_CHUNK_ID_BO_HANDLES:
+            size = sizeof(struct drm_amdgpu_bo_list_in);
+            if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+                ret = -EINVAL;
+                goto free_partial_kdata;
+            }
+
+            ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata, &cs->in.bo_list_handle);
+            p->destroy_bo_list = true;
+            if (ret)
+                goto free_partial_kdata;
+
+            break;
+
          case AMDGPU_CHUNK_ID_DEPENDENCIES:
          case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
          case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -747,7 +785,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
   * used by parsing context.
   **/
  static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
-                  bool backoff)
+                  bool backoff, int id)
  {
      unsigned i;
  @@ -765,9 +803,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
          mutex_unlock(&parser->ctx->lock);
          amdgpu_ctx_put(parser->ctx);
      }
-    if (parser->bo_list)
+    if (parser->bo_list) {
          amdgpu_bo_list_put(parser->bo_list);
  +        if (parser->destroy_bo_list)
+ amdgpu_bo_list_destroy(parser->filp->driver_priv, id);
+    }
+
      for (i = 0; i < parser->nchunks; i++)
          kvfree(parser->chunks[i].kdata);
      kfree(parser->chunks);
@@ -1277,7 +1319,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
      r = amdgpu_cs_submit(&parser, cs);
    out:
-    amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+    amdgpu_cs_parser_fini(&parser, r, reserved_buffers, cs->in.bo_list_handle);
      return r;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 06aede1..529500c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,9 +69,10 @@
   * - 3.24.0 - Add high priority compute support for gfx9
   * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
   * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
   */
  #define KMS_DRIVER_MAJOR    3
-#define KMS_DRIVER_MINOR    26
+#define KMS_DRIVER_MINOR    27
  #define KMS_DRIVER_PATCHLEVEL    0
    int amdgpu_vram_limit = 0;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 09741ba..94444ee 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
  #define AMDGPU_CHUNK_ID_DEPENDENCIES    0x03
  #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
  #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
    struct drm_amdgpu_cs_chunk {
      __u32        chunk_id;


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to