Update the way drm_coredump_printer is used based on its documentation
and Xe's code: the main idea is to generate the final version in one go
and then use memcpy to return the chunks requested by the caller of
amdgpu_devcoredump_read.

The generation is moved to a separate worker thread.

This cuts the time to copy the dump from 40s to ~0s on my machine.

---
v3:
- removed adev->coredump_in_progress and instead use work as
  the synchronisation mechanism
- use kvfree instead of kfree
---

Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Acked-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  6 ++
 .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c  | 83 +++++++++++++++++--
 .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h  |  7 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  2 +
 4 files changed, 91 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 057c8bd2ad89..e31dac2421b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -328,6 +328,7 @@ struct kfd_vm_fault_info;
 struct amdgpu_hive_info;
 struct amdgpu_reset_context;
 struct amdgpu_reset_control;
+struct amdgpu_coredump_info;
 
 enum amdgpu_cp_irq {
        AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
@@ -1200,6 +1201,11 @@ struct amdgpu_device {
 
        struct amdgpu_reset_domain      *reset_domain;
 
+#ifdef CONFIG_DEV_COREDUMP
+       struct amdgpu_coredump_info     *coredump;
+       struct work_struct              coredump_work;
+#endif
+
        struct mutex                    benchmark_mutex;
 
        bool                            scpm_enabled;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 42a969512dcc..0c7fc3800f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -32,8 +32,13 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
skip_vram_check,
                     bool vram_lost, struct amdgpu_job *job)
 {
 }
+void amdgpu_coredump_init(struct amdgpu_device *adev)
+{
+}
 #else
 
+#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024)
+
 const char *hw_ip_names[MAX_HWIP] = {
        [GC_HWIP]               = "GC",
        [HDP_HWIP]              = "HDP",
@@ -196,11 +201,9 @@ static void amdgpu_devcoredump_fw_info(struct 
amdgpu_device *adev,
 }
 
 static ssize_t
-amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
-                       void *data, size_t datalen)
+amdgpu_devcoredump_format(char *buffer, size_t count, struct 
amdgpu_coredump_info *coredump)
 {
        struct drm_printer p;
-       struct amdgpu_coredump_info *coredump = data;
        struct drm_print_iterator iter;
        struct amdgpu_vm_fault_info *fault_info;
        struct amdgpu_ip_block *ip_block;
@@ -208,7 +211,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t 
count,
 
        iter.data = buffer;
        iter.offset = 0;
-       iter.start = offset;
        iter.remain = count;
 
        p = drm_coredump_printer(&iter);
@@ -323,9 +325,63 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, 
size_t count,
        return count - iter.remain;
 }
 
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+                       void *data, size_t datalen)
+{
+       struct amdgpu_coredump_info *coredump = data;
+       ssize_t byte_copied;
+
+       if (!coredump)
+               return -ENODEV;
+
+       if (!coredump->formatted)
+               return -ENODEV;
+
+       if (offset >= coredump->formatted_size)
+               return 0;
+
+       byte_copied = count < coredump->formatted_size - offset ? count :
+               coredump->formatted_size - offset;
+       memcpy(buffer, coredump->formatted + offset, byte_copied);
+
+       return byte_copied;
+}
+
 static void amdgpu_devcoredump_free(void *data)
 {
-       kfree(data);
+       struct amdgpu_coredump_info *coredump = data;
+
+       kvfree(coredump->formatted);
+       kvfree(data);
+}
+
+static void amdgpu_devcoredump_deferred_work(struct work_struct *work)
+{
+       struct amdgpu_device *adev = container_of(work, typeof(*adev), 
coredump_work);
+       struct amdgpu_coredump_info *coredump = adev->coredump;
+
+       /* Do a one-time preparation of the coredump output because
+        * repeatingly calling drm_coredump_printer is very slow.
+        */
+       coredump->formatted_size = amdgpu_devcoredump_format(
+               NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump);
+       coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL);
+       if (!coredump->formatted) {
+               amdgpu_devcoredump_free(coredump);
+               goto end;
+       }
+
+       amdgpu_devcoredump_format(coredump->formatted, 
coredump->formatted_size, coredump);
+
+       /* If there's an existing coredump for this device, the free function 
will be
+        * called immediately so coredump might be invalid after the call to 
dev_coredumpm.
+        */
+       dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+                     amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+end:
+       adev->coredump = NULL;
 }
 
 void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
@@ -335,6 +391,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
skip_vram_check,
        struct amdgpu_coredump_info *coredump;
        struct drm_sched_job *s_job;
 
+       /* No need to generate a new coredump if there's one in progress 
already. */
+       if (work_pending(&adev->coredump_work))
+               return;
+
        coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
        if (!coredump)
                return;
@@ -361,11 +421,20 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
skip_vram_check,
 
        ktime_get_ts64(&coredump->reset_time);
 
-       dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
-                     amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+       /* Update the current coredump pointer (no lock needed, this function 
can only be called
+        * from a single thread)
+        */
+       adev->coredump = coredump;
+       /* Kick off coredump formatting to a worker thread. */
+       queue_work(system_unbound_wq, &adev->coredump_work);
 
        drm_info(dev, "AMDGPU device coredump file has been created\n");
        drm_info(dev, "Check your 
/sys/class/drm/card%d/device/devcoredump/data\n",
                 dev->primary->index);
 }
+
+void amdgpu_coredump_init(struct amdgpu_device *adev)
+{
+       INIT_WORK(&adev->coredump_work, amdgpu_devcoredump_deferred_work);
+}
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
index ef9772c6bcc9..b3582d0b4ca4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -35,12 +35,19 @@ struct amdgpu_coredump_info {
        struct amdgpu_device            *adev;
        struct amdgpu_task_info         reset_task_info;
        struct timespec64               reset_time;
+
        bool                            skip_vram_check;
        bool                            reset_vram_lost;
        struct amdgpu_ring              *ring;
+       /* Readable form of coredevdump, generate once to speed up
+        * reading it (see drm_coredump_printer's documentation).
+        */
+       ssize_t                         formatted_size;
+       char                            *formatted;
 };
 #endif
 
 void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
                     bool vram_lost, struct amdgpu_job *job);
+void amdgpu_coredump_init(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 48540300b10a..1cb88955f651 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4503,6 +4503,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
        INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
 
+       amdgpu_coredump_init(adev);
+
        adev->gfx.gfx_off_req_count = 1;
        adev->gfx.gfx_off_residency = 0;
        adev->gfx.gfx_off_entrycount = 0;
-- 
2.43.0

Reply via email to