On 3/3/26 17:18, Pierre-Eric Pelloux-Prayer wrote: > Update the way drm_coredump_printer is used based on its documentation > and Xe's code: the main idea is to generate the final version in one go > and then use memcpy to return the chunks requested by the caller of > amdgpu_devcoredump_read. > > The generation is moved to a separate worker thread. > > This cuts the time to copy the dump from 40s to ~0s on my machine. > > --- > v3: > - removed adev->coredump_in_progress and instead use work as > the synchronisation mechanism > - use kvfree instead of kfree > --- > > Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]> > Acked-by: Alex Deucher <[email protected]>
Acked-by: Christian König <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 ++ > .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 83 +++++++++++++++++-- > .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h | 7 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + > 4 files changed, 91 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 057c8bd2ad89..e31dac2421b4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -328,6 +328,7 @@ struct kfd_vm_fault_info; > struct amdgpu_hive_info; > struct amdgpu_reset_context; > struct amdgpu_reset_control; > +struct amdgpu_coredump_info; > > enum amdgpu_cp_irq { > AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0, > @@ -1200,6 +1201,11 @@ struct amdgpu_device { > > struct amdgpu_reset_domain *reset_domain; > > +#ifdef CONFIG_DEV_COREDUMP > + struct amdgpu_coredump_info *coredump; > + struct work_struct coredump_work; > +#endif > + > struct mutex benchmark_mutex; > > bool scpm_enabled; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c > index 42a969512dcc..0c7fc3800f17 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c > @@ -32,8 +32,13 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool > skip_vram_check, > bool vram_lost, struct amdgpu_job *job) > { > } > +void amdgpu_coredump_init(struct amdgpu_device *adev) > +{ > +} > #else > > +#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024) > + > const char *hw_ip_names[MAX_HWIP] = { > [GC_HWIP] = "GC", > [HDP_HWIP] = "HDP", > @@ -196,11 +201,9 @@ static void amdgpu_devcoredump_fw_info(struct > amdgpu_device *adev, > } > > static ssize_t > -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, > - void *data, size_t datalen) > +amdgpu_devcoredump_format(char *buffer, size_t count, struct > amdgpu_coredump_info *coredump) > { > struct drm_printer p; > - struct amdgpu_coredump_info *coredump = data; > struct drm_print_iterator iter; > struct amdgpu_vm_fault_info *fault_info; > struct amdgpu_ip_block *ip_block; > @@ -208,7 +211,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, > size_t count, > > iter.data = buffer; > iter.offset = 0; > - iter.start = offset; > iter.remain = count; > > p = drm_coredump_printer(&iter); > @@ -323,9 +325,63 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, > size_t count, > return count - iter.remain; > } > > +static ssize_t > +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, > + void *data, size_t datalen) > +{ > + struct amdgpu_coredump_info *coredump = data; > + ssize_t byte_copied; > + > + if (!coredump) > + return -ENODEV; > + > + if (!coredump->formatted) > + return -ENODEV; > + > + if (offset >= coredump->formatted_size) > + return 0; > + > + byte_copied = count < coredump->formatted_size - offset ? count : > + coredump->formatted_size - offset; > + memcpy(buffer, coredump->formatted + offset, byte_copied); > + > + return byte_copied; > +} > + > static void amdgpu_devcoredump_free(void *data) > { > - kfree(data); > + struct amdgpu_coredump_info *coredump = data; > + > + kvfree(coredump->formatted); > + kvfree(data); > +} > + > +static void amdgpu_devcoredump_deferred_work(struct work_struct *work) > +{ > + struct amdgpu_device *adev = container_of(work, typeof(*adev), > coredump_work); > + struct amdgpu_coredump_info *coredump = adev->coredump; > + > + /* Do a one-time preparation of the coredump output because > + * repeatingly calling drm_coredump_printer is very slow. > + */ > + coredump->formatted_size = amdgpu_devcoredump_format( > + NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump); > + coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL); > + if (!coredump->formatted) { > + amdgpu_devcoredump_free(coredump); > + goto end; > + } > + > + amdgpu_devcoredump_format(coredump->formatted, > coredump->formatted_size, coredump); > + > + /* If there's an existing coredump for this device, the free function > will be > + * called immediately so coredump might be invalid after the call to > dev_coredumpm. > + */ > + dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, > + amdgpu_devcoredump_read, amdgpu_devcoredump_free); > + > +end: > + adev->coredump = NULL; > } > > void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, > @@ -335,6 +391,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool > skip_vram_check, > struct amdgpu_coredump_info *coredump; > struct drm_sched_job *s_job; > > + /* No need to generate a new coredump if there's one in progress > already. */ > + if (work_pending(&adev->coredump_work)) > + return; > + > coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); > if (!coredump) > return; > @@ -361,11 +421,20 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool > skip_vram_check, > > ktime_get_ts64(&coredump->reset_time); > > - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, > - amdgpu_devcoredump_read, amdgpu_devcoredump_free); > + /* Update the current coredump pointer (no lock needed, this function > can only be called > + * from a single thread) > + */ > + adev->coredump = coredump; > + /* Kick off coredump formatting to a worker thread. */ > + queue_work(system_unbound_wq, &adev->coredump_work); > > drm_info(dev, "AMDGPU device coredump file has been created\n"); > drm_info(dev, "Check your > /sys/class/drm/card%d/device/devcoredump/data\n", > dev->primary->index); > } > + > +void amdgpu_coredump_init(struct amdgpu_device *adev) > +{ > + INIT_WORK(&adev->coredump_work, amdgpu_devcoredump_deferred_work); > +} > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h > index ef9772c6bcc9..b3582d0b4ca4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h > @@ -35,12 +35,19 @@ struct amdgpu_coredump_info { > struct amdgpu_device *adev; > struct amdgpu_task_info reset_task_info; > struct timespec64 reset_time; > + > bool skip_vram_check; > bool reset_vram_lost; > struct amdgpu_ring *ring; > + /* Readable form of coredevdump, generate once to speed up > + * reading it (see drm_coredump_printer's documentation). > + */ > + ssize_t formatted_size; > + char *formatted; > }; > #endif > > void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, > bool vram_lost, struct amdgpu_job *job); > +void amdgpu_coredump_init(struct amdgpu_device *adev); > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 48540300b10a..1cb88955f651 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -4503,6 +4503,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, > INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); > INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work); > > + amdgpu_coredump_init(adev); > + > adev->gfx.gfx_off_req_count = 1; > adev->gfx.gfx_off_residency = 0; > adev->gfx.gfx_off_entrycount = 0;
