Re: [PATCH] drm/amd: fix debugfs access for discovery blob
On 8/7/2023 10:20, Alex Deucher wrote: On Tue, Aug 1, 2023 at 4:15 PM Mario Limonciello wrote: Accessing the blob for amdgpu discovery from debugfs triggers: [ 1924.487667] kernel BUG at mm/usercopy.c:102! usercopy_abort() explains that it needs to be solved by creating a cache to store the data. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2748#note_2023519 Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Although this avoids the issue; I've concluded it's an inappropriate fix and will abandon it. It turns out to spit out 64k discovery blobs that should have been 8k. 6.5-rc and ASDN already picked up a better solution. db3b5cb64a9c ("drm/amdgpu: Use apt name for FW reserved region") --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 17 - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a3b86b86dc477..66a2251bdeba4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -791,8 +791,11 @@ struct amdgpu_device { boolaccel_working; struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; - struct debugfs_blob_wrapper debugfs_vbios_blob; - struct debugfs_blob_wrapper debugfs_discovery_blob; +#if defined(CONFIG_DEBUG_FS) + struct debugfs_blob_wrapper debugfs_vbios_blob; + struct debugfs_blob_wrapper debugfs_discovery_blob; + struct kmem_cache *discovery_blob_cache; +#endif struct mutexsrbm_mutex; /* GRBM index mutex. Protects concurrent access to GRBM index */ struct mutexgrbm_idx_mutex; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 56e89e76ff179..55ea5be14b188 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2180,7 +2180,15 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) debugfs_create_blob("amdgpu_vbios", 0444, root, >debugfs_vbios_blob); - adev->debugfs_discovery_blob.data = adev->mman.discovery_bin; + + adev->discovery_blob_cache = kmem_cache_create_usercopy("amdgpu_discovery", + adev->mman.discovery_tmr_size, + 0, 0, 0, + adev->mman.discovery_tmr_size, + NULL); + adev->debugfs_discovery_blob.data = kmem_cache_alloc(adev->discovery_blob_cache, GFP_KERNEL); + memcpy(adev->debugfs_discovery_blob.data, adev->mman.discovery_bin, + adev->mman.discovery_tmr_size); adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size; debugfs_create_blob("amdgpu_discovery", 0444, root, >debugfs_discovery_blob); @@ -2188,6 +2196,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) return 0; } +void amdgpu_debugfs_fini(struct amdgpu_device *adev) +{ + kmem_cache_free(adev->discovery_blob_cache, adev->debugfs_discovery_blob.data); + kmem_cache_destroy(adev->discovery_blob_cache); +} + #else int amdgpu_debugfs_init(struct amdgpu_device *adev) { @@ -2197,4 +2211,5 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; } +inline void amdgpu_debugfs_fini(struct amdgpu_device *adev) {} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0593ef8fe0a63..1a3b30dff5171 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2276,6 +2276,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); + amdgpu_debugfs_fini(adev); + amdgpu_xcp_dev_unplug(adev); drm_dev_unplug(dev); -- 2.34.1
Re: [PATCH] drm/amd: fix debugfs access for discovery blob
On Tue, Aug 1, 2023 at 4:15 PM Mario Limonciello wrote: > > Accessing the blob for amdgpu discovery from debugfs triggers: > > [ 1924.487667] kernel BUG at mm/usercopy.c:102! > > usercopy_abort() explains that it needs to be solved by creating > a cache to store the data. > > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2748#note_2023519 > Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +-- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 17 - > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ > 3 files changed, 23 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index a3b86b86dc477..66a2251bdeba4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -791,8 +791,11 @@ struct amdgpu_device { > boolaccel_working; > struct notifier_block acpi_nb; > struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; > - struct debugfs_blob_wrapper debugfs_vbios_blob; > - struct debugfs_blob_wrapper debugfs_discovery_blob; > +#if defined(CONFIG_DEBUG_FS) > + struct debugfs_blob_wrapper debugfs_vbios_blob; > + struct debugfs_blob_wrapper debugfs_discovery_blob; > + struct kmem_cache *discovery_blob_cache; > +#endif > struct mutexsrbm_mutex; > /* GRBM index mutex. Protects concurrent access to GRBM index */ > struct mutexgrbm_idx_mutex; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index 56e89e76ff179..55ea5be14b188 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -2180,7 +2180,15 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) > debugfs_create_blob("amdgpu_vbios", 0444, root, > >debugfs_vbios_blob); > > - adev->debugfs_discovery_blob.data = adev->mman.discovery_bin; > + > + adev->discovery_blob_cache = > kmem_cache_create_usercopy("amdgpu_discovery", > + > adev->mman.discovery_tmr_size, > + 0, 0, 0, > + > adev->mman.discovery_tmr_size, > + NULL); > + adev->debugfs_discovery_blob.data = > kmem_cache_alloc(adev->discovery_blob_cache, GFP_KERNEL); > + memcpy(adev->debugfs_discovery_blob.data, adev->mman.discovery_bin, > + adev->mman.discovery_tmr_size); > adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size; > debugfs_create_blob("amdgpu_discovery", 0444, root, > >debugfs_discovery_blob); > @@ -2188,6 +2196,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) > return 0; > } > > +void amdgpu_debugfs_fini(struct amdgpu_device *adev) > +{ > + kmem_cache_free(adev->discovery_blob_cache, > adev->debugfs_discovery_blob.data); > + kmem_cache_destroy(adev->discovery_blob_cache); > +} > + > #else > int amdgpu_debugfs_init(struct amdgpu_device *adev) > { > @@ -2197,4 +2211,5 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) > { > return 0; > } > +inline void amdgpu_debugfs_fini(struct amdgpu_device *adev) {} > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 0593ef8fe0a63..1a3b30dff5171 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -2276,6 +2276,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) > struct drm_device *dev = pci_get_drvdata(pdev); > struct amdgpu_device *adev = drm_to_adev(dev); > > + amdgpu_debugfs_fini(adev); > + > amdgpu_xcp_dev_unplug(adev); > drm_dev_unplug(dev); > > -- > 2.34.1 >
[PATCH] drm/amd: fix debugfs access for discovery blob
Accessing the blob for amdgpu discovery from debugfs triggers: [ 1924.487667] kernel BUG at mm/usercopy.c:102! usercopy_abort() explains that it needs to be solved by creating a cache to store the data. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2748#note_2023519 Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 17 - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a3b86b86dc477..66a2251bdeba4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -791,8 +791,11 @@ struct amdgpu_device { boolaccel_working; struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; - struct debugfs_blob_wrapper debugfs_vbios_blob; - struct debugfs_blob_wrapper debugfs_discovery_blob; +#if defined(CONFIG_DEBUG_FS) + struct debugfs_blob_wrapper debugfs_vbios_blob; + struct debugfs_blob_wrapper debugfs_discovery_blob; + struct kmem_cache *discovery_blob_cache; +#endif struct mutexsrbm_mutex; /* GRBM index mutex. Protects concurrent access to GRBM index */ struct mutexgrbm_idx_mutex; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 56e89e76ff179..55ea5be14b188 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2180,7 +2180,15 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) debugfs_create_blob("amdgpu_vbios", 0444, root, >debugfs_vbios_blob); - adev->debugfs_discovery_blob.data = adev->mman.discovery_bin; + + adev->discovery_blob_cache = kmem_cache_create_usercopy("amdgpu_discovery", + adev->mman.discovery_tmr_size, + 0, 0, 0, + adev->mman.discovery_tmr_size, + NULL); + adev->debugfs_discovery_blob.data = kmem_cache_alloc(adev->discovery_blob_cache, GFP_KERNEL); + memcpy(adev->debugfs_discovery_blob.data, adev->mman.discovery_bin, + adev->mman.discovery_tmr_size); adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size; debugfs_create_blob("amdgpu_discovery", 0444, root, >debugfs_discovery_blob); @@ -2188,6 +2196,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) return 0; } +void amdgpu_debugfs_fini(struct amdgpu_device *adev) +{ + kmem_cache_free(adev->discovery_blob_cache, adev->debugfs_discovery_blob.data); + kmem_cache_destroy(adev->discovery_blob_cache); +} + #else int amdgpu_debugfs_init(struct amdgpu_device *adev) { @@ -2197,4 +2211,5 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; } +inline void amdgpu_debugfs_fini(struct amdgpu_device *adev) {} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0593ef8fe0a63..1a3b30dff5171 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2276,6 +2276,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); + amdgpu_debugfs_fini(adev); + amdgpu_xcp_dev_unplug(adev); drm_dev_unplug(dev); -- 2.34.1