Re: [PATCH] drm/amd: fix debugfs access for discovery blob

2023-08-08 Thread Mario Limonciello

On 8/7/2023 10:20, Alex Deucher wrote:

On Tue, Aug 1, 2023 at 4:15 PM Mario Limonciello
 wrote:


Accessing the blob for amdgpu discovery from debugfs triggers:

[ 1924.487667] kernel BUG at mm/usercopy.c:102!

usercopy_abort() explains that it needs to be solved by creating
a cache to store the data.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2748#note_2023519
Signed-off-by: Mario Limonciello 


Reviewed-by: Alex Deucher 

Although this avoids the issue; I've concluded it's an inappropriate fix 
and will abandon it.  It turns out to spit out 64k discovery blobs that 
should have been 8k.


6.5-rc and ASDN already picked up a better solution.

db3b5cb64a9c ("drm/amdgpu: Use apt name for FW reserved region")


---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  7 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 17 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 ++
  3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a3b86b86dc477..66a2251bdeba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -791,8 +791,11 @@ struct amdgpu_device {
 boolaccel_working;
 struct notifier_block   acpi_nb;
 struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
-   struct debugfs_blob_wrapper debugfs_vbios_blob;
-   struct debugfs_blob_wrapper debugfs_discovery_blob;
+#if defined(CONFIG_DEBUG_FS)
+   struct debugfs_blob_wrapper debugfs_vbios_blob;
+   struct debugfs_blob_wrapper debugfs_discovery_blob;
+   struct kmem_cache   *discovery_blob_cache;
+#endif
 struct mutexsrbm_mutex;
 /* GRBM index mutex. Protects concurrent access to GRBM index */
 struct mutexgrbm_idx_mutex;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 56e89e76ff179..55ea5be14b188 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2180,7 +2180,15 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 debugfs_create_blob("amdgpu_vbios", 0444, root,
 >debugfs_vbios_blob);

-   adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
+
+   adev->discovery_blob_cache = 
kmem_cache_create_usercopy("amdgpu_discovery",
+   
adev->mman.discovery_tmr_size,
+   0, 0, 0,
+   
adev->mman.discovery_tmr_size,
+   NULL);
+   adev->debugfs_discovery_blob.data = 
kmem_cache_alloc(adev->discovery_blob_cache, GFP_KERNEL);
+   memcpy(adev->debugfs_discovery_blob.data, adev->mman.discovery_bin,
+  adev->mman.discovery_tmr_size);
 adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
 debugfs_create_blob("amdgpu_discovery", 0444, root,
 >debugfs_discovery_blob);
@@ -2188,6 +2196,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 return 0;
  }

+void amdgpu_debugfs_fini(struct amdgpu_device *adev)
+{
+   kmem_cache_free(adev->discovery_blob_cache, 
adev->debugfs_discovery_blob.data);
+   kmem_cache_destroy(adev->discovery_blob_cache);
+}
+
  #else
  int amdgpu_debugfs_init(struct amdgpu_device *adev)
  {
@@ -2197,4 +2211,5 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
  {
 return 0;
  }
+inline void amdgpu_debugfs_fini(struct amdgpu_device *adev) {}
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0593ef8fe0a63..1a3b30dff5171 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2276,6 +2276,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 struct drm_device *dev = pci_get_drvdata(pdev);
 struct amdgpu_device *adev = drm_to_adev(dev);

+   amdgpu_debugfs_fini(adev);
+
 amdgpu_xcp_dev_unplug(adev);
 drm_dev_unplug(dev);

--
2.34.1





Re: [PATCH] drm/amd: fix debugfs access for discovery blob

2023-08-07 Thread Alex Deucher
On Tue, Aug 1, 2023 at 4:15 PM Mario Limonciello
 wrote:
>
> Accessing the blob for amdgpu discovery from debugfs triggers:
>
> [ 1924.487667] kernel BUG at mm/usercopy.c:102!
>
> usercopy_abort() explains that it needs to be solved by creating
> a cache to store the data.
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2748#note_2023519
> Signed-off-by: Mario Limonciello 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  7 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 17 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 ++
>  3 files changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index a3b86b86dc477..66a2251bdeba4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -791,8 +791,11 @@ struct amdgpu_device {
> boolaccel_working;
> struct notifier_block   acpi_nb;
> struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
> -   struct debugfs_blob_wrapper debugfs_vbios_blob;
> -   struct debugfs_blob_wrapper debugfs_discovery_blob;
> +#if defined(CONFIG_DEBUG_FS)
> +   struct debugfs_blob_wrapper debugfs_vbios_blob;
> +   struct debugfs_blob_wrapper debugfs_discovery_blob;
> +   struct kmem_cache   *discovery_blob_cache;
> +#endif
> struct mutexsrbm_mutex;
> /* GRBM index mutex. Protects concurrent access to GRBM index */
> struct mutexgrbm_idx_mutex;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 56e89e76ff179..55ea5be14b188 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -2180,7 +2180,15 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> debugfs_create_blob("amdgpu_vbios", 0444, root,
> >debugfs_vbios_blob);
>
> -   adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
> +
> +   adev->discovery_blob_cache = 
> kmem_cache_create_usercopy("amdgpu_discovery",
> +   
> adev->mman.discovery_tmr_size,
> +   0, 0, 0,
> +   
> adev->mman.discovery_tmr_size,
> +   NULL);
> +   adev->debugfs_discovery_blob.data = 
> kmem_cache_alloc(adev->discovery_blob_cache, GFP_KERNEL);
> +   memcpy(adev->debugfs_discovery_blob.data, adev->mman.discovery_bin,
> +  adev->mman.discovery_tmr_size);
> adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
> debugfs_create_blob("amdgpu_discovery", 0444, root,
> >debugfs_discovery_blob);
> @@ -2188,6 +2196,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> return 0;
>  }
>
> +void amdgpu_debugfs_fini(struct amdgpu_device *adev)
> +{
> +   kmem_cache_free(adev->discovery_blob_cache, 
> adev->debugfs_discovery_blob.data);
> +   kmem_cache_destroy(adev->discovery_blob_cache);
> +}
> +
>  #else
>  int amdgpu_debugfs_init(struct amdgpu_device *adev)
>  {
> @@ -2197,4 +2211,5 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
>  {
> return 0;
>  }
> +inline void amdgpu_debugfs_fini(struct amdgpu_device *adev) {}
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 0593ef8fe0a63..1a3b30dff5171 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2276,6 +2276,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
> struct drm_device *dev = pci_get_drvdata(pdev);
> struct amdgpu_device *adev = drm_to_adev(dev);
>
> +   amdgpu_debugfs_fini(adev);
> +
> amdgpu_xcp_dev_unplug(adev);
> drm_dev_unplug(dev);
>
> --
> 2.34.1
>


[PATCH] drm/amd: fix debugfs access for discovery blob

2023-08-01 Thread Mario Limonciello
Accessing the blob for amdgpu discovery from debugfs triggers:

[ 1924.487667] kernel BUG at mm/usercopy.c:102!

usercopy_abort() explains that it needs to be solved by creating
a cache to store the data.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2748#note_2023519
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 17 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 ++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a3b86b86dc477..66a2251bdeba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -791,8 +791,11 @@ struct amdgpu_device {
boolaccel_working;
struct notifier_block   acpi_nb;
struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
-   struct debugfs_blob_wrapper debugfs_vbios_blob;
-   struct debugfs_blob_wrapper debugfs_discovery_blob;
+#if defined(CONFIG_DEBUG_FS)
+   struct debugfs_blob_wrapper debugfs_vbios_blob;
+   struct debugfs_blob_wrapper debugfs_discovery_blob;
+   struct kmem_cache   *discovery_blob_cache;
+#endif
struct mutexsrbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutexgrbm_idx_mutex;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 56e89e76ff179..55ea5be14b188 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2180,7 +2180,15 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
debugfs_create_blob("amdgpu_vbios", 0444, root,
>debugfs_vbios_blob);
 
-   adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
+
+   adev->discovery_blob_cache = 
kmem_cache_create_usercopy("amdgpu_discovery",
+   
adev->mman.discovery_tmr_size,
+   0, 0, 0,
+   
adev->mman.discovery_tmr_size,
+   NULL);
+   adev->debugfs_discovery_blob.data = 
kmem_cache_alloc(adev->discovery_blob_cache, GFP_KERNEL);
+   memcpy(adev->debugfs_discovery_blob.data, adev->mman.discovery_bin,
+  adev->mman.discovery_tmr_size);
adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
debugfs_create_blob("amdgpu_discovery", 0444, root,
>debugfs_discovery_blob);
@@ -2188,6 +2196,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
return 0;
 }
 
+void amdgpu_debugfs_fini(struct amdgpu_device *adev)
+{
+   kmem_cache_free(adev->discovery_blob_cache, 
adev->debugfs_discovery_blob.data);
+   kmem_cache_destroy(adev->discovery_blob_cache);
+}
+
 #else
 int amdgpu_debugfs_init(struct amdgpu_device *adev)
 {
@@ -2197,4 +2211,5 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 {
return 0;
 }
+inline void amdgpu_debugfs_fini(struct amdgpu_device *adev) {}
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0593ef8fe0a63..1a3b30dff5171 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2276,6 +2276,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
 
+   amdgpu_debugfs_fini(adev);
+
amdgpu_xcp_dev_unplug(adev);
drm_dev_unplug(dev);
 
-- 
2.34.1