On 8/7/2023 10:20, Alex Deucher wrote:
On Tue, Aug 1, 2023 at 4:15 PM Mario Limonciello
<mario.limoncie...@amd.com> wrote:

Accessing the blob for amdgpu discovery from debugfs triggers:

[ 1924.487667] kernel BUG at mm/usercopy.c:102!

usercopy_abort() explains that it needs to be solved by creating
a cache to store the data.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2748#note_2023519
Signed-off-by: Mario Limonciello <mario.limoncie...@amd.com>

Reviewed-by: Alex Deucher <alexander.deuc...@amd.com>

Although this avoids the issue; I've concluded it's an inappropriate fix and will abandon it. It turns out to spit out 64k discovery blobs that should have been 8k.

6.5-rc and ASDN already picked up a better solution.

db3b5cb64a9c ("drm/amdgpu: Use apt name for FW reserved region")

---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  7 +++++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 17 ++++++++++++++++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 ++
  3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a3b86b86dc477..66a2251bdeba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -791,8 +791,11 @@ struct amdgpu_device {
         bool                            accel_working;
         struct notifier_block           acpi_nb;
         struct amdgpu_i2c_chan          *i2c_bus[AMDGPU_MAX_I2C_BUS];
-       struct debugfs_blob_wrapper     debugfs_vbios_blob;
-       struct debugfs_blob_wrapper     debugfs_discovery_blob;
+#if defined(CONFIG_DEBUG_FS)
+       struct debugfs_blob_wrapper     debugfs_vbios_blob;
+       struct debugfs_blob_wrapper     debugfs_discovery_blob;
+       struct kmem_cache               *discovery_blob_cache;
+#endif
         struct mutex                    srbm_mutex;
         /* GRBM index mutex. Protects concurrent access to GRBM index */
         struct mutex                    grbm_idx_mutex;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 56e89e76ff179..55ea5be14b188 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2180,7 +2180,15 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
         debugfs_create_blob("amdgpu_vbios", 0444, root,
                             &adev->debugfs_vbios_blob);

-       adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
+
+       adev->discovery_blob_cache = 
kmem_cache_create_usercopy("amdgpu_discovery",
+                                                               
adev->mman.discovery_tmr_size,
+                                                               0, 0, 0,
+                                                               
adev->mman.discovery_tmr_size,
+                                                               NULL);
+       adev->debugfs_discovery_blob.data = 
kmem_cache_alloc(adev->discovery_blob_cache, GFP_KERNEL);
+       memcpy(adev->debugfs_discovery_blob.data, adev->mman.discovery_bin,
+              adev->mman.discovery_tmr_size);
         adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
         debugfs_create_blob("amdgpu_discovery", 0444, root,
                             &adev->debugfs_discovery_blob);
@@ -2188,6 +2196,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
         return 0;
  }

+void amdgpu_debugfs_fini(struct amdgpu_device *adev)
+{
+       kmem_cache_free(adev->discovery_blob_cache, 
adev->debugfs_discovery_blob.data);
+       kmem_cache_destroy(adev->discovery_blob_cache);
+}
+
  #else
  int amdgpu_debugfs_init(struct amdgpu_device *adev)
  {
@@ -2197,4 +2211,5 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
  {
         return 0;
  }
+inline void amdgpu_debugfs_fini(struct amdgpu_device *adev) {}
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0593ef8fe0a63..1a3b30dff5171 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2276,6 +2276,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
         struct drm_device *dev = pci_get_drvdata(pdev);
         struct amdgpu_device *adev = drm_to_adev(dev);

+       amdgpu_debugfs_fini(adev);
+
         amdgpu_xcp_dev_unplug(adev);
         drm_dev_unplug(dev);

--
2.34.1


Reply via email to