Some platforms report an invalidly large IP discovery TMR size, which leads
amdgpu_discovery_init() to attempt a large kmalloc allocation and trigger
page allocator warnings/failures during probe.

Observed log excerpt:
  WARNING: mm/page_alloc.c:5216 at __alloc_frozen_pages_noprof+0x29e/0x340
  ...
  ___kmalloc_large_node+0xf2/0x130
  __kmalloc_noprof+0x442/0x6b0
  amdgpu_discovery_init+0x161/0xa00 [amdgpu]
 Fatal error during GPU init
 probe with driver amdgpu failed with error -12

Fix by:
- validating discovery size and falling back to DISCOVERY_TMR_SIZE when
  size is zero or out of expected range;
- using kvzalloc() for discovery buffer allocation to avoid high-order
  contiguous-page allocation failures;
- using kvfree() on all release paths.

Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 5a4e63e1ad93..a6b49378c495 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -329,7 +329,20 @@ static int amdgpu_discovery_get_tmr_info(struct 
amdgpu_device *adev,
                }
        }
 out:
-       adev->discovery.bin = kzalloc(adev->discovery.size, GFP_KERNEL);
+       if (!adev->discovery.size || adev->discovery.size > DISCOVERY_TMR_SIZE) 
{
+               dev_warn(adev->dev,
+                        "invalid discovery size 0x%x, fallback to default 
0x%x\n",
+                        adev->discovery.size, DISCOVERY_TMR_SIZE);
+               /*
+                * Some platforms may expose garbage TMR size through 
scratch/ACPI.
+                * Fall back to legacy layout in VRAM when available.
+                */
+               if (!*is_tmr_in_sysmem && vram_size)
+                       adev->discovery.offset = (vram_size << 20) - 
DISCOVERY_TMR_OFFSET;
+               adev->discovery.size = DISCOVERY_TMR_SIZE;
+       }
+
+       adev->discovery.bin = kvzalloc(adev->discovery.size, GFP_KERNEL);
        if (!adev->discovery.bin)
                return -ENOMEM;
        adev->discovery.debugfs_blob.data = adev->discovery.bin;
@@ -694,7 +707,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
        return 0;
 
 out:
-       kfree(adev->discovery.bin);
+       kvfree(adev->discovery.bin);
        adev->discovery.bin = NULL;
        if ((amdgpu_discovery != 2) &&
            (RREG32(mmIP_DISCOVERY_VERSION) == 4))
@@ -707,7 +720,7 @@ static void amdgpu_discovery_sysfs_fini(struct 
amdgpu_device *adev);
 void amdgpu_discovery_fini(struct amdgpu_device *adev)
 {
        amdgpu_discovery_sysfs_fini(adev);
-       kfree(adev->discovery.bin);
+       kvfree(adev->discovery.bin);
        adev->discovery.bin = NULL;
 }
 
-- 
2.49.0

Reply via email to