Fix a kernel oops in amdgpu_bo_validate_size() when we allow allocation with
GTT | VRAM domains set. The problem is that we try to dereference a non-existing
TTM resource manager of the wanted type, GTT. In this allocation both GTT and
VRAM are set. The call takes place in amdgpu_ttm_reserve_tmr() at line 1716.

Dec 10 01:12:41 localhost.localdomain kernel: RIP: 
0010:amdgpu_bo_create+0x8c/0x4b0 [amdgpu]
Dec 10 01:12:41 localhost.localdomain kernel: Code: c7 44 24 34 00 00 00 00 a8 
30 0f 84 e6 01 00 00 49 63 f5 49 c1 e4 0c 48 89 34 24 a8 02 0f 84 ad 01 00 00 
48 8b 85 d0 55 00 00 <4c> 3b 60 10 0f 83 b5 01 00 00 81 7b 0c 87 02 00 00 0f 86 
61 03 00
Dec 10 01:12:41 localhost.localdomain kernel: RSP: 0018:ffffc3b580ba7980 
EFLAGS: 00010202
Dec 10 01:12:41 localhost.localdomain kernel: RAX: 0000000000000000 RBX: 
ffffc3b580ba7a00 RCX: 0000000000000001
Dec 10 01:12:41 localhost.localdomain kernel: RDX: ffff9fa481586200 RSI: 
ffffc3b580ba7a00 RDI: ffff9fa481580000
Dec 10 01:12:41 localhost.localdomain kernel: RBP: ffff9fa481580000 R08: 
ffff9fa481586210 R09: 0000000000000000
Dec 10 01:12:41 localhost.localdomain kernel: R10: 0000000000000001 R11: 
0000000000000000 R12: 0000000000010000
Dec 10 01:12:41 localhost.localdomain kernel: R13: 0000000000000001 R14: 
ffff9fa481586210 R15: ffff9fa481586210
Dec 10 01:12:41 localhost.localdomain kernel: FS:  00007fc2505fbb40(0000) 
GS:ffff9fab4ed00000(0000) knlGS:0000000000000000
Dec 10 01:12:41 localhost.localdomain kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 
0000000080050033
Dec 10 01:12:41 localhost.localdomain kernel: CR2: 0000000000000010 CR3: 
0000000128934000 CR4: 00000000003506e0
Dec 10 01:12:41 localhost.localdomain kernel: Call Trace:
Dec 10 01:12:41 localhost.localdomain kernel:  <TASK>
Dec 10 01:12:41 localhost.localdomain kernel:  
amdgpu_bo_create_reserved+0x15d/0x1b0 [amdgpu]
Dec 10 01:12:41 localhost.localdomain kernel:  
amdgpu_bo_create_kernel_at+0x54/0x1c0 [amdgpu]
Dec 10 01:12:41 localhost.localdomain kernel:  amdgpu_ttm_init+0x1ad/0x470 
[amdgpu]
...

Cc: Alex Deucher <[email protected]>
Cc: Christian König <[email protected]>
Signed-off-by: Luben Tuikov <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index fd3ab4b5e5bb1f..e0f103f0ec2178 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -448,31 +448,26 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device 
*adev,
 
        /*
         * If GTT is part of requested domains the check must succeed to
-        * allow fall back to GTT
+        * allow fall back to GTT.
+        *
+        * Note that allocations can request from either domain. For
+        * this reason, check either in non-exclusive way, and if
+        * neither satisfies, fail the validation.
         */
        if (domain & AMDGPU_GEM_DOMAIN_GTT) {
                man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
-
-               if (size < man->size)
+               if (man && size < man->size)
                        return true;
-               else
-                       goto fail;
        }
 
        if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
                man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
-
-               if (size < man->size)
+               if (man && size < man->size)
                        return true;
-               else
-                       goto fail;
        }
 
-
        /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
-       return true;
 
-fail:
        DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
                  man->size);
        return false;

base-commit: 3c4ee2dc869cba283b0c667708090aefbc09aacf
prerequisite-patch-id: 3d9ce4e1252cf76ced92d755740a8df4f073d440
prerequisite-patch-id: c37f8050f6b285983085f62cd65f99fce034a0fb
prerequisite-patch-id: eff248bd978d8510bab4c51b960b71dd6a542138
prerequisite-patch-id: 539ef7082989c2fe194803c5b8041b931009397c
-- 
2.39.0.rc2

Reply via email to