The vram_region_gpu_offset() helper chases pointers through TTM resource manager structures on every call. An XXX comment in the codebase noted this was a problem in the VM bind hot path and suggested caching the value with a recalculation on BO move.
Add a vram_gpu_offset field to struct xe_bo that is updated in xe_bo_move() at the 'out' label, which is the convergence point for all move paths. Provide xe_bo_vram_gpu_offset() as a static inline accessor. Convert five callers that always operate on the BO's current resource: - xe_pt.c: xe_pt_stage_bind() (VM bind hot path) - xe_bo.c: __xe_bo_addr() - xe_ggtt.c: xe_ggtt_map_bo() - xe_lmtt.c: lmtt_insert_bo() - xe_migrate.c: xe_migrate_access_memory() Two callers in xe_migrate.c (pte_update_size and emit_pte) are intentionally not converted because they receive a ttm_resource that may refer to a move destination rather than the BO's current placement. Replace the XXX comment on vram_region_gpu_offset() with proper kernel doc that directs callers to prefer the cached accessor. Add a KUnit live test (xe_bo_vram_gpu_offset_kunit) that validates cache consistency across BO creation in VRAM, eviction to system memory, and restoration back to VRAM. Tested on ASUS Zenbook S14 (Intel Core Ultra 7 258V) with xe_live_test module on Fedora. Signed-off-by: Yuri Martins <[email protected]> --- drivers/gpu/drm/xe/tests/xe_bo.c | 117 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_bo.c | 17 +++-- drivers/gpu/drm/xe/xe_bo.h | 15 ++++ drivers/gpu/drm/xe/xe_bo_types.h | 6 ++ drivers/gpu/drm/xe/xe_ggtt.c | 2 +- drivers/gpu/drm/xe/xe_lmtt.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_pt.c | 2 +- 8 files changed, 155 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 49c95ed67d7e..ed65e25c1b11 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -603,9 +603,126 @@ static void xe_bo_shrink_kunit(struct kunit *test) shrink_test_run_device(xe); } +/* + * Test that bo->vram_gpu_offset is kept in sync with the value computed + * by vram_region_gpu_offset() across BO creation, eviction to system + * memory, and restoration back to VRAM. + */ +static void vram_gpu_offset_test_run_tile(struct xe_device *xe, + struct xe_tile *tile, + struct kunit *test) +{ + struct xe_bo *bo; + unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; + u64 expected; + long timeout; + int ret; + + kunit_info(test, "Testing vram_gpu_offset cache on tile %u\n", tile->id); + + bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, + bo_flags, exec); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "Failed to create bo: %pe\n", bo); + return; + } + + xe_bo_lock(bo, false); + + /* After creation the BO should be in VRAM on DGFX. */ + ret = xe_bo_validate(bo, NULL, false, exec); + if (ret) { + KUNIT_FAIL(test, "Failed to validate bo: %d\n", ret); + goto out_unlock; + } + + /* Wait for any async clears. */ + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, false, 5 * HZ); + if (timeout <= 0) { + KUNIT_FAIL(test, "Timeout waiting for bo after validate.\n"); + goto out_unlock; + } + + expected = vram_region_gpu_offset(bo->ttm.resource); + KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected); + + if (xe_bo_is_vram(bo)) + KUNIT_EXPECT_NE(test, xe_bo_vram_gpu_offset(bo), 0); + + /* Evict to system memory — cache must become 0. */ + ret = xe_bo_evict(bo, exec); + if (ret) { + KUNIT_FAIL(test, "Failed to evict bo: %d\n", ret); + goto out_unlock; + } + + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, false, 5 * HZ); + if (timeout <= 0) { + KUNIT_FAIL(test, "Timeout waiting for bo after evict.\n"); + goto out_unlock; + } + + expected = vram_region_gpu_offset(bo->ttm.resource); + KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected); + KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), (u64)0); + + /* Restore back to VRAM — cache must be updated again. */ + ret = xe_bo_validate(bo, NULL, false, exec); + if (ret) { + KUNIT_FAIL(test, "Failed to validate bo back to vram: %d\n", ret); + goto out_unlock; + } + + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, false, 5 * HZ); + if (timeout <= 0) { + KUNIT_FAIL(test, "Timeout waiting for bo after restore.\n"); + goto out_unlock; + } + + expected = vram_region_gpu_offset(bo->ttm.resource); + KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected); + + if (xe_bo_is_vram(bo)) + KUNIT_EXPECT_NE(test, xe_bo_vram_gpu_offset(bo), 0); + +out_unlock: + xe_bo_unlock(bo); + xe_bo_put(bo); +} + +static int vram_gpu_offset_test_run_device(struct xe_device *xe) +{ + struct kunit *test = kunit_get_current_test(); + struct xe_tile *tile; + int id; + + if (!IS_DGFX(xe)) { + kunit_skip(test, "non-discrete device\n"); + return 0; + } + + guard(xe_pm_runtime)(xe); + for_each_tile(tile, xe, id) + vram_gpu_offset_test_run_tile(xe, tile, test); + + return 0; +} + +static void xe_bo_vram_gpu_offset_kunit(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + vram_gpu_offset_test_run_device(xe); +} + static struct kunit_case xe_bo_tests[] = { KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM(xe_bo_vram_gpu_offset_kunit, xe_pci_live_device_gen_param), {} }; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a7c2dc7f224c..30c4fe326827 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1164,6 +1164,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, ret = xe_sriov_vf_ccs_attach_bo(bo); out: + bo->vram_gpu_offset = ttm_bo->resource ? + vram_region_gpu_offset(ttm_bo->resource) : 0; + if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && ttm_bo->ttm) { long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, @@ -2908,9 +2911,15 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str return 0; } -/* - * XXX: This is in the VM bind data path, likely should calculate this once and - * store, with a recalculation if the BO is moved. +/** + * vram_region_gpu_offset - Compute GPU offset for a TTM resource's memory region + * @res: The TTM resource. + * + * Computes the GPU-visible offset for @res based on its current memory type. + * Callers that always operate on a BO's current resource should prefer + * xe_bo_vram_gpu_offset() which returns a cached value. + * + * Return: The GPU-visible offset, or 0 for system/TT memory. */ uint64_t vram_region_gpu_offset(struct ttm_resource *res) { @@ -3173,7 +3182,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) xe_res_first(bo->ttm.resource, page << PAGE_SHIFT, page_size, &cur); - return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource); + return cur.start + offset + xe_bo_vram_gpu_offset(bo); } } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 68dea7d25a6b..d911f7327e8b 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -342,6 +342,21 @@ bool xe_bo_is_vm_bound(struct xe_bo *bo); bool xe_bo_has_single_placement(struct xe_bo *bo); uint64_t vram_region_gpu_offset(struct ttm_resource *res); +/** + * xe_bo_vram_gpu_offset - Return cached GPU offset for BO's memory region + * @bo: The buffer object. + * + * Returns the GPU offset for the BO's current memory region. This value + * is cached on the BO and updated whenever the BO is moved, avoiding + * repeated pointer chasing through TTM resource manager structures. + * + * Return: The GPU-visible offset, or 0 for system/TT memory. + */ +static inline u64 xe_bo_vram_gpu_offset(struct xe_bo *bo) +{ + return bo->vram_gpu_offset; +} + bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc, diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ff8317bfc1ae..622a1ec8231e 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -109,6 +109,12 @@ struct xe_bo { */ u64 min_align; + /** + * @vram_gpu_offset: Cached GPU offset for BO's current memory + * region, updated on move. Protected by the BO's dma-resv lock. + */ + u64 vram_gpu_offset; + /** * @madv_purgeable: user space advise on BO purgeability, protected * by BO's dma-resv lock. diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index a848d1a41b9b..a24ae3e0e553 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -705,7 +705,7 @@ static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, pte | xe_res_dma(&cur)); } else { /* Prepend GPU offset */ - pte |= vram_region_gpu_offset(bo->ttm.resource); + pte |= xe_bo_vram_gpu_offset(bo); for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 0c726eda9390..f9257ba1728b 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -492,7 +492,7 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size)); lmtt_assert(lmtt, xe_bo_is_vram(bo)); - vram_offset = vram_region_gpu_offset(bo->ttm.resource); + vram_offset = xe_bo_vram_gpu_offset(bo); xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); while (cur.remaining) { addr = xe_res_dma(&cur); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index fc918b4fba54..bdaea4979e89 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2499,7 +2499,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, do { struct dma_fence *__fence; - u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) + + u64 vram_addr = xe_bo_vram_gpu_offset(bo) + cursor.start; int current_bytes; u32 pitch; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 8e5f4f0dea3f..7cfce1a92a1a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -782,7 +782,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, } xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = (bo && !is_purged) ? vram_region_gpu_offset(bo->ttm.resource) : 0; + xe_walk.dma_offset = (bo && !is_purged) ? xe_bo_vram_gpu_offset(bo) : 0; if (!range) xe_bo_assert_held(bo); -- 2.43.0
