from:"Oak Zeng"

[PATCH] drm/xe: Fix a build error

2024-01-27 Thread Oak Zeng

This fixes a build failure on drm-tip. This issue was introduced during
merge of "drm/ttm: replace busy placement with flags v6". For some
reason, the xe_bo.c part of above change is not merged. Manually merge
the missing part to drm_tip

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/xe/xe_bo.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 686d716c5581..d6a193060cc0 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -38,22 +38,26 @@ static const struct ttm_place sys_placement_flags = {
 static struct ttm_placement sys_placement = {
.num_placement = 1,
.placement = &sys_placement_flags,
-   .num_busy_placement = 1,
-   .busy_placement = &sys_placement_flags,
 };
 
-static const struct ttm_place tt_placement_flags = {
-   .fpfn = 0,
-   .lpfn = 0,
-   .mem_type = XE_PL_TT,
-   .flags = 0,
+static const struct ttm_place tt_placement_flags[] = {
+   {
+   .fpfn = 0,
+   .lpfn = 0,
+   .mem_type = XE_PL_TT,
+   .flags = TTM_PL_FLAG_DESIRED,
+   },
+   {
+   .fpfn = 0,
+   .lpfn = 0,
+   .mem_type = XE_PL_SYSTEM,
+   .flags = TTM_PL_FLAG_FALLBACK,
+   }
 };
 
 static struct ttm_placement tt_placement = {
-   .num_placement = 1,
-   .placement = &tt_placement_flags,
-   .num_busy_placement = 1,
-   .busy_placement = &sys_placement_flags,
+   .num_placement = 2,
+   .placement = tt_placement_flags,
 };
 
 bool mem_type_is_vram(u32 mem_type)
@@ -230,8 +234,6 @@ static int __xe_bo_placement_for_flags(struct xe_device 
*xe, struct xe_bo *bo,
bo->placement = (struct ttm_placement) {
.num_placement = c,
.placement = bo->placements,
-   .num_busy_placement = c,
-   .busy_placement = bo->placements,
};
 
return 0;
@@ -251,7 +253,6 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
/* Don't handle scatter gather BOs */
if (tbo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
-   placement->num_busy_placement = 0;
return;
}
 
@@ -1391,8 +1392,6 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
bo->placement = (struct ttm_placement) {
.num_placement = 1,
.placement = place,
-   .num_busy_placement = 1,
-   .busy_placement = place,
};
 
return 0;
@@ -2150,9 +2149,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
 
xe_place_from_ttm_type(mem_type, &requested);
placement.num_placement = 1;
-   placement.num_busy_placement = 1;
placement.placement = &requested;
-   placement.busy_placement = &requested;
 
/*
 * Stolen needs to be handled like below VRAM handling if we ever need
-- 
2.26.3

[PATCH 21/23] drm/xe/svm: GPU page fault support

2024-01-17 Thread Oak Zeng

On gpu page fault of a virtual address, try to fault in the virtual
address range to gpu page table and let HW to retry on the faulty
address.

Right now, we always migrate the whole vma which contains the fault
address to GPU. This is subject to change of a more sophisticated
migration policy: decide whether to migrate memory to GPU or map
in place with CPU memory; migration granularity.

There is rather complicated locking strategy in this patch. See more
details in xe_svm_doc.h, lock design section.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c |   7 ++
 drivers/gpu/drm/xe/xe_svm.c  | 116 +++
 drivers/gpu/drm/xe/xe_svm.h  |   6 ++
 drivers/gpu/drm/xe/xe_svm_range.c|  43 ++
 4 files changed, 172 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c 
b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 467d68f8332e..462603abab8a 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -22,6 +22,7 @@
 #include "xe_pt.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
+#include "xe_svm.h"
 
 enum fault_type {
NOT_PRESENT = 0,
@@ -131,6 +132,11 @@ static int handle_pagefault(struct xe_gt *gt, struct 
pagefault *pf)
if (!vm || !xe_vm_in_fault_mode(vm))
return -EINVAL;
 
+   if (vm->svm) {
+   ret = xe_svm_handle_gpu_fault(vm, gt, pf);
+   goto put_vm;
+   }
+
 retry_userptr:
/*
 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
@@ -219,6 +225,7 @@ static int handle_pagefault(struct xe_gt *gt, struct 
pagefault *pf)
if (ret >= 0)
ret = 0;
}
+put_vm:
xe_vm_put(vm);
 
return ret;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 0c13690a19f5..1ade8d7f0ab2 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -12,6 +12,7 @@
 #include "xe_svm.h"
 #include 
 #include 
+#include 
 #include "xe_pt.h"
 #include "xe_assert.h"
 #include "xe_vm_types.h"
@@ -206,3 +207,118 @@ static int svm_populate_range(struct xe_svm_range 
*svm_range,
kvfree(pfns);
return ret;
 }
+
+/**
+ * svm_access_allowed() -  Determine whether read or/and write to vma is 
allowed
+ *
+ * @write: true means a read and write access; false: read only access
+ */
+static bool svm_access_allowed(struct vm_area_struct *vma, bool write)
+{
+   unsigned long access = VM_READ;
+
+   if (write)
+   access |= VM_WRITE;
+
+   return (vma->vm_flags & access) == access;
+}
+
+/**
+ * svm_should_migrate() - Determine whether we should migrate a range to
+ * a destination memory region
+ *
+ * @range: The svm memory range to consider
+ * @dst_region: target destination memory region
+ * @is_atomic_fault: Is the intended migration triggered by a atomic access?
+ * On some platform, we have to migrate memory to guarantee atomic correctness.
+ */
+static bool svm_should_migrate(struct xe_svm_range *range,
+   struct xe_mem_region *dst_region, bool 
is_atomic_fault)
+{
+   return true;
+}
+
+/**
+ * xe_svm_handle_gpu_fault() - gpu page fault handler for svm subsystem
+ *
+ * @vm: The vm of the fault.
+ * @gt: The gt hardware on which the fault happens.
+ * @pf: page fault descriptor
+ *
+ * Workout a backing memory for the fault address, migrate memory from
+ * system memory to gpu vram if nessary, and map the fault address to
+ * GPU so GPU HW can retry the last operation which has caused the GPU
+ * page fault.
+ */
+int xe_svm_handle_gpu_fault(struct xe_vm *vm,
+   struct xe_gt *gt,
+   struct pagefault *pf)
+{
+   u8 access_type = pf->access_type;
+   u64 page_addr = pf->page_addr;
+   struct hmm_range hmm_range;
+   struct vm_area_struct *vma;
+   struct xe_svm_range *range;
+   struct mm_struct *mm;
+   struct xe_svm *svm;
+   int ret = 0;
+
+   svm = vm->svm;
+   if (!svm)
+   return -EINVAL;
+
+   mm = svm->mm;
+   mmap_read_lock(mm);
+   vma = find_vma_intersection(mm, page_addr, page_addr + 4);
+   if (!vma) {
+   mmap_read_unlock(mm);
+   return -ENOENT;
+   }
+
+   if (!svm_access_allowed (vma, access_type != ACCESS_TYPE_READ)) {
+   mmap_read_unlock(mm);
+   return -EPERM;
+   }
+
+   range = xe_svm_range_from_addr(svm, page_addr);
+   if (!range) {
+   range = xe_svm_range_create(svm, vma);
+   if (!range) {
+   mmap_read_unlock(mm);
+   return -ENOMEM;
+   }
+   }
+
+   if (svm_should_migrat

[PATCH 17/23] drm/xe/svm: clean up svm range during process exit

2024-01-17 Thread Oak Zeng

Clean up svm range during process exit: Zap GPU page table of
the svm process on process exit; unregister all the mmu interval
notifiers which are registered before; free svm range and svm
data structure.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c   | 24 
 drivers/gpu/drm/xe/xe_svm.h   |  1 +
 drivers/gpu/drm/xe/xe_svm_range.c | 17 +
 3 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 6393251c0051..5772bfcf7da4 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -9,6 +9,8 @@
 #include 
 #include 
 #include "xe_pt.h"
+#include "xe_assert.h"
+#include "xe_vm_types.h"
 
 DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
 
@@ -19,9 +21,31 @@ DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
  */
 void xe_destroy_svm(struct xe_svm *svm)
 {
+#define MAX_SVM_RANGE (1024*1024)
+   struct xe_svm_range **range_array;
+   struct interval_tree_node *node;
+   struct xe_svm_range *range;
+   int i = 0;
+
+   range_array = kzalloc(sizeof(struct xe_svm_range *) * MAX_SVM_RANGE,
+   GFP_KERNEL);
+   node = interval_tree_iter_first(&svm->range_tree, 0, ~0ULL);
+   while (node) {
+   range = container_of(node, struct xe_svm_range, inode);
+   xe_svm_range_prepare_destroy(range);
+   node = interval_tree_iter_next(node, 0, ~0ULL);
+   xe_assert(svm->vm->xe, i < MAX_SVM_RANGE);
+   range_array[i++] = range;
+   }
+
+   /** Free range (thus range->inode) while traversing above is not safe */
+   for(; i >= 0; i--)
+   kfree(range_array[i]);
+
hash_del_rcu(&svm->hnode);
mutex_destroy(&svm->mutex);
kfree(svm);
+   kfree(range_array);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 0038f98c0cc7..5b3bd2c064f5 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -90,6 +90,7 @@ bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
struct 
vm_area_struct *vma);
 void xe_svm_range_unregister_mmu_notifier(struct xe_svm_range *range);
 int xe_svm_range_register_mmu_notifier(struct xe_svm_range *range);
+void xe_svm_range_prepare_destroy(struct xe_svm_range *range);
 
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c 
b/drivers/gpu/drm/xe/xe_svm_range.c
index 53dd3be7ab9f..dfb4660dc26f 100644
--- a/drivers/gpu/drm/xe/xe_svm_range.c
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -165,3 +165,20 @@ int xe_svm_range_register_mmu_notifier(struct xe_svm_range 
*range)
range->mmu_notifier_registered = true;
return ret;
 }
+
+/**
+ * xe_svm_range_prepare_destroy() - prepare work to destroy a svm range
+ *
+ * @range: the svm range to destroy
+ *
+ * prepare for a svm range destroy: Zap this range from GPU, unregister mmu
+ * notifier.
+ */
+void xe_svm_range_prepare_destroy(struct xe_svm_range *range)
+{
+   struct xe_vm *vm = range->svm->vm;
+   unsigned long length = range->end - range->start;
+
+   xe_invalidate_svm_range(vm, range->start, length);
+   xe_svm_range_unregister_mmu_notifier(range);
+}
-- 
2.26.3

[PATCH 19/23] drm/xe/svm: migrate svm range to vram

2024-01-17 Thread Oak Zeng

Since the source pages of the svm range can be physically none
contiguous, and the destination vram pages can also be none
contiguous, there is no easy way to migrate multiple pages per
blitter command. We do page by page migration for now.

Migration is best effort. Even if we fail to migrate some pages,
we will try to migrate the rest pages.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c |   7 ++
 drivers/gpu/drm/xe/xe_svm.h |   3 +
 drivers/gpu/drm/xe/xe_svm_migrate.c | 114 
 3 files changed, 124 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 5772bfcf7da4..44d4f4216a93 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -5,12 +5,19 @@
 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 #include "xe_svm.h"
 #include 
 #include 
 #include "xe_pt.h"
 #include "xe_assert.h"
 #include "xe_vm_types.h"
+#include "xe_gt.h"
+#include "xe_migrate.h"
+#include "xe_trace.h"
 
 DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
 
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 5b3bd2c064f5..659bcb7927d6 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -80,6 +80,9 @@ struct xe_svm_range {
 };
 
 vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf);
+int svm_migrate_range_to_vram(struct xe_svm_range *range,
+   struct vm_area_struct 
*vma,
+   struct xe_tile *tile);
 void xe_destroy_svm(struct xe_svm *svm);
 struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
diff --git a/drivers/gpu/drm/xe/xe_svm_migrate.c 
b/drivers/gpu/drm/xe/xe_svm_migrate.c
index b4df411e04f3..3724ad6c7aea 100644
--- a/drivers/gpu/drm/xe/xe_svm_migrate.c
+++ b/drivers/gpu/drm/xe/xe_svm_migrate.c
@@ -229,3 +229,117 @@ vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
kvfree(buf);
return 0;
 }
+
+
+/**
+ * svm_migrate_range_to_vram() - migrate backing store of a va range to vram
+ * Must be called with mmap_read_lock(mm) held.
+ * @range: the va range to migrate. Range should only belong to one vma.
+ * @vma: the vma that this range belongs to. @range can cover whole @vma
+ * or a sub-range of @vma.
+ * @tile: the destination tile which holds the new backing store of the range
+ *
+ * Returns: negative errno on faiure, 0 on success
+ */
+int svm_migrate_range_to_vram(struct xe_svm_range *range,
+   struct vm_area_struct 
*vma,
+   struct xe_tile *tile)
+{
+   struct mm_struct *mm = range->svm->mm;
+   unsigned long start = range->start;
+   unsigned long end = range->end;
+   unsigned long npages = (end - start) >> PAGE_SHIFT;
+   struct xe_mem_region *mr = &tile->mem.vram;
+   struct migrate_vma migrate = {
+   .vma= vma,
+   .start  = start,
+   .end= end,
+   .pgmap_owner= tile->xe->drm.dev,
+   .flags  = MIGRATE_VMA_SELECT_SYSTEM,
+   };
+   struct device *dev = tile->xe->drm.dev;
+   dma_addr_t *src_dma_addr;
+   struct dma_fence *fence;
+   struct page *src_page;
+   LIST_HEAD(blocks);
+   int ret = 0, i;
+   u64 dst_dpa;
+   void *buf;
+
+   mmap_assert_locked(mm);
+   xe_assert(tile->xe, xe_svm_range_belongs_to_vma(mm, range, vma));
+
+   buf = kvcalloc(npages, 2* sizeof(*migrate.src) + sizeof(*src_dma_addr),
+   GFP_KERNEL);
+   if(!buf)
+   return -ENOMEM;
+   migrate.src = buf;
+   migrate.dst = migrate.src + npages;
+   src_dma_addr = (dma_addr_t *) (migrate.dst + npages);
+   ret = xe_devm_alloc_pages(tile, npages, &blocks, migrate.dst);
+   if (ret)
+   goto kfree_buf;
+
+   ret = migrate_vma_setup(&migrate);
+   if (ret) {
+   drm_err(&tile->xe->drm, "vma setup returned %d for range [%lx - 
%lx]\n",
+   ret, start, end);
+   goto free_dst_pages;
+   }
+
+   trace_xe_svm_migrate_sram_to_vram(range);
+   /**FIXME: partial migration of a range
+* print a warning for now. If this message
+* is printed, we need to fall back to page by page
+* migration: only migrate pages with MIGRATE_PFN_MIGRATE
+*/
+   if (migrate.cpages != npages)
+   drm_warn(&tile->xe->drm, "Partial migration for ran

[PATCH 11/23] drm/xe/svm: implement functions to allocate and free device memory

2024-01-17 Thread Oak Zeng

Function xe_devm_alloc_pages allocate pages from drm buddy and perform
house keeping work for all the pages allocated, such as get a page
refcount, keep a bitmap of all pages to denote whether a page is in
use, put pages to a drm lru list for eviction purpose.

Function xe_devm_free_blocks return all memory blocks to drm buddy
allocator.

Function xe_devm_free_page is a call back function from hmm layer. It
is called whenever a page's refcount reaches to 1. This function clears
the bit of this page in the bitmap. If all the bits in the bitmap is
cleared, it means all the pages have been freed, we return all the pages
in this memory block back to drm buddy.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.h|   9 ++
 drivers/gpu/drm/xe/xe_svm_devmem.c | 146 -
 2 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index b54f7714a1fc..8551df2b9780 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -74,4 +74,13 @@ struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm 
*svm,
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
 void xe_svm_devm_remove(struct xe_device *xe, struct xe_mem_region *mem);
+
+
+int xe_devm_alloc_pages(struct xe_tile *tile,
+   unsigned long npages,
+   struct list_head *blocks,
+   unsigned long *pfn);
+
+void xe_devm_free_blocks(struct list_head *blocks);
+void xe_devm_page_free(struct page *page);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c 
b/drivers/gpu/drm/xe/xe_svm_devmem.c
index cf7882830247..445e0e1bc3b4 100644
--- a/drivers/gpu/drm/xe/xe_svm_devmem.c
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -5,18 +5,162 @@
 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include "xe_device_types.h"
 #include "xe_trace.h"
+#include "xe_migrate.h"
+#include "xe_ttm_vram_mgr_types.h"
+#include "xe_assert.h"
 
+/**
+ * struct xe_svm_block_meta - svm uses this data structure to manage each
+ * block allocated from drm buddy. This will be set to the drm_buddy_block's
+ * private field.
+ *
+ * @lru: used to link this block to drm's lru lists. This will be replace
+ * with struct drm_lru_entity later.
+ * @tile: tile from which we allocated this block
+ * @bitmap: A bitmap of each page in this block. 1 means this page is used,
+ * 0 means this page is idle. When all bits of this block are 0, it is time
+ * to return this block to drm buddy subsystem.
+ */
+struct xe_svm_block_meta {
+   struct list_head lru;
+   struct xe_tile *tile;
+   unsigned long bitmap[];
+};
+
+static u64 block_offset_to_pfn(struct xe_mem_region *mr, u64 offset)
+{
+   /** DRM buddy's block offset is 0-based*/
+   offset += mr->hpa_base;
+
+   return PHYS_PFN(offset);
+}
+
+/**
+ * xe_devm_alloc_pages() - allocate device pages from buddy allocator
+ *
+ * @xe_tile: which tile to allocate device memory from
+ * @npages: how many pages to allocate
+ * @blocks: used to return the allocated blocks
+ * @pfn: used to return the pfn of all allocated pages. Must be big enough
+ * to hold at @npages entries.
+ *
+ * This function allocate blocks of memory from drm buddy allocator, and
+ * performs initialization work: set struct page::zone_device_data to point
+ * to the memory block; set/initialize drm_buddy_block::private field;
+ * lock_page for each page allocated; add memory block to lru managers lru
+ * list - this is TBD.
+ *
+ * return: 0 on success
+ * error code otherwise
+ */
+int xe_devm_alloc_pages(struct xe_tile *tile,
+   unsigned long npages,
+   struct list_head *blocks,
+   unsigned long *pfn)
+{
+   struct drm_buddy *mm = &tile->mem.vram_mgr->mm;
+   struct drm_buddy_block *block, *tmp;
+   u64 size = npages << PAGE_SHIFT;
+   int ret = 0, i, j = 0;
+
+   ret = drm_buddy_alloc_blocks(mm, 0, mm->size, size, PAGE_SIZE,
+   blocks, 
DRM_BUDDY_TOPDOWN_ALLOCATION);
+
+   if (unlikely(ret))
+   return ret;
+
+   list_for_each_entry_safe(block, tmp, blocks, link) {
+   struct xe_mem_region *mr = &tile->mem.vram;
+   u64 block_pfn_first, pages_per_block;
+   struct xe_svm_block_meta *meta;
+   u32 meta_size;
+
+   size = drm_buddy_block_size(mm, block);
+

[PATCH 20/23] drm/xe/svm: Populate svm range

2024-01-17 Thread Oak Zeng

Add a helper function svm_populate_range to populate
a svm range. This functions calls hmm_range_fault
to read CPU page tables and populate all pfns of this
virtual address range into an array, saved in hmm_range::
hmm_pfns. This is prepare work to bind a svm range to
GPU. The hmm_pfns array will be used for the GPU binding.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c | 61 +
 1 file changed, 61 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 44d4f4216a93..0c13690a19f5 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -145,3 +145,64 @@ int xe_svm_build_sg(struct hmm_range *range,
sg_mark_end(sg);
return 0;
 }
+
+/** Populate physical pages of a virtual address range
+ * This function also read mmu notifier sequence # (
+ * mmu_interval_read_begin), for the purpose of later
+ * comparison (through mmu_interval_read_retry).
+ * This must be called with mmap read or write lock held.
+ *
+ * This function alloates hmm_range->hmm_pfns, it is caller's
+ * responsibility to free it.
+ *
+ * @svm_range: The svm range to populate
+ * @hmm_range: pointer to hmm_range struct. hmm_rang->hmm_pfns
+ * will hold the populated pfns.
+ * @write: populate pages with write permission
+ *
+ * returns: 0 for succuss; negative error no on failure
+ */
+static int svm_populate_range(struct xe_svm_range *svm_range,
+   struct hmm_range *hmm_range, bool write)
+{
+   unsigned long timeout =
+   jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+   unsigned long *pfns, flags = HMM_PFN_REQ_FAULT;
+   u64 npages;
+   int ret;
+
+   mmap_assert_locked(svm_range->svm->mm);
+
+   npages = ((svm_range->end - 1) >> PAGE_SHIFT) -
+   (svm_range->start >> 
PAGE_SHIFT) + 1;
+   pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns))
+   return -ENOMEM;
+
+   if (write)
+   flags |= HMM_PFN_REQ_WRITE;
+
+   memset64((u64 *)pfns, (u64)flags, npages);
+   hmm_range->hmm_pfns = pfns;
+   hmm_range->notifier_seq = mmu_interval_read_begin(&svm_range->notifier);
+   hmm_range->notifier = &svm_range->notifier;
+   hmm_range->start = svm_range->start;
+   hmm_range->end = svm_range->end;
+   hmm_range->pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE;
+   hmm_range->dev_private_owner = svm_range->svm->vm->xe->drm.dev;
+
+   while (true) {
+   ret = hmm_range_fault(hmm_range);
+   if (time_after(jiffies, timeout))
+   goto free_pfns;
+
+   if (ret == -EBUSY)
+   continue;
+   break;
+   }
+
+free_pfns:
+   if (ret)
+   kvfree(pfns);
+   return ret;
+}
-- 
2.26.3

[PATCH 23/23] drm/xe/svm: Add svm memory hints interface

2024-01-17 Thread Oak Zeng

Signed-off-by: Oak Zeng 
---
 include/uapi/drm/xe_drm.h | 40 +++
 1 file changed, 40 insertions(+)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 50bbea0992d9..551ed8706097 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -80,6 +80,7 @@ extern "C" {
  *  - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
  *  - &DRM_IOCTL_XE_EXEC
  *  - &DRM_IOCTL_XE_WAIT_USER_FENCE
+ *  - &DRM_IOCTL_XE_SVM
  */
 
 /*
@@ -100,6 +101,7 @@ extern "C" {
 #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08
 #define DRM_XE_EXEC0x09
 #define DRM_XE_WAIT_USER_FENCE 0x0a
+#define DRM_XE_SVM 0x0b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_XE_DEVICE_QUERY  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
@@ -113,6 +115,7 @@ extern "C" {
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
 #define DRM_IOCTL_XE_EXEC  DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+#define DRM_IOCTL_XE_SVM   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_SVM, struct drm_xe_svm_args)
 
 /**
  * DOC: Xe IOCTL Extensions
@@ -1339,6 +1342,43 @@ struct drm_xe_wait_user_fence {
__u64 reserved[2];
 };
 
+enum drm_xe_svm_attr_type {
+   DRM_XE_SVM_ATTR_PREFERRED_LOC,
+   DRM_XE_SVM_ATTR_MIGRATION_GRANULARITY,
+   DRM_XE_SVM_ATTR_ATOMIC,
+   DRM_XE_SVM_ATTR_CACHE,
+   DRM_XE_SVM_ATTR_PREFETCH_LOC,
+   DRM_XE_SVM_ATTR_ACCESS_PATTERN,
+};
+
+struct drm_xe_svm_attr {
+   __u32 type;
+   __u32 value;
+};
+
+enum drm_xe_svm_op {
+   DRM_XE_SVM_OP_SET_ATTR,
+   DRM_XE_SVM_OP_GET_ATTR,
+};
+
+/**
+ * struct drm_xe_svm_args - Input of &DRM_IOCTL_XE_SVM
+ *
+ * Set or get memory attributes to a virtual address range
+ */
+struct drm_xe_svm_args {
+   /** @start: start of the virtual address range */
+   __u64 start;
+   /** @size: size of the virtual address range */
+   __u64 size;
+   /** @op: operation, either set or get */
+   __u32 op;
+   /** @nattr: number of attributes */
+   __u32 nattr;
+   /** @attrs: An array of attributes */
+   struct drm_xe_svm_attr attrs[];
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.26.3

[PATCH 13/23] drm/xe/svm: Handle CPU page fault

2024-01-17 Thread Oak Zeng

Under the picture of svm, CPU and GPU program share one same
virtual address space. The backing store of this virtual address
space can be either in system memory or device memory. Since GPU
device memory is remaped as DEVICE_PRIVATE, CPU can't access it.
Any CPU access to device memory causes a page fault. Implement
a page fault handler to migrate memory back to system memory and
map it to CPU page table so the CPU program can proceed.

Also unbind this page from GPU side, and free the original GPU
device page

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_device_types.h |  12 ++
 drivers/gpu/drm/xe/xe_svm.h  |   8 +-
 drivers/gpu/drm/xe/xe_svm_devmem.c   |  10 +-
 drivers/gpu/drm/xe/xe_svm_migrate.c  | 230 +++
 drivers/gpu/drm/xe/xe_svm_range.c|  27 
 5 files changed, 280 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_svm_migrate.c

diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
b/drivers/gpu/drm/xe/xe_device_types.h
index 6dba5b0ab481..c08e41cb3229 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -573,4 +573,16 @@ struct xe_file {
struct xe_drm_client *client;
 };
 
+static inline struct xe_tile *mem_region_to_tile(struct xe_mem_region *mr)
+{
+   return container_of(mr, struct xe_tile, mem.vram);
+}
+
+static inline u64 vram_pfn_to_dpa(struct xe_mem_region *mr, u64 pfn)
+{
+   u64 dpa;
+   u64 offset = (pfn << PAGE_SHIFT) - mr->hpa_base;
+   dpa = mr->dpa_base + offset;
+   return dpa;
+}
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 8551df2b9780..6b93055934f8 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -12,8 +12,10 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
+#include 
 #include "xe_device_types.h"
 
 struct xe_vm;
@@ -66,16 +68,20 @@ struct xe_svm_range {
struct interval_tree_node inode;
 };
 
+vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf);
 void xe_destroy_svm(struct xe_svm *svm);
 struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
unsigned long 
addr);
+bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
+   struct 
xe_svm_range *range,
+   struct 
vm_area_struct *vma);
+
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
 void xe_svm_devm_remove(struct xe_device *xe, struct xe_mem_region *mem);
 
-
 int xe_devm_alloc_pages(struct xe_tile *tile,
unsigned long npages,
struct list_head *blocks,
diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c 
b/drivers/gpu/drm/xe/xe_svm_devmem.c
index 5cd54dde4a9d..01f8385ebb5b 100644
--- a/drivers/gpu/drm/xe/xe_svm_devmem.c
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -11,13 +11,16 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
-
 #include "xe_device_types.h"
 #include "xe_trace.h"
 #include "xe_migrate.h"
 #include "xe_ttm_vram_mgr_types.h"
 #include "xe_assert.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
 
 /**
  * struct xe_svm_block_meta - svm uses this data structure to manage each
@@ -137,11 +140,6 @@ void xe_devm_free_blocks(struct list_head *blocks)
free_block(block);
 }
 
-static vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
-{
-   return 0;
-}
-
 void xe_devm_page_free(struct page *page)
 {
struct drm_buddy_block *block =
diff --git a/drivers/gpu/drm/xe/xe_svm_migrate.c 
b/drivers/gpu/drm/xe/xe_svm_migrate.c
new file mode 100644
index ..3be26da33aa3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_migrate.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "xe_device_types.h"
+#include "xe_trace.h"
+#include "xe_migrate.h"
+#include "xe_ttm_vram_mgr_types.h"
+#include "xe_assert.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
+
+
+/**
+ * alloc_host_page() - allocate one host page for the fault vma
+ *
+ * @dev: (GPU) device that will access the allocated page
+ * @vma: the fault vma that we need allocate page for
+ * @addr: the fault address. The allocated page is for this address
+ *

[PATCH 07/23] drm/xe/svm: Add helper for binding hmm range to gpu

2024-01-17 Thread Oak Zeng

Add helper function xe_bind_svm_range to bind a svm range
to gpu. A temporary xe_vma is created locally to re-use
existing page table update functions which are vma-based.

The svm page table update lock design is different from
userptr and bo page table update. A xe_pt_svm_pre_commit
function is introduced for svm range pre-commitment.

A hmm_range pointer is added to xe_vma struct.

v1: Make userptr member to be the last member of xe_vma struct

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_pt.c   | 114 +--
 drivers/gpu/drm/xe/xe_pt.h   |   4 ++
 drivers/gpu/drm/xe/xe_vm_types.h |  13 +++-
 3 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index de1030a47588..f1e479fa3001 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -17,6 +17,7 @@
 #include "xe_trace.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_vm.h"
+#include "xe_svm.h"
 
 struct xe_pt_dir {
struct xe_pt pt;
@@ -582,8 +583,15 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 {
struct xe_device *xe = tile_to_xe(tile);
struct xe_bo *bo = xe_vma_bo(vma);
-   bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
-   (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
+   /*
+* FIXME: Right now assume all svm ranges bound to GPU is backed
+* by device memory. This assumption will change once migration
+* policy is implemented. A svm range's backing store can be a
+* mixture of device memory and system memory, page by page based.
+* We probably need a separate stage_bind function for svm.
+*/
+   bool is_devmem = vma->svm_sg || (!xe_vma_is_userptr(vma) && bo &&
+   (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)));
struct xe_res_cursor curs;
struct xe_pt_stage_bind_walk xe_walk = {
.base = {
@@ -617,7 +625,10 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
xe_bo_assert_held(bo);
 
if (!xe_vma_is_null(vma)) {
-   if (xe_vma_is_userptr(vma))
+   if (vma->svm_sg)
+   xe_res_first_sg(vma->svm_sg, 0, xe_vma_size(vma),
+   &curs);
+   else if (xe_vma_is_userptr(vma))
xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma),
&curs);
else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
@@ -1046,6 +1057,28 @@ static int xe_pt_userptr_pre_commit(struct 
xe_migrate_pt_update *pt_update)
return 0;
 }
 
+static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+   struct xe_vma *vma = pt_update->vma;
+   struct hmm_range *range = vma->hmm_range;
+
+   if (mmu_interval_read_retry(range->notifier,
+   range->notifier_seq)) {
+   /*
+* FIXME: is this really necessary? We didn't update GPU
+* page table yet...
+*/
+   xe_vm_invalidate_vma(vma);
+   return -EAGAIN;
+   }
+   return 0;
+}
+
+static const struct xe_migrate_pt_update_ops svm_bind_ops = {
+   .populate = xe_vm_populate_pgtable,
+   .pre_commit = xe_pt_svm_pre_commit,
+};
+
 static const struct xe_migrate_pt_update_ops bind_ops = {
.populate = xe_vm_populate_pgtable,
.pre_commit = xe_pt_pre_commit,
@@ -1197,7 +1230,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma 
*vma, struct xe_exec_queue
struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
struct xe_pt_migrate_pt_update bind_pt_update = {
.base = {
-   .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : 
&bind_ops,
+   .ops = vma->svm_sg ? &svm_bind_ops :
+   (xe_vma_is_userptr(vma) ? 
&userptr_bind_ops : &bind_ops),
.vma = vma,
.tile_id = tile->id,
},
@@ -1651,3 +1685,75 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma 
*vma, struct xe_exec_queu
 
return fence;
 }
+
+/**
+ * xe_bind_svm_range() - bind an address range to vm
+ *
+ * @vm: the vm to bind this address range
+ * @tile: the tile to bind this address range to
+ * @range: a hmm_range which includes all the information
+ * needed for binding: virtual address range and physical
+ * pfns to back up this virtual address range.
+ * @flags: the binding flags to set in pte
+ *
+ * This is a helper function used by svm sub-system
+ * to bind a svm range to gpu vm. svm sub-system
+ * doesn't have xe_vma, thus helpers such as
+

[PATCH 02/23] drm/xe/svm: Add svm key data structures

2024-01-17 Thread Oak Zeng

Add xe_svm and xe_svm_range data structure. Each xe_svm
represents a svm address space and it maps 1:1 to the
process's mm_struct. It also maps 1:1 to the gpu xe_vm
struct.

Each xe_svm_range represent a virtual address range inside
a svm address space. It is similar to CPU's  vm_area_struct,
or to the GPU xe_vma struct. It contains data to synchronize
this address range to CPU's virtual address range, using mmu
notifier mechanism. It can also hold this range's memory
attributes set by user, such as preferred memory location etc -
this is TBD.

Each svm address space is made of many svm virtual address range.
All address ranges are maintained in xe_svm's interval tree.

Also add a xe_svm pointer to xe_vm data structure. So we have
a 1:1 mapping b/t xe_svm and xe_vm.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.h  | 59 
 drivers/gpu/drm/xe/xe_vm_types.h |  2 ++
 2 files changed, 61 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_svm.h

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
new file mode 100644
index ..ba301a331f59
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __XE_SVM_H
+#define __XE_SVM_H
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct xe_vm;
+struct mm_struct;
+
+/**
+ * struct xe_svm - data structure to represent a shared
+ * virtual address space from device side. xe_svm, xe_vm
+ * and mm_struct has a 1:1:1 relationship.
+ */
+struct xe_svm {
+   /** @vm: The xe_vm address space corresponding to this xe_svm */
+   struct xe_vm *vm;
+   /** @mm: The mm_struct corresponding to this xe_svm */
+   struct mm_struct *mm;
+   /**
+* @mutex: A lock used by svm subsystem. It protects:
+* 1. below range_tree
+* 2. GPU page table update. Serialize all SVM GPU page table updates
+*/
+   struct mutex mutex;
+   /**
+* @range_tree: Interval tree of all svm ranges in this svm
+*/
+   struct rb_root_cached range_tree;
+};
+
+/**
+ * struct xe_svm_range - Represents a shared virtual address range.
+ */
+struct xe_svm_range {
+   /** @notifier: The mmu interval notifer used to keep track of CPU
+* side address range change. Driver will get a callback with this
+* notifier if anything changed from CPU side, such as range is
+* unmapped from CPU
+*/
+   struct mmu_interval_notifier notifier;
+   /** @start: start address of this range, inclusive */
+   u64 start;
+   /** @end: end address of this range, exclusive */
+   u64 end;
+   /** @unregister_notifier_work: A worker used to unregister this 
notifier */
+   struct work_struct unregister_notifier_work;
+   /** @inode: used to link this range to svm's range_tree */
+   struct interval_tree_node inode;
+};
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 63e8a50b88e9..037fb7168c63 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -17,6 +17,7 @@
 #include "xe_pt_types.h"
 #include "xe_range_fence.h"
 
+struct xe_svm;
 struct xe_bo;
 struct xe_sync_entry;
 struct xe_vm;
@@ -279,6 +280,7 @@ struct xe_vm {
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
struct xe_file *xef;
+   struct xe_svm *svm;
 };
 
 /** struct xe_vma_op_map - VMA map operation */
-- 
2.26.3

[PATCH 22/23] drm/xe/svm: Add DRM_XE_SVM kernel config entry

2024-01-17 Thread Oak Zeng

DRM_XE_SVM kernel config entry is added so
xe svm feature can be configured before kernel
compilation.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/Kconfig   | 22 ++
 drivers/gpu/drm/xe/Makefile  |  5 +
 drivers/gpu/drm/xe/xe_mmio.c |  5 +
 drivers/gpu/drm/xe/xe_vm.c   |  2 ++
 4 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 1b57ae38210d..6f498095a915 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -83,6 +83,28 @@ config DRM_XE_FORCE_PROBE
 
  Use "!*" to block the probe of the driver for all known devices.
 
+config DRM_XE_SVM
+bool "Enable Shared Virtual Memory support in xe"
+depends on DRM_XE
+depends on ARCH_ENABLE_MEMORY_HOTPLUG
+depends on ARCH_ENABLE_MEMORY_HOTREMOVE
+depends on MEMORY_HOTPLUG
+depends on MEMORY_HOTREMOVE
+depends on ARCH_HAS_PTE_DEVMAP
+depends on SPARSEMEM_VMEMMAP
+depends on ZONE_DEVICE
+depends on DEVICE_PRIVATE
+depends on MMU
+select HMM_MIRROR
+select MMU_NOTIFIER
+default y
+help
+  Choose this option if you want Shared Virtual Memory (SVM)
+  support in xe. With SVM, virtual address space is shared
+ between CPU and GPU. This means any virtual address such
+ as malloc or mmap returns, variables on stack, or global
+ memory pointers, can be used for GPU transparently.
+
 menu "drm/Xe Debugging"
 depends on DRM_XE
 depends on EXPERT
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e16b84f79ddf..ae503f7c1f94 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -283,6 +283,11 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/skl_universal_plane.o \
i915-display/skl_watermark.o
 
+xe-$(CONFIG_DRM_XE_SVM) += xe_svm.o \
+  xe_svm_devmem.o \
+  xe_svm_range.o \
+  xe_svm_migrate.o
+
 ifeq ($(CONFIG_ACPI),y)
xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_acpi.o \
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 3d34dcfa3b3a..99810794bd94 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -286,7 +286,9 @@ int xe_mmio_probe_vram(struct xe_device *xe)
}
 
io_size -= min_t(u64, tile_size, io_size);
+#if IS_ENABLED(CONFIG_DRM_XE_SVM)
xe_svm_devm_add(tile, &tile->mem.vram);
+#endif
}
 
xe->mem.vram.actual_physical_size = total_size;
@@ -361,8 +363,11 @@ static void mmio_fini(struct drm_device *drm, void *arg)
pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
if (xe->mem.vram.mapping)
iounmap(xe->mem.vram.mapping);
+
+#if IS_ENABLED(CONFIG_DRM_XE_SVM)
for_each_tile(tile, xe, id) {
xe_svm_devm_remove(xe, &tile->mem.vram);
+#endif
}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 712fe49d8fb2..3bf19c92e01f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1377,7 +1377,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 
flags)
xe->usm.num_vm_in_non_fault_mode++;
mutex_unlock(&xe->usm.lock);
 
+#if IS_ENABLED(CONFIG_DRM_XE_SVM)
vm->svm = xe_create_svm(vm);
+#endif
trace_xe_vm_create(vm);
 
return vm;
-- 
2.26.3

[PATCH 12/23] drm/xe/svm: Trace buddy block allocation and free

2024-01-17 Thread Oak Zeng

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm_devmem.c |  5 -
 drivers/gpu/drm/xe/xe_trace.h  | 35 ++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c 
b/drivers/gpu/drm/xe/xe_svm_devmem.c
index 445e0e1bc3b4..5cd54dde4a9d 100644
--- a/drivers/gpu/drm/xe/xe_svm_devmem.c
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -95,6 +95,7 @@ int xe_devm_alloc_pages(struct xe_tile *tile,
block->private = meta;
block_pfn_first =
block_offset_to_pfn(mr, 
drm_buddy_block_offset(block));
+   trace_xe_buddy_block_alloc(block, size, block_pfn_first);
for(i = 0; i < pages_per_block; i++) {
struct page *page;
 
@@ -159,8 +160,10 @@ void xe_devm_page_free(struct page *page)
 
xe_assert(tile->xe, i < pages_per_block);
clear_bit(i, meta->bitmap);
-   if (bitmap_empty(meta->bitmap, pages_per_block))
+   if (bitmap_empty(meta->bitmap, pages_per_block)) {
free_block(block);
+   trace_xe_buddy_block_free(block, size, block_pfn_first);
+   }
 }
 
 static const struct dev_pagemap_ops xe_devm_pagemap_ops = {
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 63867c0fa848..50380f5173ca 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -11,6 +11,7 @@
 
 #include 
 #include 
+#include 
 
 #include "xe_bo_types.h"
 #include "xe_exec_queue_types.h"
@@ -600,6 +601,40 @@ DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h,
 
 );
 
+DECLARE_EVENT_CLASS(xe_buddy_block,
+   TP_PROTO(struct drm_buddy_block *block, u64 size, u64 pfn),
+   TP_ARGS(block, size, pfn),
+
+   TP_STRUCT__entry(
+   __field(u64, block)
+   __field(u64, header)
+   __field(u64, size)
+   __field(u64, pfn)
+   ),
+
+   TP_fast_assign(
+   __entry->block = (u64)block;
+   __entry->header = block->header;
+   __entry->size = size;
+   __entry->pfn = pfn;
+   ),
+
+   TP_printk("xe svm: allocated block %llx, block header %llx, 
size %llx, pfn %llx\n",
+   __entry->block, __entry->header, __entry->size, 
__entry->pfn)
+);
+
+
+DEFINE_EVENT(xe_buddy_block, xe_buddy_block_alloc,
+   TP_PROTO(struct drm_buddy_block *block, u64 size, u64 pfn),
+   TP_ARGS(block, size, pfn)
+);
+
+
+DEFINE_EVENT(xe_buddy_block, xe_buddy_block_free,
+   TP_PROTO(struct drm_buddy_block *block, u64 size, u64 pfn),
+   TP_ARGS(block, size, pfn)
+);
+
 #endif
 
 /* This part must be outside protection */
-- 
2.26.3

[PATCH 16/23] drm/xe/svm: Implement the mmu notifier range invalidate callback

2024-01-17 Thread Oak Zeng

To mirror the CPU page table from GPU side, we register a mmu interval
notifier (in the coming patch of this series). Core mm call back to
GPU driver whenever there is a change to certain virtual address range,
i.e., range is released or unmapped by user etc.

This patch implemented the GPU driver callback function for such mmu
interval notifier. In the callback function we unbind the address
range from GPU if it is unmapped from CPU side, thus we mirror the
CPU page table change.

We also unregister the mmu interval notifier from core mm in the case
of munmap event. But we can't unregister mmu notifier directly from the
mmu notifier range invalidation callback function. The reason is, during
a munmap (see kernel function vm_munmap), a mmap_write_lock is held, but
unregister mmu notifier (calling mmu_interval_notifier_remove) also requires
a mmap_write_lock of the current process.

Thus, we start a kernel worker to unregister mmu interval notifier on a
MMU_NOTIFY_UNMAP event.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c   |  1 +
 drivers/gpu/drm/xe/xe_svm.h   |  1 -
 drivers/gpu/drm/xe/xe_svm_range.c | 37 ++-
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index ab3cc2121869..6393251c0051 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -8,6 +8,7 @@
 #include "xe_svm.h"
 #include 
 #include 
+#include "xe_pt.h"
 
 DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
 
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 90e665f2bfc6..0038f98c0cc7 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -54,7 +54,6 @@ struct xe_svm {
 struct xe_svm_range {
/** @svm: pointer of the xe_svm that this range belongs to */
struct xe_svm *svm;
-
/** @notifier: The mmu interval notifer used to keep track of CPU
 * side address range change. Driver will get a callback with this
 * notifier if anything changed from CPU side, such as range is
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c 
b/drivers/gpu/drm/xe/xe_svm_range.c
index 286d5f7d6ecd..53dd3be7ab9f 100644
--- a/drivers/gpu/drm/xe/xe_svm_range.c
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include "xe_svm.h"
+#include "xe_pt.h"
 
 /**
  * xe_svm_range_from_addr() - retrieve svm_range contains a virtual address
@@ -59,8 +60,42 @@ bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
return (vma1 == vma) && (vma2 == vma);
 }
 
+static bool xe_svm_range_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+   struct xe_svm_range *svm_range =
+   container_of(mni, struct xe_svm_range, notifier);
+   struct xe_svm *svm = svm_range->svm;
+   unsigned long length = range->end - range->start;
+
+   /*
+* MMU_NOTIFY_RELEASE is called upon process exit to notify driver
+* to release any process resources, such as zap GPU page table
+* mapping or unregister mmu notifier etc. We already clear GPU
+* page table  and unregister mmu notifier in in xe_destroy_svm,
+* upon process exit. So just simply return here.
+*/
+   if (range->event == MMU_NOTIFY_RELEASE)
+   return true;
+
+   if (mmu_notifier_range_blockable(range))
+   mutex_lock(&svm->mutex);
+   else if (!mutex_trylock(&svm->mutex))
+   return false;
+
+   mmu_interval_set_seq(mni, cur_seq);
+   xe_invalidate_svm_range(svm->vm, range->start, length);
+   mutex_unlock(&svm->mutex);
+
+   if (range->event == MMU_NOTIFY_UNMAP)
+   queue_work(system_unbound_wq, 
&svm_range->unregister_notifier_work);
+
+   return true;
+}
+
 static const struct mmu_interval_notifier_ops xe_svm_mni_ops = {
-   .invalidate = NULL,
+   .invalidate = xe_svm_range_invalidate,
 };
 
 /**
-- 
2.26.3

[PATCH 15/23] drm/xe/svm: Implement functions to register and unregister mmu notifier

2024-01-17 Thread Oak Zeng

xe driver register mmu interval notifier to core mm to monitor vma
change. We register mmu interval notifier for each svm range. mmu
interval notifier should be unregistered in a worker (see next patch
in this series), so also initialize kernel worker to unregister mmu
interval notifier.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.h   | 14 ++
 drivers/gpu/drm/xe/xe_svm_range.c | 73 +++
 2 files changed, 87 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 6b93055934f8..90e665f2bfc6 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -52,16 +52,28 @@ struct xe_svm {
  * struct xe_svm_range - Represents a shared virtual address range.
  */
 struct xe_svm_range {
+   /** @svm: pointer of the xe_svm that this range belongs to */
+   struct xe_svm *svm;
+
/** @notifier: The mmu interval notifer used to keep track of CPU
 * side address range change. Driver will get a callback with this
 * notifier if anything changed from CPU side, such as range is
 * unmapped from CPU
 */
struct mmu_interval_notifier notifier;
+   bool mmu_notifier_registered;
/** @start: start address of this range, inclusive */
u64 start;
/** @end: end address of this range, exclusive */
u64 end;
+   /** @vma: the corresponding vma of this svm range
+*  The relationship b/t vma and svm range is 1:N,
+*  which means one vma can be splitted into multiple
+*  @xe_svm_range while one @xe_svm_range can have
+*  only one vma. A N:N mapping means some complication
+*  in codes. Lets assume 1:N for now.
+*/
+   struct vm_area_struct *vma;
/** @unregister_notifier_work: A worker used to unregister this 
notifier */
struct work_struct unregister_notifier_work;
/** @inode: used to link this range to svm's range_tree */
@@ -77,6 +89,8 @@ struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm 
*svm,
 bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
struct 
xe_svm_range *range,
struct 
vm_area_struct *vma);
+void xe_svm_range_unregister_mmu_notifier(struct xe_svm_range *range);
+int xe_svm_range_register_mmu_notifier(struct xe_svm_range *range);
 
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c 
b/drivers/gpu/drm/xe/xe_svm_range.c
index b32c32f60315..286d5f7d6ecd 100644
--- a/drivers/gpu/drm/xe/xe_svm_range.c
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -4,6 +4,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -57,3 +58,75 @@ bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
 
return (vma1 == vma) && (vma2 == vma);
 }
+
+static const struct mmu_interval_notifier_ops xe_svm_mni_ops = {
+   .invalidate = NULL,
+};
+
+/**
+ * unregister a mmu interval notifier for a svm range
+ *
+ * @range: svm range
+ *
+ */
+void xe_svm_range_unregister_mmu_notifier(struct xe_svm_range *range)
+{
+   if (!range->mmu_notifier_registered)
+   return;
+
+   mmu_interval_notifier_remove(&range->notifier);
+   range->mmu_notifier_registered = false;
+}
+
+static void xe_svm_unregister_notifier_work(struct work_struct *work)
+{
+   struct xe_svm_range *range;
+
+   range = container_of(work, struct xe_svm_range, 
unregister_notifier_work);
+
+   xe_svm_range_unregister_mmu_notifier(range);
+
+   /**
+* This is called from mmu notifier MUNMAP event. When munmap is called,
+* this range is not valid any more. Remove it.
+*/
+   mutex_lock(&range->svm->mutex);
+   interval_tree_remove(&range->inode, &range->svm->range_tree);
+   mutex_unlock(&range->svm->mutex);
+   kfree(range);
+}
+
+/**
+ * register a mmu interval notifier to monitor vma change
+ *
+ * @range: svm range to monitor
+ *
+ * This has to be called inside a mmap_read_lock
+ */
+int xe_svm_range_register_mmu_notifier(struct xe_svm_range *range)
+{
+   struct vm_area_struct *vma = range->vma;
+   struct mm_struct *mm = range->svm->mm;
+   u64 start, length;
+   int ret = 0;
+
+   if (range->mmu_notifier_registered)
+   return 0;
+
+   start =  range->start;
+   length = range->end - start;
+   /** We are inside a mmap_read_lock, but it requires a mmap_write_lock
+*  to register mmu notifier.
+*/
+   mmap_read_unlock(mm);
+   mmap_write_lock(mm);
+   ret = mmu_interval_notifier_insert_locked(&range

[PATCH 18/23] drm/xe/svm: Move a few structures to xe_gt.h

2024-01-17 Thread Oak Zeng

Move access_type and pagefault struct to header file so it
can be shared with svm sub-system. This is preparation work
for enabling page fault for svm.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_gt.h   | 20 
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 21 -
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4486e083f5ef..51dd288cf1cf 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -17,6 +17,26 @@
  xe_hw_engine_is_valid((hwe__)))
 
 #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> 
XE_HW_ENGINE_CCS0)
+enum access_type {
+   ACCESS_TYPE_READ = 0,
+   ACCESS_TYPE_WRITE = 1,
+   ACCESS_TYPE_ATOMIC = 2,
+   ACCESS_TYPE_RESERVED = 3,
+};
+
+struct pagefault {
+   u64 page_addr;
+   u32 asid;
+   u16 pdata;
+   u8 vfid;
+   u8 access_type;
+   u8 fault_type;
+   u8 fault_level;
+   u8 engine_class;
+   u8 engine_instance;
+   u8 fault_unsuccessful;
+   bool trva_fault;
+};
 
 #ifdef CONFIG_FAULT_INJECTION
 #include  /* XXX: fault-inject.h is broken */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c 
b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 5c2603075af9..467d68f8332e 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -23,27 +23,6 @@
 #include "xe_trace.h"
 #include "xe_vm.h"
 
-struct pagefault {
-   u64 page_addr;
-   u32 asid;
-   u16 pdata;
-   u8 vfid;
-   u8 access_type;
-   u8 fault_type;
-   u8 fault_level;
-   u8 engine_class;
-   u8 engine_instance;
-   u8 fault_unsuccessful;
-   bool trva_fault;
-};
-
-enum access_type {
-   ACCESS_TYPE_READ = 0,
-   ACCESS_TYPE_WRITE = 1,
-   ACCESS_TYPE_ATOMIC = 2,
-   ACCESS_TYPE_RESERVED = 3,
-};
-
 enum fault_type {
NOT_PRESENT = 0,
WRITE_ACCESS_VIOLATION = 1,
-- 
2.26.3

[PATCH 14/23] drm/xe/svm: trace svm range migration

2024-01-17 Thread Oak Zeng

Add function to trace svm range migration, either
from vram to sram, or sram to vram

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm_migrate.c |  1 +
 drivers/gpu/drm/xe/xe_trace.h   | 30 +
 2 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm_migrate.c 
b/drivers/gpu/drm/xe/xe_svm_migrate.c
index 3be26da33aa3..b4df411e04f3 100644
--- a/drivers/gpu/drm/xe/xe_svm_migrate.c
+++ b/drivers/gpu/drm/xe/xe_svm_migrate.c
@@ -201,6 +201,7 @@ vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
if (!migrate_vma.cpages)
goto free_buf;
 
+   trace_xe_svm_migrate_vram_to_sram(range);
for (i = 0; i < npages; i++) {
ret = migrate_page_vram_to_ram(vma, addr, migrate_vma.src[i],
migrate_vma.dst + i);
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 50380f5173ca..960eec38aee5 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -21,6 +21,7 @@
 #include "xe_guc_exec_queue_types.h"
 #include "xe_sched_job.h"
 #include "xe_vm.h"
+#include "xe_svm.h"
 
 DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
@@ -601,6 +602,35 @@ DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h,
 
 );
 
+DECLARE_EVENT_CLASS(xe_svm_migrate,
+   TP_PROTO(struct xe_svm_range *range),
+   TP_ARGS(range),
+
+   TP_STRUCT__entry(
+__field(u64, start)
+__field(u64, end)
+),
+
+   TP_fast_assign(
+  __entry->start = range->start;
+  __entry->end = range->end;
+  ),
+
+   TP_printk("Migrate svm range [0x%016llx,0x%016llx)",  
__entry->start,
+ __entry->end)
+);
+
+DEFINE_EVENT(xe_svm_migrate, xe_svm_migrate_vram_to_sram,
+   TP_PROTO(struct xe_svm_range *range),
+   TP_ARGS(range)
+);
+
+
+DEFINE_EVENT(xe_svm_migrate, xe_svm_migrate_sram_to_vram,
+   TP_PROTO(struct xe_svm_range *range),
+   TP_ARGS(range)
+);
+
 DECLARE_EVENT_CLASS(xe_buddy_block,
TP_PROTO(struct drm_buddy_block *block, u64 size, u64 pfn),
TP_ARGS(block, size, pfn),
-- 
2.26.3

[PATCH 10/23] drm/xe/svm: Introduce svm migration function

2024-01-17 Thread Oak Zeng

Introduce xe_migrate_svm function for data migration.
This function is similar to xe_migrate_copy function
but has different parameters. Instead of BO and ttm
resource parameters, it has source and destination
buffer's dpa address as parameter. This function is
intended to be used by svm sub-system which doesn't
have BO and TTM concept.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_migrate.c | 213 
 drivers/gpu/drm/xe/xe_migrate.h |   7 ++
 2 files changed, 220 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 44725f978f3e..5bd9fd40f93f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -429,6 +429,37 @@ static bool xe_migrate_allow_identity(u64 size, const 
struct xe_res_cursor *cur)
return cur->size >= size;
 }
 
+/**
+ * pte_update_cmd_size() - calculate the batch buffer command size
+ * to update a flat page table.
+ *
+ * @size: The virtual address range size of the page table to update
+ *
+ * The page table to update is supposed to be a flat 1 level page
+ * table with all entries pointing to 4k pages.
+ *
+ * Return the number of dwords of the update command
+ */
+static u32 pte_update_cmd_size(u64 size)
+{
+   u32 dword;
+   u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+
+   XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
+   /*
+* MI_STORE_DATA_IMM command is used to update page table. Each
+* instruction can update maximumly 0x1ff pte entries. To update
+* n (n <= 0x1ff) pte entries, we need:
+* 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
+* 2 dword for the page table's physical location
+* 2*n dword for value of pte to fill (each pte entry is 2 dwords)
+*/
+   dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff);
+   dword += entries * 2;
+
+   return dword;
+}
+
 static u32 pte_update_size(struct xe_migrate *m,
   bool is_vram,
   struct ttm_resource *res,
@@ -529,6 +560,48 @@ static void emit_pte(struct xe_migrate *m,
}
 }
 
+/**
+ * build_pt_update_batch_sram() - build batch buffer commands to update
+ * migration vm page table for system memory
+ *
+ * @m: The migration context
+ * @bb: The batch buffer which hold the page table update commands
+ * @pt_offset: The offset of page table to update, in byte
+ * @dpa: device physical address you want the page table to point to
+ * @size: size of the virtual address space you want the page table to cover
+ */
+static void build_pt_update_batch_sram(struct xe_migrate *m,
+struct xe_bb *bb, u32 pt_offset,
+u64 dpa, u32 size)
+{
+   u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
+   u32 ptes;
+
+   ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+   while (ptes) {
+   u32 chunk = min(0x1ffU, ptes);
+
+   bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+   bb->cs[bb->len++] = pt_offset;
+   bb->cs[bb->len++] = 0;
+
+   pt_offset += chunk * 8;
+   ptes -= chunk;
+
+   while (chunk--) {
+   u64 addr;
+
+   addr = dpa & PAGE_MASK;
+   addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
+addr, 
pat_index,
+0, false, 0);
+   bb->cs[bb->len++] = lower_32_bits(addr);
+   bb->cs[bb->len++] = upper_32_bits(addr);
+   dpa += XE_PAGE_SIZE;
+   }
+   }
+}
+
 #define EMIT_COPY_CCS_DW 5
 static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
  u64 dst_ofs, bool dst_is_indirect,
@@ -846,6 +919,146 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
return fence;
 }
 
+/**
+ * xe_migrate_svm() - A migrate function used by SVM subsystem
+ *
+ * @m: The migration context
+ * @src_dpa: device physical start address of source, from GPU's point of view
+ * @src_is_vram: True if source buffer is in vram.
+ * @dst_dpa: device physical start address of destination, from GPU's point of 
view
+ * @dst_is_vram: True if destination buffer is in vram.
+ * @size: The size of data to copy.
+ *
+ * Copy @size bytes of data from @src_dpa to @dst_dpa. The functionality
+ * and behavior of this function is similar to xe_migrate_copy function, but
+ * the interface is different. This function is a helper function supposed to
+ * be used by SVM subsytem. Since in SVM subsystem there is no buffer object
+ * and ttm, there is no src/dst bo as function input. In

[PATCH 09/23] drm/xe/svm: Remap and provide memmap backing for GPU vram

2024-01-17 Thread Oak Zeng

Memory remap GPU vram using devm_memremap_pages, so each GPU vram
page is backed by a struct page.

Those struct pages are created to allow hmm migrate buffer b/t
GPU vram and CPU system memory using existing Linux migration
mechanism (i.e., migrating b/t CPU system memory and hard disk).

This is prepare work to enable svm (shared virtual memory) through
Linux kernel hmm framework. The memory remap's page map type is set
to MEMORY_DEVICE_PRIVATE for now. This means even though each GPU
vram page get a struct page and can be mapped in CPU page table,
but such pages are treated as GPU's private resource, so CPU can't
access them. If CPU access such page, a page fault is triggered
and page will be migrate to system memory.

For GPU device which supports coherent memory protocol b/t CPU and
GPU (such as CXL and CAPI protocol), we can remap device memory as
MEMORY_DEVICE_COHERENT. This is TBD.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_device_types.h |  8 +++
 drivers/gpu/drm/xe/xe_mmio.c |  7 +++
 drivers/gpu/drm/xe/xe_svm.h  |  2 +
 drivers/gpu/drm/xe/xe_svm_devmem.c   | 87 
 4 files changed, 104 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_svm_devmem.c

diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
b/drivers/gpu/drm/xe/xe_device_types.h
index 7eda86bd4c2a..6dba5b0ab481 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -99,6 +99,14 @@ struct xe_mem_region {
resource_size_t actual_physical_size;
/** @mapping: pointer to VRAM mappable space */
void __iomem *mapping;
+   /** @pagemap: Used to remap device memory as ZONE_DEVICE */
+   struct dev_pagemap pagemap;
+   /**
+* @hpa_base: base host physical address
+*
+* This is generated when remap device memory as ZONE_DEVICE
+*/
+   resource_size_t hpa_base;
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index c8c5d74b6e90..3d34dcfa3b3a 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -21,6 +21,7 @@
 #include "xe_macros.h"
 #include "xe_module.h"
 #include "xe_tile.h"
+#include "xe_svm.h"
 
 #define XEHP_MTCFG_ADDRXE_REG(0x101800)
 #define TILE_COUNT REG_GENMASK(15, 8)
@@ -285,6 +286,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
}
 
io_size -= min_t(u64, tile_size, io_size);
+   xe_svm_devm_add(tile, &tile->mem.vram);
}
 
xe->mem.vram.actual_physical_size = total_size;
@@ -353,10 +355,15 @@ void xe_mmio_probe_tiles(struct xe_device *xe)
 static void mmio_fini(struct drm_device *drm, void *arg)
 {
struct xe_device *xe = arg;
+   struct xe_tile *tile;
+   u8 id;
 
pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
if (xe->mem.vram.mapping)
iounmap(xe->mem.vram.mapping);
+   for_each_tile(tile, xe, id) {
+   xe_svm_devm_remove(xe, &tile->mem.vram);
+   }
 }
 
 static int xe_verify_lmem_ready(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 191bce6425db..b54f7714a1fc 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -72,4 +72,6 @@ struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
unsigned long 
addr);
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
+int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
+void xe_svm_devm_remove(struct xe_device *xe, struct xe_mem_region *mem);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c 
b/drivers/gpu/drm/xe/xe_svm_devmem.c
new file mode 100644
index ..cf7882830247
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "xe_device_types.h"
+#include "xe_trace.h"
+
+
+static vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
+{
+   return 0;
+}
+
+static void xe_devm_page_free(struct page *page)
+{
+}
+
+static const struct dev_pagemap_ops xe_devm_pagemap_ops = {
+   .page_free = xe_devm_page_free,
+   .migrate_to_ram = xe_devm_migrate_to_ram,
+};
+
+/**
+ * xe_svm_devm_add: Remap and provide memmap backing for device memory
+ * @tile: tile that the memory region blongs to
+ * @mr: memory region to remap
+ *
+ * This remap device memory to host physical address space and create
+ * struct page to back device memory
+ *
+ * Return: 0 on success standard error co

[PATCH 08/23] drm/xe/svm: Add helper to invalidate svm range from GPU

2024-01-17 Thread Oak Zeng

A svm subsystem friendly function is added for svm range invalidation
purpose. svm subsystem doesn't maintain xe_vma, so a temporary xe_vma
is used to call function xe_vma_invalidate_vma

Not sure whether this works or not. Will have to test. if a temporary
vma doesn't work, we will have to call the zap_pte/tlb_inv functions
directly.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_pt.c | 33 +
 drivers/gpu/drm/xe/xe_pt.h |  1 +
 2 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index f1e479fa3001..7ae8954be041 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1757,3 +1757,36 @@ int xe_bind_svm_range(struct xe_vm *vm, struct xe_tile 
*tile,
dma_fence_put(fence);
return ret;
 }
+
+/**
+ * xe_invalidate_svm_range() - a helper to invalidate a svm address range
+ *
+ * @vm: The vm that the address range belongs to
+ * @start: start of the virtual address range
+ * @size: size of the virtual address range
+ *
+ * This is a helper function supposed to be used by svm subsystem.
+ * svm subsystem doesn't maintain xe_vma, so we create a temporary
+ * xe_vma structure so we can reuse xe_vm_invalidate_vma().
+ */
+void xe_invalidate_svm_range(struct xe_vm *vm, u64 start, u64 size)
+{
+   struct xe_vma vma = {
+   .gpuva = {
+   .va = {
+   .addr = start,
+   .range = size,
+   },
+   .vm = &vm->gpuvm,
+   },
+   /** invalidate from all tiles
+*  FIXME: We used temporary vma in xe_bind_svm_range, so
+*  we lost track of which tile we are bound to. Does
+*  setting tile_present to all tiles cause a problem
+*  in xe_vm_invalidate_vma()?
+*/
+   .tile_present = BIT(vm->xe->info.tile_count) - 1,
+   };
+
+   xe_vm_invalidate_vma(&vma);
+}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 775d08707466..42d495997635 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -49,4 +49,5 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
 
 int xe_bind_svm_range(struct xe_vm *vm, struct xe_tile *tile,
struct hmm_range *range, u64 flags);
+void xe_invalidate_svm_range(struct xe_vm *vm, u64 start, u64 size);
 #endif
-- 
2.26.3

[PATCH 03/23] drm/xe/svm: create xe svm during vm creation

2024-01-17 Thread Oak Zeng

Create the xe_svm struct during xe_vm creation.
Add xe_svm to a global hash table so later on
we can retrieve xe_svm using mm_struct (the key).

Destroy svm process during xe_vm close.

Also add a helper function to retrieve svm struct
from mm struct

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c | 63 +
 drivers/gpu/drm/xe/xe_svm.h | 11 +++
 drivers/gpu/drm/xe/xe_vm.c  |  5 +++
 3 files changed, 79 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_svm.c

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
new file mode 100644
index ..559188471949
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include "xe_svm.h"
+
+DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
+
+/**
+ * xe_destroy_svm() - destroy a svm process
+ *
+ * @svm: the xe_svm to destroy
+ */
+void xe_destroy_svm(struct xe_svm *svm)
+{
+   hash_del_rcu(&svm->hnode);
+   mutex_destroy(&svm->mutex);
+   kfree(svm);
+}
+
+/**
+ * xe_create_svm() - create a svm process
+ *
+ * @vm: the xe_vm that we create svm process for
+ *
+ * Return the created xe svm struct
+ */
+struct xe_svm *xe_create_svm(struct xe_vm *vm)
+{
+   struct mm_struct *mm = current->mm;
+   struct xe_svm *svm;
+
+   svm = kzalloc(sizeof(struct xe_svm), GFP_KERNEL);
+   svm->mm = mm;
+   svm->vm = vm;
+   mutex_init(&svm->mutex);
+   /** Add svm to global xe_svm_table hash table
+*  use mm as key so later we can retrieve svm using mm
+*/
+   hash_add_rcu(xe_svm_table, &svm->hnode, (uintptr_t)mm);
+   return svm;
+}
+
+/**
+ * xe_lookup_svm_by_mm() - retrieve xe_svm from mm struct
+ *
+ * @mm: the mm struct of the svm to retrieve
+ *
+ * Return the xe_svm struct pointer, or NULL if fail
+ */
+struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm)
+{
+   struct xe_svm *svm;
+
+   hash_for_each_possible_rcu(xe_svm_table, svm, hnode, (uintptr_t)mm)
+   if (svm->mm == mm)
+   return svm;
+
+   return NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index ba301a331f59..cd3cf92f3784 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -11,10 +11,15 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 struct xe_vm;
 struct mm_struct;
 
+#define XE_MAX_SVM_PROCESS 5 /* Maximumly support 32 SVM process*/
+extern DECLARE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
+
 /**
  * struct xe_svm - data structure to represent a shared
  * virtual address space from device side. xe_svm, xe_vm
@@ -35,6 +40,8 @@ struct xe_svm {
 * @range_tree: Interval tree of all svm ranges in this svm
 */
struct rb_root_cached range_tree;
+   /** @hnode: used to add this svm to a global xe_svm_hash table*/
+   struct hlist_node hnode;
 };
 
 /**
@@ -56,4 +63,8 @@ struct xe_svm_range {
/** @inode: used to link this range to svm's range_tree */
struct interval_tree_node inode;
 };
+
+void xe_destroy_svm(struct xe_svm *svm);
+struct xe_svm *xe_create_svm(struct xe_vm *vm);
+struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a7e7a0b24099..712fe49d8fb2 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -36,6 +36,7 @@
 #include "xe_trace.h"
 #include "generated/xe_wa_oob.h"
 #include "xe_wa.h"
+#include "xe_svm.h"
 
 #define TEST_VM_ASYNC_OPS_ERROR
 
@@ -1376,6 +1377,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 
flags)
xe->usm.num_vm_in_non_fault_mode++;
mutex_unlock(&xe->usm.lock);
 
+   vm->svm = xe_create_svm(vm);
trace_xe_vm_create(vm);
 
return vm;
@@ -1496,6 +1498,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
 
+   if (vm->svm)
+   xe_destroy_svm(vm->svm);
+
xe_vm_put(vm);
 }
 
-- 
2.26.3

[PATCH 05/23] drm/xe/svm: add helper to retrieve svm range from address

2024-01-17 Thread Oak Zeng

All valid virtual address range are maintained in svm's
range_tree. This functions iterate svm's range tree and
return the svm range that contains specific address.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.h   |  2 ++
 drivers/gpu/drm/xe/xe_svm_range.c | 32 +++
 2 files changed, 34 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_svm_range.c

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index cd3cf92f3784..3ed106ecc02b 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -67,4 +67,6 @@ struct xe_svm_range {
 void xe_destroy_svm(struct xe_svm *svm);
 struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
+struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
+   unsigned long 
addr);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c 
b/drivers/gpu/drm/xe/xe_svm_range.c
new file mode 100644
index ..d8251d38f65e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include "xe_svm.h"
+
+/**
+ * xe_svm_range_from_addr() - retrieve svm_range contains a virtual address
+ *
+ * @svm: svm that the virtual address belongs to
+ * @addr: the virtual address to retrieve svm_range for
+ *
+ * return the svm range found,
+ * or NULL if no range found
+ */
+struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
+   
unsigned long addr)
+{
+   struct interval_tree_node *node;
+
+   mutex_lock(&svm->mutex);
+   node = interval_tree_iter_first(&svm->range_tree, addr, addr);
+   mutex_unlock(&svm->mutex);
+   if (!node)
+   return NULL;
+
+   return container_of(node, struct xe_svm_range, inode);
+}
-- 
2.26.3

[PATCH 04/23] drm/xe/svm: Trace svm creation

2024-01-17 Thread Oak Zeng

xe_vm tracepoint is extended to also print svm.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_trace.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 95163c303f3e..63867c0fa848 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -467,15 +467,17 @@ DECLARE_EVENT_CLASS(xe_vm,
TP_STRUCT__entry(
 __field(u64, vm)
 __field(u32, asid)
+__field(u64, svm)
 ),
 
TP_fast_assign(
   __entry->vm = (unsigned long)vm;
   __entry->asid = vm->usm.asid;
+  __entry->svm = (unsigned long)vm->svm;
   ),
 
-   TP_printk("vm=0x%016llx, asid=0x%05x",  __entry->vm,
- __entry->asid)
+   TP_printk("vm=0x%016llx, asid=0x%05x, svm=0x%016llx",  
__entry->vm,
+ __entry->asid, __entry->svm)
 );
 
 DEFINE_EVENT(xe_vm, xe_vm_kill,
-- 
2.26.3

[PATCH 06/23] drm/xe/svm: Introduce a helper to build sg table from hmm range

2024-01-17 Thread Oak Zeng

Introduce xe_svm_build_sg helper function to build a scatter
gather table from a hmm_range struct. This is prepare work
for binding hmm range to gpu.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c | 52 +
 drivers/gpu/drm/xe/xe_svm.h |  3 +++
 2 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 559188471949..ab3cc2121869 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -6,6 +6,8 @@
 #include 
 #include 
 #include "xe_svm.h"
+#include 
+#include 
 
 DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
 
@@ -61,3 +63,53 @@ struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm)
 
return NULL;
 }
+
+/**
+ * xe_svm_build_sg() - build a scatter gather table for all the physical 
pages/pfn
+ * in a hmm_range.
+ *
+ * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
+ * has the pfn numbers of pages that back up this hmm address range.
+ * @st: pointer to the sg table.
+ *
+ * All the contiguous pfns will be collapsed into one entry in
+ * the scatter gather table. This is for the convenience of
+ * later on operations to bind address range to GPU page table.
+ *
+ * This function allocates the storage of the sg table. It is
+ * caller's responsibility to free it calling sg_free_table.
+ *
+ * Returns 0 if successful; -ENOMEM if fails to allocate memory
+ */
+int xe_svm_build_sg(struct hmm_range *range,
+struct sg_table *st)
+{
+   struct scatterlist *sg;
+   u64 i, npages;
+
+   sg = NULL;
+   st->nents = 0;
+   npages = ((range->end - 1) >> PAGE_SHIFT) - (range->start >> 
PAGE_SHIFT) + 1;
+
+   if (unlikely(sg_alloc_table(st, npages, GFP_KERNEL)))
+   return -ENOMEM;
+
+   for (i = 0; i < npages; i++) {
+   unsigned long addr = range->hmm_pfns[i];
+
+   if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
+   sg->length += PAGE_SIZE;
+   sg_dma_len(sg) += PAGE_SIZE;
+   continue;
+   }
+
+   sg =  sg ? sg_next(sg) : st->sgl;
+   sg_dma_address(sg) = addr;
+   sg_dma_len(sg) = PAGE_SIZE;
+   sg->length = PAGE_SIZE;
+   st->nents++;
+   }
+
+   sg_mark_end(sg);
+   return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 3ed106ecc02b..191bce6425db 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 #include 
+#include 
+#include "xe_device_types.h"
 
 struct xe_vm;
 struct mm_struct;
@@ -69,4 +71,5 @@ struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
unsigned long 
addr);
+int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 #endif
-- 
2.26.3

[PATCH 01/23] drm/xe/svm: Add SVM document

2024-01-17 Thread Oak Zeng

Add shared virtual memory document.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 Documentation/gpu/xe/index.rst  |   1 +
 Documentation/gpu/xe/xe_svm.rst |   8 +++
 drivers/gpu/drm/xe/xe_svm_doc.h | 121 
 3 files changed, 130 insertions(+)
 create mode 100644 Documentation/gpu/xe/xe_svm.rst
 create mode 100644 drivers/gpu/drm/xe/xe_svm_doc.h

diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
index c224ecaee81e..106b60aba1f0 100644
--- a/Documentation/gpu/xe/index.rst
+++ b/Documentation/gpu/xe/index.rst
@@ -23,3 +23,4 @@ DG2, etc is provided to prototype the driver.
xe_firmware
xe_tile
xe_debugging
+   xe_svm
diff --git a/Documentation/gpu/xe/xe_svm.rst b/Documentation/gpu/xe/xe_svm.rst
new file mode 100644
index ..62954ba1c6f8
--- /dev/null
+++ b/Documentation/gpu/xe/xe_svm.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=
+Shared virtual memory
+=
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_svm_doc.h
+   :doc: Shared virtual memory
diff --git a/drivers/gpu/drm/xe/xe_svm_doc.h b/drivers/gpu/drm/xe/xe_svm_doc.h
new file mode 100644
index ..de38ee3585e4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_doc.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SVM_DOC_H_
+#define _XE_SVM_DOC_H_
+
+/**
+ * DOC: Shared virtual memory
+ *
+ * Shared Virtual Memory (SVM) allows the programmer to use a single virtual
+ * address space shared between threads executing on CPUs and GPUs. It 
abstracts
+ * away from the user the location of the backing memory, and hence simplifies
+ * the user programming model. In a non-SVM memory model, user need to 
explicitly
+ * decide memory placement such as device or system memory, also user need to
+ * explicitly migrate memory b/t device and system memory.
+ *
+ * Interface
+ * =
+ *
+ * SVM makes use of default OS memory allocation and mapping interface such as
+ * malloc() and mmap(). The pointer returned from malloc() and mmap() can be
+ * directly used on both CPU and GPU program.
+ *
+ * SVM also provides API to set virtual address range based memory attributes
+ * such as preferred memory location, memory migration granularity, and memory
+ * atomic attributes etc. This is similar to Linux madvise API.
+ *
+ * Basic implementation
+ * ==
+ *
+ * XeKMD implementation is based on Linux kernel Heterogeneous Memory 
Management
+ * (HMM) framework. HMM’s address space mirroring support allows sharing of the
+ * address space by duplicating sections of CPU page tables in the device page
+ * tables. This enables both CPU and GPU access a physical memory location 
using
+ * the same virtual address.
+ *
+ * Linux kernel also provides the ability to plugin device memory to the system
+ * (as a special ZONE_DEVICE type) and allocates struct page for each device 
memory
+ * page.
+ *
+ * HMM also provides a mechanism to migrate pages from host to device memory 
and
+ * vice versa.
+ *
+ * More information on HMM can be found here.
+ * https://www.kernel.org/doc/Documentation/vm/hmm.rst
+ *
+ * Unlike the non-SVM memory allocator (such as gem_create, vm_bind etc), there
+ * is no buffer object (BO, such as struct ttm_buffer_object, struct 
drm_gem_object),
+ * in our SVM implementation. We delibrately choose this implementation option
+ * to achieve page granularity memory placement, validation, eviction and 
migration.
+ *
+ * The SVM layer directly allocate device memory from drm buddy subsystem. The
+ * memory is organized as many blocks each of which has 2^n pages. SVM 
subsystem
+ * then mark the usage of each page using a simple bitmap. When all pages in a
+ * block are not used anymore, SVM return this block back to drm buddy 
subsystem.
+ *
+ * There are 3 events which can trigger SVM subsystem in actions:
+ *
+ * 1. A mmu notifier callback
+ *
+ * Since SVM need to mirror the program's CPU virtual address space from GPU 
side,
+ * when program's CPU address space changes, SVM need to make an identical 
change
+ * from GPU side. SVM/hmm use mmu interval notifier to achieve this. SVM 
register
+ * a mmu interval notifier call back function to core mm, and whenever a CPU 
side
+ * virtual address space is changed (i.e., when a virtual address range is 
unmapped
+ * from CPU calling munmap), the registered callback function will be called 
from
+ * core mm. SVM then mirror the CPU address space change from GPU side, i.e., 
unmap
+ * or invalidate the virtual address range from GPU page table.
+ *
+ * 2. A GPU page fault
+ *
+ * At the very beginning of a process's life, no virtual address of the process
+ * is mapped on GPU page table. So when GPU access any virtual address of the 
process
+ * a GPU page fault is

[PATCH 00/23] XeKmd basic SVM support

2024-01-17 Thread Oak Zeng

This is the very basic SVM (shared virtual memory) support in XeKmd
driver. SVM allows the programmer to use a shaed virtual address space
between CPU program and GPU program. It abstracts away from the user
the location of the backing memory in a mixed CPU and GPU programming
environment.

This work is based on previous I915 SVM implementation mainly from
Niranjana Vishwanathapura and Oak Zeng, which has never been upstreamed
before. This is our first attempt to upstream this work.

This implementation depends on Linux kernel HMM support. See some key
designs in patch #1.

We are aware there are currently some effort to implement SVM using
GMEM(generalized memory management,
see 
https://lore.kernel.org/dri-devel/20231128125025.4449-1-weixi@huawei.com/)
We are open to this new method if it can be merged to upstream kernel.
Before that, we think it is still safer to support SVM through HMM.

This series only has basic SVM support. We think it is better to post
this series earlier so we can get more eyes on it. Below are the works
that is planned or ongoing:

*Testing: We are working on the igt test right now. Some part of this
series, especially the gpu page table update(patch #7, #8) and migration
function (patch #10) need some debug to make it work.

*Virtual address range based memory attributes and hints: We plan to
expose uAPI for user to set memory attributes such as preferred location
or migration granularity etc to a virtual address range. This is
important to tune SVM performance.

*GPU vram eviction: One key design choice of this series is, SVM
layer allocate GPU memory directly from drm buddy allocator, instead
of from xe vram manager. There is no BO (buffer object) concept
in this implementation. The key benefit of this approach is we can
migrate memory at page granularity easily. This also means SVM bypasses
TTM's memory eviction logic. But we want the SVM memory and BO driver
memory can mutually evicted each other. We have some prove of concept
work to rework TTM resource manager for this purpose, see
https://lore.kernel.org/dri-devel/20231102043306.2931989-1-oak.z...@intel.com/
We will continue work on that series then implement SVM's eviction
function based on the concept of shared drm LRU list b/t SVM and TTM/BO
driver.

Oak Zeng (23):
  drm/xe/svm: Add SVM document
  drm/xe/svm: Add svm key data structures
  drm/xe/svm: create xe svm during vm creation
  drm/xe/svm: Trace svm creation
  drm/xe/svm: add helper to retrieve svm range from address
  drm/xe/svm: Introduce a helper to build sg table from hmm range
  drm/xe/svm: Add helper for binding hmm range to gpu
  drm/xe/svm: Add helper to invalidate svm range from GPU
  drm/xe/svm: Remap and provide memmap backing for GPU vram
  drm/xe/svm: Introduce svm migration function
  drm/xe/svm: implement functions to allocate and free device memory
  drm/xe/svm: Trace buddy block allocation and free
  drm/xe/svm: Handle CPU page fault
  drm/xe/svm: trace svm range migration
  drm/xe/svm: Implement functions to register and unregister mmu
notifier
  drm/xe/svm: Implement the mmu notifier range invalidate callback
  drm/xe/svm: clean up svm range during process exit
  drm/xe/svm: Move a few structures to xe_gt.h
  drm/xe/svm: migrate svm range to vram
  drm/xe/svm: Populate svm range
  drm/xe/svm: GPU page fault support
  drm/xe/svm: Add DRM_XE_SVM kernel config entry
  drm/xe/svm: Add svm memory hints interface

 Documentation/gpu/xe/index.rst   |   1 +
 Documentation/gpu/xe/xe_svm.rst  |   8 +
 drivers/gpu/drm/xe/Kconfig   |  22 ++
 drivers/gpu/drm/xe/Makefile  |   5 +
 drivers/gpu/drm/xe/xe_device_types.h |  20 ++
 drivers/gpu/drm/xe/xe_gt.h   |  20 ++
 drivers/gpu/drm/xe/xe_gt_pagefault.c |  28 +--
 drivers/gpu/drm/xe/xe_migrate.c  | 213 +
 drivers/gpu/drm/xe/xe_migrate.h  |   7 +
 drivers/gpu/drm/xe/xe_mmio.c |  12 +
 drivers/gpu/drm/xe/xe_pt.c   | 147 +++-
 drivers/gpu/drm/xe/xe_pt.h   |   5 +
 drivers/gpu/drm/xe/xe_svm.c  | 324 +
 drivers/gpu/drm/xe/xe_svm.h  | 115 +
 drivers/gpu/drm/xe/xe_svm_devmem.c   | 232 ++
 drivers/gpu/drm/xe/xe_svm_doc.h  | 121 ++
 drivers/gpu/drm/xe/xe_svm_migrate.c  | 345 +++
 drivers/gpu/drm/xe/xe_svm_range.c| 227 ++
 drivers/gpu/drm/xe/xe_trace.h|  71 +-
 drivers/gpu/drm/xe/xe_vm.c   |   7 +
 drivers/gpu/drm/xe/xe_vm_types.h |  15 +-
 include/uapi/drm/xe_drm.h|  40 
 22 files changed, 1957 insertions(+), 28 deletions(-)
 create mode 100644 Documentation/gpu/xe/xe_svm.rst
 create mode 100644 drivers/gpu/drm/xe/xe_svm.c
 create mode 100644 drivers/gpu/drm/xe/xe_svm.h
 create mode 100644 drivers/gpu/drm/xe/xe_svm_devmem.c
 create mode 100644 drivers/gpu/drm/xe/xe_svm_doc.h
 create mode 100644 drivers/gpu/drm/xe/xe_svm_migrate.c
 create mode 100644 drive

[PATCH 16/22] drm/xe/svm: Implement the mmu notifier range invalidate callback

2023-12-20 Thread Oak Zeng

To mirror the CPU page table from GPU side, we register a mmu interval
notifier (in the coming patch of this series). Core mm call back to
GPU driver whenever there is a change to certain virtual address range,
i.e., range is released or unmapped by user etc.

This patch implemented the GPU driver callback function for such mmu
interval notifier. In the callback function we unbind the address
range from GPU if it is unmapped from CPU side, thus we mirror the
CPU page table change.

We also unregister the mmu interval notifier from core mm in the case
of munmap event. But we can't unregister mmu notifier directly from the
mmu notifier range invalidation callback function. The reason is, during
a munmap (see kernel function vm_munmap), a mmap_write_lock is held, but
unregister mmu notifier (calling mmu_interval_notifier_remove) also requires
a mmap_write_lock of the current process.

Thus, we start a kernel worker to unregister mmu interval notifier on a
MMU_NOTIFY_UNMAP event.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c   |  1 +
 drivers/gpu/drm/xe/xe_svm.h   |  1 -
 drivers/gpu/drm/xe/xe_svm_range.c | 37 ++-
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index ab3cc2121869..6393251c0051 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -8,6 +8,7 @@
 #include "xe_svm.h"
 #include 
 #include 
+#include "xe_pt.h"
 
 DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
 
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 90e665f2bfc6..0038f98c0cc7 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -54,7 +54,6 @@ struct xe_svm {
 struct xe_svm_range {
/** @svm: pointer of the xe_svm that this range belongs to */
struct xe_svm *svm;
-
/** @notifier: The mmu interval notifer used to keep track of CPU
 * side address range change. Driver will get a callback with this
 * notifier if anything changed from CPU side, such as range is
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c 
b/drivers/gpu/drm/xe/xe_svm_range.c
index 286d5f7d6ecd..53dd3be7ab9f 100644
--- a/drivers/gpu/drm/xe/xe_svm_range.c
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include "xe_svm.h"
+#include "xe_pt.h"
 
 /**
  * xe_svm_range_from_addr() - retrieve svm_range contains a virtual address
@@ -59,8 +60,42 @@ bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
return (vma1 == vma) && (vma2 == vma);
 }
 
+static bool xe_svm_range_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+   struct xe_svm_range *svm_range =
+   container_of(mni, struct xe_svm_range, notifier);
+   struct xe_svm *svm = svm_range->svm;
+   unsigned long length = range->end - range->start;
+
+   /*
+* MMU_NOTIFY_RELEASE is called upon process exit to notify driver
+* to release any process resources, such as zap GPU page table
+* mapping or unregister mmu notifier etc. We already clear GPU
+* page table  and unregister mmu notifier in in xe_destroy_svm,
+* upon process exit. So just simply return here.
+*/
+   if (range->event == MMU_NOTIFY_RELEASE)
+   return true;
+
+   if (mmu_notifier_range_blockable(range))
+   mutex_lock(&svm->mutex);
+   else if (!mutex_trylock(&svm->mutex))
+   return false;
+
+   mmu_interval_set_seq(mni, cur_seq);
+   xe_invalidate_svm_range(svm->vm, range->start, length);
+   mutex_unlock(&svm->mutex);
+
+   if (range->event == MMU_NOTIFY_UNMAP)
+   queue_work(system_unbound_wq, 
&svm_range->unregister_notifier_work);
+
+   return true;
+}
+
 static const struct mmu_interval_notifier_ops xe_svm_mni_ops = {
-   .invalidate = NULL,
+   .invalidate = xe_svm_range_invalidate,
 };
 
 /**
-- 
2.26.3

[PATCH 21/22] drm/xe/svm: GPU page fault support

2023-12-20 Thread Oak Zeng

On gpu page fault of a virtual address, try to fault in the virtual
address range to gpu page table and let HW to retry on the faulty
address.

Right now, we always migrate the whole vma which contains the fault
address to GPU. This is subject to change of a more sophisticated
migration policy: decide whether to migrate memory to GPU or map
in place with CPU memory; migration granularity.

There is rather complicated locking strategy in this patch. See more
details in xe_svm_doc.h, lock design section.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c |   7 ++
 drivers/gpu/drm/xe/xe_svm.c  | 116 +++
 drivers/gpu/drm/xe/xe_svm.h  |   6 ++
 drivers/gpu/drm/xe/xe_svm_range.c|  43 ++
 4 files changed, 172 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c 
b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 6de1ff195aaa..0afd312ff154 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -22,6 +22,7 @@
 #include "xe_pt.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
+#include "xe_svm.h"
 
 enum fault_type {
NOT_PRESENT = 0,
@@ -131,6 +132,11 @@ static int handle_pagefault(struct xe_gt *gt, struct 
pagefault *pf)
if (!vm || !xe_vm_in_fault_mode(vm))
return -EINVAL;
 
+   if (vm->svm) {
+   ret = xe_svm_handle_gpu_fault(vm, gt, pf);
+   goto put_vm;
+   }
+
 retry_userptr:
/*
 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
@@ -219,6 +225,7 @@ static int handle_pagefault(struct xe_gt *gt, struct 
pagefault *pf)
if (ret >= 0)
ret = 0;
}
+put_vm:
xe_vm_put(vm);
 
return ret;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 0c13690a19f5..1ade8d7f0ab2 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -12,6 +12,7 @@
 #include "xe_svm.h"
 #include 
 #include 
+#include 
 #include "xe_pt.h"
 #include "xe_assert.h"
 #include "xe_vm_types.h"
@@ -206,3 +207,118 @@ static int svm_populate_range(struct xe_svm_range 
*svm_range,
kvfree(pfns);
return ret;
 }
+
+/**
+ * svm_access_allowed() -  Determine whether read or/and write to vma is 
allowed
+ *
+ * @write: true means a read and write access; false: read only access
+ */
+static bool svm_access_allowed(struct vm_area_struct *vma, bool write)
+{
+   unsigned long access = VM_READ;
+
+   if (write)
+   access |= VM_WRITE;
+
+   return (vma->vm_flags & access) == access;
+}
+
+/**
+ * svm_should_migrate() - Determine whether we should migrate a range to
+ * a destination memory region
+ *
+ * @range: The svm memory range to consider
+ * @dst_region: target destination memory region
+ * @is_atomic_fault: Is the intended migration triggered by a atomic access?
+ * On some platform, we have to migrate memory to guarantee atomic correctness.
+ */
+static bool svm_should_migrate(struct xe_svm_range *range,
+   struct xe_mem_region *dst_region, bool 
is_atomic_fault)
+{
+   return true;
+}
+
+/**
+ * xe_svm_handle_gpu_fault() - gpu page fault handler for svm subsystem
+ *
+ * @vm: The vm of the fault.
+ * @gt: The gt hardware on which the fault happens.
+ * @pf: page fault descriptor
+ *
+ * Workout a backing memory for the fault address, migrate memory from
+ * system memory to gpu vram if nessary, and map the fault address to
+ * GPU so GPU HW can retry the last operation which has caused the GPU
+ * page fault.
+ */
+int xe_svm_handle_gpu_fault(struct xe_vm *vm,
+   struct xe_gt *gt,
+   struct pagefault *pf)
+{
+   u8 access_type = pf->access_type;
+   u64 page_addr = pf->page_addr;
+   struct hmm_range hmm_range;
+   struct vm_area_struct *vma;
+   struct xe_svm_range *range;
+   struct mm_struct *mm;
+   struct xe_svm *svm;
+   int ret = 0;
+
+   svm = vm->svm;
+   if (!svm)
+   return -EINVAL;
+
+   mm = svm->mm;
+   mmap_read_lock(mm);
+   vma = find_vma_intersection(mm, page_addr, page_addr + 4);
+   if (!vma) {
+   mmap_read_unlock(mm);
+   return -ENOENT;
+   }
+
+   if (!svm_access_allowed (vma, access_type != ACCESS_TYPE_READ)) {
+   mmap_read_unlock(mm);
+   return -EPERM;
+   }
+
+   range = xe_svm_range_from_addr(svm, page_addr);
+   if (!range) {
+   range = xe_svm_range_create(svm, vma);
+   if (!range) {
+   mmap_read_unlock(mm);
+   return -ENOMEM;
+   }
+   }
+
+   if (svm_should_migrat

[PATCH 06/22] drm/xe/svm: Introduce a helper to build sg table from hmm range

2023-12-20 Thread Oak Zeng

Introduce xe_svm_build_sg helper function to build a scatter
gather table from a hmm_range struct. This is prepare work
for binding hmm range to gpu.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c | 52 +
 drivers/gpu/drm/xe/xe_svm.h |  3 +++
 2 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 559188471949..ab3cc2121869 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -6,6 +6,8 @@
 #include 
 #include 
 #include "xe_svm.h"
+#include 
+#include 
 
 DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
 
@@ -61,3 +63,53 @@ struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm)
 
return NULL;
 }
+
+/**
+ * xe_svm_build_sg() - build a scatter gather table for all the physical 
pages/pfn
+ * in a hmm_range.
+ *
+ * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
+ * has the pfn numbers of pages that back up this hmm address range.
+ * @st: pointer to the sg table.
+ *
+ * All the contiguous pfns will be collapsed into one entry in
+ * the scatter gather table. This is for the convenience of
+ * later on operations to bind address range to GPU page table.
+ *
+ * This function allocates the storage of the sg table. It is
+ * caller's responsibility to free it calling sg_free_table.
+ *
+ * Returns 0 if successful; -ENOMEM if fails to allocate memory
+ */
+int xe_svm_build_sg(struct hmm_range *range,
+struct sg_table *st)
+{
+   struct scatterlist *sg;
+   u64 i, npages;
+
+   sg = NULL;
+   st->nents = 0;
+   npages = ((range->end - 1) >> PAGE_SHIFT) - (range->start >> 
PAGE_SHIFT) + 1;
+
+   if (unlikely(sg_alloc_table(st, npages, GFP_KERNEL)))
+   return -ENOMEM;
+
+   for (i = 0; i < npages; i++) {
+   unsigned long addr = range->hmm_pfns[i];
+
+   if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
+   sg->length += PAGE_SIZE;
+   sg_dma_len(sg) += PAGE_SIZE;
+   continue;
+   }
+
+   sg =  sg ? sg_next(sg) : st->sgl;
+   sg_dma_address(sg) = addr;
+   sg_dma_len(sg) = PAGE_SIZE;
+   sg->length = PAGE_SIZE;
+   st->nents++;
+   }
+
+   sg_mark_end(sg);
+   return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 3ed106ecc02b..191bce6425db 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 #include 
+#include 
+#include "xe_device_types.h"
 
 struct xe_vm;
 struct mm_struct;
@@ -69,4 +71,5 @@ struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
unsigned long 
addr);
+int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 #endif
-- 
2.26.3

[PATCH 13/22] drm/xe/svm: Handle CPU page fault

2023-12-20 Thread Oak Zeng

Under the picture of svm, CPU and GPU program share one same
virtual address space. The backing store of this virtual address
space can be either in system memory or device memory. Since GPU
device memory is remaped as DEVICE_PRIVATE, CPU can't access it.
Any CPU access to device memory causes a page fault. Implement
a page fault handler to migrate memory back to system memory and
map it to CPU page table so the CPU program can proceed.

Also unbind this page from GPU side, and free the original GPU
device page

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_device_types.h |  12 ++
 drivers/gpu/drm/xe/xe_svm.h  |   8 +-
 drivers/gpu/drm/xe/xe_svm_devmem.c   |  10 +-
 drivers/gpu/drm/xe/xe_svm_migrate.c  | 230 +++
 drivers/gpu/drm/xe/xe_svm_range.c|  27 
 5 files changed, 280 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_svm_migrate.c

diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
b/drivers/gpu/drm/xe/xe_device_types.h
index c67c28f04d2f..ac77996bebe6 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -555,4 +555,16 @@ struct xe_file {
struct xe_drm_client *client;
 };
 
+static inline struct xe_tile *mem_region_to_tile(struct xe_mem_region *mr)
+{
+   return container_of(mr, struct xe_tile, mem.vram);
+}
+
+static inline u64 vram_pfn_to_dpa(struct xe_mem_region *mr, u64 pfn)
+{
+   u64 dpa;
+   u64 offset = (pfn << PAGE_SHIFT) - mr->hpa_base;
+   dpa = mr->dpa_base + offset;
+   return dpa;
+}
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 8551df2b9780..6b93055934f8 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -12,8 +12,10 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
+#include 
 #include "xe_device_types.h"
 
 struct xe_vm;
@@ -66,16 +68,20 @@ struct xe_svm_range {
struct interval_tree_node inode;
 };
 
+vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf);
 void xe_destroy_svm(struct xe_svm *svm);
 struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
unsigned long 
addr);
+bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
+   struct 
xe_svm_range *range,
+   struct 
vm_area_struct *vma);
+
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
 void xe_svm_devm_remove(struct xe_device *xe, struct xe_mem_region *mem);
 
-
 int xe_devm_alloc_pages(struct xe_tile *tile,
unsigned long npages,
struct list_head *blocks,
diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c 
b/drivers/gpu/drm/xe/xe_svm_devmem.c
index 5cd54dde4a9d..01f8385ebb5b 100644
--- a/drivers/gpu/drm/xe/xe_svm_devmem.c
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -11,13 +11,16 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
-
 #include "xe_device_types.h"
 #include "xe_trace.h"
 #include "xe_migrate.h"
 #include "xe_ttm_vram_mgr_types.h"
 #include "xe_assert.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
 
 /**
  * struct xe_svm_block_meta - svm uses this data structure to manage each
@@ -137,11 +140,6 @@ void xe_devm_free_blocks(struct list_head *blocks)
free_block(block);
 }
 
-static vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
-{
-   return 0;
-}
-
 void xe_devm_page_free(struct page *page)
 {
struct drm_buddy_block *block =
diff --git a/drivers/gpu/drm/xe/xe_svm_migrate.c 
b/drivers/gpu/drm/xe/xe_svm_migrate.c
new file mode 100644
index ..3be26da33aa3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_migrate.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "xe_device_types.h"
+#include "xe_trace.h"
+#include "xe_migrate.h"
+#include "xe_ttm_vram_mgr_types.h"
+#include "xe_assert.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
+
+
+/**
+ * alloc_host_page() - allocate one host page for the fault vma
+ *
+ * @dev: (GPU) device that will access the allocated page
+ * @vma: the fault vma that we need allocate page for
+ * @addr: the fault address. The allocated page is for this address
+ *

[PATCH 17/22] drm/xe/svm: clean up svm range during process exit

2023-12-20 Thread Oak Zeng

Clean up svm range during process exit: Zap GPU page table of
the svm process on process exit; unregister all the mmu interval
notifiers which are registered before; free svm range and svm
data structure.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c   | 24 
 drivers/gpu/drm/xe/xe_svm.h   |  1 +
 drivers/gpu/drm/xe/xe_svm_range.c | 17 +
 3 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 6393251c0051..5772bfcf7da4 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -9,6 +9,8 @@
 #include 
 #include 
 #include "xe_pt.h"
+#include "xe_assert.h"
+#include "xe_vm_types.h"
 
 DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
 
@@ -19,9 +21,31 @@ DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
  */
 void xe_destroy_svm(struct xe_svm *svm)
 {
+#define MAX_SVM_RANGE (1024*1024)
+   struct xe_svm_range **range_array;
+   struct interval_tree_node *node;
+   struct xe_svm_range *range;
+   int i = 0;
+
+   range_array = kzalloc(sizeof(struct xe_svm_range *) * MAX_SVM_RANGE,
+   GFP_KERNEL);
+   node = interval_tree_iter_first(&svm->range_tree, 0, ~0ULL);
+   while (node) {
+   range = container_of(node, struct xe_svm_range, inode);
+   xe_svm_range_prepare_destroy(range);
+   node = interval_tree_iter_next(node, 0, ~0ULL);
+   xe_assert(svm->vm->xe, i < MAX_SVM_RANGE);
+   range_array[i++] = range;
+   }
+
+   /** Free range (thus range->inode) while traversing above is not safe */
+   for(; i >= 0; i--)
+   kfree(range_array[i]);
+
hash_del_rcu(&svm->hnode);
mutex_destroy(&svm->mutex);
kfree(svm);
+   kfree(range_array);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 0038f98c0cc7..5b3bd2c064f5 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -90,6 +90,7 @@ bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
struct 
vm_area_struct *vma);
 void xe_svm_range_unregister_mmu_notifier(struct xe_svm_range *range);
 int xe_svm_range_register_mmu_notifier(struct xe_svm_range *range);
+void xe_svm_range_prepare_destroy(struct xe_svm_range *range);
 
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c 
b/drivers/gpu/drm/xe/xe_svm_range.c
index 53dd3be7ab9f..dfb4660dc26f 100644
--- a/drivers/gpu/drm/xe/xe_svm_range.c
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -165,3 +165,20 @@ int xe_svm_range_register_mmu_notifier(struct xe_svm_range 
*range)
range->mmu_notifier_registered = true;
return ret;
 }
+
+/**
+ * xe_svm_range_prepare_destroy() - prepare work to destroy a svm range
+ *
+ * @range: the svm range to destroy
+ *
+ * prepare for a svm range destroy: Zap this range from GPU, unregister mmu
+ * notifier.
+ */
+void xe_svm_range_prepare_destroy(struct xe_svm_range *range)
+{
+   struct xe_vm *vm = range->svm->vm;
+   unsigned long length = range->end - range->start;
+
+   xe_invalidate_svm_range(vm, range->start, length);
+   xe_svm_range_unregister_mmu_notifier(range);
+}
-- 
2.26.3

[PATCH 22/22] drm/xe/svm: Add DRM_XE_SVM kernel config entry

2023-12-20 Thread Oak Zeng

DRM_XE_SVM kernel config entry is added so
xe svm feature can be configured before kernel
compilation.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/Kconfig   | 22 ++
 drivers/gpu/drm/xe/Makefile  |  5 +
 drivers/gpu/drm/xe/xe_mmio.c |  5 +
 drivers/gpu/drm/xe/xe_vm.c   |  2 ++
 4 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 5b3da06e7ba3..a57f0972e9ae 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -83,6 +83,28 @@ config DRM_XE_FORCE_PROBE
 
  Use "!*" to block the probe of the driver for all known devices.
 
+config DRM_XE_SVM
+bool "Enable Shared Virtual Memory support in xe"
+depends on DRM_XE
+depends on ARCH_ENABLE_MEMORY_HOTPLUG
+depends on ARCH_ENABLE_MEMORY_HOTREMOVE
+depends on MEMORY_HOTPLUG
+depends on MEMORY_HOTREMOVE
+depends on ARCH_HAS_PTE_DEVMAP
+depends on SPARSEMEM_VMEMMAP
+depends on ZONE_DEVICE
+depends on DEVICE_PRIVATE
+depends on MMU
+select HMM_MIRROR
+select MMU_NOTIFIER
+default y
+help
+  Choose this option if you want Shared Virtual Memory (SVM)
+  support in xe. With SVM, virtual address space is shared
+ between CPU and GPU. This means any virtual address such
+ as malloc or mmap returns, variables on stack, or global
+ memory pointers, can be used for GPU transparently.
+
 menu "drm/Xe Debugging"
 depends on DRM_XE
 depends on EXPERT
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index df8601d6a59f..b75bdbc5e42c 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -282,6 +282,11 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/skl_universal_plane.o \
i915-display/skl_watermark.o
 
+xe-$(CONFIG_DRM_XE_SVM) += xe_svm.o \
+  xe_svm_devmem.o \
+  xe_svm_range.o \
+  xe_svm_migrate.o
+
 ifeq ($(CONFIG_ACPI),y)
xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_acpi.o \
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index cfe25a3c7059..7c95f675ed92 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -286,7 +286,9 @@ int xe_mmio_probe_vram(struct xe_device *xe)
}
 
io_size -= min_t(u64, tile_size, io_size);
+#if IS_ENABLED(CONFIG_DRM_XE_SVM)
xe_svm_devm_add(tile, &tile->mem.vram);
+#endif
}
 
xe->mem.vram.actual_physical_size = total_size;
@@ -361,8 +363,11 @@ static void mmio_fini(struct drm_device *drm, void *arg)
pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
if (xe->mem.vram.mapping)
iounmap(xe->mem.vram.mapping);
+
+#if IS_ENABLED(CONFIG_DRM_XE_SVM)
for_each_tile(tile, xe, id) {
xe_svm_devm_remove(xe, &tile->mem.vram);
+#endif
}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 3c301a5c7325..12d82f2fc195 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1376,7 +1376,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 
flags)
xe->usm.num_vm_in_non_fault_mode++;
mutex_unlock(&xe->usm.lock);
 
+#if IS_ENABLED(CONFIG_DRM_XE_SVM)
vm->svm = xe_create_svm(vm);
+#endif
trace_xe_vm_create(vm);
 
return vm;
-- 
2.26.3

[PATCH 18/22] drm/xe/svm: Move a few structures to xe_gt.h

2023-12-20 Thread Oak Zeng

Move access_type and pagefault struct to header file so it
can be shared with svm sub-system. This is preparation work
for enabling page fault for svm.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_gt.h   | 20 
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 21 -
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4486e083f5ef..51dd288cf1cf 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -17,6 +17,26 @@
  xe_hw_engine_is_valid((hwe__)))
 
 #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> 
XE_HW_ENGINE_CCS0)
+enum access_type {
+   ACCESS_TYPE_READ = 0,
+   ACCESS_TYPE_WRITE = 1,
+   ACCESS_TYPE_ATOMIC = 2,
+   ACCESS_TYPE_RESERVED = 3,
+};
+
+struct pagefault {
+   u64 page_addr;
+   u32 asid;
+   u16 pdata;
+   u8 vfid;
+   u8 access_type;
+   u8 fault_type;
+   u8 fault_level;
+   u8 engine_class;
+   u8 engine_instance;
+   u8 fault_unsuccessful;
+   bool trva_fault;
+};
 
 #ifdef CONFIG_FAULT_INJECTION
 #include  /* XXX: fault-inject.h is broken */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c 
b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 4489aadc7a52..6de1ff195aaa 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -23,27 +23,6 @@
 #include "xe_trace.h"
 #include "xe_vm.h"
 
-struct pagefault {
-   u64 page_addr;
-   u32 asid;
-   u16 pdata;
-   u8 vfid;
-   u8 access_type;
-   u8 fault_type;
-   u8 fault_level;
-   u8 engine_class;
-   u8 engine_instance;
-   u8 fault_unsuccessful;
-   bool trva_fault;
-};
-
-enum access_type {
-   ACCESS_TYPE_READ = 0,
-   ACCESS_TYPE_WRITE = 1,
-   ACCESS_TYPE_ATOMIC = 2,
-   ACCESS_TYPE_RESERVED = 3,
-};
-
 enum fault_type {
NOT_PRESENT = 0,
WRITE_ACCESS_VIOLATION = 1,
-- 
2.26.3

[PATCH 10/22] drm/xe/svm: Introduce svm migration function

2023-12-20 Thread Oak Zeng

Introduce xe_migrate_svm function for data migration.
This function is similar to xe_migrate_copy function
but has different parameters. Instead of BO and ttm
resource parameters, it has source and destination
buffer's dpa address as parameter. This function is
intended to be used by svm sub-system which doesn't
have BO and TTM concept.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_migrate.c | 213 
 drivers/gpu/drm/xe/xe_migrate.h |   7 ++
 2 files changed, 220 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index adf1dab5eba2..425de8e44deb 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -387,6 +387,37 @@ static u64 xe_migrate_res_sizes(struct xe_device *xe, 
struct xe_res_cursor *cur)
 cur->remaining);
 }
 
+/**
+ * pte_update_cmd_size() - calculate the batch buffer command size
+ * to update a flat page table.
+ *
+ * @size: The virtual address range size of the page table to update
+ *
+ * The page table to update is supposed to be a flat 1 level page
+ * table with all entries pointing to 4k pages.
+ *
+ * Return the number of dwords of the update command
+ */
+static u32 pte_update_cmd_size(u64 size)
+{
+   u32 dword;
+   u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+
+   XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
+   /*
+* MI_STORE_DATA_IMM command is used to update page table. Each
+* instruction can update maximumly 0x1ff pte entries. To update
+* n (n <= 0x1ff) pte entries, we need:
+* 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
+* 2 dword for the page table's physical location
+* 2*n dword for value of pte to fill (each pte entry is 2 dwords)
+*/
+   dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff);
+   dword += entries * 2;
+
+   return dword;
+}
+
 static u32 pte_update_size(struct xe_migrate *m,
   bool is_vram,
   struct ttm_resource *res,
@@ -492,6 +523,48 @@ static void emit_pte(struct xe_migrate *m,
}
 }
 
+/**
+ * build_pt_update_batch_sram() - build batch buffer commands to update
+ * migration vm page table for system memory
+ *
+ * @m: The migration context
+ * @bb: The batch buffer which hold the page table update commands
+ * @pt_offset: The offset of page table to update, in byte
+ * @dpa: device physical address you want the page table to point to
+ * @size: size of the virtual address space you want the page table to cover
+ */
+static void build_pt_update_batch_sram(struct xe_migrate *m,
+struct xe_bb *bb, u32 pt_offset,
+u64 dpa, u32 size)
+{
+   u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
+   u32 ptes;
+
+   ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+   while (ptes) {
+   u32 chunk = min(0x1ffU, ptes);
+
+   bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+   bb->cs[bb->len++] = pt_offset;
+   bb->cs[bb->len++] = 0;
+
+   pt_offset += chunk * 8;
+   ptes -= chunk;
+
+   while (chunk--) {
+   u64 addr;
+
+   addr = dpa & PAGE_MASK;
+   addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
+addr, 
pat_index,
+0, false, 0);
+   bb->cs[bb->len++] = lower_32_bits(addr);
+   bb->cs[bb->len++] = upper_32_bits(addr);
+   dpa += XE_PAGE_SIZE;
+   }
+   }
+}
+
 #define EMIT_COPY_CCS_DW 5
 static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
  u64 dst_ofs, bool dst_is_indirect,
@@ -808,6 +881,146 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
return fence;
 }
 
+/**
+ * xe_migrate_svm() - A migrate function used by SVM subsystem
+ *
+ * @m: The migration context
+ * @src_dpa: device physical start address of source, from GPU's point of view
+ * @src_is_vram: True if source buffer is in vram.
+ * @dst_dpa: device physical start address of destination, from GPU's point of 
view
+ * @dst_is_vram: True if destination buffer is in vram.
+ * @size: The size of data to copy.
+ *
+ * Copy @size bytes of data from @src_dpa to @dst_dpa. The functionality
+ * and behavior of this function is similar to xe_migrate_copy function, but
+ * the interface is different. This function is a helper function supposed to
+ * be used by SVM subsytem. Since in SVM subsystem there is no buffer object
+ * and ttm, there is no src/dst bo as function input.

[PATCH 20/22] drm/xe/svm: Populate svm range

2023-12-20 Thread Oak Zeng

Add a helper function svm_populate_range to populate
a svm range. This functions calls hmm_range_fault
to read CPU page tables and populate all pfns of this
virtual address range into an array, saved in hmm_range::
hmm_pfns. This is prepare work to bind a svm range to
GPU. The hmm_pfns array will be used for the GPU binding.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c | 61 +
 1 file changed, 61 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 44d4f4216a93..0c13690a19f5 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -145,3 +145,64 @@ int xe_svm_build_sg(struct hmm_range *range,
sg_mark_end(sg);
return 0;
 }
+
+/** Populate physical pages of a virtual address range
+ * This function also read mmu notifier sequence # (
+ * mmu_interval_read_begin), for the purpose of later
+ * comparison (through mmu_interval_read_retry).
+ * This must be called with mmap read or write lock held.
+ *
+ * This function alloates hmm_range->hmm_pfns, it is caller's
+ * responsibility to free it.
+ *
+ * @svm_range: The svm range to populate
+ * @hmm_range: pointer to hmm_range struct. hmm_rang->hmm_pfns
+ * will hold the populated pfns.
+ * @write: populate pages with write permission
+ *
+ * returns: 0 for succuss; negative error no on failure
+ */
+static int svm_populate_range(struct xe_svm_range *svm_range,
+   struct hmm_range *hmm_range, bool write)
+{
+   unsigned long timeout =
+   jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+   unsigned long *pfns, flags = HMM_PFN_REQ_FAULT;
+   u64 npages;
+   int ret;
+
+   mmap_assert_locked(svm_range->svm->mm);
+
+   npages = ((svm_range->end - 1) >> PAGE_SHIFT) -
+   (svm_range->start >> 
PAGE_SHIFT) + 1;
+   pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns))
+   return -ENOMEM;
+
+   if (write)
+   flags |= HMM_PFN_REQ_WRITE;
+
+   memset64((u64 *)pfns, (u64)flags, npages);
+   hmm_range->hmm_pfns = pfns;
+   hmm_range->notifier_seq = mmu_interval_read_begin(&svm_range->notifier);
+   hmm_range->notifier = &svm_range->notifier;
+   hmm_range->start = svm_range->start;
+   hmm_range->end = svm_range->end;
+   hmm_range->pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE;
+   hmm_range->dev_private_owner = svm_range->svm->vm->xe->drm.dev;
+
+   while (true) {
+   ret = hmm_range_fault(hmm_range);
+   if (time_after(jiffies, timeout))
+   goto free_pfns;
+
+   if (ret == -EBUSY)
+   continue;
+   break;
+   }
+
+free_pfns:
+   if (ret)
+   kvfree(pfns);
+   return ret;
+}
-- 
2.26.3

[PATCH 07/22] drm/xe/svm: Add helper for binding hmm range to gpu

2023-12-20 Thread Oak Zeng

Add helper function xe_bind_svm_range to bind a svm range
to gpu. A temporary xe_vma is created locally to re-use
existing page table update functions which are vma-based.

The svm page table update lock design is different from
userptr and bo page table update. A xe_pt_svm_pre_commit
function is introduced for svm range pre-commitment.

A hmm_range pointer is added to xe_vma struct.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_pt.c   | 101 ++-
 drivers/gpu/drm/xe/xe_pt.h   |   4 ++
 drivers/gpu/drm/xe/xe_vm_types.h |  10 +++
 3 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index de1030a47588..65cfac88ab2f 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -17,6 +17,7 @@
 #include "xe_trace.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_vm.h"
+#include "xe_svm.h"
 
 struct xe_pt_dir {
struct xe_pt pt;
@@ -617,7 +618,10 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
xe_bo_assert_held(bo);
 
if (!xe_vma_is_null(vma)) {
-   if (xe_vma_is_userptr(vma))
+   if (vma->svm_sg)
+   xe_res_first_sg(vma->svm_sg, 0, xe_vma_size(vma),
+   &curs);
+   else if (xe_vma_is_userptr(vma))
xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma),
&curs);
else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
@@ -1046,6 +1050,28 @@ static int xe_pt_userptr_pre_commit(struct 
xe_migrate_pt_update *pt_update)
return 0;
 }
 
+static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+   struct xe_vma *vma = pt_update->vma;
+   struct hmm_range *range = vma->hmm_range;
+
+   if (mmu_interval_read_retry(range->notifier,
+   range->notifier_seq)) {
+   /*
+* FIXME: is this really necessary? We didn't update GPU
+* page table yet...
+*/
+   xe_vm_invalidate_vma(vma);
+   return -EAGAIN;
+   }
+   return 0;
+}
+
+static const struct xe_migrate_pt_update_ops svm_bind_ops = {
+   .populate = xe_vm_populate_pgtable,
+   .pre_commit = xe_pt_svm_pre_commit,
+};
+
 static const struct xe_migrate_pt_update_ops bind_ops = {
.populate = xe_vm_populate_pgtable,
.pre_commit = xe_pt_pre_commit,
@@ -1197,7 +1223,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma 
*vma, struct xe_exec_queue
struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
struct xe_pt_migrate_pt_update bind_pt_update = {
.base = {
-   .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : 
&bind_ops,
+   .ops = vma->svm_sg ? &svm_bind_ops :
+   (xe_vma_is_userptr(vma) ? 
&userptr_bind_ops : &bind_ops),
.vma = vma,
.tile_id = tile->id,
},
@@ -1651,3 +1678,73 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma 
*vma, struct xe_exec_queu
 
return fence;
 }
+
+/**
+ * xe_bind_svm_range() - bind an address range to vm
+ *
+ * @vm: the vm to bind this address range
+ * @tile: the tile to bind this address range to
+ * @range: a hmm_range which includes all the information
+ * needed for binding: virtual address range and physical
+ * pfns to back up this virtual address range.
+ * @flags: the binding flags to set in pte
+ *
+ * This is a helper function used by svm sub-system
+ * to bind a svm range to gpu vm. svm sub-system
+ * doesn't have xe_vma, thus helpers such as
+ * __xe_pt_bind_vma can't be used directly. So this
+ * helper is written for svm sub-system to use.
+ *
+ * This is a synchronous function. When this function
+ * returns, either the svm range is bound to GPU, or
+ * error happened.
+ *
+ * Return: 0 for success or error code for failure
+ * If -EAGAIN returns, it means mmu notifier was called (
+ * aka there was concurrent cpu page table update) during
+ * this function, caller has to retry hmm_range_fault
+ */
+int xe_bind_svm_range(struct xe_vm *vm, struct xe_tile *tile,
+   struct hmm_range *range, u64 flags)
+{
+   struct dma_fence *fence = NULL;
+   struct xe_svm *svm = vm->svm;
+   int ret = 0;
+   /*
+* Create a temp vma to reuse page table helpers such as
+* __xe_pt_bind_vma
+*/
+   struct xe_vma vma = {
+   .gpuva = {
+   .va = {
+   .addr = range->start,
+   .range = range->end - range-

[PATCH 15/22] drm/xe/svm: Implement functions to register and unregister mmu notifier

2023-12-20 Thread Oak Zeng

xe driver register mmu interval notifier to core mm to monitor vma
change. We register mmu interval notifier for each svm range. mmu
interval notifier should be unregistered in a worker (see next patch
in this series), so also initialize kernel worker to unregister mmu
interval notifier.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.h   | 14 ++
 drivers/gpu/drm/xe/xe_svm_range.c | 73 +++
 2 files changed, 87 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 6b93055934f8..90e665f2bfc6 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -52,16 +52,28 @@ struct xe_svm {
  * struct xe_svm_range - Represents a shared virtual address range.
  */
 struct xe_svm_range {
+   /** @svm: pointer of the xe_svm that this range belongs to */
+   struct xe_svm *svm;
+
/** @notifier: The mmu interval notifer used to keep track of CPU
 * side address range change. Driver will get a callback with this
 * notifier if anything changed from CPU side, such as range is
 * unmapped from CPU
 */
struct mmu_interval_notifier notifier;
+   bool mmu_notifier_registered;
/** @start: start address of this range, inclusive */
u64 start;
/** @end: end address of this range, exclusive */
u64 end;
+   /** @vma: the corresponding vma of this svm range
+*  The relationship b/t vma and svm range is 1:N,
+*  which means one vma can be splitted into multiple
+*  @xe_svm_range while one @xe_svm_range can have
+*  only one vma. A N:N mapping means some complication
+*  in codes. Lets assume 1:N for now.
+*/
+   struct vm_area_struct *vma;
/** @unregister_notifier_work: A worker used to unregister this 
notifier */
struct work_struct unregister_notifier_work;
/** @inode: used to link this range to svm's range_tree */
@@ -77,6 +89,8 @@ struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm 
*svm,
 bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
struct 
xe_svm_range *range,
struct 
vm_area_struct *vma);
+void xe_svm_range_unregister_mmu_notifier(struct xe_svm_range *range);
+int xe_svm_range_register_mmu_notifier(struct xe_svm_range *range);
 
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c 
b/drivers/gpu/drm/xe/xe_svm_range.c
index b32c32f60315..286d5f7d6ecd 100644
--- a/drivers/gpu/drm/xe/xe_svm_range.c
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -4,6 +4,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -57,3 +58,75 @@ bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
 
return (vma1 == vma) && (vma2 == vma);
 }
+
+static const struct mmu_interval_notifier_ops xe_svm_mni_ops = {
+   .invalidate = NULL,
+};
+
+/**
+ * unregister a mmu interval notifier for a svm range
+ *
+ * @range: svm range
+ *
+ */
+void xe_svm_range_unregister_mmu_notifier(struct xe_svm_range *range)
+{
+   if (!range->mmu_notifier_registered)
+   return;
+
+   mmu_interval_notifier_remove(&range->notifier);
+   range->mmu_notifier_registered = false;
+}
+
+static void xe_svm_unregister_notifier_work(struct work_struct *work)
+{
+   struct xe_svm_range *range;
+
+   range = container_of(work, struct xe_svm_range, 
unregister_notifier_work);
+
+   xe_svm_range_unregister_mmu_notifier(range);
+
+   /**
+* This is called from mmu notifier MUNMAP event. When munmap is called,
+* this range is not valid any more. Remove it.
+*/
+   mutex_lock(&range->svm->mutex);
+   interval_tree_remove(&range->inode, &range->svm->range_tree);
+   mutex_unlock(&range->svm->mutex);
+   kfree(range);
+}
+
+/**
+ * register a mmu interval notifier to monitor vma change
+ *
+ * @range: svm range to monitor
+ *
+ * This has to be called inside a mmap_read_lock
+ */
+int xe_svm_range_register_mmu_notifier(struct xe_svm_range *range)
+{
+   struct vm_area_struct *vma = range->vma;
+   struct mm_struct *mm = range->svm->mm;
+   u64 start, length;
+   int ret = 0;
+
+   if (range->mmu_notifier_registered)
+   return 0;
+
+   start =  range->start;
+   length = range->end - start;
+   /** We are inside a mmap_read_lock, but it requires a mmap_write_lock
+*  to register mmu notifier.
+*/
+   mmap_read_unlock(mm);
+   mmap_write_lock(mm);
+   ret = mmu_interval_notifier_insert_locked(&range

[PATCH 14/22] drm/xe/svm: trace svm range migration

2023-12-20 Thread Oak Zeng

Add function to trace svm range migration, either
from vram to sram, or sram to vram

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm_migrate.c |  1 +
 drivers/gpu/drm/xe/xe_trace.h   | 30 +
 2 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm_migrate.c 
b/drivers/gpu/drm/xe/xe_svm_migrate.c
index 3be26da33aa3..b4df411e04f3 100644
--- a/drivers/gpu/drm/xe/xe_svm_migrate.c
+++ b/drivers/gpu/drm/xe/xe_svm_migrate.c
@@ -201,6 +201,7 @@ vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
if (!migrate_vma.cpages)
goto free_buf;
 
+   trace_xe_svm_migrate_vram_to_sram(range);
for (i = 0; i < npages; i++) {
ret = migrate_page_vram_to_ram(vma, addr, migrate_vma.src[i],
migrate_vma.dst + i);
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 50380f5173ca..960eec38aee5 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -21,6 +21,7 @@
 #include "xe_guc_exec_queue_types.h"
 #include "xe_sched_job.h"
 #include "xe_vm.h"
+#include "xe_svm.h"
 
 DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
@@ -601,6 +602,35 @@ DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h,
 
 );
 
+DECLARE_EVENT_CLASS(xe_svm_migrate,
+   TP_PROTO(struct xe_svm_range *range),
+   TP_ARGS(range),
+
+   TP_STRUCT__entry(
+__field(u64, start)
+__field(u64, end)
+),
+
+   TP_fast_assign(
+  __entry->start = range->start;
+  __entry->end = range->end;
+  ),
+
+   TP_printk("Migrate svm range [0x%016llx,0x%016llx)",  
__entry->start,
+ __entry->end)
+);
+
+DEFINE_EVENT(xe_svm_migrate, xe_svm_migrate_vram_to_sram,
+   TP_PROTO(struct xe_svm_range *range),
+   TP_ARGS(range)
+);
+
+
+DEFINE_EVENT(xe_svm_migrate, xe_svm_migrate_sram_to_vram,
+   TP_PROTO(struct xe_svm_range *range),
+   TP_ARGS(range)
+);
+
 DECLARE_EVENT_CLASS(xe_buddy_block,
TP_PROTO(struct drm_buddy_block *block, u64 size, u64 pfn),
TP_ARGS(block, size, pfn),
-- 
2.26.3

[PATCH 02/22] drm/xe/svm: Add svm key data structures

2023-12-20 Thread Oak Zeng

Add xe_svm and xe_svm_range data structure. Each xe_svm
represents a svm address space and it maps 1:1 to the
process's mm_struct. It also maps 1:1 to the gpu xe_vm
struct.

Each xe_svm_range represent a virtual address range inside
a svm address space. It is similar to CPU's  vm_area_struct,
or to the GPU xe_vma struct. It contains data to synchronize
this address range to CPU's virtual address range, using mmu
notifier mechanism. It can also hold this range's memory
attributes set by user, such as preferred memory location etc -
this is TBD.

Each svm address space is made of many svm virtual address range.
All address ranges are maintained in xe_svm's interval tree.

Also add a xe_svm pointer to xe_vm data structure. So we have
a 1:1 mapping b/t xe_svm and xe_vm.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.h  | 59 
 drivers/gpu/drm/xe/xe_vm_types.h |  2 ++
 2 files changed, 61 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_svm.h

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
new file mode 100644
index ..ba301a331f59
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __XE_SVM_H
+#define __XE_SVM_H
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct xe_vm;
+struct mm_struct;
+
+/**
+ * struct xe_svm - data structure to represent a shared
+ * virtual address space from device side. xe_svm, xe_vm
+ * and mm_struct has a 1:1:1 relationship.
+ */
+struct xe_svm {
+   /** @vm: The xe_vm address space corresponding to this xe_svm */
+   struct xe_vm *vm;
+   /** @mm: The mm_struct corresponding to this xe_svm */
+   struct mm_struct *mm;
+   /**
+* @mutex: A lock used by svm subsystem. It protects:
+* 1. below range_tree
+* 2. GPU page table update. Serialize all SVM GPU page table updates
+*/
+   struct mutex mutex;
+   /**
+* @range_tree: Interval tree of all svm ranges in this svm
+*/
+   struct rb_root_cached range_tree;
+};
+
+/**
+ * struct xe_svm_range - Represents a shared virtual address range.
+ */
+struct xe_svm_range {
+   /** @notifier: The mmu interval notifer used to keep track of CPU
+* side address range change. Driver will get a callback with this
+* notifier if anything changed from CPU side, such as range is
+* unmapped from CPU
+*/
+   struct mmu_interval_notifier notifier;
+   /** @start: start address of this range, inclusive */
+   u64 start;
+   /** @end: end address of this range, exclusive */
+   u64 end;
+   /** @unregister_notifier_work: A worker used to unregister this 
notifier */
+   struct work_struct unregister_notifier_work;
+   /** @inode: used to link this range to svm's range_tree */
+   struct interval_tree_node inode;
+};
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 63e8a50b88e9..037fb7168c63 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -17,6 +17,7 @@
 #include "xe_pt_types.h"
 #include "xe_range_fence.h"
 
+struct xe_svm;
 struct xe_bo;
 struct xe_sync_entry;
 struct xe_vm;
@@ -279,6 +280,7 @@ struct xe_vm {
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
struct xe_file *xef;
+   struct xe_svm *svm;
 };
 
 /** struct xe_vma_op_map - VMA map operation */
-- 
2.26.3

[PATCH 09/22] drm/xe/svm: Remap and provide memmap backing for GPU vram

2023-12-20 Thread Oak Zeng

Memory remap GPU vram using devm_memremap_pages, so each GPU vram
page is backed by a struct page.

Those struct pages are created to allow hmm migrate buffer b/t
GPU vram and CPU system memory using existing Linux migration
mechanism (i.e., migrating b/t CPU system memory and hard disk).

This is prepare work to enable svm (shared virtual memory) through
Linux kernel hmm framework. The memory remap's page map type is set
to MEMORY_DEVICE_PRIVATE for now. This means even though each GPU
vram page get a struct page and can be mapped in CPU page table,
but such pages are treated as GPU's private resource, so CPU can't
access them. If CPU access such page, a page fault is triggered
and page will be migrate to system memory.

For GPU device which supports coherent memory protocol b/t CPU and
GPU (such as CXL and CAPI protocol), we can remap device memory as
MEMORY_DEVICE_COHERENT. This is TBD.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_device_types.h |  8 +++
 drivers/gpu/drm/xe/xe_mmio.c |  7 +++
 drivers/gpu/drm/xe/xe_svm.h  |  2 +
 drivers/gpu/drm/xe/xe_svm_devmem.c   | 87 
 4 files changed, 104 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_svm_devmem.c

diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
b/drivers/gpu/drm/xe/xe_device_types.h
index 71f23ac365e6..c67c28f04d2f 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -99,6 +99,14 @@ struct xe_mem_region {
resource_size_t actual_physical_size;
/** @mapping: pointer to VRAM mappable space */
void *__iomem mapping;
+   /** @pagemap: Used to remap device memory as ZONE_DEVICE */
+   struct dev_pagemap pagemap;
+   /**
+* @hpa_base: base host physical address
+*
+* This is generated when remap device memory as ZONE_DEVICE
+*/
+   resource_size_t hpa_base;
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f660cfb79f50..cfe25a3c7059 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -21,6 +21,7 @@
 #include "xe_macros.h"
 #include "xe_module.h"
 #include "xe_tile.h"
+#include "xe_svm.h"
 
 #define XEHP_MTCFG_ADDRXE_REG(0x101800)
 #define TILE_COUNT REG_GENMASK(15, 8)
@@ -285,6 +286,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
}
 
io_size -= min_t(u64, tile_size, io_size);
+   xe_svm_devm_add(tile, &tile->mem.vram);
}
 
xe->mem.vram.actual_physical_size = total_size;
@@ -353,10 +355,15 @@ void xe_mmio_probe_tiles(struct xe_device *xe)
 static void mmio_fini(struct drm_device *drm, void *arg)
 {
struct xe_device *xe = arg;
+   struct xe_tile *tile;
+   u8 id;
 
pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
if (xe->mem.vram.mapping)
iounmap(xe->mem.vram.mapping);
+   for_each_tile(tile, xe, id) {
+   xe_svm_devm_remove(xe, &tile->mem.vram);
+   }
 }
 
 static int xe_verify_lmem_ready(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 191bce6425db..b54f7714a1fc 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -72,4 +72,6 @@ struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
unsigned long 
addr);
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
+int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
+void xe_svm_devm_remove(struct xe_device *xe, struct xe_mem_region *mem);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c 
b/drivers/gpu/drm/xe/xe_svm_devmem.c
new file mode 100644
index ..cf7882830247
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "xe_device_types.h"
+#include "xe_trace.h"
+
+
+static vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
+{
+   return 0;
+}
+
+static void xe_devm_page_free(struct page *page)
+{
+}
+
+static const struct dev_pagemap_ops xe_devm_pagemap_ops = {
+   .page_free = xe_devm_page_free,
+   .migrate_to_ram = xe_devm_migrate_to_ram,
+};
+
+/**
+ * xe_svm_devm_add: Remap and provide memmap backing for device memory
+ * @tile: tile that the memory region blongs to
+ * @mr: memory region to remap
+ *
+ * This remap device memory to host physical address space and create
+ * struct page to back device memory
+ *
+ * Return: 0 on success standard error co

[PATCH 19/22] drm/xe/svm: migrate svm range to vram

2023-12-20 Thread Oak Zeng

Since the source pages of the svm range can be physically none
contiguous, and the destination vram pages can also be none
contiguous, there is no easy way to migrate multiple pages per
blitter command. We do page by page migration for now.

Migration is best effort. Even if we fail to migrate some pages,
we will try to migrate the rest pages.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c |   7 ++
 drivers/gpu/drm/xe/xe_svm.h |   3 +
 drivers/gpu/drm/xe/xe_svm_migrate.c | 114 
 3 files changed, 124 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 5772bfcf7da4..44d4f4216a93 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -5,12 +5,19 @@
 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 #include "xe_svm.h"
 #include 
 #include 
 #include "xe_pt.h"
 #include "xe_assert.h"
 #include "xe_vm_types.h"
+#include "xe_gt.h"
+#include "xe_migrate.h"
+#include "xe_trace.h"
 
 DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
 
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 5b3bd2c064f5..659bcb7927d6 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -80,6 +80,9 @@ struct xe_svm_range {
 };
 
 vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf);
+int svm_migrate_range_to_vram(struct xe_svm_range *range,
+   struct vm_area_struct 
*vma,
+   struct xe_tile *tile);
 void xe_destroy_svm(struct xe_svm *svm);
 struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
diff --git a/drivers/gpu/drm/xe/xe_svm_migrate.c 
b/drivers/gpu/drm/xe/xe_svm_migrate.c
index b4df411e04f3..3724ad6c7aea 100644
--- a/drivers/gpu/drm/xe/xe_svm_migrate.c
+++ b/drivers/gpu/drm/xe/xe_svm_migrate.c
@@ -229,3 +229,117 @@ vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
kvfree(buf);
return 0;
 }
+
+
+/**
+ * svm_migrate_range_to_vram() - migrate backing store of a va range to vram
+ * Must be called with mmap_read_lock(mm) held.
+ * @range: the va range to migrate. Range should only belong to one vma.
+ * @vma: the vma that this range belongs to. @range can cover whole @vma
+ * or a sub-range of @vma.
+ * @tile: the destination tile which holds the new backing store of the range
+ *
+ * Returns: negative errno on faiure, 0 on success
+ */
+int svm_migrate_range_to_vram(struct xe_svm_range *range,
+   struct vm_area_struct 
*vma,
+   struct xe_tile *tile)
+{
+   struct mm_struct *mm = range->svm->mm;
+   unsigned long start = range->start;
+   unsigned long end = range->end;
+   unsigned long npages = (end - start) >> PAGE_SHIFT;
+   struct xe_mem_region *mr = &tile->mem.vram;
+   struct migrate_vma migrate = {
+   .vma= vma,
+   .start  = start,
+   .end= end,
+   .pgmap_owner= tile->xe->drm.dev,
+   .flags  = MIGRATE_VMA_SELECT_SYSTEM,
+   };
+   struct device *dev = tile->xe->drm.dev;
+   dma_addr_t *src_dma_addr;
+   struct dma_fence *fence;
+   struct page *src_page;
+   LIST_HEAD(blocks);
+   int ret = 0, i;
+   u64 dst_dpa;
+   void *buf;
+
+   mmap_assert_locked(mm);
+   xe_assert(tile->xe, xe_svm_range_belongs_to_vma(mm, range, vma));
+
+   buf = kvcalloc(npages, 2* sizeof(*migrate.src) + sizeof(*src_dma_addr),
+   GFP_KERNEL);
+   if(!buf)
+   return -ENOMEM;
+   migrate.src = buf;
+   migrate.dst = migrate.src + npages;
+   src_dma_addr = (dma_addr_t *) (migrate.dst + npages);
+   ret = xe_devm_alloc_pages(tile, npages, &blocks, migrate.dst);
+   if (ret)
+   goto kfree_buf;
+
+   ret = migrate_vma_setup(&migrate);
+   if (ret) {
+   drm_err(&tile->xe->drm, "vma setup returned %d for range [%lx - 
%lx]\n",
+   ret, start, end);
+   goto free_dst_pages;
+   }
+
+   trace_xe_svm_migrate_sram_to_vram(range);
+   /**FIXME: partial migration of a range
+* print a warning for now. If this message
+* is printed, we need to fall back to page by page
+* migration: only migrate pages with MIGRATE_PFN_MIGRATE
+*/
+   if (migrate.cpages != npages)
+   drm_warn(&tile->xe->drm, "Partial migration for ran

[PATCH 03/22] drm/xe/svm: create xe svm during vm creation

2023-12-20 Thread Oak Zeng

Create the xe_svm struct during xe_vm creation.
Add xe_svm to a global hash table so later on
we can retrieve xe_svm using mm_struct (the key).

Destroy svm process during xe_vm close.

Also add a helper function to retrieve svm struct
from mm struct

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.c | 63 +
 drivers/gpu/drm/xe/xe_svm.h | 11 +++
 drivers/gpu/drm/xe/xe_vm.c  |  5 +++
 3 files changed, 79 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_svm.c

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
new file mode 100644
index ..559188471949
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include "xe_svm.h"
+
+DEFINE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
+
+/**
+ * xe_destroy_svm() - destroy a svm process
+ *
+ * @svm: the xe_svm to destroy
+ */
+void xe_destroy_svm(struct xe_svm *svm)
+{
+   hash_del_rcu(&svm->hnode);
+   mutex_destroy(&svm->mutex);
+   kfree(svm);
+}
+
+/**
+ * xe_create_svm() - create a svm process
+ *
+ * @vm: the xe_vm that we create svm process for
+ *
+ * Return the created xe svm struct
+ */
+struct xe_svm *xe_create_svm(struct xe_vm *vm)
+{
+   struct mm_struct *mm = current->mm;
+   struct xe_svm *svm;
+
+   svm = kzalloc(sizeof(struct xe_svm), GFP_KERNEL);
+   svm->mm = mm;
+   svm->vm = vm;
+   mutex_init(&svm->mutex);
+   /** Add svm to global xe_svm_table hash table
+*  use mm as key so later we can retrieve svm using mm
+*/
+   hash_add_rcu(xe_svm_table, &svm->hnode, (uintptr_t)mm);
+   return svm;
+}
+
+/**
+ * xe_lookup_svm_by_mm() - retrieve xe_svm from mm struct
+ *
+ * @mm: the mm struct of the svm to retrieve
+ *
+ * Return the xe_svm struct pointer, or NULL if fail
+ */
+struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm)
+{
+   struct xe_svm *svm;
+
+   hash_for_each_possible_rcu(xe_svm_table, svm, hnode, (uintptr_t)mm)
+   if (svm->mm == mm)
+   return svm;
+
+   return NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index ba301a331f59..cd3cf92f3784 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -11,10 +11,15 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 struct xe_vm;
 struct mm_struct;
 
+#define XE_MAX_SVM_PROCESS 5 /* Maximumly support 32 SVM process*/
+extern DECLARE_HASHTABLE(xe_svm_table, XE_MAX_SVM_PROCESS);
+
 /**
  * struct xe_svm - data structure to represent a shared
  * virtual address space from device side. xe_svm, xe_vm
@@ -35,6 +40,8 @@ struct xe_svm {
 * @range_tree: Interval tree of all svm ranges in this svm
 */
struct rb_root_cached range_tree;
+   /** @hnode: used to add this svm to a global xe_svm_hash table*/
+   struct hlist_node hnode;
 };
 
 /**
@@ -56,4 +63,8 @@ struct xe_svm_range {
/** @inode: used to link this range to svm's range_tree */
struct interval_tree_node inode;
 };
+
+void xe_destroy_svm(struct xe_svm *svm);
+struct xe_svm *xe_create_svm(struct xe_vm *vm);
+struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 1ca917b8315c..3c301a5c7325 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -36,6 +36,7 @@
 #include "xe_trace.h"
 #include "generated/xe_wa_oob.h"
 #include "xe_wa.h"
+#include "xe_svm.h"
 
 #define TEST_VM_ASYNC_OPS_ERROR
 
@@ -1375,6 +1376,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 
flags)
xe->usm.num_vm_in_non_fault_mode++;
mutex_unlock(&xe->usm.lock);
 
+   vm->svm = xe_create_svm(vm);
trace_xe_vm_create(vm);
 
return vm;
@@ -1495,6 +1497,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
 
+   if (vm->svm)
+   xe_destroy_svm(vm->svm);
+
xe_vm_put(vm);
 }
 
-- 
2.26.3

[PATCH 08/22] drm/xe/svm: Add helper to invalidate svm range from GPU

2023-12-20 Thread Oak Zeng

A svm subsystem friendly function is added for svm range invalidation
purpose. svm subsystem doesn't maintain xe_vma, so a temporary xe_vma
is used to call function xe_vma_invalidate_vma

Not sure whether this works or not. Will have to test. if a temporary
vma doesn't work, we will have to call the zap_pte/tlb_inv functions
directly.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_pt.c | 33 +
 drivers/gpu/drm/xe/xe_pt.h |  1 +
 2 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 65cfac88ab2f..9805b402ebca 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1748,3 +1748,36 @@ int xe_bind_svm_range(struct xe_vm *vm, struct xe_tile 
*tile,
dma_fence_put(fence);
return ret;
 }
+
+/**
+ * xe_invalidate_svm_range() - a helper to invalidate a svm address range
+ *
+ * @vm: The vm that the address range belongs to
+ * @start: start of the virtual address range
+ * @size: size of the virtual address range
+ *
+ * This is a helper function supposed to be used by svm subsystem.
+ * svm subsystem doesn't maintain xe_vma, so we create a temporary
+ * xe_vma structure so we can reuse xe_vm_invalidate_vma().
+ */
+void xe_invalidate_svm_range(struct xe_vm *vm, u64 start, u64 size)
+{
+   struct xe_vma vma = {
+   .gpuva = {
+   .va = {
+   .addr = start,
+   .range = size,
+   },
+   .vm = &vm->gpuvm,
+   },
+   /** invalidate from all tiles
+*  FIXME: We used temporary vma in xe_bind_svm_range, so
+*  we lost track of which tile we are bound to. Does
+*  setting tile_present to all tiles cause a problem
+*  in xe_vm_invalidate_vma()?
+*/
+   .tile_present = BIT(vm->xe->info.tile_count) - 1,
+   };
+
+   xe_vm_invalidate_vma(&vma);
+}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 775d08707466..42d495997635 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -49,4 +49,5 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
 
 int xe_bind_svm_range(struct xe_vm *vm, struct xe_tile *tile,
struct hmm_range *range, u64 flags);
+void xe_invalidate_svm_range(struct xe_vm *vm, u64 start, u64 size);
 #endif
-- 
2.26.3

[PATCH 11/22] drm/xe/svm: implement functions to allocate and free device memory

2023-12-20 Thread Oak Zeng

Function xe_devm_alloc_pages allocate pages from drm buddy and perform
house keeping work for all the pages allocated, such as get a page
refcount, keep a bitmap of all pages to denote whether a page is in
use, put pages to a drm lru list for eviction purpose.

Function xe_devm_free_blocks return all memory blocks to drm buddy
allocator.

Function xe_devm_free_page is a call back function from hmm layer. It
is called whenever a page's refcount reaches to 1. This function clears
the bit of this page in the bitmap. If all the bits in the bitmap is
cleared, it means all the pages have been freed, we return all the pages
in this memory block back to drm buddy.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.h|   9 ++
 drivers/gpu/drm/xe/xe_svm_devmem.c | 146 -
 2 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index b54f7714a1fc..8551df2b9780 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -74,4 +74,13 @@ struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm 
*svm,
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
 void xe_svm_devm_remove(struct xe_device *xe, struct xe_mem_region *mem);
+
+
+int xe_devm_alloc_pages(struct xe_tile *tile,
+   unsigned long npages,
+   struct list_head *blocks,
+   unsigned long *pfn);
+
+void xe_devm_free_blocks(struct list_head *blocks);
+void xe_devm_page_free(struct page *page);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c 
b/drivers/gpu/drm/xe/xe_svm_devmem.c
index cf7882830247..445e0e1bc3b4 100644
--- a/drivers/gpu/drm/xe/xe_svm_devmem.c
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -5,18 +5,162 @@
 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include "xe_device_types.h"
 #include "xe_trace.h"
+#include "xe_migrate.h"
+#include "xe_ttm_vram_mgr_types.h"
+#include "xe_assert.h"
 
+/**
+ * struct xe_svm_block_meta - svm uses this data structure to manage each
+ * block allocated from drm buddy. This will be set to the drm_buddy_block's
+ * private field.
+ *
+ * @lru: used to link this block to drm's lru lists. This will be replace
+ * with struct drm_lru_entity later.
+ * @tile: tile from which we allocated this block
+ * @bitmap: A bitmap of each page in this block. 1 means this page is used,
+ * 0 means this page is idle. When all bits of this block are 0, it is time
+ * to return this block to drm buddy subsystem.
+ */
+struct xe_svm_block_meta {
+   struct list_head lru;
+   struct xe_tile *tile;
+   unsigned long bitmap[];
+};
+
+static u64 block_offset_to_pfn(struct xe_mem_region *mr, u64 offset)
+{
+   /** DRM buddy's block offset is 0-based*/
+   offset += mr->hpa_base;
+
+   return PHYS_PFN(offset);
+}
+
+/**
+ * xe_devm_alloc_pages() - allocate device pages from buddy allocator
+ *
+ * @xe_tile: which tile to allocate device memory from
+ * @npages: how many pages to allocate
+ * @blocks: used to return the allocated blocks
+ * @pfn: used to return the pfn of all allocated pages. Must be big enough
+ * to hold at @npages entries.
+ *
+ * This function allocate blocks of memory from drm buddy allocator, and
+ * performs initialization work: set struct page::zone_device_data to point
+ * to the memory block; set/initialize drm_buddy_block::private field;
+ * lock_page for each page allocated; add memory block to lru managers lru
+ * list - this is TBD.
+ *
+ * return: 0 on success
+ * error code otherwise
+ */
+int xe_devm_alloc_pages(struct xe_tile *tile,
+   unsigned long npages,
+   struct list_head *blocks,
+   unsigned long *pfn)
+{
+   struct drm_buddy *mm = &tile->mem.vram_mgr->mm;
+   struct drm_buddy_block *block, *tmp;
+   u64 size = npages << PAGE_SHIFT;
+   int ret = 0, i, j = 0;
+
+   ret = drm_buddy_alloc_blocks(mm, 0, mm->size, size, PAGE_SIZE,
+   blocks, 
DRM_BUDDY_TOPDOWN_ALLOCATION);
+
+   if (unlikely(ret))
+   return ret;
+
+   list_for_each_entry_safe(block, tmp, blocks, link) {
+   struct xe_mem_region *mr = &tile->mem.vram;
+   u64 block_pfn_first, pages_per_block;
+   struct xe_svm_block_meta *meta;
+   u32 meta_size;
+
+   size = drm_buddy_block_size(mm, block);
+

[PATCH 12/22] drm/xe/svm: Trace buddy block allocation and free

2023-12-20 Thread Oak Zeng

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm_devmem.c |  5 -
 drivers/gpu/drm/xe/xe_trace.h  | 35 ++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c 
b/drivers/gpu/drm/xe/xe_svm_devmem.c
index 445e0e1bc3b4..5cd54dde4a9d 100644
--- a/drivers/gpu/drm/xe/xe_svm_devmem.c
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -95,6 +95,7 @@ int xe_devm_alloc_pages(struct xe_tile *tile,
block->private = meta;
block_pfn_first =
block_offset_to_pfn(mr, 
drm_buddy_block_offset(block));
+   trace_xe_buddy_block_alloc(block, size, block_pfn_first);
for(i = 0; i < pages_per_block; i++) {
struct page *page;
 
@@ -159,8 +160,10 @@ void xe_devm_page_free(struct page *page)
 
xe_assert(tile->xe, i < pages_per_block);
clear_bit(i, meta->bitmap);
-   if (bitmap_empty(meta->bitmap, pages_per_block))
+   if (bitmap_empty(meta->bitmap, pages_per_block)) {
free_block(block);
+   trace_xe_buddy_block_free(block, size, block_pfn_first);
+   }
 }
 
 static const struct dev_pagemap_ops xe_devm_pagemap_ops = {
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 63867c0fa848..50380f5173ca 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -11,6 +11,7 @@
 
 #include 
 #include 
+#include 
 
 #include "xe_bo_types.h"
 #include "xe_exec_queue_types.h"
@@ -600,6 +601,40 @@ DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h,
 
 );
 
+DECLARE_EVENT_CLASS(xe_buddy_block,
+   TP_PROTO(struct drm_buddy_block *block, u64 size, u64 pfn),
+   TP_ARGS(block, size, pfn),
+
+   TP_STRUCT__entry(
+   __field(u64, block)
+   __field(u64, header)
+   __field(u64, size)
+   __field(u64, pfn)
+   ),
+
+   TP_fast_assign(
+   __entry->block = (u64)block;
+   __entry->header = block->header;
+   __entry->size = size;
+   __entry->pfn = pfn;
+   ),
+
+   TP_printk("xe svm: allocated block %llx, block header %llx, 
size %llx, pfn %llx\n",
+   __entry->block, __entry->header, __entry->size, 
__entry->pfn)
+);
+
+
+DEFINE_EVENT(xe_buddy_block, xe_buddy_block_alloc,
+   TP_PROTO(struct drm_buddy_block *block, u64 size, u64 pfn),
+   TP_ARGS(block, size, pfn)
+);
+
+
+DEFINE_EVENT(xe_buddy_block, xe_buddy_block_free,
+   TP_PROTO(struct drm_buddy_block *block, u64 size, u64 pfn),
+   TP_ARGS(block, size, pfn)
+);
+
 #endif
 
 /* This part must be outside protection */
-- 
2.26.3

[PATCH 00/22] XeKmd basic SVM support

2023-12-20 Thread Oak Zeng

This is the very basic SVM (shared virtual memory) support in XeKmd
driver. SVM allows the programmer to use a shaed virtual address space
between CPU program and GPU program. It abstracts away from the user
the location of the backing memory in a mixed CPU and GPU programming
environment.

This work is based on previous I915 SVM implementation mainly from
Niranjana Vishwanathapura and Oak Zeng, which has never been upstreamed
before. This is our first attempt to upstream this work.

This implementation depends on Linux kernel HMM support. See some key
designs in patch #1.

We are aware there are currently some effort to implement SVM using
GMEM(generalized memory management,
see 
https://lore.kernel.org/dri-devel/20231128125025.4449-1-weixi@huawei.com/)
We are open to this new method if it can be merged to upstream kernel.
Before that, we think it is still safer to support SVM through HMM.

This series only has basic SVM support. We think it is better to post
this series earlier so we can get more eyes on it. Below are the works
that is planned or ongoing:

*Testing: We are working on the igt test right now. Some part of this
series, especially the gpu page table update(patch #7, #8) and migration
function (patch #10) need some debug to make it work.

*Virtual address range based memory attributes and hints: We plan to
expose uAPI for user to set memory attributes such as preferred location
or migration granularity etc to a virtual address range. This is
important to tune SVM performance.

*GPU vram eviction: One key design choice of this series is, SVM
layer allocate GPU memory directly from drm buddy allocator, instead
of from xe vram manager. There is no BO (buffer object) concept
in this implementation. The key benefit of this approach is we can
migrate memory at page granularity easily. This also means SVM bypasses
TTM's memory eviction logic. But we want the SVM memory and BO driver
memory can mutually evicted each other. We have some prove of concept
work to rework TTM resource manager for this purpose, see
https://lore.kernel.org/dri-devel/20231102043306.2931989-1-oak.z...@intel.com/
We will continue work on that series then implement SVM's eviction
function based on the concept of shared drm LRU list b/t SVM and TTM/BO
driver.

Oak Zeng (22):
  drm/xe/svm: Add SVM document
  drm/xe/svm: Add svm key data structures
  drm/xe/svm: create xe svm during vm creation
  drm/xe/svm: Trace svm creation
  drm/xe/svm: add helper to retrieve svm range from address
  drm/xe/svm: Introduce a helper to build sg table from hmm range
  drm/xe/svm: Add helper for binding hmm range to gpu
  drm/xe/svm: Add helper to invalidate svm range from GPU
  drm/xe/svm: Remap and provide memmap backing for GPU vram
  drm/xe/svm: Introduce svm migration function
  drm/xe/svm: implement functions to allocate and free device memory
  drm/xe/svm: Trace buddy block allocation and free
  drm/xe/svm: Handle CPU page fault
  drm/xe/svm: trace svm range migration
  drm/xe/svm: Implement functions to register and unregister mmu
notifier
  drm/xe/svm: Implement the mmu notifier range invalidate callback
  drm/xe/svm: clean up svm range during process exit
  drm/xe/svm: Move a few structures to xe_gt.h
  drm/xe/svm: migrate svm range to vram
  drm/xe/svm: Populate svm range
  drm/xe/svm: GPU page fault support
  drm/xe/svm: Add DRM_XE_SVM kernel config entry

 Documentation/gpu/xe/index.rst   |   1 +
 Documentation/gpu/xe/xe_svm.rst  |   8 +
 drivers/gpu/drm/xe/Kconfig   |  22 ++
 drivers/gpu/drm/xe/Makefile  |   5 +
 drivers/gpu/drm/xe/xe_device_types.h |  20 ++
 drivers/gpu/drm/xe/xe_gt.h   |  20 ++
 drivers/gpu/drm/xe/xe_gt_pagefault.c |  28 +--
 drivers/gpu/drm/xe/xe_migrate.c  | 213 +
 drivers/gpu/drm/xe/xe_migrate.h  |   7 +
 drivers/gpu/drm/xe/xe_mmio.c |  12 +
 drivers/gpu/drm/xe/xe_pt.c   | 134 ++-
 drivers/gpu/drm/xe/xe_pt.h   |   5 +
 drivers/gpu/drm/xe/xe_svm.c  | 324 +
 drivers/gpu/drm/xe/xe_svm.h  | 115 +
 drivers/gpu/drm/xe/xe_svm_devmem.c   | 232 ++
 drivers/gpu/drm/xe/xe_svm_doc.h  | 121 ++
 drivers/gpu/drm/xe/xe_svm_migrate.c  | 345 +++
 drivers/gpu/drm/xe/xe_svm_range.c| 227 ++
 drivers/gpu/drm/xe/xe_trace.h|  71 +-
 drivers/gpu/drm/xe/xe_vm.c   |   7 +
 drivers/gpu/drm/xe/xe_vm_types.h |  12 +
 21 files changed, 1904 insertions(+), 25 deletions(-)
 create mode 100644 Documentation/gpu/xe/xe_svm.rst
 create mode 100644 drivers/gpu/drm/xe/xe_svm.c
 create mode 100644 drivers/gpu/drm/xe/xe_svm.h
 create mode 100644 drivers/gpu/drm/xe/xe_svm_devmem.c
 create mode 100644 drivers/gpu/drm/xe/xe_svm_doc.h
 create mode 100644 drivers/gpu/drm/xe/xe_svm_migrate.c
 create mode 100644 drivers/gpu/drm/xe/xe_svm_range.c

-- 
2.26.3

[PATCH 05/22] drm/xe/svm: add helper to retrieve svm range from address

2023-12-20 Thread Oak Zeng

All valid virtual address range are maintained in svm's
range_tree. This functions iterate svm's range tree and
return the svm range that contains specific address.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_svm.h   |  2 ++
 drivers/gpu/drm/xe/xe_svm_range.c | 32 +++
 2 files changed, 34 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_svm_range.c

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index cd3cf92f3784..3ed106ecc02b 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -67,4 +67,6 @@ struct xe_svm_range {
 void xe_destroy_svm(struct xe_svm *svm);
 struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
+struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
+   unsigned long 
addr);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c 
b/drivers/gpu/drm/xe/xe_svm_range.c
new file mode 100644
index ..d8251d38f65e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include "xe_svm.h"
+
+/**
+ * xe_svm_range_from_addr() - retrieve svm_range contains a virtual address
+ *
+ * @svm: svm that the virtual address belongs to
+ * @addr: the virtual address to retrieve svm_range for
+ *
+ * return the svm range found,
+ * or NULL if no range found
+ */
+struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
+   
unsigned long addr)
+{
+   struct interval_tree_node *node;
+
+   mutex_lock(&svm->mutex);
+   node = interval_tree_iter_first(&svm->range_tree, addr, addr);
+   mutex_unlock(&svm->mutex);
+   if (!node)
+   return NULL;
+
+   return container_of(node, struct xe_svm_range, inode);
+}
-- 
2.26.3

[PATCH 04/22] drm/xe/svm: Trace svm creation

2023-12-20 Thread Oak Zeng

xe_vm tracepoint is extended to also print svm.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_trace.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 95163c303f3e..63867c0fa848 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -467,15 +467,17 @@ DECLARE_EVENT_CLASS(xe_vm,
TP_STRUCT__entry(
 __field(u64, vm)
 __field(u32, asid)
+__field(u64, svm)
 ),
 
TP_fast_assign(
   __entry->vm = (unsigned long)vm;
   __entry->asid = vm->usm.asid;
+  __entry->svm = (unsigned long)vm->svm;
   ),
 
-   TP_printk("vm=0x%016llx, asid=0x%05x",  __entry->vm,
- __entry->asid)
+   TP_printk("vm=0x%016llx, asid=0x%05x, svm=0x%016llx",  
__entry->vm,
+ __entry->asid, __entry->svm)
 );
 
 DEFINE_EVENT(xe_vm, xe_vm_kill,
-- 
2.26.3

[PATCH 01/22] drm/xe/svm: Add SVM document

2023-12-20 Thread Oak Zeng

Add shared virtual memory document.

Signed-off-by: Oak Zeng 
Co-developed-by: Niranjana Vishwanathapura 
Signed-off-by: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 Documentation/gpu/xe/index.rst  |   1 +
 Documentation/gpu/xe/xe_svm.rst |   8 +++
 drivers/gpu/drm/xe/xe_svm_doc.h | 121 
 3 files changed, 130 insertions(+)
 create mode 100644 Documentation/gpu/xe/xe_svm.rst
 create mode 100644 drivers/gpu/drm/xe/xe_svm_doc.h

diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
index c224ecaee81e..106b60aba1f0 100644
--- a/Documentation/gpu/xe/index.rst
+++ b/Documentation/gpu/xe/index.rst
@@ -23,3 +23,4 @@ DG2, etc is provided to prototype the driver.
xe_firmware
xe_tile
xe_debugging
+   xe_svm
diff --git a/Documentation/gpu/xe/xe_svm.rst b/Documentation/gpu/xe/xe_svm.rst
new file mode 100644
index ..62954ba1c6f8
--- /dev/null
+++ b/Documentation/gpu/xe/xe_svm.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=
+Shared virtual memory
+=
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_svm_doc.h
+   :doc: Shared virtual memory
diff --git a/drivers/gpu/drm/xe/xe_svm_doc.h b/drivers/gpu/drm/xe/xe_svm_doc.h
new file mode 100644
index ..de38ee3585e4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_doc.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SVM_DOC_H_
+#define _XE_SVM_DOC_H_
+
+/**
+ * DOC: Shared virtual memory
+ *
+ * Shared Virtual Memory (SVM) allows the programmer to use a single virtual
+ * address space shared between threads executing on CPUs and GPUs. It 
abstracts
+ * away from the user the location of the backing memory, and hence simplifies
+ * the user programming model. In a non-SVM memory model, user need to 
explicitly
+ * decide memory placement such as device or system memory, also user need to
+ * explicitly migrate memory b/t device and system memory.
+ *
+ * Interface
+ * =
+ *
+ * SVM makes use of default OS memory allocation and mapping interface such as
+ * malloc() and mmap(). The pointer returned from malloc() and mmap() can be
+ * directly used on both CPU and GPU program.
+ *
+ * SVM also provides API to set virtual address range based memory attributes
+ * such as preferred memory location, memory migration granularity, and memory
+ * atomic attributes etc. This is similar to Linux madvise API.
+ *
+ * Basic implementation
+ * ==
+ *
+ * XeKMD implementation is based on Linux kernel Heterogeneous Memory 
Management
+ * (HMM) framework. HMM’s address space mirroring support allows sharing of the
+ * address space by duplicating sections of CPU page tables in the device page
+ * tables. This enables both CPU and GPU access a physical memory location 
using
+ * the same virtual address.
+ *
+ * Linux kernel also provides the ability to plugin device memory to the system
+ * (as a special ZONE_DEVICE type) and allocates struct page for each device 
memory
+ * page.
+ *
+ * HMM also provides a mechanism to migrate pages from host to device memory 
and
+ * vice versa.
+ *
+ * More information on HMM can be found here.
+ * https://www.kernel.org/doc/Documentation/vm/hmm.rst
+ *
+ * Unlike the non-SVM memory allocator (such as gem_create, vm_bind etc), there
+ * is no buffer object (BO, such as struct ttm_buffer_object, struct 
drm_gem_object),
+ * in our SVM implementation. We delibrately choose this implementation option
+ * to achieve page granularity memory placement, validation, eviction and 
migration.
+ *
+ * The SVM layer directly allocate device memory from drm buddy subsystem. The
+ * memory is organized as many blocks each of which has 2^n pages. SVM 
subsystem
+ * then mark the usage of each page using a simple bitmap. When all pages in a
+ * block are not used anymore, SVM return this block back to drm buddy 
subsystem.
+ *
+ * There are 3 events which can trigger SVM subsystem in actions:
+ *
+ * 1. A mmu notifier callback
+ *
+ * Since SVM need to mirror the program's CPU virtual address space from GPU 
side,
+ * when program's CPU address space changes, SVM need to make an identical 
change
+ * from GPU side. SVM/hmm use mmu interval notifier to achieve this. SVM 
register
+ * a mmu interval notifier call back function to core mm, and whenever a CPU 
side
+ * virtual address space is changed (i.e., when a virtual address range is 
unmapped
+ * from CPU calling munmap), the registered callback function will be called 
from
+ * core mm. SVM then mirror the CPU address space change from GPU side, i.e., 
unmap
+ * or invalidate the virtual address range from GPU page table.
+ *
+ * 2. A GPU page fault
+ *
+ * At the very beginning of a process's life, no virtual address of the process
+ * is mapped on GPU page table. So when GPU access any virtual address of the 
process
+ * a GPU page fault is

[RFC 04/11] drm: Add evict function pointer to drm lru entity

2023-11-01 Thread Oak Zeng

Drm lru manager provides generic functions to manage lru list,
and function to evict a lru entity. But how to evict an entity
is implemented in an entity's sub-class. This patch introduces
a few function pointers to drm lru entity for this purpose. Those
functions are abstracted from the current ttm resource eviction
process. They need to be tunned in the future when svm code comes
into the picture.

Also implemented a drm_lru_evict_first function to evict the first
lru entity from lru manager. Both ttm and svm codes are supposed
to call this function to evict the first resource from lru list.
This way ttm and svm codes can mutually evict resources from each
other.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/drm_evictable_lru.c | 36 +-
 include/drm/drm_evictable_lru.h | 74 -
 2 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_evictable_lru.c 
b/drivers/gpu/drm/drm_evictable_lru.c
index 2ba9105cca03..7b62cae2dfea 100644
--- a/drivers/gpu/drm/drm_evictable_lru.c
+++ b/drivers/gpu/drm/drm_evictable_lru.c
@@ -19,13 +19,15 @@ static inline struct drm_lru_mgr *entity_to_mgr(struct 
drm_lru_entity *entity)
 }
 
 void drm_lru_entity_init(struct drm_lru_entity *entity, struct drm_device *drm,
-   uint32_t mem_type, uint64_t size, uint32_t priority)
+   uint32_t mem_type, uint64_t size, uint32_t priority,
+   struct drm_lru_evict_func *evict_func)
 {
entity->drm = drm;
entity->mem_type = mem_type;
entity->size = size;
entity->priority = priority;
INIT_LIST_HEAD(&entity->lru);
+   entity->evict_func = evict_func;
 }
 
 /**
@@ -230,3 +232,35 @@ void drm_lru_del_bulk_move(struct drm_lru_entity *entity,
}
 }
 EXPORT_SYMBOL(drm_lru_del_bulk_move);
+
+int drm_lru_evict_first(struct drm_lru_mgr *mgr,
+   const struct drm_lru_evict_ctx *evict_ctx)
+{
+   struct drm_lru_entity *entity, *busy_entity = NULL;
+   struct drm_lru_cursor cursor;
+   bool locked = false, busy = false, found = false;
+
+   spin_lock(mgr->lru_lock);
+
+   /* First find a victim to evict*/
+   drm_lru_for_each_entity(mgr, &cursor, entity) {
+   if (!entity->evict_func->evict_allowable(entity,
+   evict_ctx, &busy, &locked)) {
+   if (!busy_entity && busy)
+   busy_entity = entity;
+   continue;
+   }
+   found = true;
+   break;
+   }
+
+   /* We didn't find a victim, but we found a busy entity, i.e.,
+* other clients hold a reservation lock of this entity. Try
+* to wait and evict this busy entity.
+*/
+   if (!found && busy_entity)
+   return busy_entity->evict_func->evict_busy_entity(busy_entity, 
evict_ctx);
+
+   /* If here, we found a victim to evict*/
+   return entity->evict_func->evict_entity(entity, evict_ctx, locked);
+}
diff --git a/include/drm/drm_evictable_lru.h b/include/drm/drm_evictable_lru.h
index 3fd6bd2475d9..7f49964f2f9b 100644
--- a/include/drm/drm_evictable_lru.h
+++ b/include/drm/drm_evictable_lru.h
@@ -15,6 +15,12 @@ struct drm_device;
 #define DRM_MAX_LRU_PRIORITY 4
 #define DRM_NUM_MEM_TYPES 8
 
+struct drm_lru_evict_ctx {
+   void *data1;
+   void *data2;
+   void *data3;
+};
+
 /**
  * struct drm_lru_entity
  *
@@ -23,6 +29,7 @@ struct drm_device;
  * @size: resource size of this entity
  * @priority: The priority of this entity
  * @lru: least recent used list node, see &drm_lru_mgr.lru
+ * @evict_func: functions to evict this entity
  *
  * This structure represents an entity in drm_lru_mgr's
  * list. This structure is supposed to be embedded in
@@ -34,6 +41,7 @@ struct drm_lru_entity {
uint64_t size;
uint32_t priority;
struct list_head lru;
+   struct drm_lru_evict_func *evict_func;
 };
 
 /**
@@ -97,7 +105,67 @@ struct drm_lru_bulk_move {
struct drm_lru_bulk_move_range 
range[DRM_NUM_MEM_TYPES][DRM_MAX_LRU_PRIORITY];
 };
 
+struct drm_lru_evict_func {
+   /**
+* evict_allowable
+*
+* @lru_entity: The struct ttm_resource::lru_entity when this resource 
is
+* added to drm lru list.
+* @evict_ctx: eviction context. This is opaque data to drm lru layer. 
It is
+* passed to drm lru layer through drm_lru_evict_first function and drm 
lru
+* layer just pass it back to ttm or svm code by calling some ttm or svm
+* callback functions.
+* @busy: used to return whether the current resource is busy (i.e., 
locked
+* by other clients)
+* @locked: used to return whether this resource is locked during this 
check,
+* i.e., successfully trylocked bo's dma reservation object

[RFC 11/11] drm/ttm: Write ttm functions using drm lru manager functions

2023-11-01 Thread Oak Zeng

Replace struct ttm_resource::lru with drm lru entity. Replace
struct ttm_resource_manager::lru[] with drm lru manager. Remove
ttm_lru_bulk_move functions and definitions as those are moved
to drm lru manager.

Some of ttm resource, ttm bo and ttm device functions are re-written
using drm lru manager functions.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
 drivers/gpu/drm/ttm/tests/ttm_device_test.c |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c|  20 +--
 drivers/gpu/drm/ttm/ttm_bo_util.c   |  20 +--
 drivers/gpu/drm/ttm/ttm_bo_vm.c |   2 +-
 drivers/gpu/drm/ttm/ttm_device.c|  10 +-
 drivers/gpu/drm/ttm/ttm_range_manager.c |   2 +-
 drivers/gpu/drm/ttm/ttm_resource.c  | 155 
 drivers/gpu/drm/xe/xe_bo.c  |  44 +++---
 drivers/gpu/drm/xe/xe_bo.h  |   3 +-
 drivers/gpu/drm/xe/xe_dma_buf.c |   4 +-
 drivers/gpu/drm/xe/xe_exec.c|   2 +-
 drivers/gpu/drm/xe/xe_migrate.c |   6 +-
 drivers/gpu/drm/xe/xe_res_cursor.h  |  10 +-
 drivers/gpu/drm/xe/xe_ttm_sys_mgr.c |   2 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c|  12 +-
 drivers/gpu/drm/xe/xe_vm.c  |   2 +-
 drivers/gpu/drm/xe/xe_vm_types.h|   2 +-
 include/drm/ttm/ttm_bo.h|   4 +-
 include/drm/ttm/ttm_resource.h  |  59 ++--
 21 files changed, 112 insertions(+), 253 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 747bcad86d5d..c977c00e986a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -369,7 +369,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
 {
spin_lock(adev->mman.bdev.lru_lock);
-   ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+   drm_lru_bulk_move_tail(&vm->lru_bulk_move);
spin_unlock(adev->mman.bdev.lru_lock);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 204ab13184ed..fec545b5d154 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -337,7 +337,7 @@ struct amdgpu_vm {
struct amdgpu_task_info task_info;
 
/* Store positions of group of BOs */
-   struct ttm_lru_bulk_move lru_bulk_move;
+   struct drm_lru_bulk_move lru_bulk_move;
/* Flag to indicate if VM is used for compute */
boolis_compute_context;
 
diff --git a/drivers/gpu/drm/ttm/tests/ttm_device_test.c 
b/drivers/gpu/drm/ttm/tests/ttm_device_test.c
index b1b423b68cdf..a62ca31b55df 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_device_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_device_test.c
@@ -90,7 +90,7 @@ static void ttm_device_fini_basic(struct kunit *test)
ttm_device_fini(ttm_dev);
 
KUNIT_ASSERT_FALSE(test, man->use_type);
-   KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[0]));
+   KUNIT_ASSERT_TRUE(test, list_empty(&man->lru_mgr->lru[0]));
KUNIT_ASSERT_NULL(test, ttm_dev->man_drv[TTM_PL_SYSTEM]);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9ec7a246e2ad..d44ca5e51dff 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -92,11 +92,11 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
  * resulting in much less overhead of maintaining the LRU.
  * The only requirement is that the resources stay together on the LRU and are
  * never separated. This is enforces by setting the bulk_move structure on a 
BO.
- * ttm_lru_bulk_move_tail() should be used to move all resources to the tail of
+ * drm_lru_bulk_move_tail() should be used to move all resources to the tail of
  * their LRU list.
  */
 void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
- struct ttm_lru_bulk_move *bulk)
+ struct drm_lru_bulk_move *bulk)
 {
dma_resv_assert_held(bo->base.resv);
 
@@ -122,8 +122,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
bool old_use_tt, new_use_tt;
int ret;
 
-   old_use_tt = !bo->resource || ttm_manager_type(bdev, 
bo->resource->mem_type)->use_tt;
-   new_use_tt = ttm_manager_type(bdev, mem->mem_type)->use_tt;
+   old_use_tt = !bo->resource || ttm_manager_type(bdev, 
bo->resource->lru_entity.mem_type)->use_tt;
+   new_use_tt = ttm_manager_type(bdev, mem->lru_entity.mem_type)->use_tt;
 
ttm_bo_unmap_virtual(bo);
 
@@ -139,7 +139,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
if (ret)
goto out_err;
 
-   if (mem->mem_type != TTM_PL_SYSTEM) {
+

[RFC 10/11] drm/ttm: Implement ttm memory evict functions

2023-11-01 Thread Oak Zeng

Implement ttm_mem_evict_valuable, ttm_mem_evict_entity and
ttm_mem_evict_busy_entity. Those are callback functions from
drm lru manager. Register those functions during drm lru entity
initialization. Those 3 functions are splitted from original
ttm_mem_evict_first function.

Reimplemented ttm_mem_evict_first function using drm_lru_evict_first
function. For now, drm_lru_evict_first just calls back to above 3
functions which are splitted from ttm_mem_evict_first function, so
there is no function change. In the future, when SVM code is added,
drm_lru_evict_first function can also calls into SVM resource eviction
functions, thus TTM and SVM can mutually evict resources from each
other.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 192 ---
 include/drm/ttm/ttm_bo.h |   2 +
 2 files changed, 158 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 4a5ffa920665..9ec7a246e2ad 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -587,50 +587,148 @@ static int ttm_mem_evict_wait_busy(struct 
ttm_buffer_object *busy_bo,
return r == -EDEADLK ? -EBUSY : r;
 }
 
-int ttm_mem_evict_first(struct ttm_device *bdev,
-   struct ttm_resource_manager *man,
-   const struct ttm_place *place,
-   struct ttm_operation_ctx *ctx,
-   struct ww_acquire_ctx *ticket)
+struct ttm_mem_evict_ctx {
+   const struct ttm_place *place;
+   struct ttm_operation_ctx *ctx;
+   struct ww_acquire_ctx *ticket;
+};
+
+/**
+ * ttm_mem_evict_allowable
+ *
+ * @lru_entity: The struct ttm_resource::lru_entity when this resource is
+ * added to drm lru list.
+ * @place: The preferred ttm placement where we want to evict memory for
+ * more memory space. If the current ttm_resource doesn't match the preferred
+ * placement, then there is no need to evict the current resource.
+ * @ctx: ttm operation context
+ * @ticket: dma reservation's context used to lock resource
+ * @busy: used to return whether the current resource is busy (i.e., locked
+ * by other clients)
+ * @locked: used to return whether this resource is locked during this check,
+ * i.e., successfully trylocked bo's dma reservation object
+ *
+ * Check whether we are allowed to evict a memory resource. Return true if we
+ * are allowed to evict resource; otherwise false.
+ *
+ * When this function returns true, a resource reference counter (bo's 
reference)
+ * is hold. This reference counter need to be released after evict operation 
later
+ * on.
+ *
+ * This function should be called with lru_lock hold.
+ */
+bool ttm_mem_evict_allowable(struct drm_lru_entity *lru_entity,
+   const struct drm_lru_evict_ctx *lru_evict_ctx,
+   bool *busy, bool *locked)
 {
-   struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
-   struct drm_lru_cursor cursor;
struct ttm_resource *res;
-   struct drm_lru_entity *entity;
-   bool locked = false;
-   int ret;
+   struct ttm_buffer_object *bo = NULL;
+   struct ttm_device *bdev;
+   const struct ttm_place *place;
+   struct ttm_operation_ctx *ctx;
+   struct ww_acquire_ctx *ticket;
+   struct ttm_mem_evict_ctx *evict_ctx;
 
-   spin_lock(bdev->lru_lock);
-   drm_lru_for_each_entity(man->lru_mgr, &cursor, entity) {
-   bool busy;
+   evict_ctx = (struct ttm_mem_evict_ctx *)lru_evict_ctx;
+   place = evict_ctx->place;
+   ctx = evict_ctx->ctx;
+   ticket = evict_ctx->ticket;
 
-   res = container_of(entity, struct ttm_resource, lru_entity);
-   if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place,
-   &locked, &busy)) {
-   if (busy && !busy_bo && ticket !=
-   dma_resv_locking_ctx(res->bo->base.resv))
-   busy_bo = res->bo;
-   continue;
-   }
+   res = container_of(lru_entity, struct ttm_resource, lru_entity);
+   bo = res->bo;
+   bdev = bo->bdev;
 
-   if (ttm_bo_get_unless_zero(res->bo)) {
-   bo = res->bo;
-   break;
-   }
-   if (locked)
-   dma_resv_unlock(res->bo->base.resv);
-   }
+   if (!ttm_bo_evict_swapout_allowable(bo, ctx, place, locked, busy)) {
+   if (busy && ticket != dma_resv_locking_ctx(bo->base.resv))
+   *busy = true;
 
-   if (!bo) {
-   if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
-   busy_bo = NULL;
-   spin_unlock(bdev->lru_lock);
-   ret = ttm_mem_evict_wait_busy(busy_bo,

[RFC 06/11] drm/ttm: Set lru manager to ttm resource manager

2023-11-01 Thread Oak Zeng

Add a weak reference of lru manager to ttm resource manager,
and add a function to set lru manager for ttm resource manager.

Signed-off-by: Oak Zeng 
---
 include/drm/ttm/ttm_resource.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 92241c2374fa..e4fc1ada5236 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -46,6 +46,7 @@ struct iosys_map;
 struct io_mapping;
 struct sg_table;
 struct scatterlist;
+struct drm_lru_mgr;
 
 struct ttm_resource_manager_func {
/**
@@ -172,6 +173,12 @@ struct ttm_resource_manager {
 * bdev->lru_lock.
 */
uint64_t usage;
+
+   /**
+* @lru_mgr: weak reference of the lru manager that manages lru
+* list for this ttm resource manager.
+*/
+   struct drm_lru_mgr *lru_mgr;
 };
 
 /**
@@ -326,6 +333,18 @@ static inline bool ttm_resource_manager_used(struct 
ttm_resource_manager *man)
return man->use_type;
 }
 
+/**
+ * ttm_resource_manager_set_lru_mgr
+ *
+ * @man: ttm resource manager
+ * @mgr: pointing to lru manager
+ */
+static inline void
+ttm_resource_manager_set_lru_mgr(struct ttm_resource_manager *man, struct 
drm_lru_mgr *mgr)
+{
+   man->lru_mgr = mgr;
+}
+
 /**
  * ttm_resource_manager_cleanup
  *
-- 
2.26.3

[RFC 09/11] drm/ttm: Use drm LRU manager iterator

2023-11-01 Thread Oak Zeng

Since TTM resource LRU list is moved to drm LRU manager layer,
use drm lru manager iterator instead of TTM resource manager
iterator. TTM resource manager iterator is deleted. No function
change.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/ttm/ttm_bo.c   |  7 ++--
 drivers/gpu/drm/ttm/ttm_device.c   | 10 --
 drivers/gpu/drm/ttm/ttm_resource.c | 51 --
 include/drm/ttm/ttm_resource.h | 33 ++-
 4 files changed, 14 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 26e0555bad0c..4a5ffa920665 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "ttm_module.h"
 
@@ -593,15 +594,17 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
struct ww_acquire_ctx *ticket)
 {
struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
-   struct ttm_resource_cursor cursor;
+   struct drm_lru_cursor cursor;
struct ttm_resource *res;
+   struct drm_lru_entity *entity;
bool locked = false;
int ret;
 
spin_lock(bdev->lru_lock);
-   ttm_resource_manager_for_each_res(man, &cursor, res) {
+   drm_lru_for_each_entity(man->lru_mgr, &cursor, entity) {
bool busy;
 
+   res = container_of(entity, struct ttm_resource, lru_entity);
if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place,
&locked, &busy)) {
if (busy && !busy_bo && ticket !=
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 393c3e27016e..881662d69aba 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "ttm_module.h"
 
@@ -141,7 +142,8 @@ int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t 
gfp_flags)
 int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
   gfp_t gfp_flags)
 {
-   struct ttm_resource_cursor cursor;
+   struct drm_lru_cursor cursor;
+   struct drm_lru_entity *entity;
struct ttm_resource_manager *man;
struct ttm_resource *res;
unsigned i;
@@ -153,10 +155,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct 
ttm_operation_ctx *ctx,
if (!man || !man->use_tt)
continue;
 
-   ttm_resource_manager_for_each_res(man, &cursor, res) {
-   struct ttm_buffer_object *bo = res->bo;
+   drm_lru_for_each_entity(man->lru_mgr, &cursor, entity) {
+   struct ttm_buffer_object *bo;
uint32_t num_pages;
 
+   res = container_of(entity, struct ttm_resource, 
lru_entity);
+   bo = res->bo;
if (!bo || bo->resource != res)
continue;
 
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 05eef866065e..0c6e0dbeff07 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -488,57 +488,6 @@ void ttm_resource_manager_debug(struct 
ttm_resource_manager *man,
 }
 EXPORT_SYMBOL(ttm_resource_manager_debug);
 
-/**
- * ttm_resource_manager_first
- *
- * @man: resource manager to iterate over
- * @cursor: cursor to record the position
- *
- * Returns the first resource from the resource manager.
- */
-struct ttm_resource *
-ttm_resource_manager_first(struct ttm_resource_manager *man,
-  struct ttm_resource_cursor *cursor)
-{
-   struct ttm_resource *res;
-
-   lockdep_assert_held(man->bdev->lru_lock);
-
-   for (cursor->priority = 0; cursor->priority < DRM_MAX_LRU_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(res, &man->lru[cursor->priority], lru)
-   return res;
-
-   return NULL;
-}
-
-/**
- * ttm_resource_manager_next
- *
- * @man: resource manager to iterate over
- * @cursor: cursor to record the position
- * @res: the current resource pointer
- *
- * Returns the next resource from the resource manager.
- */
-struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res)
-{
-   lockdep_assert_held(man->bdev->lru_lock);
-
-   list_for_each_entry_continue(res, &man->lru[cursor->priority], lru)
-   return res;
-
-   for (++cursor->priority; cursor->priority < DRM_MAX_LRU_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(res, &ma

[RFC 05/11] drm: Replace ttm macros with drm macros

2023-11-01 Thread Oak Zeng

TTM_MAX_BO_PRIORITY and TTM_NUM_MEM_TYPES are move from ttm to
drm, so:
s/TTM_MAX_BO_PRIORITY/DRM_MAX_LRU_PRIORITY
s/TTM_NUM_MEM_TYPES/DRM_NUM_MEM_TYPES

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  2 +-
 drivers/gpu/drm/i915/intel_region_ttm.c  |  2 +-
 drivers/gpu/drm/i915/selftests/mock_region.c |  2 +-
 drivers/gpu/drm/ttm/ttm_device.c |  8 
 drivers/gpu/drm/ttm/ttm_resource.c   | 12 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c   |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.h   |  2 +-
 drivers/gpu/drm/xe/xe_bo.h   |  2 +-
 include/drm/ttm/ttm_device.h |  6 +++---
 include/drm/ttm/ttm_range_manager.h  |  4 ++--
 include/drm/ttm/ttm_resource.h   | 10 --
 11 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index c46f54f83f54..228dbea60949 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -987,7 +987,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
spin_lock(bo->bdev->lru_lock);
if (shrinkable) {
/* Try to keep shmem_tt from being considered for shrinking. */
-   bo->priority = TTM_MAX_BO_PRIORITY - 1;
+   bo->priority = DRM_MAX_LRU_PRIORITY - 1;
} else if (obj->mm.madv != I915_MADV_WILLNEED) {
bo->priority = I915_TTM_PRIO_PURGE;
} else if (!i915_gem_object_has_pages(obj)) {
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index b845782c9859..f75520c2ba59 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -63,7 +63,7 @@ int intel_region_to_ttm_type(const struct intel_memory_region 
*mem)
return TTM_PL_SYSTEM;
 
type = mem->instance + TTM_PL_PRIV;
-   GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES);
+   GEM_BUG_ON(type >= DRM_NUM_MEM_TYPES);
 
return type;
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c 
b/drivers/gpu/drm/i915/selftests/mock_region.c
index 6324eb32d4dd..6ea0e6bec812 100644
--- a/drivers/gpu/drm/i915/selftests/mock_region.c
+++ b/drivers/gpu/drm/i915/selftests/mock_region.c
@@ -111,7 +111,7 @@ mock_region_create(struct drm_i915_private *i915,
   resource_size_t io_size)
 {
int instance = ida_alloc_max(&i915->selftest.mock_region_instances,
-TTM_NUM_MEM_TYPES - TTM_PL_PRIV - 1,
+DRM_NUM_MEM_TYPES - TTM_PL_PRIV - 1,
 GFP_KERNEL);
 
if (instance < 0)
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index d18eca86ebd6..e8c8006ba748 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -148,7 +148,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct 
ttm_operation_ctx *ctx,
int ret;
 
spin_lock(bdev->lru_lock);
-   for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
+   for (i = TTM_PL_SYSTEM; i < DRM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
continue;
@@ -245,7 +245,7 @@ void ttm_device_fini(struct ttm_device *bdev)
destroy_workqueue(bdev->wq);
 
spin_lock(bdev->lru_lock);
-   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
+   for (i = 0; i < DRM_MAX_LRU_PRIORITY; ++i)
if (list_empty(&man->lru[0]))
pr_debug("Swap list %d was clean\n", i);
spin_unlock(bdev->lru_lock);
@@ -287,12 +287,12 @@ void ttm_device_clear_dma_mappings(struct ttm_device 
*bdev)
 
ttm_device_clear_lru_dma_mappings(bdev, &bdev->pinned);
 
-   for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
+   for (i = TTM_PL_SYSTEM; i < DRM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
continue;
 
-   for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j)
+   for (j = 0; j < DRM_MAX_LRU_PRIORITY; ++j)
ttm_device_clear_lru_dma_mappings(bdev, &man->lru[j]);
}
 }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 6ada77f51fba..05eef866065e 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -54,8 +54,8 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
 {
unsigned i, j;
 
-   for (i = 0; i < TTM_NUM_MEM_TYPES; ++i) {
-   for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
+   for (i = 0; i < DRM_NUM_MEM_TYPES; ++i) {
+   f

[RFC 08/11] drm: Initialize drm lru manager

2023-11-01 Thread Oak Zeng

Initialize lru_mgr for each memory type or memory region. Also set
ttm_resource_manager's weak reference to drm lru manager.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c|  6 ++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c   | 10 ++
 drivers/gpu/drm/nouveau/nouveau_ttm.c   | 12 
 drivers/gpu/drm/ttm/ttm_range_manager.c |  6 ++
 drivers/gpu/drm/ttm/ttm_sys_manager.c   |  2 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c  |  6 ++
 drivers/gpu/drm/xe/xe_ttm_sys_mgr.c |  6 ++
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c|  6 ++
 10 files changed, 66 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 44367f03316f..57e8b1688977 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -278,6 +278,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
uint64_t gtt_size)
 {
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
+   struct drm_device *drm = adev_to_drm(adev);
uint64_t start, size;
 
man->use_tt = true;
@@ -292,6 +293,9 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
uint64_t gtt_size)
 
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
ttm_resource_manager_set_used(man, true);
+
+   drm_lru_mgr_init(&drm->lru_mgr[TTM_PL_TT], gtt_size, &drm->lru_lock);
+   ttm_resource_manager_set_lru_mgr(man, &drm->lru_mgr[TTM_PL_TT]);
return 0;
 }
 
@@ -307,6 +311,7 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
 {
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
+   struct drm_device *drm = adev_to_drm(adev);
int ret;
 
ttm_resource_manager_set_used(man, false);
@@ -321,4 +326,5 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
 
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
+   drm_lru_mgr_fini(&drm->lru_mgr[TTM_PL_TT]);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
index e8adfd0a570a..f989aca2bfc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -100,6 +100,7 @@ static const struct ttm_resource_manager_func 
amdgpu_preempt_mgr_func = {
 int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
 {
struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+   struct drm_device *drm = adev_to_drm(adev);
int ret;
 
man->use_tt = true;
@@ -115,6 +116,9 @@ int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
 
ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, man);
ttm_resource_manager_set_used(man, true);
+
+   drm_lru_mgr_init(&drm->lru_mgr[AMDGPU_PL_PREEMPT], (1 << 30), 
&drm->lru_lock);
+   ttm_resource_manager_set_lru_mgr(man, &drm->lru_mgr[VMW_PL_SYSTEM]);
return 0;
 }
 
@@ -129,6 +133,7 @@ int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
 void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
 {
struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+   struct drm_device *drm = adev_to_drm(adev);
int ret;
 
ttm_resource_manager_set_used(man, false);
@@ -141,4 +146,5 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
 
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
+   drm_lru_mgr_fini(&drm->lru_mgr[AMDGPU_PL_PREEMPT]);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index b83e1741905e..0792d22508c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -884,6 +884,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 {
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
+   struct drm_device *drm = adev_to_drm(adev);
int err;
 
ttm_resource_manager_init(man, &adev->mman.bdev,
@@ -907,6 +908,9 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
+   drm_lru_mgr_init(&drm->lru_mgr[TTM_PL_VRAM], adev->gmc.real_vram_size,
+   &drm-&g

[RFC 07/11] drm/ttm: re-parameterize a few ttm functions

2023-11-01 Thread Oak Zeng

Add a struct drm_device *drm parameter to function
ttm_range_man_init, ttm_range_man_fini, ttm_sys_man_init,
and ttm_sys_man_free. This drm parameter will be used
in the coming patches to retrieve and initialize drm
lru manager.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  8 
 drivers/gpu/drm/drm_gem_vram_helper.c   |  8 
 drivers/gpu/drm/loongson/lsdc_ttm.c |  8 
 drivers/gpu/drm/nouveau/nouveau_ttm.c   |  8 
 drivers/gpu/drm/qxl/qxl_ttm.c   |  6 +++---
 drivers/gpu/drm/radeon/radeon_ttm.c |  8 
 drivers/gpu/drm/ttm/ttm_device.c|  2 +-
 drivers/gpu/drm/ttm/ttm_module.h|  3 ++-
 drivers/gpu/drm/ttm/ttm_range_manager.c |  6 --
 drivers/gpu/drm/ttm/ttm_sys_manager.c   |  6 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  4 ++--
 include/drm/ttm/ttm_range_manager.h | 13 +++--
 12 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5cdbc901cbe2..cc0736f82a80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -75,7 +75,7 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
unsigned int type,
uint64_t size_in_page)
 {
-   return ttm_range_man_init(&adev->mman.bdev, type,
+   return ttm_range_man_init(adev_to_drm(adev), &adev->mman.bdev, type,
  false, size_in_page);
 }
 
@@ -2026,9 +2026,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
-   ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
-   ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
-   ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
+   ttm_range_man_fini(adev_to_drm(adev), &adev->mman.bdev, AMDGPU_PL_GDS);
+   ttm_range_man_fini(adev_to_drm(adev), &adev->mman.bdev, AMDGPU_PL_GWS);
+   ttm_range_man_fini(adev_to_drm(adev), &adev->mman.bdev, AMDGPU_PL_OA);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
DRM_INFO("amdgpu: ttm finalized\n");
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c 
b/drivers/gpu/drm/drm_gem_vram_helper.c
index 56749e40459f..5b18db72cc96 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -1009,7 +1009,7 @@ static int drm_vram_mm_init(struct drm_vram_mm *vmm, 
struct drm_device *dev,
if (ret)
return ret;
 
-   ret = ttm_range_man_init(&vmm->bdev, TTM_PL_VRAM,
+   ret = ttm_range_man_init(dev, &vmm->bdev, TTM_PL_VRAM,
 false, vram_size >> PAGE_SHIFT);
if (ret)
return ret;
@@ -1017,9 +1017,9 @@ static int drm_vram_mm_init(struct drm_vram_mm *vmm, 
struct drm_device *dev,
return 0;
 }
 
-static void drm_vram_mm_cleanup(struct drm_vram_mm *vmm)
+static void drm_vram_mm_cleanup(struct drm_device *drm, struct drm_vram_mm 
*vmm)
 {
-   ttm_range_man_fini(&vmm->bdev, TTM_PL_VRAM);
+   ttm_range_man_fini(drm, &vmm->bdev, TTM_PL_VRAM);
ttm_device_fini(&vmm->bdev);
 }
 
@@ -1056,7 +1056,7 @@ static void drm_vram_helper_release_mm(struct drm_device 
*dev)
if (!dev->vram_mm)
return;
 
-   drm_vram_mm_cleanup(dev->vram_mm);
+   drm_vram_mm_cleanup(dev, dev->vram_mm);
kfree(dev->vram_mm);
dev->vram_mm = NULL;
 }
diff --git a/drivers/gpu/drm/loongson/lsdc_ttm.c 
b/drivers/gpu/drm/loongson/lsdc_ttm.c
index bd68cb9366b5..f7f226314a09 100644
--- a/drivers/gpu/drm/loongson/lsdc_ttm.c
+++ b/drivers/gpu/drm/loongson/lsdc_ttm.c
@@ -533,8 +533,8 @@ static void lsdc_ttm_fini(struct drm_device *ddev, void 
*data)
 {
struct lsdc_device *ldev = (struct lsdc_device *)data;
 
-   ttm_range_man_fini(&ldev->bdev, TTM_PL_VRAM);
-   ttm_range_man_fini(&ldev->bdev, TTM_PL_TT);
+   ttm_range_man_fini(ddev, &ldev->bdev, TTM_PL_VRAM);
+   ttm_range_man_fini(ddev, &ldev->bdev, TTM_PL_TT);
 
ttm_device_fini(&ldev->bdev);
 
@@ -556,7 +556,7 @@ int lsdc_ttm_init(struct lsdc_device *ldev)
 
num_vram_pages = ldev->vram_size >> PAGE_SHIFT;
 
-   ret = ttm_range_man_init(&ldev->bdev, TTM_PL_VRAM, false, 
num_vram_pages);
+   ret = ttm_range_man_init(&ldev->base, &ldev->bdev, TTM_PL_VRAM, false, 
num_vram_pages);
if (unlikely(ret))
return ret;
 
@@ -567,7 +567,7 @@ int lsdc_ttm_init(struct lsdc_device *ldev)
 
num_gtt_pages = ldev->gtt_size >> PAGE_SHIFT;
 
-   ret = ttm_range_man_init(&ldev->bdev, TTM_PL

[PATCH 00/11] Introduce drm evictable lru

2023-11-01 Thread Oak Zeng

We plan to implement xe driver's shared virtual memory
manager (aka SVM) without buffer object concept. This
means we won't build our shared virtual memory manager
upon TTM infrastructure like amdgpu does.

Even though this approach is more efficient, it does
create a problem for memory eviction when there is
memory pressure: memory allocated by SVM and memory
allocated by TTM should be able to mutually evict
from each other. TTM's resource manager maintains
a LRU list for each memory type and this list is used
to pick up the memory eviction victim. Since we don't
use TTM for SVM implementation, SVM allocated memory
can't be added to TTM resource manager's LRU list. Thus
SVM allocated memory and TTM allocated memory are not
mutually evictable.

See more discussion on this topic here:
https://www.spinics.net/lists/dri-devel/msg410740.html

This series solve this problem by creating a shared
LRU list b/t SVM and TTM, or any other resource manager.

The basic idea is, abstract a drm_lru_entity structure
which is supposed to be embedded in ttm_resource structure,
or any other resource manager. The resource LRU list is a 
list of drm_lru_entity. drm_lru_entity has eviction function
pointers which can be used to call back drivers' specific
eviction function to evict a memory resource.

Introduce global drm_lru_manager to struct drm_device
to manage LRU lists. Each memory type or memory region
can have a LRU list. TTM resource manager's LRU list functions
including bulk move functions are moved to drm lru manager.
drm lru manager provides a evict_first function to evict
the first memory resource from LRU list. This function can
be called from TTM, SVM or any other resource manager, so
all the memory allocated in the drm sub-system can be mutually
evicted.

The lru_lock is also moved from struct ttm_device to struct 
drm_device.

Opens:
1) memory accounting: currently the ttm resource manager's
memory accounting functions is kept at ttm resource manager.
Since memory accounting should be cross TTM and SVM, it should
be ideally in the drm lru manager layer. This will be polished
in the future.

2) eviction callback function interface: The current eviction
function interface is designed to meet TTM memory eviction
requirements. When SVM is in the picture, this interface
need to be futher tunned to meet SVM requirement also. 

This series is not tested and it is only compiled for xe
driver. Some minor changes are needed for other driver
such as amdgpu, nouveau etc. I intended to send this out
as a request for comment series to get some early feedback,
to see whether this is the right direction to go. I will
futher polish this series after a direction is agreed.

Oak Zeng (11):
  drm/ttm: re-parameter ttm_device_init
  drm: move lru_lock from ttm_device to drm_device
  drm: introduce drm evictable LRU
  drm: Add evict function pointer to drm lru entity
  drm: Replace ttm macros with drm macros
  drm/ttm: Set lru manager to ttm resource manager
  drm/ttm: re-parameterize a few ttm functions
  drm: Initialize drm lru manager
  drm/ttm: Use drm LRU manager iterator
  drm/ttm: Implement ttm memory evict functions
  drm/ttm: Write ttm functions using drm lru manager functions

 drivers/gpu/drm/Makefile  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c   |   6 +
 .../gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c   |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  10 +-
 drivers/gpu/drm/drm_drv.c |   1 +
 drivers/gpu/drm/drm_evictable_lru.c   | 266 ++
 drivers/gpu/drm/drm_gem_vram_helper.c |  10 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   6 +-
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  10 +
 drivers/gpu/drm/i915/intel_region_ttm.c   |   4 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |   2 +-
 drivers/gpu/drm/loongson/lsdc_ttm.c   |  10 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c |  22 +-
 drivers/gpu/drm/qxl/qxl_ttm.c |   6 +-
 drivers/gpu/drm/radeon/radeon_ttm.c   |  10 +-
 drivers/gpu/drm/ttm/tests/ttm_device_test.c   |   2 +-
 drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c  | 247 
 drivers/gpu/drm/ttm/ttm_bo_util.c |  20 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c   |   2 +-
 drivers/gpu/drm/ttm/ttm_device.c  |  55 ++--
 drivers/gpu/drm/ttm/ttm_module.h  |   3 +-
 drivers/gpu/drm/ttm/ttm_range_manager.c   |  14 +-
 drivers/gpu/drm/ttm/ttm_resource.c| 242 +++-
 drivers/gpu/drm/ttm/ttm_sys_manager.c |   8 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c|   2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.h|   2 +-
 driver

[RFC 02/11] drm: move lru_lock from ttm_device to drm_device

2023-11-01 Thread Oak Zeng

In the coming patches, we will share the lru list b/t
ttm bo based memory allocator and hmm/svm based memory
allocator. Thus lru_lock (which is used mainly to protect
the lru list) is moved from struct ttm_device to struct
drm_device, so this lock can be shared b/t those two
memory allocators.

To minimize code change, struct ttm_device still hold
a weak reference of lru_lock, so ttm layer can still
reference to this lock easily.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c |  4 +-
 drivers/gpu/drm/drm_drv.c|  1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 +-
 drivers/gpu/drm/ttm/ttm_bo.c | 40 +--
 drivers/gpu/drm/ttm/ttm_device.c | 18 -
 drivers/gpu/drm/ttm/ttm_resource.c   | 42 ++--
 drivers/gpu/drm/xe/xe_bo.c   |  4 +-
 drivers/gpu/drm/xe/xe_exec.c |  4 +-
 drivers/gpu/drm/xe/xe_vm.c   |  4 +-
 include/drm/drm_device.h |  5 +++
 include/drm/ttm/ttm_bo.h |  4 +-
 include/drm/ttm/ttm_device.h |  4 +-
 13 files changed, 72 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f5daadcec865..747bcad86d5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -368,9 +368,9 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec 
*exec,
 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
 {
-   spin_lock(&adev->mman.bdev.lru_lock);
+   spin_lock(adev->mman.bdev.lru_lock);
ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
-   spin_unlock(&adev->mman.bdev.lru_lock);
+   spin_unlock(adev->mman.bdev.lru_lock);
 }
 
 /* Create scheduler entities for page table updates */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index c7085a747b03..b83e1741905e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -290,9 +290,9 @@ static void amdgpu_vram_mgr_do_reserve(struct 
ttm_resource_manager *man)
 
vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
atomic64_add(vis_usage, &mgr->vis_usage);
-   spin_lock(&man->bdev->lru_lock);
+   spin_lock(man->bdev->lru_lock);
man->usage += rsv->size;
-   spin_unlock(&man->bdev->lru_lock);
+   spin_unlock(man->bdev->lru_lock);
list_move(&rsv->blocks, &mgr->reserved_pages);
}
 }
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3eda026ffac6..1943c38815aa 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -623,6 +623,7 @@ static int drm_dev_init(struct drm_device *dev,
 
INIT_LIST_HEAD(&dev->managed.resources);
spin_lock_init(&dev->managed.lock);
+   spin_lock_init(&dev->lru_lock);
 
/* no per-device feature limits by default */
dev->driver_features = ~0u;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 9227f8146a58..c46f54f83f54 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -984,7 +984,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
/*
 * Put on the correct LRU list depending on the MADV status
 */
-   spin_lock(&bo->bdev->lru_lock);
+   spin_lock(bo->bdev->lru_lock);
if (shrinkable) {
/* Try to keep shmem_tt from being considered for shrinking. */
bo->priority = TTM_MAX_BO_PRIORITY - 1;
@@ -1013,7 +1013,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
}
 
ttm_bo_move_to_lru_tail(bo);
-   spin_unlock(&bo->bdev->lru_lock);
+   spin_unlock(bo->bdev->lru_lock);
 }
 
 /*
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e58b7e249816..26e0555bad0c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -68,7 +68,7 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object 
*bo,
  * @bo: The buffer object.
  *
  * Move this BO to the tail of all lru lists used to lookup and reserve an
- * object. This function must be called with struct ttm_global::lru_lock
+ * object. This function must be called with struct drm_device::lru_lock
  * held, and is used to make a BO less likely to be considered for eviction.
  */
 void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo)
@@ -102,13 +102,13 @@ void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
i

[RFC 03/11] drm: introduce drm evictable LRU

2023-11-01 Thread Oak Zeng

drm LRU manager is introuced for resource eviction purpose. It maintains
a LRU list per resource type. It provides functions to add or remove
resource to or from the list. It also provides function to retrieve the
first entity on the LRU list.

drm LRU manager also provides functions for bulk moving resources
on the LRU lists.

drm LRU manager also does very basic memory accounting function, i.e.,
LRU manager keeps a size of this resource type and a usage member
for how much of resource has been added to this LRU manager's LRU
list. TTM resource manager memory accounting functoins such as
struct ttm_resource_manager::size and struct ttm_resource_manger::usage
are still kept. In the future, when SVM codes are in the picture,
those memory accounting functions need some rework to consider
the memory used by both TTM and SVM.

For one device, a global drm LRU manager per resource type should be
created/initialized at device initialization time. Drm LRU manager
instances are embedded in struct drm_device.

It is pretty much moving some of the ttm resource manager functions
to the drm layer. The reason of this code refactory is, we want to
create a single LRU list for memory allocated from BO(buffer object)
based driver and hmm/svm(shared virtual memory) based driver, thus BO
driver and svm driver can evict memory from each other.

Previously the LRU list in TTM resource manager (lru field in struct
ttm_reource_manager) is coupled with ttm_buffer_object concept, i.e.,
each ttm resource is backed by a ttm_buffer_object and the LRU list
is essentially a list of ttm_buffer_object. Due to this behavior, the
TTM resource manager can't be used by hmm/svm driver as we don't plan
to have the BO concept for the hmm/svm implemenation. So we decouple
the evictable LRU list from the BO concept in this series.

The design goal of drm lru manager is to make it as lean as possible.
So each lru entity only has a list node member used to link this entity
to the evictable LRU list, and the basic resource size/type/priority
of this entity. It doesn't have any driver specify information. A lru
entity also has a function pointer of evict function. This is used to
implement ttm or svm specific eviction function. A lru entity is supposed
to be embedded in a driver specific structure such as struct
ttm_resource, see the usage in the next patch of this series.

The ttm resource manager, and some of the ttm_bo functions such as
ttm_mem_evict_first will be rewriten using the new drm lru manager
library, see the next patch in this series.

The future hmm/svm implemenation will call lru manager function to add
hmm/svm allocations to the shared evictable lru list.

Lock design: previously ttm_resource LRU list is protected by a device
global ttm_device::lru_lock (bdev->lru_lock in codes). This lock also
protects ttm_buffer_object::pin_count, ttm_resource_manager::usage,
ttm_resource::bo, ttm_device::pinned list etc. With this refactory,
lru_lock is moved out of ttm_device and is added to struct drm_deive, so
it can be shared b/t ttm code and svm code.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/Makefile|   1 +
 drivers/gpu/drm/drm_evictable_lru.c | 232 
 include/drm/drm_device.h|   7 +
 include/drm/drm_evictable_lru.h | 188 ++
 4 files changed, 428 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_evictable_lru.c
 create mode 100644 include/drm/drm_evictable_lru.h

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 1ad88efb1752..13953b0d271b 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -46,6 +46,7 @@ drm-y := \
drm_vblank_work.o \
drm_vma_manager.o \
drm_gpuva_mgr.o \
+   drm_evictable_lru.o \
drm_writeback.o
 drm-$(CONFIG_DRM_LEGACY) += \
drm_agpsupport.o \
diff --git a/drivers/gpu/drm/drm_evictable_lru.c 
b/drivers/gpu/drm/drm_evictable_lru.c
new file mode 100644
index ..2ba9105cca03
--- /dev/null
+++ b/drivers/gpu/drm/drm_evictable_lru.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+static inline struct drm_lru_mgr *entity_to_mgr(struct drm_lru_entity *entity)
+{
+   struct drm_lru_mgr *mgr;
+
+   mgr = &entity->drm->lru_mgr[entity->mem_type];
+   BUG_ON(!mgr->used);
+
+   return mgr;
+}
+
+void drm_lru_entity_init(struct drm_lru_entity *entity, struct drm_device *drm,
+   uint32_t mem_type, uint64_t size, uint32_t priority)
+{
+   entity->drm = drm;
+   entity->mem_type = mem_type;
+   entity->size = size;
+   entity->priority = priority;
+   INIT_LIST_HEAD(&entity->lru);
+}
+
+/**
+ * drm_lru_mgr_init
+ *
+ * @mgr: drm lru manager to init
+ * @size: size of the resource managed by this manager
+ * @lock: pointer of the global lru_lock
+ *
+

[RFC 01/11] drm/ttm: re-parameter ttm_device_init

2023-11-01 Thread Oak Zeng

Change the 3rd parameter of ttm_device_init from
struct device * to struct drm_device *. This is
a prepare work for moving lru_lock from ttm_device
to drm_device.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 2 +-
 drivers/gpu/drm/drm_gem_vram_helper.c | 2 +-
 drivers/gpu/drm/i915/intel_region_ttm.c   | 2 +-
 drivers/gpu/drm/loongson/lsdc_ttm.c   | 2 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c | 2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c   | 2 +-
 drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c | 2 +-
 drivers/gpu/drm/ttm/ttm_device.c  | 7 ---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   | 2 +-
 drivers/gpu/drm/xe/xe_device.c| 2 +-
 include/drm/ttm/ttm_device.h  | 3 ++-
 11 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4e51dce3aab5..5cdbc901cbe2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1817,7 +1817,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
mutex_init(&adev->mman.gtt_window_lock);
 
/* No others user of address space so set it to 0 */
-   r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
+   r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, 
adev_to_drm(adev),
   adev_to_drm(adev)->anon_inode->i_mapping,
   adev_to_drm(adev)->vma_offset_manager,
   adev->need_swiotlb,
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c 
b/drivers/gpu/drm/drm_gem_vram_helper.c
index b67eafa55715..56749e40459f 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -1002,7 +1002,7 @@ static int drm_vram_mm_init(struct drm_vram_mm *vmm, 
struct drm_device *dev,
vmm->vram_base = vram_base;
vmm->vram_size = vram_size;
 
-   ret = ttm_device_init(&vmm->bdev, &bo_driver, dev->dev,
+   ret = ttm_device_init(&vmm->bdev, &bo_driver, dev,
 dev->anon_inode->i_mapping,
 dev->vma_offset_manager,
 false, true);
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index bf6097e7433d..b845782c9859 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -33,7 +33,7 @@ int intel_region_ttm_device_init(struct drm_i915_private 
*dev_priv)
struct drm_device *drm = &dev_priv->drm;
 
return ttm_device_init(&dev_priv->bdev, i915_ttm_driver(),
-  drm->dev, drm->anon_inode->i_mapping,
+  drm, drm->anon_inode->i_mapping,
   drm->vma_offset_manager, false, false);
 }
 
diff --git a/drivers/gpu/drm/loongson/lsdc_ttm.c 
b/drivers/gpu/drm/loongson/lsdc_ttm.c
index bf79dc55afa4..bd68cb9366b5 100644
--- a/drivers/gpu/drm/loongson/lsdc_ttm.c
+++ b/drivers/gpu/drm/loongson/lsdc_ttm.c
@@ -548,7 +548,7 @@ int lsdc_ttm_init(struct lsdc_device *ldev)
unsigned long num_gtt_pages;
int ret;
 
-   ret = ttm_device_init(&ldev->bdev, &lsdc_bo_driver, ddev->dev,
+   ret = ttm_device_init(&ldev->bdev, &lsdc_bo_driver, ddev,
  ddev->anon_inode->i_mapping,
  ddev->vma_offset_manager, false, true);
if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c 
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 486f39f31a38..831918437850 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -299,7 +299,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
drm->agp.cma = pci->agp.cma;
}
 
-   ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, drm->dev->dev,
+   ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, dev,
  dev->anon_inode->i_mapping,
  dev->vma_offset_manager,
  drm_need_swiotlb(drm->client.mmu.dmabits),
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 4eb83ccc4906..77ca50187162 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -688,7 +688,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
int r;
 
/* No others user of address space so set it to 0 */
-   r = ttm_device_init(&rdev->mman.bdev, &radeon_bo_driver, rdev->dev,
+   r = ttm_device_init(&rdev->mman.bdev, &radeon_bo_driver, rdev->ddev,

[PATCH 1/2] drm/amdgpu: fix compile error on architecture s390

2021-03-15 Thread Oak Zeng

ioremap_cache is not supported on some architecture
such as s390. Put the codes into a #ifdef to fix
some compile error reported by test robot.

Signed-off-by: Oak Zeng 
Reported-by: Kernel test robot 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 37751e7..1091585 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1817,7 +1817,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 
/* Change the size here instead of the init above so only lpfn is 
affected */
amdgpu_ttm_set_buffer_funcs_status(adev, false);
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_X86
if (adev->gmc.xgmi.connected_to_cpu)
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 1/2] drm/amdgpu: fix compile error on architecture s390

2021-03-10 Thread Oak Zeng

ioremap_cache is not supported on some architecture
such as s390. Put the codes into a #ifdef to fix
some compile error reported by test robot.

Signed-off-by: Oak Zeng 
Reported-by: Kernel test robot 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 37751e7..1091585 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1817,7 +1817,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 
/* Change the size here instead of the init above so only lpfn is 
affected */
amdgpu_ttm_set_buffer_funcs_status(adev, false);
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_X86
if (adev->gmc.xgmi.connected_to_cpu)
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 2/2] drm/amdgpu: fix a few compiler warnings

2021-03-10 Thread Oak Zeng

1. make function mmhub_v1_7_setup_vm_pt_regs static
2. indent a if statement

Signed-off-by: Oak Zeng 
Reported-by: kernel test robot 
Reported-by: Dan Carpenter 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
index 3b4193c..8fca72e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -88,14 +88,14 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
adev->gmc.xgmi.num_physical_nodes = max_region + 1;
 
if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
-   return -EINVAL;
+   return -EINVAL;
 
adev->gmc.xgmi.physical_node_id =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
  PF_LFB_REGION);
 
if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
-   return -EINVAL;
+   return -EINVAL;
 
adev->gmc.xgmi.node_segment_size = seg_size;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index ac74d66..29d7f50 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -53,7 +53,7 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device 
*adev)
return base;
 }
 
-void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t 
vmid,
uint64_t page_table_base)
 {
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/ttm: ioremap buffer according to TTM mem caching setting

2021-03-04 Thread Oak Zeng

If tbo.mem.bus.caching is cached, buffer is intended to be mapped
as cached from CPU. Map it with ioremap_cache.

This wasn't necessary before as device memory was never mapped
as cached from CPU side. It becomes necessary for aldebaran as
device memory is mapped cached from CPU.

Signed-off-by: Oak Zeng 
Reviewed-by: Christian Konig 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e581..296bb20 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -91,6 +91,10 @@ static int ttm_resource_ioremap(struct ttm_device *bdev,
 
if (mem->bus.caching == ttm_write_combined)
addr = ioremap_wc(mem->bus.offset, bus_size);
+#ifdef CONFIG_X86
+   else if (mem->bus.caching == ttm_cached)
+   addr = ioremap_cache(mem->bus.offset, bus_size);
+#endif
else
addr = ioremap(mem->bus.offset, bus_size);
if (!addr) {
@@ -372,6 +376,11 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
if (mem->bus.caching == ttm_write_combined)
map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
  size);
+#ifdef CONFIG_X86
+   else if (mem->bus.caching == ttm_cached)
+   map->virtual = ioremap_cache(bo->mem.bus.offset + 
offset,
+ size);
+#endif
else
map->virtual = ioremap(bo->mem.bus.offset + offset,
   size);
@@ -490,6 +499,11 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct 
dma_buf_map *map)
else if (mem->bus.caching == ttm_write_combined)
vaddr_iomem = ioremap_wc(mem->bus.offset,
 bo->base.size);
+#ifdef CONFIG_X86
+   else if (mem->bus.caching == ttm_cached)
+   vaddr_iomem = ioremap_cache(mem->bus.offset,
+ bo->base.size);
+#endif
else
vaddr_iomem = ioremap(mem->bus.offset, bo->base.size);
 
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/ttm: ioremap buffer according to TTM mem caching setting

2021-03-04 Thread Oak Zeng

If tbo.mem.bus.caching is cached, buffer is intended to be mapped
as cached from CPU. Map it with ioremap_cache.

This wasn't necessary before as device memory was never mapped
as cached from CPU side. It becomes necessary for aldebaran as
device memory is mapped cached from CPU.

Signed-off-by: Oak Zeng 
Reviewed-by: Christian Konig 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e581..7429464 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -91,6 +91,10 @@ static int ttm_resource_ioremap(struct ttm_device *bdev,
 
if (mem->bus.caching == ttm_write_combined)
addr = ioremap_wc(mem->bus.offset, bus_size);
+#ifdef CONFIG_X86
+   else if (mem->bus.caching == ttm_cached)
+   addr = ioremap_cache(mem->bus.offset, bus_size);
+#endif
else
addr = ioremap(mem->bus.offset, bus_size);
if (!addr) {
@@ -372,6 +376,11 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
if (mem->bus.caching == ttm_write_combined)
map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
  size);
+#ifdef CONFIG_X86
+   else if (mem->bus.caching == ttm_cached)
+   map->virtual = ioremap_cache(bo->mem.bus.offset + 
offset,
+ size);
+#endif
else
map->virtual = ioremap(bo->mem.bus.offset + offset,
   size);
@@ -490,6 +499,11 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct 
dma_buf_map *map)
else if (mem->bus.caching == ttm_write_combined)
vaddr_iomem = ioremap_wc(mem->bus.offset,
 bo->base.size);
+   else if (mem->bus.caching == ttm_cached)
+#ifdef CONFIG_X86
+   vaddr_iomem = ioremap_cache(mem->bus.offset,
+ bo->base.size);
+#endif
else
vaddr_iomem = ioremap(mem->bus.offset, bo->base.size);
 
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/ttm: ioremap buffer according to TTM mem caching setting

2021-03-03 Thread Oak Zeng

If tbo.mem.bus.caching is cached, buffer is intended to be mapped
as cached from CPU. Map it with ioremap_cache.

This wasn't necessary before as device memory was never mapped
as cached from CPU side. It becomes necessary for aldebaran as
device memory is mapped cached from CPU.

Signed-off-by: Oak Zeng 
Reviewed-by: Christian Konig 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e581..7c848e2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -91,6 +91,10 @@ static int ttm_resource_ioremap(struct ttm_device *bdev,
 
if (mem->bus.caching == ttm_write_combined)
addr = ioremap_wc(mem->bus.offset, bus_size);
+#ifdef __x86_64__
+   else if (mem->bus.caching == ttm_cached)
+   addr = ioremap_cache(mem->bus.offset, bus_size);
+#endif
else
addr = ioremap(mem->bus.offset, bus_size);
if (!addr) {
@@ -372,6 +376,11 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
if (mem->bus.caching == ttm_write_combined)
map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
  size);
+#ifdef __x86_64__
+   else if (mem->bus.caching == ttm_cached)
+   map->virtual = ioremap_cache(bo->mem.bus.offset + 
offset,
+ size);
+#endif
else
map->virtual = ioremap(bo->mem.bus.offset + offset,
   size);
@@ -490,6 +499,11 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct 
dma_buf_map *map)
else if (mem->bus.caching == ttm_write_combined)
vaddr_iomem = ioremap_wc(mem->bus.offset,
 bo->base.size);
+   else if (mem->bus.caching == ttm_cached)
+#ifdef __x86_64__
+   vaddr_iomem = ioremap_cache(mem->bus.offset,
+ bo->base.size);
+#endif
else
vaddr_iomem = ioremap(mem->bus.offset, bo->base.size);
 
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/ttm: PLEASE ignore. Just test Intel build robot.

2021-03-02 Thread Oak Zeng

ioremap buffer according to TTM mem caching setting

If tbo.mem.bus.caching is cached, buffer is intended to be mapped
as cached from CPU. Map it with ioremap_cache.

This wasn't necessary before as device memory was never mapped
as cached from CPU side. It becomes necessary for aldebaran as
device memory is mapped cached from CPU.

Signed-off-by: Oak Zeng 
Reviewed-by: Christian Konig 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e581..8c65a13 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -91,6 +91,8 @@ static int ttm_resource_ioremap(struct ttm_device *bdev,
 
if (mem->bus.caching == ttm_write_combined)
addr = ioremap_wc(mem->bus.offset, bus_size);
+   else if (mem->bus.caching == ttm_cached)
+   addr = ioremap_wc(mem->bus.offset, bus_size);
else
addr = ioremap(mem->bus.offset, bus_size);
if (!addr) {
@@ -372,6 +374,9 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
if (mem->bus.caching == ttm_write_combined)
map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
  size);
+   else if (mem->bus.caching == ttm_cached)
+   map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
+ size);
else
map->virtual = ioremap(bo->mem.bus.offset + offset,
   size);
@@ -490,6 +495,9 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct 
dma_buf_map *map)
else if (mem->bus.caching == ttm_write_combined)
vaddr_iomem = ioremap_wc(mem->bus.offset,
 bo->base.size);
+   else if (mem->bus.caching == ttm_cached)
+   vaddr_iomem = ioremap_cache(mem->bus.offset,
+ bo->base.size);
else
vaddr_iomem = ioremap(mem->bus.offset, bo->base.size);
 
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/ttm: PLEASE ignore. Just test Intel build robot.

2021-03-02 Thread Oak Zeng

ioremap buffer according to TTM mem caching setting

If tbo.mem.bus.caching is cached, buffer is intended to be mapped
as cached from CPU. Map it with ioremap_cache.

This wasn't necessary before as device memory was never mapped
as cached from CPU side. It becomes necessary for aldebaran as
device memory is mapped cached from CPU.

Signed-off-by: Oak Zeng 
Reviewed-by: Christian Konig 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e581..8c65a13 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -91,6 +91,8 @@ static int ttm_resource_ioremap(struct ttm_device *bdev,
 
if (mem->bus.caching == ttm_write_combined)
addr = ioremap_wc(mem->bus.offset, bus_size);
+   else if (mem->bus.caching == ttm_cached)
+   addr = ioremap_wc(mem->bus.offset, bus_size);
else
addr = ioremap(mem->bus.offset, bus_size);
if (!addr) {
@@ -372,6 +374,9 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
if (mem->bus.caching == ttm_write_combined)
map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
  size);
+   else if (mem->bus.caching == ttm_cached)
+   map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
+ size);
else
map->virtual = ioremap(bo->mem.bus.offset + offset,
   size);
@@ -490,6 +495,9 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct 
dma_buf_map *map)
else if (mem->bus.caching == ttm_write_combined)
vaddr_iomem = ioremap_wc(mem->bus.offset,
 bo->base.size);
+   else if (mem->bus.caching == ttm_cached)
+   vaddr_iomem = ioremap_wc(mem->bus.offset,
+ bo->base.size);
else
vaddr_iomem = ioremap(mem->bus.offset, bo->base.size);
 
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/ttm: ioremap buffer according to TTM mem caching setting

2021-03-01 Thread Oak Zeng

If tbo.mem.bus.caching is cached, buffer is intended to be mapped
as cached from CPU. Map it with ioremap_cache.

This wasn't necessary before as device memory was never mapped
as cached from CPU side. It becomes necessary for aldebaran as
device memory is mapped cached from CPU.

Signed-off-by: Oak Zeng 
Reviewed-by: Christian Konig 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e581..8c65a13 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -91,6 +91,8 @@ static int ttm_resource_ioremap(struct ttm_device *bdev,
 
if (mem->bus.caching == ttm_write_combined)
addr = ioremap_wc(mem->bus.offset, bus_size);
+   else if (mem->bus.caching == ttm_cached)
+   addr = ioremap_cache(mem->bus.offset, bus_size);
else
addr = ioremap(mem->bus.offset, bus_size);
if (!addr) {
@@ -372,6 +374,9 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
if (mem->bus.caching == ttm_write_combined)
map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
  size);
+   else if (mem->bus.caching == ttm_cached)
+   map->virtual = ioremap_cache(bo->mem.bus.offset + 
offset,
+ size);
else
map->virtual = ioremap(bo->mem.bus.offset + offset,
   size);
@@ -490,6 +495,9 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct 
dma_buf_map *map)
else if (mem->bus.caching == ttm_write_combined)
vaddr_iomem = ioremap_wc(mem->bus.offset,
 bo->base.size);
+   else if (mem->bus.caching == ttm_cached)
+   vaddr_iomem = ioremap_cache(mem->bus.offset,
+ bo->base.size);
else
vaddr_iomem = ioremap(mem->bus.offset, bo->base.size);
 
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

69 matches

Mail list logo