From: Matthew Auld <matthew.a...@intel.com>

For local-memory objects we need to align the GTT addresses
to 64K, both for the ppgtt and ggtt.

We need to support vm->min_alignment > 4K, depending
on the vm itself and the type of object we are inserting.
With this in mind update the GTT selftests to take this
into account.

For compact-pt we further align and pad lmem object GTT addresses
to 2MB to ensure PDEs contain consistent page sizes as
required by the HW.

v3:
        * use needs_compact_pt flag to discriminate between
          64K and 64K with compact-pt
        * add i915_vm_obj_min_alignment
        * use i915_vm_obj_min_alignment to round up vma reservation
          if compact-pt instead of hard coding
v5:
        * fix i915_vm_obj_min_alignment for internal objects which
          have no memory region
v6:
        * tiled_blits_create correctly pick largest required alignment
v8:
        * i915_vm_min_alignment protect against array overflow for mock region

Signed-off-by: Matthew Auld <matthew.a...@intel.com>
Signed-off-by: Ramalingam C <ramalinga...@intel.com>
Signed-off-by: Robert Beckett <bob.beck...@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellst...@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.v...@intel.com>
---
 .../i915/gem/selftests/i915_gem_client_blt.c  | 21 ++--
 drivers/gpu/drm/i915/gt/intel_gtt.c           | 12 +++
 drivers/gpu/drm/i915/gt/intel_gtt.h           | 22 +++++
 drivers/gpu/drm/i915/i915_vma.c               |  9 ++
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 ++++++++++++-------
 5 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 8f28e46e8ee5..ddd0772fd828 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -40,6 +40,7 @@ struct tiled_blits {
        struct blit_buffer scratch;
        struct i915_vma *batch;
        u64 hole;
+       u64 align;
        u32 width;
        u32 height;
 };
@@ -411,14 +412,19 @@ tiled_blits_create(struct intel_engine_cs *engine, struct 
rnd_state *prng)
                goto err_free;
        }
 
-       hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
+       t->align = i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL);
+       t->align = max(t->align,
+                      i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM));
+
+       hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align);
        hole_size *= 2; /* room to maneuver */
-       hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
+       hole_size += 2 * t->align; /* padding on either side */
 
        mutex_lock(&t->ce->vm->mutex);
        memset(&hole, 0, sizeof(hole));
        err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole,
-                                         hole_size, 0, I915_COLOR_UNEVICTABLE,
+                                         hole_size, t->align,
+                                         I915_COLOR_UNEVICTABLE,
                                          0, U64_MAX,
                                          DRM_MM_INSERT_BEST);
        if (!err)
@@ -429,7 +435,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct 
rnd_state *prng)
                goto err_put;
        }
 
-       t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
+       t->hole = hole.start + t->align;
        pr_info("Using hole at %llx\n", t->hole);
 
        err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
@@ -456,7 +462,7 @@ static void tiled_blits_destroy(struct tiled_blits *t)
 static int tiled_blits_prepare(struct tiled_blits *t,
                               struct rnd_state *prng)
 {
-       u64 offset = PAGE_ALIGN(t->width * t->height * 4);
+       u64 offset = round_up(t->width * t->height * 4, t->align);
        u32 *map;
        int err;
        int i;
@@ -487,8 +493,7 @@ static int tiled_blits_prepare(struct tiled_blits *t,
 
 static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
 {
-       u64 offset =
-               round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
+       u64 offset = round_up(t->width * t->height * 4, 2 * t->align);
        int err;
 
        /* We want to check position invariant tiling across GTT eviction */
@@ -501,7 +506,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct 
rnd_state *prng)
 
        /* Reposition so that we overlap the old addresses, and slightly off */
        err = tiled_blit(t,
-                        &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
+                        &t->buffers[2], t->hole + t->align,
                         &t->buffers[1], t->hole + 3 * offset / 2);
        if (err)
                return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 49a8fb63e6e5..c548c193cd35 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -225,6 +225,18 @@ void i915_address_space_init(struct i915_address_space 
*vm, int subclass)
 
        GEM_BUG_ON(!vm->total);
        drm_mm_init(&vm->mm, 0, vm->total);
+
+       memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
+                ARRAY_SIZE(vm->min_alignment));
+
+       if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915)) {
+               vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
+               vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = 
I915_GTT_PAGE_SIZE_2M;
+       } else if (HAS_64K_PAGES(vm->i915)) {
+               vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
+               vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = 
I915_GTT_PAGE_SIZE_64K;
+       }
+
        vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 
        INIT_LIST_HEAD(&vm->bound_list);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 8073438b67c8..6cd518a3277c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -29,6 +29,8 @@
 #include "i915_selftest.h"
 #include "i915_vma_resource.h"
 #include "i915_vma_types.h"
+#include "i915_params.h"
+#include "intel_memory_region.h"
 
 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 
@@ -223,6 +225,7 @@ struct i915_address_space {
        struct device *dma;
        u64 total;              /* size addr space maps (ex. 2GB for ggtt) */
        u64 reserved;           /* size addr space reserved */
+       u64 min_alignment[INTEL_MEMORY_STOLEN_LOCAL + 1];
 
        unsigned int bind_async_flags;
 
@@ -384,6 +387,25 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm)
        return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
 }
 
+static inline u64 i915_vm_min_alignment(struct i915_address_space *vm,
+                                       enum intel_memory_type type)
+{
+       /* avoid INTEL_MEMORY_MOCK overflow */
+       if ((int)type >= ARRAY_SIZE(vm->min_alignment))
+               type = INTEL_MEMORY_SYSTEM;
+
+       return vm->min_alignment[type];
+}
+
+static inline u64 i915_vm_obj_min_alignment(struct i915_address_space *vm,
+                                           struct drm_i915_gem_object  *obj)
+{
+       struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+       enum intel_memory_type type = mr ? mr->type : INTEL_MEMORY_SYSTEM;
+
+       return i915_vm_min_alignment(vm, type);
+}
+
 static inline bool
 i915_vm_has_cache_coloring(struct i915_address_space *vm)
 {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 845cd88f8313..3558b16a929c 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -757,6 +757,14 @@ i915_vma_insert(struct i915_vma *vma, struct 
i915_gem_ww_ctx *ww,
                end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
        GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
 
+       alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, 
vma->obj));
+       /*
+        * for compact-pt we round up the reservation to prevent
+        * any smaller pages being used within the same PDE
+        */
+       if (NEEDS_COMPACT_PT(vma->vm->i915))
+               size = round_up(size, alignment);
+
        /* If binding the object/GGTT view requires more space than the entire
         * aperture has, reject it early before evicting everything in a vain
         * attempt to find space.
@@ -769,6 +777,7 @@ i915_vma_insert(struct i915_vma *vma, struct 
i915_gem_ww_ctx *ww,
        }
 
        color = 0;
+
        if (i915_vm_has_cache_coloring(vma->vm))
                color = vma->obj->cache_level;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index e7e6c4b2c81d..0d80509ef3c4 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -239,6 +239,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
                         u64 hole_start, u64 hole_end,
                         unsigned long end_time)
 {
+       const unsigned int min_alignment =
+               i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
        I915_RND_STATE(seed_prng);
        struct i915_vma_resource *mock_vma_res;
        unsigned int size;
@@ -252,9 +254,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
                I915_RND_SUBSTATE(prng, seed_prng);
                struct drm_i915_gem_object *obj;
                unsigned int *order, count, n;
-               u64 hole_size;
+               u64 hole_size, aligned_size;
 
-               hole_size = (hole_end - hole_start) >> size;
+               aligned_size = max_t(u32, ilog2(min_alignment), size);
+               hole_size = (hole_end - hole_start) >> aligned_size;
                if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
                        hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
                count = hole_size >> 1;
@@ -275,8 +278,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
                }
                GEM_BUG_ON(!order);
 
-               GEM_BUG_ON(count * BIT_ULL(size) > vm->total);
-               GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end);
+               GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total);
+               GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > 
hole_end);
 
                /* Ignore allocation failures (i.e. don't report them as
                 * a test failure) as we are purposefully allocating very
@@ -299,10 +302,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
                }
 
                for (n = 0; n < count; n++) {
-                       u64 addr = hole_start + order[n] * BIT_ULL(size);
+                       u64 addr = hole_start + order[n] * 
BIT_ULL(aligned_size);
                        intel_wakeref_t wakeref;
 
-                       GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
+                       GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total);
 
                        if (igt_timeout(end_time,
                                        "%s timed out before %d/%d\n",
@@ -345,7 +348,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
                        }
 
                        mock_vma_res->bi.pages = obj->mm.pages;
-                       mock_vma_res->node_size = BIT_ULL(size);
+                       mock_vma_res->node_size = BIT_ULL(aligned_size);
                        mock_vma_res->start = addr;
 
                        with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
@@ -356,7 +359,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
 
                i915_random_reorder(order, count, &prng);
                for (n = 0; n < count; n++) {
-                       u64 addr = hole_start + order[n] * BIT_ULL(size);
+                       u64 addr = hole_start + order[n] * 
BIT_ULL(aligned_size);
                        intel_wakeref_t wakeref;
 
                        GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
@@ -400,8 +403,10 @@ static int fill_hole(struct i915_address_space *vm,
 {
        const u64 hole_size = hole_end - hole_start;
        struct drm_i915_gem_object *obj;
+       const unsigned int min_alignment =
+               i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
        const unsigned long max_pages =
-               min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT);
+               min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> 
ilog2(min_alignment));
        const unsigned long max_step = max(int_sqrt(max_pages), 2UL);
        unsigned long npages, prime, flags;
        struct i915_vma *vma;
@@ -442,14 +447,17 @@ static int fill_hole(struct i915_address_space *vm,
 
                                offset = p->offset;
                                list_for_each_entry(obj, &objects, st_link) {
+                                       u64 aligned_size = 
round_up(obj->base.size,
+                                                                   
min_alignment);
+
                                        vma = i915_vma_instance(obj, vm, NULL);
                                        if (IS_ERR(vma))
                                                continue;
 
                                        if (p->step < 0) {
-                                               if (offset < hole_start + 
obj->base.size)
+                                               if (offset < hole_start + 
aligned_size)
                                                        break;
-                                               offset -= obj->base.size;
+                                               offset -= aligned_size;
                                        }
 
                                        err = i915_vma_pin(vma, 0, 0, offset | 
flags);
@@ -471,22 +479,25 @@ static int fill_hole(struct i915_address_space *vm,
                                        i915_vma_unpin(vma);
 
                                        if (p->step > 0) {
-                                               if (offset + obj->base.size > 
hole_end)
+                                               if (offset + aligned_size > 
hole_end)
                                                        break;
-                                               offset += obj->base.size;
+                                               offset += aligned_size;
                                        }
                                }
 
                                offset = p->offset;
                                list_for_each_entry(obj, &objects, st_link) {
+                                       u64 aligned_size = 
round_up(obj->base.size,
+                                                                   
min_alignment);
+
                                        vma = i915_vma_instance(obj, vm, NULL);
                                        if (IS_ERR(vma))
                                                continue;
 
                                        if (p->step < 0) {
-                                               if (offset < hole_start + 
obj->base.size)
+                                               if (offset < hole_start + 
aligned_size)
                                                        break;
-                                               offset -= obj->base.size;
+                                               offset -= aligned_size;
                                        }
 
                                        if (!drm_mm_node_allocated(&vma->node) 
||
@@ -507,22 +518,25 @@ static int fill_hole(struct i915_address_space *vm,
                                        }
 
                                        if (p->step > 0) {
-                                               if (offset + obj->base.size > 
hole_end)
+                                               if (offset + aligned_size > 
hole_end)
                                                        break;
-                                               offset += obj->base.size;
+                                               offset += aligned_size;
                                        }
                                }
 
                                offset = p->offset;
                                list_for_each_entry_reverse(obj, &objects, 
st_link) {
+                                       u64 aligned_size = 
round_up(obj->base.size,
+                                                                   
min_alignment);
+
                                        vma = i915_vma_instance(obj, vm, NULL);
                                        if (IS_ERR(vma))
                                                continue;
 
                                        if (p->step < 0) {
-                                               if (offset < hole_start + 
obj->base.size)
+                                               if (offset < hole_start + 
aligned_size)
                                                        break;
-                                               offset -= obj->base.size;
+                                               offset -= aligned_size;
                                        }
 
                                        err = i915_vma_pin(vma, 0, 0, offset | 
flags);
@@ -544,22 +558,25 @@ static int fill_hole(struct i915_address_space *vm,
                                        i915_vma_unpin(vma);
 
                                        if (p->step > 0) {
-                                               if (offset + obj->base.size > 
hole_end)
+                                               if (offset + aligned_size > 
hole_end)
                                                        break;
-                                               offset += obj->base.size;
+                                               offset += aligned_size;
                                        }
                                }
 
                                offset = p->offset;
                                list_for_each_entry_reverse(obj, &objects, 
st_link) {
+                                       u64 aligned_size = 
round_up(obj->base.size,
+                                                                   
min_alignment);
+
                                        vma = i915_vma_instance(obj, vm, NULL);
                                        if (IS_ERR(vma))
                                                continue;
 
                                        if (p->step < 0) {
-                                               if (offset < hole_start + 
obj->base.size)
+                                               if (offset < hole_start + 
aligned_size)
                                                        break;
-                                               offset -= obj->base.size;
+                                               offset -= aligned_size;
                                        }
 
                                        if (!drm_mm_node_allocated(&vma->node) 
||
@@ -580,9 +597,9 @@ static int fill_hole(struct i915_address_space *vm,
                                        }
 
                                        if (p->step > 0) {
-                                               if (offset + obj->base.size > 
hole_end)
+                                               if (offset + aligned_size > 
hole_end)
                                                        break;
-                                               offset += obj->base.size;
+                                               offset += aligned_size;
                                        }
                                }
                        }
@@ -612,6 +629,7 @@ static int walk_hole(struct i915_address_space *vm,
        const u64 hole_size = hole_end - hole_start;
        const unsigned long max_pages =
                min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT);
+       unsigned long min_alignment;
        unsigned long flags;
        u64 size;
 
@@ -621,6 +639,8 @@ static int walk_hole(struct i915_address_space *vm,
        if (i915_is_ggtt(vm))
                flags |= PIN_GLOBAL;
 
+       min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
+
        for_each_prime_number_from(size, 1, max_pages) {
                struct drm_i915_gem_object *obj;
                struct i915_vma *vma;
@@ -639,7 +659,7 @@ static int walk_hole(struct i915_address_space *vm,
 
                for (addr = hole_start;
                     addr + obj->base.size < hole_end;
-                    addr += obj->base.size) {
+                    addr += round_up(obj->base.size, min_alignment)) {
                        err = i915_vma_pin(vma, 0, 0, addr | flags);
                        if (err) {
                                pr_err("%s bind failed at %llx + %llx [hole 
%llx- %llx] with err=%d\n",
@@ -691,6 +711,7 @@ static int pot_hole(struct i915_address_space *vm,
 {
        struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
+       unsigned int min_alignment;
        unsigned long flags;
        unsigned int pot;
        int err = 0;
@@ -699,6 +720,8 @@ static int pot_hole(struct i915_address_space *vm,
        if (i915_is_ggtt(vm))
                flags |= PIN_GLOBAL;
 
+       min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
+
        obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
@@ -711,13 +734,13 @@ static int pot_hole(struct i915_address_space *vm,
 
        /* Insert a pair of pages across every pot boundary within the hole */
        for (pot = fls64(hole_end - 1) - 1;
-            pot > ilog2(2 * I915_GTT_PAGE_SIZE);
+            pot > ilog2(2 * min_alignment);
             pot--) {
                u64 step = BIT_ULL(pot);
                u64 addr;
 
-               for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - 
I915_GTT_PAGE_SIZE;
-                    addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) 
- I915_GTT_PAGE_SIZE;
+               for (addr = round_up(hole_start + min_alignment, step) - 
min_alignment;
+                    addr <= round_down(hole_end - (2 * min_alignment), step) - 
min_alignment;
                     addr += step) {
                        err = i915_vma_pin(vma, 0, 0, addr | flags);
                        if (err) {
@@ -762,6 +785,7 @@ static int drunk_hole(struct i915_address_space *vm,
                      unsigned long end_time)
 {
        I915_RND_STATE(prng);
+       unsigned int min_alignment;
        unsigned int size;
        unsigned long flags;
 
@@ -769,15 +793,18 @@ static int drunk_hole(struct i915_address_space *vm,
        if (i915_is_ggtt(vm))
                flags |= PIN_GLOBAL;
 
+       min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
+
        /* Keep creating larger objects until one cannot fit into the hole */
        for (size = 12; (hole_end - hole_start) >> size; size++) {
                struct drm_i915_gem_object *obj;
                unsigned int *order, count, n;
                struct i915_vma *vma;
-               u64 hole_size;
+               u64 hole_size, aligned_size;
                int err = -ENODEV;
 
-               hole_size = (hole_end - hole_start) >> size;
+               aligned_size = max_t(u32, ilog2(min_alignment), size);
+               hole_size = (hole_end - hole_start) >> aligned_size;
                if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
                        hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
                count = hole_size >> 1;
@@ -817,7 +844,7 @@ static int drunk_hole(struct i915_address_space *vm,
                GEM_BUG_ON(vma->size != BIT_ULL(size));
 
                for (n = 0; n < count; n++) {
-                       u64 addr = hole_start + order[n] * BIT_ULL(size);
+                       u64 addr = hole_start + order[n] * 
BIT_ULL(aligned_size);
 
                        err = i915_vma_pin(vma, 0, 0, addr | flags);
                        if (err) {
@@ -869,11 +896,14 @@ static int __shrink_hole(struct i915_address_space *vm,
 {
        struct drm_i915_gem_object *obj;
        unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
+       unsigned int min_alignment;
        unsigned int order = 12;
        LIST_HEAD(objects);
        int err = 0;
        u64 addr;
 
+       min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
+
        /* Keep creating larger objects until one cannot fit into the hole */
        for (addr = hole_start; addr < hole_end; ) {
                struct i915_vma *vma;
@@ -914,7 +944,7 @@ static int __shrink_hole(struct i915_address_space *vm,
                }
 
                i915_vma_unpin(vma);
-               addr += size;
+               addr += round_up(size, min_alignment);
 
                /*
                 * Since we are injecting allocation faults at random intervals,
-- 
2.20.1

Reply via email to