Re: [PATCH v2] drm/buddy: Fix the warn on's during force merge

2024-05-17 Thread Matthew Auld

On 17/05/2024 14:50, Arunpravin Paneer Selvam wrote:

Move the fallback and block incompatible checks
above, so that we dont unnecessarily split the blocks
and leaving the unmerged. This resolves the unnecessary
warn on's thrown during force_merge call.

v2:(Matthew)
   - Move the fallback and block incompatible checks above
 the contains check.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")

Reviewed-by: Matthew Auld 

A follow up unit test to catch this edge case would be lovely.


---
  drivers/gpu/drm/drm_buddy.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..94f8c34fc293 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -524,11 +524,11 @@ __alloc_range_bias(struct drm_buddy *mm,
continue;
}
  
+		if (!fallback && block_incompatible(block, flags))

+   continue;
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
-   if (!fallback && block_incompatible(block, flags))
-   continue;
-
/*
 * Find the free block within the range.
 */


Re: [PATCH] drm/buddy: Merge back blocks in bias range function

2024-05-17 Thread Matthew Auld

On 17/05/2024 13:38, Arunpravin Paneer Selvam wrote:

In bias range allocation, when we don't find the required
blocks (i.e) on returning the -ENOSPC, we should merge back the
split blocks. Otherwise, during force_merge we are flooded with
warn on's due to block and its buddy are in same clear state
(dirty or clear).

Hence, renamed the force_merge with merge_blocks and passed a
force_merge as a bool function parameter. Based on the requirement,
say, in any normal situation we can call the merge_blocks passing
the force_merge variable as false. And, in any memory cruch situation,
we can call the merge_blocks passing the force_merge as true. This
resolves the unnecessary warn on's thrown during force_merge call.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
---
  drivers/gpu/drm/drm_buddy.c | 32 ++--
  1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..111f602f1359 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -161,10 +161,11 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
return order;
  }
  
-static int __force_merge(struct drm_buddy *mm,

-u64 start,
-u64 end,
-unsigned int min_order)
+static int __merge_blocks(struct drm_buddy *mm,
+ u64 start,
+ u64 end,
+ unsigned int min_order,
+ bool force_merge)
  {
unsigned int order;
int i;
@@ -195,8 +196,9 @@ static int __force_merge(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
continue;
  
-			WARN_ON(drm_buddy_block_is_clear(block) ==

-   drm_buddy_block_is_clear(buddy));
+   if (force_merge)
+   WARN_ON(drm_buddy_block_is_clear(block) ==
+   drm_buddy_block_is_clear(buddy));
  
  			/*

 * If the prev block is same as buddy, don't access the
@@ -210,7 +212,7 @@ static int __force_merge(struct drm_buddy *mm,
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, 
block);
  
-			order = __drm_buddy_free(mm, block, true);

+   order = __drm_buddy_free(mm, block, force_merge);
if (order >= min_order)
return 0;
}
@@ -332,7 +334,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
  
  	for (i = 0; i < mm->n_roots; ++i) {

order = ilog2(size) - ilog2(mm->chunk_size);
-   __force_merge(mm, 0, size, order);
+   __merge_blocks(mm, 0, size, order, true);
  
  		WARN_ON(!drm_buddy_block_is_free(mm->roots[i]));

drm_block_free(mm, mm->roots[i]);
@@ -479,7 +481,7 @@ __alloc_range_bias(struct drm_buddy *mm,
   unsigned long flags,
   bool fallback)
  {
-   u64 req_size = mm->chunk_size << order;
+   u64 size, root_size, req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -487,6 +489,7 @@ __alloc_range_bias(struct drm_buddy *mm,
int i;
  
  	end = end - 1;

+   size = mm->size;
  
  	for (i = 0; i < mm->n_roots; ++i)

list_add_tail(>roots[i]->tmp_link, );
@@ -548,6 +551,15 @@ __alloc_range_bias(struct drm_buddy *mm,
list_add(>left->tmp_link, );
} while (1);
  
+	/* Merge back the split blocks */

+   for (i = 0; i < mm->n_roots; ++i) {
+   order = ilog2(size) - ilog2(mm->chunk_size);
+   __merge_blocks(mm, start, end, order, false);
+
+   root_size = mm->chunk_size << order;
+   size -= root_size;
+   }


Hmm, can't we just not split a given block if it is incompatible? Like 
say we are looking for cleared, there is not much point in splitting 
blocks that are dirty on this pass, right?


What about moving the incompatible check earlier like:

if (!fallback && block_incompatible(block)
   continue;

Would that not fix the issue?


+
return ERR_PTR(-ENOSPC);
  
  err_undo:

@@ -1026,7 +1038,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
if (order-- == min_order) {
/* Try allocation through force merge method */
if (mm->clear_avail &&
-   !__force_merge(mm, start, end, min_order)) {
+   !__merge_blocks(mm, start, end, min_order, 
true)) {
block = __drm_buddy_alloc_blocks(mm, 
start,

Re: [PATCH 1/2] drm/buddy: stop using PAGE_SIZE

2024-05-17 Thread Matthew Auld

On 17/05/2024 12:00, Christian König wrote:

Am 17.05.24 um 10:53 schrieb Matthew Auld:

On 17/05/2024 02:11, Dave Airlie wrote:

On Thu, 29 Feb 2024 at 23:48, Arnd Bergmann  wrote:


On Thu, Feb 29, 2024, at 11:51, Matthew Auld wrote:
The drm_buddy minimum page-size requirements should be distinct 
from the

CPU PAGE_SIZE. Only restriction is that the minimum page-size is at
least 4K.

Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Arnd Bergmann 


Acked-by: Arnd Bergmann 


Has this landed anywhere yet?


Looks like it fell through the cracks. I think it still applies, so 
just needs someone with commit rights to push it.


Pushed to drm-misc-fixes.


Thanks.



Regards,
Christian.





I'm been testing 6.9 on 64K pages and the buddy tests are exploding so
I wanted to pull this in.

Dave.




Re: [PATCH 1/2] drm/buddy: stop using PAGE_SIZE

2024-05-17 Thread Matthew Auld

On 17/05/2024 02:11, Dave Airlie wrote:

On Thu, 29 Feb 2024 at 23:48, Arnd Bergmann  wrote:


On Thu, Feb 29, 2024, at 11:51, Matthew Auld wrote:

The drm_buddy minimum page-size requirements should be distinct from the
CPU PAGE_SIZE. Only restriction is that the minimum page-size is at
least 4K.

Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Arnd Bergmann 


Acked-by: Arnd Bergmann 


Has this landed anywhere yet?


Looks like it fell through the cracks. I think it still applies, so just 
needs someone with commit rights to push it.




I'm been testing 6.9 on 64K pages and the buddy tests are exploding so
I wanted to pull this in.

Dave.


Re: [PATCH v2 2/2] drm/tests: Add a unit test for range bias allocation

2024-05-13 Thread Matthew Auld

On 13/05/2024 16:11, Paneer Selvam, Arunpravin wrote:

Hi Matthew,

On 5/13/2024 1:49 PM, Matthew Auld wrote:

On 12/05/2024 08:59, Arunpravin Paneer Selvam wrote:

Allocate cleared blocks in the bias range when the DRM
buddy's clear avail is zero. This will validate the bias
range allocation in scenarios like system boot when no
cleared blocks are available and exercise the fallback
path too. The resulting blocks should always be dirty.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/tests/drm_buddy_test.c | 35 ++
  1 file changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index e3b50e240d36..a194f271bc55 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -26,6 +26,8 @@ static void drm_test_buddy_alloc_range_bias(struct 
kunit *test)

  u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
  DRM_RND_STATE(prng, random_seed);
  unsigned int i, count, *order;
+    struct drm_buddy_block *block;
+    unsigned long flags;
  struct drm_buddy mm;
  LIST_HEAD(allocated);
  @@ -222,6 +224,39 @@ static void 
drm_test_buddy_alloc_range_bias(struct kunit *test)

    drm_buddy_free_list(, , 0);
  drm_buddy_fini();
+
+    /*
+ * Allocate cleared blocks in the bias range when the DRM 
buddy's clear avail is
+ * zero. This will validate the bias range allocation in 
scenarios like system boot
+ * when no cleared blocks are available and exercise the 
fallback path too. The resulting

+ * blocks should always be dirty.
+ */
+
+    KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, ps),
+   "buddy_init failed\n");
+    mm.clear_avail = 0;


Should already be zero, right? Maybe make this an assert instead?
No, since the mm declared as a local variable in the test case, 
mm.clear_avail is not zero.


That sounds like a bug IMO. The init() should initialize it, like it 
does for mm.avail and everything else.





+
+    bias_start = round_up(prandom_u32_state() % (mm_size - ps), 
ps);
+    bias_end = round_up(bias_start + prandom_u32_state() % 
(mm_size - bias_start), ps);

+    bias_end = max(bias_end, bias_start + ps);
+    bias_rem = bias_end - bias_start;
+
+    flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
+    u32 size = max(round_up(prandom_u32_state() % bias_rem, 
ps), ps);


u32 declaration should be moved to above?

Sure.

Thanks,
Arun.


Otherwise,
Reviewed-by: Matthew Auld 


+
+    KUNIT_ASSERT_FALSE_MSG(test,
+   drm_buddy_alloc_blocks(, bias_start,
+  bias_end, size, ps,
+  ,
+  flags),
+   "buddy_alloc failed with bias(%x-%x), size=%u, 
ps=%u\n",

+   bias_start, bias_end, size, ps);
+
+    list_for_each_entry(block, , link)
+    KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+    drm_buddy_free_list(, , 0);
+    drm_buddy_fini();
  }
    static void drm_test_buddy_alloc_clear(struct kunit *test)




Re: [PATCH v2 2/2] drm/tests: Add a unit test for range bias allocation

2024-05-13 Thread Matthew Auld

On 12/05/2024 08:59, Arunpravin Paneer Selvam wrote:

Allocate cleared blocks in the bias range when the DRM
buddy's clear avail is zero. This will validate the bias
range allocation in scenarios like system boot when no
cleared blocks are available and exercise the fallback
path too. The resulting blocks should always be dirty.

Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 35 ++
  1 file changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index e3b50e240d36..a194f271bc55 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -26,6 +26,8 @@ static void drm_test_buddy_alloc_range_bias(struct kunit 
*test)
u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
DRM_RND_STATE(prng, random_seed);
unsigned int i, count, *order;
+   struct drm_buddy_block *block;
+   unsigned long flags;
struct drm_buddy mm;
LIST_HEAD(allocated);
  
@@ -222,6 +224,39 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
  
  	drm_buddy_free_list(, , 0);

drm_buddy_fini();
+
+   /*
+* Allocate cleared blocks in the bias range when the DRM buddy's clear 
avail is
+* zero. This will validate the bias range allocation in scenarios like 
system boot
+* when no cleared blocks are available and exercise the fallback path 
too. The resulting
+* blocks should always be dirty.
+*/
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, ps),
+  "buddy_init failed\n");
+   mm.clear_avail = 0;


Should already be zero, right? Maybe make this an assert instead?


+
+   bias_start = round_up(prandom_u32_state() % (mm_size - ps), ps);
+   bias_end = round_up(bias_start + prandom_u32_state() % (mm_size - 
bias_start), ps);
+   bias_end = max(bias_end, bias_start + ps);
+   bias_rem = bias_end - bias_start;
+
+   flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
+   u32 size = max(round_up(prandom_u32_state() % bias_rem, ps), ps);


u32 declaration should be moved to above?

Otherwise,
Reviewed-by: Matthew Auld 


+
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, bias_start,
+ bias_end, size, ps,
+ ,
+ flags),
+  "buddy_alloc failed with bias(%x-%x), size=%u, 
ps=%u\n",
+  bias_start, bias_end, size, ps);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+   drm_buddy_free_list(, , 0);
+   drm_buddy_fini();
  }
  
  static void drm_test_buddy_alloc_clear(struct kunit *test)


[PATCH 01/20] drm/drm_managed: try to improve the drmm DOC

2024-05-10 Thread Matthew Auld
Hopefully make it clearer when to use devm vs drmm.

Signed-off-by: Matthew Auld 
Cc: Daniel Vetter 
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/drm_managed.c | 42 +++
 1 file changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
index 7646f67bda4e..20d705bbc0a3 100644
--- a/drivers/gpu/drm/drm_managed.c
+++ b/drivers/gpu/drm/drm_managed.c
@@ -34,6 +34,48 @@
  * during the lifetime of the driver, all the functions are fully concurrent
  * safe. But it is recommended to use managed resources only for resources that
  * change rarely, if ever, during the lifetime of the _device instance.
+ *
+ * Note that the distinction between devm and drmm is important to get right.
+ * Consider some hotunplug scenarios, where it is valid for there to be 
multiple
+ * unplugged struct _device instances each being kept alive by an open
+ * driver fd. The driver needs a clean separation between what needs to happen
+ * when the struct  is removed and what needs to happen when a given
+ * struct _device instance is released, as well as in some cases a more
+ * finer grained marking of critical sections that require hardware 
interaction.
+ * See below.
+ *
+ * devm
+ * 
+ * In general use devm for cleaning up anything hardware related. So removing
+ * pci mmaps, releasing interrupt handlers, basically anything hw related.  The
+ * devm release actions are called when the struct  is removed, shortly
+ * after calling into the drivers struct _driver.remove() callback, if this
+ * is a pci device.
+ *
+ * devm can be thought of as an alternative to putting all the hw related
+ * cleanup directly in the struct _driver.remove() callback, where the
+ * correct ordering of the unwind steps needs to be manually done in the error
+ * path of the struct _driver.probe() and again on the remove side.  With
+ * devm this is all done automatically.
+ *
+ * drmm
+ * 
+ * In general use this for cleaning up anything software related. So data
+ * structures and the like which are tied to the lifetime of a particular 
struct
+ * _device instance.
+ *
+ * drmm can be thought of as an alternative to putting all the software related
+ * cleanup directly in the struct _driver.release() callback, where again
+ * the correct ordering of the unwind steps needs to be done manually. As with
+ * devm this is instead done automatically.
+ *
+ * Sometimes there is no clean separation between software and hardware, which
+ * is where drm_dev_enter() comes in. For example, a driver might have some
+ * state tied to a struct _device instance, for which the same cleanup path
+ * is called for both a plugged and unplugged device, and the cleanup itself
+ * might require talking to the device if it's still attached to this 
particular
+ * struct _device. For that we instead mark the device sections.  See
+ * drm_dev_enter(), drm_dev_exit() and drm_dev_unplug().
  */
 
 struct drmres_node {
-- 
2.45.0



Re: [PATCH] drm/buddy: Fix the range bias clear memory allocation issue

2024-05-07 Thread Matthew Auld

On 06/05/2024 14:38, Arunpravin Paneer Selvam wrote:

Problem statement: During the system boot time, an application request
for the bulk volume of cleared range bias memory when the clear_avail
is zero, we dont fallback into normal allocation method as we had an
unnecessary clear_avail check which prevents the fallback method leads
to fb allocation failure following system goes into unresponsive state.

Solution: Remove the unnecessary clear_avail check in the range bias
allocation function.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")

Reviewed-by: Matthew Auld 




Re: [PATCH v10 4/9] drm/ttm/tests: Add tests with mock resource managers

2024-04-10 Thread Matthew Auld

On 22/03/2024 14:29, Karolina Stolarek wrote:

Add mock resource manager to test ttm_bo_validate() with non-system
placements. Update KConfig entry to enable DRM Buddy allocator, used
by the mock manager. Update move function to do more than just assign
a resource.

Signed-off-by: Karolina Stolarek 
---
  drivers/gpu/drm/Kconfig   |   1 +
  drivers/gpu/drm/ttm/tests/.kunitconfig|   1 +
  drivers/gpu/drm/ttm/tests/Makefile|   1 +
  .../gpu/drm/ttm/tests/ttm_bo_validate_test.c  | 276 ++
  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c |  39 ++-
  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h |   2 +
  drivers/gpu/drm/ttm/tests/ttm_mock_manager.c  | 207 +
  drivers/gpu/drm/ttm/tests/ttm_mock_manager.h  |  31 ++
  8 files changed, 556 insertions(+), 2 deletions(-)
  create mode 100644 drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
  create mode 100644 drivers/gpu/drm/ttm/tests/ttm_mock_manager.h

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 91776996ada4..9fb6eb785bf9 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -200,6 +200,7 @@ config DRM_TTM_KUNIT_TEST
  default n
  depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
  select DRM_TTM
+select DRM_BUDDY
  select DRM_EXPORT_FOR_TESTS if m
  select DRM_KUNIT_TEST_HELPERS
  default KUNIT_ALL_TESTS
diff --git a/drivers/gpu/drm/ttm/tests/.kunitconfig 
b/drivers/gpu/drm/ttm/tests/.kunitconfig
index 75fdce0cd98e..9228ce9b913c 100644
--- a/drivers/gpu/drm/ttm/tests/.kunitconfig
+++ b/drivers/gpu/drm/ttm/tests/.kunitconfig
@@ -2,3 +2,4 @@ CONFIG_KUNIT=y
  CONFIG_DRM=y
  CONFIG_DRM_KUNIT_TEST_HELPERS=y
  CONFIG_DRM_TTM_KUNIT_TEST=y
+CONFIG_DRM_BUDDY=y
diff --git a/drivers/gpu/drm/ttm/tests/Makefile 
b/drivers/gpu/drm/ttm/tests/Makefile
index 2e5ed63fb414..f3149de77541 100644
--- a/drivers/gpu/drm/ttm/tests/Makefile
+++ b/drivers/gpu/drm/ttm/tests/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_DRM_TTM_KUNIT_TEST) += \
  ttm_tt_test.o \
  ttm_bo_test.o \
  ttm_bo_validate_test.o \
+ttm_mock_manager.o \
  ttm_kunit_helpers.o
diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c 
b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
index 8229bb31d747..7070b5d16c10 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
@@ -8,12 +8,15 @@
  #include 
  
  #include "ttm_kunit_helpers.h"

+#include "ttm_mock_manager.h"
  
  #define BO_SIZE		SZ_4K

+#define MANAGER_SIZE   SZ_1M
  
  struct ttm_bo_validate_test_case {

const char *description;
enum ttm_bo_type bo_type;
+   uint32_t mem_type;
bool with_ttm;
  };
  
@@ -102,6 +105,49 @@ static void ttm_bo_init_reserved_sys_man(struct kunit *test)

ttm_bo_put(bo);
  }
  
+static void ttm_bo_init_reserved_mock_man(struct kunit *test)

+{
+   const struct ttm_bo_validate_test_case *params = test->param_value;
+   enum ttm_bo_type bo_type = params->bo_type;
+   struct ttm_test_devices *priv = test->priv;
+   uint32_t size = ALIGN(BO_SIZE, PAGE_SIZE);
+   struct ttm_operation_ctx ctx = { };
+   struct ttm_placement *placement;
+   uint32_t mem_type = TTM_PL_VRAM;
+   struct ttm_buffer_object *bo;
+   struct ttm_place *place;
+   int err;
+
+   ttm_mock_manager_init(priv->ttm_dev, mem_type, MANAGER_SIZE);
+
+   bo = kunit_kzalloc(test, sizeof(*bo), GFP_KERNEL);
+   KUNIT_ASSERT_NOT_NULL(test, bo);
+
+   place = ttm_place_kunit_init(test, mem_type, 0);
+   placement = ttm_placement_kunit_init(test, place, 1);
+
+   drm_gem_private_object_init(priv->drm, >base, size);
+
+   err = ttm_bo_init_reserved(priv->ttm_dev, bo, bo_type, placement,
+  PAGE_SIZE, , NULL, NULL,
+  _ttm_bo_destroy);
+   dma_resv_unlock(bo->base.resv);
+
+   KUNIT_EXPECT_EQ(test, err, 0);
+   KUNIT_EXPECT_EQ(test, kref_read(>kref), 1);
+   KUNIT_EXPECT_PTR_EQ(test, bo->bdev, priv->ttm_dev);
+   KUNIT_EXPECT_EQ(test, bo->type, bo_type);
+   KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
+
+   if (bo_type != ttm_bo_type_kernel)
+   KUNIT_EXPECT_TRUE(test,
+ 
drm_mm_node_allocated(>base.vma_node.vm_node));
+
+   ttm_resource_free(bo, >resource);
+   ttm_bo_put(bo);
+   ttm_mock_manager_fini(priv->ttm_dev, mem_type);
+}
+
  static void ttm_bo_init_reserved_resv(struct kunit *test)
  {
enum ttm_bo_type bo_type = ttm_bo_type_device;
@@ -136,6 +182,51 @@ static void ttm_bo_init_reserved_resv(struct kunit *test)
ttm_bo_put(bo);
  }
  
+static void ttm_bo_validate_basic(struct kunit *test)

+{
+   const struct ttm_bo_validate_test_case *params = test->param_value;
+   uint32_t fst_mem = TTM_PL_SYSTEM, snd_mem = TTM_PL_VRAM;
+   struct 

Re: [PATCH v10 3/9] drm/ttm/tests: Test simple BO creation and validation

2024-04-10 Thread Matthew Auld
_init_reserved_resv(struct kunit *test)
+{
+   enum ttm_bo_type bo_type = ttm_bo_type_device;
+   struct ttm_test_devices *priv = test->priv;
+   uint32_t size = ALIGN(BO_SIZE, PAGE_SIZE);
+   struct ttm_operation_ctx ctx = { };
+   struct ttm_placement *placement;
+   struct ttm_buffer_object *bo;
+   struct ttm_place *place;
+   struct dma_resv resv;
+   int err;
+
+   bo = kunit_kzalloc(test, sizeof(*bo), GFP_KERNEL);
+   KUNIT_ASSERT_NOT_NULL(test, bo);
+
+   place = ttm_place_kunit_init(test, TTM_PL_SYSTEM, 0);
+   placement = ttm_placement_kunit_init(test, place, 1);
+
+   drm_gem_private_object_init(priv->drm, >base, size);
+   dma_resv_init();
+   dma_resv_lock(, NULL);
+
+   err = ttm_bo_init_reserved(priv->ttm_dev, bo, bo_type, placement,
+  PAGE_SIZE, , NULL, ,
+  _ttm_bo_destroy);
+   dma_resv_unlock(bo->base.resv);
+
+   KUNIT_EXPECT_EQ(test, err, 0);
+   KUNIT_EXPECT_PTR_EQ(test, bo->base.resv, );
+
+   ttm_resource_free(bo, >resource);
+   ttm_bo_put(bo);
+}
+
+static void ttm_bo_validate_invalid_placement(struct kunit *test)
+{
+   enum ttm_bo_type bo_type = ttm_bo_type_device;
+   uint32_t unknown_mem_type = TTM_PL_PRIV + 1;
+   uint32_t size = ALIGN(BO_SIZE, PAGE_SIZE);
+   struct ttm_operation_ctx ctx = { };
+   struct ttm_placement *placement;
+   struct ttm_buffer_object *bo;
+   struct ttm_place *place;
+   int err;
+
+   place = ttm_place_kunit_init(test, unknown_mem_type, 0);
+   placement = ttm_placement_kunit_init(test, place, 1);
+
+   bo = ttm_bo_kunit_init(test, test->priv, size);
+   bo->type = bo_type;
+
+   ttm_bo_reserve(bo, false, false, NULL);
+   err = ttm_bo_validate(bo, placement, );
+   dma_resv_unlock(bo->base.resv);
+
+   KUNIT_EXPECT_EQ(test, err, -ENOMEM);
+
+   ttm_bo_put(bo);
+}
+
+static void ttm_bo_validate_pinned(struct kunit *test)
+{
+   enum ttm_bo_type bo_type = ttm_bo_type_device;
+   uint32_t size = ALIGN(BO_SIZE, PAGE_SIZE);
+   struct ttm_operation_ctx ctx = { };
+   uint32_t mem_type = TTM_PL_SYSTEM;
+   struct ttm_placement *placement;
+   struct ttm_buffer_object *bo;
+   struct ttm_place *place;
+   int err;
+
+   place = ttm_place_kunit_init(test, mem_type, 0);
+   placement = ttm_placement_kunit_init(test, place, 1);
+
+   bo = ttm_bo_kunit_init(test, test->priv, size);
+   bo->type = bo_type;
+
+   ttm_bo_reserve(bo, false, false, NULL);
+   ttm_bo_pin(bo);
+   err = ttm_bo_validate(bo, placement, );
+   dma_resv_unlock(bo->base.resv);
+
+   KUNIT_EXPECT_EQ(test, err, -EINVAL);


ttm_bo_put(bo) ?

Reviewed-by: Matthew Auld 


Re: [PATCH v10 2/9] drm/ttm/tests: Use an init function from the helpers lib

2024-04-10 Thread Matthew Auld

On 22/03/2024 14:29, Karolina Stolarek wrote:

Add a new helper function that also initializes the device. Use it in
ttm_tt test suite and delete the local definition.

Signed-off-by: Karolina Stolarek 

Reviewed-by: Matthew Auld 


Re: [PATCH v10 1/3] drm/buddy: Implement tracking clear page feature

2024-04-10 Thread Matthew Auld

On 08/04/2024 16:16, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list(Christian)
   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new variables(Matthew)
   - Add a unit test case(Matthew)

v5:
   - remove force merge support to actual range allocation and not to bail
 out when contains && split(Matthew)
   - add range support to force merge function.

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 430 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  28 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 368 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 5ebdd6f8f36e..83dbe252f727 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
  }
  
-static void list_insert_sorted(struct drm_buddy *mm,

-  struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+   struct drm_buddy_block *block)


Why the change here?


  {
struct drm_buddy_block *node;
struct list_head *head;
@@ -57,6 +57,16 @@ s

Re: [PATCH v10 3/3] drm/tests: Add a test case for drm buddy clear allocation

2024-04-08 Thread Matthew Auld

On 08/04/2024 16:16, Arunpravin Paneer Selvam wrote:

Add a new test case for the drm buddy clear and dirty
allocation.

v2:(Matthew)
   - make size as u32
   - rename PAGE_SIZE with SZ_4K
   - dont fragment the address space for all the order allocation
 iterations. we can do it once and just increment and allocate
 the size.
   - create new mm with non power-of-two size to ensure the multi-root
 force_merge during fini.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 141 +
  1 file changed, 141 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index 4621a860cb05..b07f132f2835 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -224,6 +224,146 @@ static void drm_test_buddy_alloc_range_bias(struct kunit 
*test)
drm_buddy_fini();
  }
  
+static void drm_test_buddy_alloc_clear(struct kunit *test)

+{
+   unsigned long n_pages, total, i = 0;
+   const unsigned long ps = SZ_4K;
+   struct drm_buddy_block *block;
+   const int max_order = 12;
+   LIST_HEAD(allocated);
+   struct drm_buddy mm;
+   unsigned int order;
+   u32 mm_size, size;
+   LIST_HEAD(dirty);
+   LIST_HEAD(clean);
+
+   mm_size = SZ_4K << max_order;
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+   /*
+* Idea is to allocate and free some random portion of the address 
space,
+* returning those pages as non-dirty and randomly alternate between
+* requesting dirty and non-dirty pages (not going over the limit
+* we freed as non-dirty), putting that into two separate lists.
+* Loop over both lists at the end checking that the dirty list
+* is indeed all dirty pages and vice versa. Free it all again,
+* keeping the dirty/clear status.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   5 * ps, ps, 
,
+   
DRM_BUDDY_TOPDOWN_ALLOCATION),
+   "buddy_alloc hit an error size=%lu\n", 5 * ps);
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   n_pages = 10;
+   do {
+   unsigned long flags;
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0) {
+   list = 
+   flags = 0;
+   } else {
+   list = 
+   flags = DRM_BUDDY_CLEAR_ALLOCATION;
+   }
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,
+   ps, ps, 
list,
+   flags),
+   "buddy_alloc hit an error size=%lu\n", 
ps);
+   } while (++i < n_pages);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   /*
+* Trying to go over the clear limit for some allocation.
+* The allocation should never fail with reasonable page-size.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   10 * ps, ps, ,
+   
DRM_BUDDY_CLEAR_ALLOCATION),
+   "buddy_alloc hit an error size=%lu\n", 10 * ps);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+   drm_buddy_free_list(, , 0);
+   drm_buddy_fini();
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Create a new mm. Intentionally fragment the address space by creating
+* two alternating lists. Free both lists, one as dirty the other as 
clean.
+* Try to allocate double the previous size with matching 
min_page_size. The
+* allocation should never fail as it calls the force_merge. Also check 
that
+* the page is always dirty after force_merge. Free the page as dirty, 
then
+* repeat the whole thing, increment the order until we hit the 
max_order.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0)
+   list = 
+   else
+   list = 
+
+   KUNIT_ASSERT_FALSE_MSG(tes

Re: [PATCH v9 1/3] drm/buddy: Implement tracking clear page feature

2024-04-05 Thread Matthew Auld

On 01/04/2024 12:07, Paneer Selvam, Arunpravin wrote:

Hi Matthew,

On 3/28/2024 10:18 PM, Matthew Auld wrote:

On 28/03/2024 16:07, Paneer Selvam, Arunpravin wrote:

Hi Matthew,

On 3/26/2024 11:39 PM, Matthew Auld wrote:

On 18/03/2024 21:40, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the 
list(Christian)

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required 
blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.

   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new 
variables(Matthew)

   - Add a unit test case(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 427 
++

  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 360 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..625a30a6b855 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
  kmem_cache_free(slab_blocks, block);
  }
  -static void list_insert_sorted(struct drm_buddy *mm,
-   struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+    struct drm_buddy_block *block)
  {
  struct drm_buddy_block *node;
  struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm

Re: [PATCH v9 1/3] drm/buddy: Implement tracking clear page feature

2024-03-28 Thread Matthew Auld

On 28/03/2024 16:07, Paneer Selvam, Arunpravin wrote:

Hi Matthew,

On 3/26/2024 11:39 PM, Matthew Auld wrote:

On 18/03/2024 21:40, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the 
list(Christian)

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.

   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new 
variables(Matthew)

   - Add a unit test case(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 427 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 360 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..625a30a6b855 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
  kmem_cache_free(slab_blocks, block);
  }
  -static void list_insert_sorted(struct drm_buddy *mm,
-   struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+    struct drm_buddy_block *block)
  {
  struct drm_buddy_block *node;
  struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(>link, node->link.prev, >link);
  }
  +static void clear_reset(s

Re: [PATCH v9 1/3] drm/buddy: Implement tracking clear page feature

2024-03-26 Thread Matthew Auld

On 18/03/2024 21:40, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list(Christian)
   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new variables(Matthew)
   - Add a unit test case(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 427 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 360 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..625a30a6b855 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
  }
  
-static void list_insert_sorted(struct drm_buddy *mm,

-  struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+   struct drm_buddy_block *block)
  {
struct drm_buddy_block *node;
struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
  }
  
+static void clear_reset(struct drm_buddy_block *block)

+{
+   block->header &

Re: [PATCH v9 3/3] drm/tests: Add a test case for drm buddy clear allocation

2024-03-26 Thread Matthew Auld

On 18/03/2024 21:40, Arunpravin Paneer Selvam wrote:

Add a new test case for the drm buddy clear and dirty
allocation.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 127 +
  1 file changed, 127 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index 454ad9952f56..d355a6e61893 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -19,6 +19,132 @@ static inline u64 get_size(int order, u64 chunk_size)
return (1 << order) * chunk_size;
  }
  
+static void drm_test_buddy_alloc_clear(struct kunit *test)

+{
+   unsigned long n_pages, total, i = 0;
+   const unsigned long ps = SZ_4K;
+   struct drm_buddy_block *block;
+   const int max_order = 12;
+   LIST_HEAD(allocated);
+   struct drm_buddy mm;
+   unsigned int order;
+   u64 mm_size, size;


Maybe just make these two u32 or unsigned long. That should be big 
enough, plus avoids any kind of 32b compilation bugs below.



+   LIST_HEAD(dirty);
+   LIST_HEAD(clean);
+
+   mm_size = PAGE_SIZE << max_order;


s/PAGE_SIZE/SZ_4K/ below also.


+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+   /**


Drop the extra *, since is not actual kernel-doc. Below also.


+* Idea is to allocate and free some random portion of the address 
space,
+* returning those pages as non-dirty and randomly alternate between
+* requesting dirty and non-dirty pages (not going over the limit
+* we freed as non-dirty), putting that into two separate lists.
+* Loop over both lists at the end checking that the dirty list
+* is indeed all dirty pages and vice versa. Free it all again,
+* keeping the dirty/clear status.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   5 * ps, ps, 
,
+   
DRM_BUDDY_TOPDOWN_ALLOCATION),
+   "buddy_alloc hit an error size=%u\n", 5 * ps);
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   n_pages = 10;
+   do {
+   unsigned long flags;
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0) {
+   list = 
+   flags = 0;
+   } else if (slot == 1) {


Could just be else {


+   list = 
+   flags = DRM_BUDDY_CLEAR_ALLOCATION;
+   }
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,
+   ps, ps, 
list,
+   flags),
+   "buddy_alloc hit an error size=%u\n", 
ps);
+   } while (++i < n_pages);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   /**
+* Trying to go over the clear limit for some allocation.
+* The allocation should never fail with reasonable page-size.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   10 * ps, ps, ,
+   
DRM_BUDDY_CLEAR_ALLOCATION),
+   "buddy_alloc hit an error size=%u\n", 10 * ps);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+   drm_buddy_free_list(, , 0);
+   drm_buddy_fini();
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /**
+* Create a new mm. Intentionally fragment the address space by creating
+* two alternating lists. Free both lists, one as dirty the other as 
clean.
+* Try to allocate double the previous size with matching 
min_page_size. The
+* allocation should never fail as it calls the force_merge. Also check 
that
+* the page is always dirty after force_merge. Free the page as dirty, 
then
+* repeat the whole thing, increment the order until we hit the 
max_order.
+*/
+
+   order = 1;
+   do {
+   size = PAGE_SIZE << order;
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0)
+  

Re: [PATCH v8 1/3] drm/buddy: Implement tracking clear page feature

2024-03-07 Thread Matthew Auld

On 07/03/2024 12:25, Paneer Selvam, Arunpravin wrote:

Hi Matthew,

On 3/6/2024 11:19 PM, Matthew Auld wrote:

On 04/03/2024 16:32, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the 
list(Christian)

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.

   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 


Is there a unit test for this? What about maybe something roughly like:

- Pick small random mm_size which is not always power-of-two.
- Allocate and free some random portion of the address space, 
returning those pages as non-dirty. Then do another cycle and randomly 
alternate between requesting dirty and non-dirty pages (not going over 
the limit you freed as non-dirty), putting that into two separate 
lists. Loop over both lists at the end checking that the dirty list is 
indeed all dirty pages and vice versa. Free it all again, keeping the 
dirty/clear status.
- Also try to go over the clear limit for some allocation. The 
allocation should never fail with reasonable page-size.
- Test the defrag/force_merge interface. Clean the mm or create new 
one. Intentionally fragment the address space, by creating two 
alternating lists. Free both lists, one as dirty the other as clean. 
Try to allocate double the previous size with matching min_page_size. 
Should fail. Call force_merge. Should now succeed. Also check that the 
page is always dirty after force_merge. Free the page as dirty, then 
repeat the whole thing, doubling the page-size until you hit max_order.
- Make sure we also call fini() with some part of the address space 
left as non-dirty. Should not trigger any warnings.


I think would be good to consider, but not a blocker or anything. Some 
comments below, otherwise I think looks good.
Yes. It is good to have a unit test for this feature. I will send the 
patch.



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 294 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  22 +-
  6 files changed, 290 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list

Re: [PATCH] drm/tests/buddy: fix print format

2024-03-07 Thread Matthew Auld

On 07/03/2024 08:30, Maxime Ripard wrote:

On Thu, 29 Feb 2024 09:52:26 +, Matthew Auld wrote:

This will report a build warning once we have: 806cb2270237 ("kunit:
Annotate _MSG assertion variants with gnu printf specifiers").




Applied to drm/drm-misc (drm-misc-fixes).


Thanks.



Thanks!
Maxime



Re: [PATCH v8 1/3] drm/buddy: Implement tracking clear page feature

2024-03-06 Thread Matthew Auld

On 04/03/2024 16:32, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list(Christian)
   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 


Is there a unit test for this? What about maybe something roughly like:

- Pick small random mm_size which is not always power-of-two.
- Allocate and free some random portion of the address space, returning 
those pages as non-dirty. Then do another cycle and randomly alternate 
between requesting dirty and non-dirty pages (not going over the limit 
you freed as non-dirty), putting that into two separate lists. Loop over 
both lists at the end checking that the dirty list is indeed all dirty 
pages and vice versa. Free it all again, keeping the dirty/clear status.
- Also try to go over the clear limit for some allocation. The 
allocation should never fail with reasonable page-size.
- Test the defrag/force_merge interface. Clean the mm or create new one. 
Intentionally fragment the address space, by creating two alternating 
lists. Free both lists, one as dirty the other as clean. Try to allocate 
double the previous size with matching min_page_size. Should fail. Call 
force_merge. Should now succeed. Also check that the page is always 
dirty after force_merge. Free the page as dirty, then repeat the whole 
thing, doubling the page-size until you hit max_order.
- Make sure we also call fini() with some part of the address space left 
as non-dirty. Should not trigger any warnings.


I think would be good to consider, but not a blocker or anything. Some 
comments below, otherwise I think looks good.



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 294 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  22 +-
  6 files changed, 290 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 

Re: [PATCH v7 3/3] drm/buddy: Add defragmentation support

2024-03-04 Thread Matthew Auld

On 04/03/2024 12:22, Paneer Selvam, Arunpravin wrote:

Hi Matthew,

On 2/22/2024 12:12 AM, Matthew Auld wrote:

On 21/02/2024 12:18, Arunpravin Paneer Selvam wrote:

Add a function to support defragmentation.

v1:
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2(Matthew):
   - add amdgpu user for defragmentation
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++-
  drivers/gpu/drm/drm_buddy.c  | 93 +---
  include/drm/drm_buddy.h  |  3 +
  3 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

 min_block_size,
 >blocks,
 vres->flags);
-    if (unlikely(r))
-    goto error_free_blocks;
+    if (unlikely(r)) {
+    if (r == -ENOSPC) {
+    drm_buddy_defrag(mm, min_block_size);
+    r = drm_buddy_alloc_blocks(mm, fpfn,
+   lpfn,
+   size,
+   min_block_size,
+   >blocks,
+   vres->flags);
+    if (unlikely(r))
+    goto error_free_blocks;
+    } else {
+    goto error_free_blocks;
+    }
+    }
    if (size > remaining_size)
  remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 18e004fa39d3..56bd1560fbcd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -203,6 +203,8 @@ void drm_buddy_fini(struct drm_buddy *mm)
  drm_block_free(mm, mm->roots[i]);
  }
  +    drm_buddy_defrag(mm, mm->chunk_size << mm->max_order);


I think this needs to be called higher up, otherwise we blow up with 
the WARN, plus we just freed the root(s). There is also the case with 
non-power-of-two VRAM size, in which case you get multiple roots and 
max_order is just the largest root and not entire address space. I 
guess do this in the loop above and use the root order instead?


Also this should be done as part of the first patch and then in this 
patch it is just a case of exporting it. Every commit should ideally 
be functional by itself.
You mean we move the above change in drm_buddy_fini function and 
drm_buddy_defrag function as part of first patch.
And just we add export function and add amdgpu user in this patch. Is my 
understanding correct?


Yeah, I think that makes sense.



Thanks,
Arun.



+
  WARN_ON(mm->avail != mm->size);
    kfree(mm->roots);
@@ -276,25 +278,39 @@ drm_get_buddy(struct drm_buddy_block *block)
  }
  EXPORT_SYMBOL(drm_get_buddy);
  -static void __drm_buddy_free(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+ struct drm_buddy_block *block,
+ bool defrag)
  {
+    unsigned int order, block_order;
  struct drm_buddy_block *parent;
  +    block_order = drm_buddy_block_order(block);
+
  while ((parent = block->parent)) {
-    struct drm_buddy_block *buddy;
+    struct drm_buddy_block *buddy = NULL;
    buddy = __get_buddy(block);
    if (!drm_buddy_block_is_free(buddy))
  break;
  -    if (drm_buddy_block_is_clear(block) !=
-    drm_buddy_block_is_clear(buddy))
-    break;
+    if (!defrag) {
+    /*
+ * Check the block and its buddy clear state and exit
+ * the loop if they both have the dissimilar state.
+ */
+    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
  -    if (drm_buddy_block_is_clear(block))
-    mark_cleared(parent);
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+    }
+
+    WARN_ON(defrag &&
+    (drm_buddy_block_is_clear(block) ==
+ drm_buddy_block_is_clear(buddy)));
    list_del(>link);
  @@ -304,8 +320,57 @@ static void __drm_buddy_free(struct drm_buddy 
*mm,

  block = parent;
  }
  -    mark_free

Re: [PULL] drm-misc-fixes

2024-02-29 Thread Matthew Auld

On 29/02/2024 13:37, Maxime Ripard wrote:

Hi,

Here's this week drm-misc fixes PR.

There's two commits for files unders drivers/soc/qcom that don't have a
maintainer Acked-by. Bjorn's Acked-by was provided on IRC, and Konrad
provided it by mail after the facts so we're covered.

Maxime

drm-misc-fixes-2024-02-29:
A reset fix for host1x, a resource leak fix and a probe fix for aux-hpd,
a use-after-free fix and a boot fix for a pmic_glink qcom driver in
drivers/soc, a fix for the simpledrm/tegra transition, a kunit fix for
the TTM tests, a font handling fix for fbcon, two allocation fixes and a
kunit test to cover them for drm/buddy
The following changes since commit 72fa02fdf83306c52bc1eede28359e3fa32a151a:

   nouveau: add an ioctl to report vram usage (2024-02-23 10:20:07 +1000)

are available in the Git repository at:

   https://anongit.freedesktop.org/git/drm/drm-misc 
tags/drm-misc-fixes-2024-02-29

for you to fetch changes up to c70703320e557ff30847915e6a7631a9abdda16b:

   drm/tests/drm_buddy: add alloc_range_bias test (2024-02-28 08:03:29 +0100)


A reset fix for host1x, a resource leak fix and a probe fix for aux-hpd,
a use-after-free fix and a boot fix for a pmic_glink qcom driver in
drivers/soc, a fix for the simpledrm/tegra transition, a kunit fix for
the TTM tests, a font handling fix for fbcon, two allocation fixes and a
kunit test to cover them for drm/buddy


Christian König (1):
   drm/ttm/tests: depend on UML || COMPILE_TEST

Jiri Slaby (SUSE) (1):
   fbcon: always restore the old font data in fbcon_do_set_font()

Johan Hovold (3):
   drm/bridge: aux-hpd: fix OF node leaks
   drm/bridge: aux-hpd: separate allocation and registration
   soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free

Matthew Auld (3):
   drm/buddy: fix range bias
   drm/buddy: check range allocation matches alignment
   drm/tests/drm_buddy: add alloc_range_bias test


Note that there is a build fix needed for this one:
https://patchwork.freedesktop.org/patch/580568/?series=130552=1



Maxime Ripard (1):
   Merge drm/drm-fixes into drm-misc-fixes

Mikko Perttunen (1):
   gpu: host1x: Skip reset assert on Tegra186

Rob Clark (1):
   soc: qcom: pmic_glink: Fix boot when QRTR=m

Thierry Reding (1):
   drm/tegra: Remove existing framebuffer only if we support display

  drivers/gpu/drm/Kconfig |   5 +-
  drivers/gpu/drm/bridge/aux-hpd-bridge.c |  70 +++---
  drivers/gpu/drm/drm_buddy.c |  16 ++-
  drivers/gpu/drm/tegra/drm.c |  23 +++-
  drivers/gpu/drm/tests/drm_buddy_test.c  | 218 
  drivers/gpu/host1x/dev.c|  15 ++-
  drivers/gpu/host1x/dev.h|   6 +
  drivers/soc/qcom/pmic_glink.c   |  21 +--
  drivers/soc/qcom/pmic_glink_altmode.c   |  16 ++-
  drivers/video/fbdev/core/fbcon.c|   8 +-
  include/drm/bridge/aux-bridge.h |  15 +++
  11 files changed, 368 insertions(+), 45 deletions(-)


[PATCH 2/2] drm/tests/buddy: stop using PAGE_SIZE

2024-02-29 Thread Matthew Auld
Gives the wrong impression that min page-size has to be tied to the CPU
PAGE_SIZE.

Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Arnd Bergmann 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 42 +-
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index be2d9d7764be..8528f39a84e6 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -329,8 +329,8 @@ static void drm_test_buddy_alloc_pathological(struct kunit 
*test)
 * Eventually we will have a fully 50% fragmented mm.
 */
 
-   mm_size = PAGE_SIZE << max_order;
-   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, PAGE_SIZE),
+   mm_size = SZ_4K << max_order;
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, SZ_4K),
   "buddy_init failed\n");
 
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
@@ -344,7 +344,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit 
*test)
}
 
for (order = top; order--;) {
-   size = get_size(order, PAGE_SIZE);
+   size = get_size(order, mm.chunk_size);
KUNIT_ASSERT_FALSE_MSG(test, 
drm_buddy_alloc_blocks(, start,

mm_size, size, size,

, flags),
@@ -358,7 +358,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit 
*test)
}
 
/* There should be one final page for this sub-allocation */
-   size = get_size(0, PAGE_SIZE);
+   size = get_size(0, mm.chunk_size);
KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, start, 
mm_size,
size, size, 
, flags),
   "buddy_alloc hit 
-ENOMEM for hole\n");
@@ -368,7 +368,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit 
*test)
 
list_move_tail(>link, );
 
-   size = get_size(top, PAGE_SIZE);
+   size = get_size(top, mm.chunk_size);
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, start, 
mm_size,
   size, size, 
, flags),
  "buddy_alloc 
unexpectedly succeeded at top-order %d/%d, it should be full!",
@@ -379,7 +379,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit 
*test)
 
/* Nothing larger than blocks of chunk_size now available */
for (order = 1; order <= max_order; order++) {
-   size = get_size(order, PAGE_SIZE);
+   size = get_size(order, mm.chunk_size);
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, start, 
mm_size,
   size, size, 
, flags),
  "buddy_alloc 
unexpectedly succeeded at order %d, it should be full!",
@@ -408,14 +408,14 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit 
*test)
 * page left.
 */
 
-   mm_size = PAGE_SIZE << max_order;
-   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, PAGE_SIZE),
+   mm_size = SZ_4K << max_order;
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, SZ_4K),
   "buddy_init failed\n");
 
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
 
for (order = 0; order < max_order; order++) {
-   size = get_size(order, PAGE_SIZE);
+   size = get_size(order, mm.chunk_size);
KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, start, 
mm_size,
size, size, 
, flags),
   "buddy_alloc hit 
-ENOMEM with order=%d\n",
@@ -428,7 +428,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit 
*test)
}
 
/* And now the last remaining block available */
-   size = get_size(0, PAGE_SIZE);
+   size = get_size(0, mm.chunk_size);
KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, start, mm_size,
size, size, , 
flags),
   "buddy_alloc hit -ENOMEM on 
final alloc\n");
@@ -440,7 +440,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit 
*test)
 
/* Should be completely full! */
for (order = max_order; order--;) {

[PATCH 1/2] drm/buddy: stop using PAGE_SIZE

2024-02-29 Thread Matthew Auld
The drm_buddy minimum page-size requirements should be distinct from the
CPU PAGE_SIZE. Only restriction is that the minimum page-size is at
least 4K.

Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Arnd Bergmann 
---
 drivers/gpu/drm/drm_buddy.c | 2 +-
 include/drm/drm_buddy.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 5ebdd6f8f36e..f999568d69c1 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -102,7 +102,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 
chunk_size)
if (size < chunk_size)
return -EINVAL;
 
-   if (chunk_size < PAGE_SIZE)
+   if (chunk_size < SZ_4K)
return -EINVAL;
 
if (!is_power_of_2(chunk_size))
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index a5b39fc01003..19ed661a32f3 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -53,8 +53,8 @@ struct drm_buddy_block {
struct list_head tmp_link;
 };
 
-/* Order-zero must be at least PAGE_SIZE */
-#define DRM_BUDDY_MAX_ORDER (63 - PAGE_SHIFT)
+/* Order-zero must be at least SZ_4K */
+#define DRM_BUDDY_MAX_ORDER (63 - 12)
 
 /*
  * Binary Buddy System.
@@ -82,7 +82,7 @@ struct drm_buddy {
unsigned int n_roots;
unsigned int max_order;
 
-   /* Must be at least PAGE_SIZE */
+   /* Must be at least SZ_4K */
u64 chunk_size;
u64 size;
u64 avail;
-- 
2.43.2



[PATCH] drm/tests/buddy: fix print format

2024-02-29 Thread Matthew Auld
This will report a build warning once we have: 806cb2270237 ("kunit:
Annotate _MSG assertion variants with gnu printf specifiers").

Reported-by: Stephen Rothwell 
Fixes: c70703320e55 ("drm/tests/drm_buddy: add alloc_range_bias test")
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index be2d9d7764be..484360c7e1f6 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -189,7 +189,7 @@ static void drm_test_buddy_alloc_range_bias(struct kunit 
*test)
  ,
  
DRM_BUDDY_RANGE_ALLOCATION),
   "buddy_alloc failed with bias(%x-%x), 
size=%u, ps=%u\n",
-  bias_start, bias_end, size);
+  bias_start, bias_end, size, ps);
bias_rem -= size;
 
/*
-- 
2.43.2



Re: [PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-28 Thread Matthew Auld

On 28/02/2024 07:20, Christian König wrote:

Am 26.02.24 um 10:58 schrieb Matthew Auld:

On 19/02/2024 12:24, Matthew Auld wrote:

On 19/02/2024 10:48, Matthew Auld wrote:

On 19/02/2024 10:30, Christian König wrote:

Am 19.02.24 um 11:28 schrieb Matthew Auld:

On 19/02/2024 09:53, Christian König wrote:

Am 19.02.24 um 10:42 schrieb Matthew Auld:

On 15/02/2024 17:44, Matthew Auld wrote:
Doesn't seem to compile on 32b, presumably due to u64 
mod/division.
Simplest is to just switch over to u32 here. Also make print 
modifiers

consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous 
test")

Reported-by: Geert Uytterhoeven 
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Maxime Ripard 


Any chance someone can push just this single patch here, since 
it fixes 32b build? It already has an r-b from Arun.


Already working on this. Just give me a few more minutes.


Thanks.


No, problem. I would have pushed this earlier, but my build server 
doesn't want to work any more. Looks like the SSD has passed its 
warranty :(


Should I push the other three patches to drm-misc-fixes as well? I 
currently can't even build test them.


Need to send a v2 for that. One minor change in the test just to be 
consistent with using u32. Thanks.


Sent v2. If you could push that when you get a chance. Thanks.

https://patchwork.freedesktop.org/series/130075/


Gentle ping on merging v2.


Pushed all three to drm-misc-fixes.


Thanks.



Regards,
Christian.









Thanks,
Christian.





Thanks,
Christian.




---
  drivers/gpu/drm/tests/drm_buddy_test.c | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index fee6bec757d1..edacc1adb28f 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 
chunk_size)
    static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)

  {
-    u64 mm_size, ps = SZ_4K, i, n_pages, total;
+    u32 mm_size, ps = SZ_4K, i, n_pages, total;
  struct drm_buddy_block *block;
  struct drm_buddy mm;
  LIST_HEAD(left);
@@ -56,30 +56,30 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)

  KUNIT_ASSERT_FALSE_MSG(test,
drm_buddy_alloc_blocks(, 0, mm_size,
    ps, ps, list, 0),
-   "buddy_alloc hit an error size=%d\n",
+   "buddy_alloc hit an error size=%u\n",
 ps);
  } while (++i < n_pages);
    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 
0, mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%d\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 
0, mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * 
ps);

+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 
0, mm_size,

 2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 2 * 
ps);

+   "buddy_alloc didn't error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 
0, mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * 
ps);

+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  /*
   * At this point we should have enough contiguous space 
for 2 blocks,
   * however they are never buddies (since we freed middle 
and right) so
@@ -88,13 +88,13 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 
0, mm_size,

  2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 2 * ps);
+   "buddy_alloc hit an error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 
0, mm_size,

  3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 3 * ps);
+   "buddy_alloc hit an error size=%u\n", 3 * ps);
    total = 0;
  list_for_each_entry(block, , link)








Re: [PATCH 1/2] drm/ttm: improve idle/busy handling v4

2024-02-27 Thread Matthew Auld

On 26/02/2024 20:21, Thomas Hellström wrote:

Hi, Christian

On Fri, 2024-02-23 at 15:30 +0100, Christian König wrote:

Am 06.02.24 um 13:56 schrieb Christian König:

Am 06.02.24 um 13:53 schrieb Thomas Hellström:

Hi, Christian,

On Fri, 2024-01-26 at 15:09 +0100, Christian König wrote:

Previously we would never try to move a BO into the preferred
placements
when it ever landed in a busy placement since those were
considered
compatible.

Rework the whole handling and finally unify the idle and busy
handling.
ttm_bo_validate() is now responsible to try idle placement
first and
then
use the busy placement if that didn't worked.

Drawback is that we now always try the idle placement first for
each
validation which might cause some additional CPU overhead on
overcommit.

v2: fix kerneldoc warning and coding style
v3: take care of XE as well
v4: keep the ttm_bo_mem_space functionality as it is for now,
only
add
  new handling for ttm_bo_validate as suggested by Thomas

Signed-off-by: Christian König 
Reviewed-by: Zack Rusin  v3

Sending this through xe CI, will try to review asap.


Take your time. At the moment people are bombarding me with work
and I
have only two hands and one head as well :(


So I've digged myself out of that hole and would rather like to get
this
new feature into 6.9.

Any time to review it? I can also plan some time to review your LRU
changes next week.

Thanks,
Christian.


Sorry for the late response. Was planning to review but saw that there
was still an xe CI failure.

https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-129579v1/bat-atsm-2/igt@xe_evict_...@evict-overcommit-parallel-nofree-samefd.html

I haven't really had time to look into what might be causing this,
though.

Maybe in ttm_bo_alloc_resource():

@@ -772,7 +772,7 @@ static int ttm_bo_alloc_resource(struct 
ttm_buffer_object *bo,


do {
ret = ttm_resource_alloc(bo, place, res);
-   if (unlikely(ret != -ENOSPC))
+   if (unlikely(ret && ret != -ENOSPC))
return ret;
if (likely(!ret) || !force_space)
break;

Otherwise we allocate VRAM but never correctly synchronise against the 
move fence, since we missed adding it to the BO. When we trigger async 
evictions that would explain the above test failure where we detect VRAM 
corruption, since someone else is still using the VRAM we allocated. 
What do you think?




/Thomas





Christian.



/Thomas



---
   drivers/gpu/drm/ttm/ttm_bo.c   | 231 +---
---
--
   drivers/gpu/drm/ttm/ttm_resource.c |  16 +-
   include/drm/ttm/ttm_resource.h |   3 +-
   3 files changed, 121 insertions(+), 129 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
b/drivers/gpu/drm/ttm/ttm_bo.c
index ba3f09e2d7e6..b12f435542a9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -724,64 +724,36 @@ static int ttm_bo_add_move_fence(struct
ttm_buffer_object *bo,
   return ret;
   }
   -/*
- * Repeatedly evict memory from the LRU for @mem_type until we
create enough
- * space, or we've evicted everything and there isn't enough
space.
- */
-static int ttm_bo_mem_force_space(struct ttm_buffer_object
*bo,
-      const struct ttm_place *place,
-      struct ttm_resource **mem,
-      struct ttm_operation_ctx *ctx)
-{
-    struct ttm_device *bdev = bo->bdev;
-    struct ttm_resource_manager *man;
-    struct ww_acquire_ctx *ticket;
-    int ret;
-
-    man = ttm_manager_type(bdev, place->mem_type);
-    ticket = dma_resv_locking_ctx(bo->base.resv);
-    do {
-    ret = ttm_resource_alloc(bo, place, mem);
-    if (likely(!ret))
-    break;
-    if (unlikely(ret != -ENOSPC))
-    return ret;
-    ret = ttm_mem_evict_first(bdev, man, place, ctx,
-      ticket);
-    if (unlikely(ret != 0))
-    return ret;
-    } while (1);
-
-    return ttm_bo_add_move_fence(bo, man, *mem, ctx-

no_wait_gpu);

-}
-
   /**
- * ttm_bo_mem_space
+ * ttm_bo_alloc_resource - Allocate backing store for a BO
    *
- * @bo: Pointer to a struct ttm_buffer_object. the data of
which
- * we want to allocate space for.
- * @placement: Proposed new placement for the buffer object.
- * @mem: A struct ttm_resource.
+ * @bo: Pointer to a struct ttm_buffer_object of which we want
a
resource for
+ * @placement: Proposed new placement for the buffer object
    * @ctx: if and how to sleep, lock buffers and alloc memory
+ * @force_space: If we should evict buffers to force space
+ * @res: The resulting struct ttm_resource.
    *
- * Allocate memory space for the buffer object pointed to by
@bo,
using
- * the placement flags in @placement, potentially evicting
other
idle buffer objects.
- * This function may sleep while waiting for space to become
available.
+ * Allocates a resource for the buffer object pointed to by
@bo,

Re: [PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-26 Thread Matthew Auld

Hi,

On 26/02/2024 10:38, Geert Uytterhoeven wrote:

Hi Matthew,

On Mon, Feb 26, 2024 at 10:58 AM Matthew Auld  wrote:

On 19/02/2024 12:24, Matthew Auld wrote:

On 19/02/2024 10:48, Matthew Auld wrote:

On 19/02/2024 10:30, Christian König wrote:

Am 19.02.24 um 11:28 schrieb Matthew Auld:

On 19/02/2024 09:53, Christian König wrote:

Am 19.02.24 um 10:42 schrieb Matthew Auld:

On 15/02/2024 17:44, Matthew Auld wrote:

Doesn't seem to compile on 32b, presumably due to u64 mod/division.
Simplest is to just switch over to u32 here. Also make print
modifiers
consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous
test")
Reported-by: Geert Uytterhoeven 
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Maxime Ripard 


Any chance someone can push just this single patch here, since it
fixes 32b build? It already has an r-b from Arun.


Already working on this. Just give me a few more minutes.


Thanks.


No, problem. I would have pushed this earlier, but my build server
doesn't want to work any more. Looks like the SSD has passed its
warranty :(

Should I push the other three patches to drm-misc-fixes as well? I
currently can't even build test them.


Need to send a v2 for that. One minor change in the test just to be
consistent with using u32. Thanks.


Sent v2. If you could push that when you get a chance. Thanks.

https://patchwork.freedesktop.org/series/130075/


Gentle ping on merging v2.


Your v1 and a fix from Linus already made it upstream:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/log/drivers/gpu/drm/tests?h=v6.8-rc6


Ah, right. I meant v2 for the remaining drm_buddy patches in this series:
https://patchwork.freedesktop.org/series/130075/



Gr{oetje,eeting}s,

 Geert



Re: [PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-26 Thread Matthew Auld

On 19/02/2024 12:24, Matthew Auld wrote:

On 19/02/2024 10:48, Matthew Auld wrote:

On 19/02/2024 10:30, Christian König wrote:

Am 19.02.24 um 11:28 schrieb Matthew Auld:

On 19/02/2024 09:53, Christian König wrote:

Am 19.02.24 um 10:42 schrieb Matthew Auld:

On 15/02/2024 17:44, Matthew Auld wrote:

Doesn't seem to compile on 32b, presumably due to u64 mod/division.
Simplest is to just switch over to u32 here. Also make print 
modifiers

consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous 
test")

Reported-by: Geert Uytterhoeven 
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Maxime Ripard 


Any chance someone can push just this single patch here, since it 
fixes 32b build? It already has an r-b from Arun.


Already working on this. Just give me a few more minutes.


Thanks.


No, problem. I would have pushed this earlier, but my build server 
doesn't want to work any more. Looks like the SSD has passed its 
warranty :(


Should I push the other three patches to drm-misc-fixes as well? I 
currently can't even build test them.


Need to send a v2 for that. One minor change in the test just to be 
consistent with using u32. Thanks.


Sent v2. If you could push that when you get a chance. Thanks.

https://patchwork.freedesktop.org/series/130075/


Gentle ping on merging v2.







Thanks,
Christian.





Thanks,
Christian.




---
  drivers/gpu/drm/tests/drm_buddy_test.c | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index fee6bec757d1..edacc1adb28f 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 
chunk_size)

    static void drm_test_buddy_alloc_contiguous(struct kunit *test)
  {
-    u64 mm_size, ps = SZ_4K, i, n_pages, total;
+    u32 mm_size, ps = SZ_4K, i, n_pages, total;
  struct drm_buddy_block *block;
  struct drm_buddy mm;
  LIST_HEAD(left);
@@ -56,30 +56,30 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)

  KUNIT_ASSERT_FALSE_MSG(test,
 drm_buddy_alloc_blocks(, 0, mm_size,
    ps, ps, list, 0),
-   "buddy_alloc hit an error size=%d\n",
+   "buddy_alloc hit an error size=%u\n",
 ps);
  } while (++i < n_pages);
    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 
0, mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%d\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 2 * ps);
+   "buddy_alloc didn't error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  /*
   * At this point we should have enough contiguous space for 
2 blocks,
   * however they are never buddies (since we freed middle 
and right) so
@@ -88,13 +88,13 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

  2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 2 * ps);
+   "buddy_alloc hit an error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

  3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 3 * ps);
+   "buddy_alloc hit an error size=%u\n", 3 * ps);
    total = 0;
  list_for_each_entry(block, , link)






Re: [PATCH 1/2] devm-helpers: Add resource managed version of mutex init

2024-02-22 Thread Matthew Auld

On 22/02/2024 14:58, Marek Behún wrote:

A few drivers are doing resource-managed mutex initialization by
implementing ad-hoc one-liner mutex dropping functions and using them
with devm_add_action_or_reset(). Help drivers avoid these repeated
one-liners by adding managed version of mutex initialization.

Use the new function devm_mutex_init() in the following drivers:
   drivers/gpio/gpio-pisosr.c
   drivers/gpio/gpio-sim.c
   drivers/gpu/drm/xe/xe_hwmon.c
   drivers/hwmon/nzxt-smart2.c
   drivers/leds/leds-is31fl319x.c
   drivers/power/supply/mt6370-charger.c
   drivers/power/supply/rt9467-charger.c

Signed-off-by: Marek Behún 
---
  drivers/gpio/gpio-pisosr.c|  9 ++-
  drivers/gpio/gpio-sim.c   | 12 ++
  drivers/gpu/drm/xe/xe_hwmon.c | 11 ++---
  drivers/hwmon/nzxt-smart2.c   |  9 ++-
  drivers/leds/leds-is31fl319x.c|  9 ++-
  drivers/power/supply/mt6370-charger.c | 11 +
  drivers/power/supply/rt9467-charger.c | 34 ---
  include/linux/devm-helpers.h  | 32 +
  8 files changed, 47 insertions(+), 80 deletions(-)

diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c
index e3013e778e15..dddbf37e855f 100644
--- a/drivers/gpio/gpio-pisosr.c
+++ b/drivers/gpio/gpio-pisosr.c
@@ -7,6 +7,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -116,11 +117,6 @@ static const struct gpio_chip template_chip = {
.can_sleep  = true,
  };
  
-static void pisosr_mutex_destroy(void *lock)

-{
-   mutex_destroy(lock);
-}
-
  static int pisosr_gpio_probe(struct spi_device *spi)
  {
struct device *dev = >dev;
@@ -147,8 +143,7 @@ static int pisosr_gpio_probe(struct spi_device *spi)
return dev_err_probe(dev, PTR_ERR(gpio->load_gpio),
 "Unable to allocate load GPIO\n");
  
-	mutex_init(>lock);

-   ret = devm_add_action_or_reset(dev, pisosr_mutex_destroy, >lock);
+   ret = devm_mutex_init(dev, >lock);
if (ret)
return ret;
  
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c

index c4106e37e6db..fcfcaa4efe70 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -12,6 +12,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -307,13 +308,6 @@ static ssize_t gpio_sim_sysfs_pull_store(struct device 
*dev,
return len;
  }
  
-static void gpio_sim_mutex_destroy(void *data)

-{
-   struct mutex *lock = data;
-
-   mutex_destroy(lock);
-}
-
  static void gpio_sim_put_device(void *data)
  {
struct device *dev = data;
@@ -457,9 +451,7 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, 
struct device *dev)
if (ret)
return ret;
  
-	mutex_init(>lock);

-   ret = devm_add_action_or_reset(dev, gpio_sim_mutex_destroy,
-  >lock);
+   ret = devm_mutex_init(dev, >lock);
if (ret)
return ret;
  
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c

index 174ed2185481..bb88ae1c196c 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -3,6 +3,7 @@
   * Copyright © 2023 Intel Corporation
   */
  
+#include 

  #include 
  #include 
  #include 
@@ -729,13 +730,6 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
xe_hwmon_energy_get(hwmon, );
  }
  
-static void xe_hwmon_mutex_destroy(void *arg)

-{
-   struct xe_hwmon *hwmon = arg;
-
-   mutex_destroy(>hwmon_lock);
-}
-
  void xe_hwmon_register(struct xe_device *xe)
  {
struct device *dev = xe->drm.dev;
@@ -751,8 +745,7 @@ void xe_hwmon_register(struct xe_device *xe)
  
  	xe->hwmon = hwmon;
  
-	mutex_init(>hwmon_lock);

-   if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon))
+   if (devm_mutex_init(dev, >hwmon_lock))
return;
  
  	/* primary GT to access device level properties */

diff --git a/drivers/hwmon/nzxt-smart2.c b/drivers/hwmon/nzxt-smart2.c
index 7aa586eb74be..00bc89607673 100644
--- a/drivers/hwmon/nzxt-smart2.c
+++ b/drivers/hwmon/nzxt-smart2.c
@@ -5,6 +5,7 @@
   * Copyright (c) 2021 Aleksandr Mezin
   */
  
+#include 

  #include 
  #include 
  #include 
@@ -721,11 +722,6 @@ static int __maybe_unused 
nzxt_smart2_hid_reset_resume(struct hid_device *hdev)
return init_device(drvdata, drvdata->update_interval);
  }
  
-static void mutex_fini(void *lock)

-{
-   mutex_destroy(lock);
-}
-
  static int nzxt_smart2_hid_probe(struct hid_device *hdev,
 const struct hid_device_id *id)
  {
@@ -741,8 +737,7 @@ static int nzxt_smart2_hid_probe(struct hid_device *hdev,
  
  	init_waitqueue_head(>wq);
  
-	mutex_init(>mutex);

-   ret = devm_add_action_or_reset(>dev, mutex_fini, >mutex);
+   ret = devm_mutex_init(>dev, >mutex);

Re: [PATCH v7 3/3] drm/buddy: Add defragmentation support

2024-02-21 Thread Matthew Auld

On 21/02/2024 12:18, Arunpravin Paneer Selvam wrote:

Add a function to support defragmentation.

v1:
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2(Matthew):
   - add amdgpu user for defragmentation
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++-
  drivers/gpu/drm/drm_buddy.c  | 93 +---
  include/drm/drm_buddy.h  |  3 +
  3 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
   min_block_size,
   >blocks,
   vres->flags);
-   if (unlikely(r))
-   goto error_free_blocks;
+   if (unlikely(r)) {
+   if (r == -ENOSPC) {
+   drm_buddy_defrag(mm, min_block_size);
+   r = drm_buddy_alloc_blocks(mm, fpfn,
+  lpfn,
+  size,
+  min_block_size,
+  >blocks,
+  vres->flags);
+   if (unlikely(r))
+   goto error_free_blocks;
+   } else {
+   goto error_free_blocks;
+   }
+   }
  
  		if (size > remaining_size)

remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 18e004fa39d3..56bd1560fbcd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -203,6 +203,8 @@ void drm_buddy_fini(struct drm_buddy *mm)
drm_block_free(mm, mm->roots[i]);
}
  
+	drm_buddy_defrag(mm, mm->chunk_size << mm->max_order);


I think this needs to be called higher up, otherwise we blow up with the 
WARN, plus we just freed the root(s). There is also the case with 
non-power-of-two VRAM size, in which case you get multiple roots and 
max_order is just the largest root and not entire address space. I guess 
do this in the loop above and use the root order instead?


Also this should be done as part of the first patch and then in this 
patch it is just a case of exporting it. Every commit should ideally be 
functional by itself.



+
WARN_ON(mm->avail != mm->size);
  
  	kfree(mm->roots);

@@ -276,25 +278,39 @@ drm_get_buddy(struct drm_buddy_block *block)
  }
  EXPORT_SYMBOL(drm_get_buddy);
  
-static void __drm_buddy_free(struct drm_buddy *mm,

-struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+struct drm_buddy_block *block,
+bool defrag)
  {
+   unsigned int order, block_order;
struct drm_buddy_block *parent;
  
+	block_order = drm_buddy_block_order(block);

+
while ((parent = block->parent)) {
-   struct drm_buddy_block *buddy;
+   struct drm_buddy_block *buddy = NULL;
  
  		buddy = __get_buddy(block);
  
  		if (!drm_buddy_block_is_free(buddy))

break;
  
-		if (drm_buddy_block_is_clear(block) !=

-   drm_buddy_block_is_clear(buddy))
-   break;
+   if (!defrag) {
+   /*
+* Check the block and its buddy clear state and exit
+* the loop if they both have the dissimilar state.
+*/
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
  
-		if (drm_buddy_block_is_clear(block))

-   mark_cleared(parent);
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+   }
+
+   WARN_ON(defrag &&
+   (drm_buddy_block_is_clear(block) ==
+

Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature

2024-02-21 Thread Matthew Auld

On 21/02/2024 12:40, Paneer Selvam, Arunpravin wrote:


On 2/16/2024 5:33 PM, Matthew Auld wrote:

On 08/02/2024 15:49, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.


Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 


Probably needs a new unit test.

I think we are missing something to forcefully re-merge everything at 
fini()? In theory we can just call the defrag routine. Otherwise we 
might trigger various warnings since the root(s) might still be split.


Also one nit below. Otherwise I think looks good.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 192 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  18 +-
  6 files changed, 187 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..33ad0cfbd54c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(>link, node->link.prev, >link);
  }
  +static void clear_reset(struct drm_buddy_block *block)
+{
+    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+    block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
  mark_free(mm, block->left);
  mark_free(mm, block->right);
  +    if (drm_buddy_block_is_clear(block)) {
+    mark_cleared(block->left);
+    mark_cleared(block->right);
+    clear_reset(block);
+    }
+
  mark_split(block);
    return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
  if (!drm_buddy_block_is_free(buddy))
  break;
  +    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
+
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+
  list_del(>link);
    drm_block_free(mm, block);
@@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  {
  BUG_ON(!drm_buddy_block_is_allocated(block));
  mm->avail += drm_buddy_blo

Re: [PATCH 7/9] drm: tests: Fix invalid printf format specifiers in KUnit tests

2024-02-21 Thread Matthew Auld

On 21/02/2024 09:27, David Gow wrote:

The drm_buddy_test's alloc_contiguous test used a u64 for the page size,
which was then updated to be an 'unsigned long' to avoid 64-bit
multiplication division helpers.

However, the variable is logged by some KUNIT_ASSERT_EQ_MSG() using the
'%d' or '%llu' format specifiers, the former of which is always wrong,
and the latter is no longer correct now that ps is no longer a u64. Fix
these to all use '%lu'.

Also, drm_mm_test calls KUNIT_FAIL() with an empty string as the
message. gcc warns if a printf format string is empty (apparently), so
give these some more detailed error messages, which should be more
useful anyway.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test")
Fixes: fca7526b7d89 ("drm/tests/drm_buddy: fix build failure on 32-bit targets")
Fixes: fc8d29e298cf ("drm: selftest: convert drm_mm selftest to KUnit")
Signed-off-by: David Gow 
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 14 +++---
  drivers/gpu/drm/tests/drm_mm_test.c|  6 +++---
  2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index 8a464f7f4c61..3dbfa3078449 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -55,30 +55,30 @@ static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)
KUNIT_ASSERT_FALSE_MSG(test,
   drm_buddy_alloc_blocks(, 0, mm_size,
  ps, ps, list, 0),
-  "buddy_alloc hit an error size=%d\n",
+  "buddy_alloc hit an error size=%lu\n",
   ps);
} while (++i < n_pages);
  
  	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,

   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%d\n", 3 * ps);
+  "buddy_alloc didn't error size=%lu\n", 3 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+  "buddy_alloc didn't error size=%lu\n", 3 * ps);
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   2 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+  "buddy_alloc didn't error size=%lu\n", 2 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+  "buddy_alloc didn't error size=%lu\n", 3 * ps);
/*
 * At this point we should have enough contiguous space for 2 blocks,
 * however they are never buddies (since we freed middle and right) so
@@ -87,13 +87,13 @@ static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)
KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
2 * ps, ps, 
,

DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc hit an error size=%d\n", 2 * ps);
+  "buddy_alloc hit an error size=%lu\n", 2 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
3 * ps, ps, 
,

DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc hit an error size=%d\n", 3 * ps);
+  "buddy_alloc hit an error size=%lu\n", 3 * ps);


There was also a fix for this in: 335126937753 ("drm/tests/drm_buddy: 
fix 32b build"), but there everything was made u32.


Reviewed-by: Matthew Auld 

  
  	total = 0;

list_for_each_entry(block, , link)
diff --git a/d

Re: [PATCH] drm/ttm: Fix an invalid freeing on already freed page in error path

2024-02-21 Thread Matthew Auld

On 21/02/2024 07:33, Thomas Hellström wrote:

If caching mode change fails due to, for example, OOM we
free the allocated pages in a two-step process. First the pages
for which the caching change has already succeeded. Secondly
the pages for which a caching change did not succeed.

However the second step was incorrectly freeing the pages already
freed in the first step.

Fix.

Signed-off-by: Thomas Hellström 
Fixes: 379989e7cbdc ("drm/ttm/pool: Fix ttm_pool_alloc error path")
Cc: Christian König 
Cc: Dave Airlie 
Cc: Christian Koenig 
Cc: Huang Rui 
Cc: dri-devel@lists.freedesktop.org
Cc:  # v6.4+

Reviewed-by: Matthew Auld 


Re: [PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-19 Thread Matthew Auld

On 19/02/2024 10:48, Matthew Auld wrote:

On 19/02/2024 10:30, Christian König wrote:

Am 19.02.24 um 11:28 schrieb Matthew Auld:

On 19/02/2024 09:53, Christian König wrote:

Am 19.02.24 um 10:42 schrieb Matthew Auld:

On 15/02/2024 17:44, Matthew Auld wrote:

Doesn't seem to compile on 32b, presumably due to u64 mod/division.
Simplest is to just switch over to u32 here. Also make print 
modifiers

consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous 
test")

Reported-by: Geert Uytterhoeven 
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Maxime Ripard 


Any chance someone can push just this single patch here, since it 
fixes 32b build? It already has an r-b from Arun.


Already working on this. Just give me a few more minutes.


Thanks.


No, problem. I would have pushed this earlier, but my build server 
doesn't want to work any more. Looks like the SSD has passed its 
warranty :(


Should I push the other three patches to drm-misc-fixes as well? I 
currently can't even build test them.


Need to send a v2 for that. One minor change in the test just to be 
consistent with using u32. Thanks.


Sent v2. If you could push that when you get a chance. Thanks.

https://patchwork.freedesktop.org/series/130075/





Thanks,
Christian.





Thanks,
Christian.




---
  drivers/gpu/drm/tests/drm_buddy_test.c | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index fee6bec757d1..edacc1adb28f 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 
chunk_size)

    static void drm_test_buddy_alloc_contiguous(struct kunit *test)
  {
-    u64 mm_size, ps = SZ_4K, i, n_pages, total;
+    u32 mm_size, ps = SZ_4K, i, n_pages, total;
  struct drm_buddy_block *block;
  struct drm_buddy mm;
  LIST_HEAD(left);
@@ -56,30 +56,30 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)

  KUNIT_ASSERT_FALSE_MSG(test,
 drm_buddy_alloc_blocks(, 0, mm_size,
    ps, ps, list, 0),
-   "buddy_alloc hit an error size=%d\n",
+   "buddy_alloc hit an error size=%u\n",
 ps);
  } while (++i < n_pages);
    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%d\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 2 * ps);
+   "buddy_alloc didn't error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  /*
   * At this point we should have enough contiguous space for 
2 blocks,
   * however they are never buddies (since we freed middle and 
right) so
@@ -88,13 +88,13 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

  2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 2 * ps);
+   "buddy_alloc hit an error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

  3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 3 * ps);
+   "buddy_alloc hit an error size=%u\n", 3 * ps);
    total = 0;
  list_for_each_entry(block, , link)






[PATCH v2 3/3] drm/tests/drm_buddy: add alloc_range_bias test

2024-02-19 Thread Matthew Auld
Sanity check range bias with DRM_BUDDY_RANGE_ALLOCATION.

v2:
  - Be consistent with u32 here.

Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Reviewed-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 218 +
 1 file changed, 218 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index edacc1adb28f..1008d5b9d61e 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -14,11 +14,216 @@
 
 #include "../lib/drm_random.h"
 
+static unsigned int random_seed;
+
 static inline u64 get_size(int order, u64 chunk_size)
 {
return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_alloc_range_bias(struct kunit *test)
+{
+   u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
+   DRM_RND_STATE(prng, random_seed);
+   unsigned int i, count, *order;
+   struct drm_buddy mm;
+   LIST_HEAD(allocated);
+
+   bias_size = SZ_1M;
+   ps = roundup_pow_of_two(prandom_u32_state() % bias_size);
+   ps = max(SZ_4K, ps);
+   mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
+
+   kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, ps),
+  "buddy_init failed\n");
+
+   count = mm_size / bias_size;
+   order = drm_random_order(count, );
+   KUNIT_EXPECT_TRUE(test, order);
+
+   /*
+* Idea is to split the address space into uniform bias ranges, and then
+* in some random order allocate within each bias, using various
+* patterns within. This should detect if allocations leak out from a
+* given bias, for example.
+*/
+
+   for (i = 0; i < count; i++) {
+   LIST_HEAD(tmp);
+   u32 size;
+
+   bias_start = order[i] * bias_size;
+   bias_end = bias_start + bias_size;
+   bias_rem = bias_size;
+
+   /* internal round_up too big */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start,
+bias_end, 
bias_size + ps, bias_size,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc failed with bias(%x-%x), 
size=%u, ps=%u\n",
+ bias_start, bias_end, bias_size, 
bias_size);
+
+   /* size too big */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start,
+bias_end, 
bias_size + ps, ps,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with 
bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, bias_size + ps, ps);
+
+   /* bias range too small for size */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start + 
ps,
+bias_end, 
bias_size, ps,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with 
bias(%x-%x), size=%u, ps=%u\n",
+ bias_start + ps, bias_end, bias_size, ps);
+
+   /* bias misaligned */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start + 
ps,
+bias_end - ps,
+bias_size >> 1, 
bias_size >> 1,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc h didn't fail with 
bias(%x-%x), size=%u, ps=%u\n",
+ bias_start + ps, bias_end - ps, bias_size 
>> 1, bias_size >> 1);
+
+   /* single big page */
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, bias_start,
+ bias_end, 
bias_size, bias_size,
+ 

[PATCH v2 2/3] drm/buddy: check range allocation matches alignment

2024-02-19 Thread Matthew Auld
Likely not a big deal for real users, but for consistency we should
respect the min_page_size here. Main issue is that bias allocations
turns into normal range allocation if the range and size matches
exactly, and in the next patch we want to add some unit tests for this
part of the api.

Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Reviewed-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f3a6ac908f81..5ebdd6f8f36e 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -771,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
return -EINVAL;
 
/* Actual range allocation */
-   if (start + size == end)
+   if (start + size == end) {
+   if (!IS_ALIGNED(start | end, min_block_size))
+   return -EINVAL;
+
return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+   }
 
original_size = size;
original_min_size = min_block_size;
-- 
2.43.0



[PATCH v2 1/3] drm/buddy: fix range bias

2024-02-19 Thread Matthew Auld
There is a corner case here where start/end is after/before the block
range we are currently checking. If so we need to be sure that splitting
the block will eventually give use the block size we need. To do that we
should adjust the block range to account for the start/end, and only
continue with the split if the size/alignment will fit the requested
size. Not doing so can result in leaving split blocks unmerged when it
eventually fails.

Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function")
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc:  # v5.18+
Reviewed-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..f3a6ac908f81 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
 u64 start, u64 end,
 unsigned int order)
 {
+   u64 req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
if (drm_buddy_block_is_allocated(block))
continue;
 
+   if (block_start < start || block_end > end) {
+   u64 adjusted_start = max(block_start, start);
+   u64 adjusted_end = min(block_end, end);
+
+   if (round_down(adjusted_end + 1, req_size) <=
+   round_up(adjusted_start, req_size))
+   continue;
+   }
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
/*
-- 
2.43.0



Re: [PATCH] drm/tests/drm_buddy: avoid 64-bit calculation

2024-02-19 Thread Matthew Auld

On 19/02/2024 11:41, Christian König wrote:

Am 19.02.24 um 12:29 schrieb Arnd Bergmann:

On Mon, Feb 19, 2024, at 12:22, Christian König wrote:

Am 17.02.24 um 02:31 schrieb Randy Dunlap:

On 2/16/24 12:24, Arnd Bergmann wrote:

From: Arnd Bergmann 

The newly added drm_test_buddy_alloc_contiguous() test fails to 
link on

32-bit targets because of inadvertent 64-bit calculations:

ERROR: modpost: "__aeabi_uldivmod" 
[drivers/gpu/drm/tests/drm_buddy_test.ko] undefined!
ERROR: modpost: "__aeabi_ldivmod" 
[drivers/gpu/drm/tests/drm_buddy_test.ko] undefined!


>From what I can tell, the numbers cannot possibly overflow a 
32-bit size,

so use different types for these.

I noticed that the function has another possible flaw in that is mixes
what it calls pages with 4KB units. This is a big confusing at best,
or possibly broken when built on machines with larger pages.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test")
Signed-off-by: Arnd Bergmann 

Tested-by: Randy Dunlap 

I've just pushed a similar patch Mathew came up a bit earlier to
drm-misc-fixes.

Sorry for the noise, I have to catch up on picking up patches for
misc-fixes and misc-next.

Ok, thanks.

Have you looked at how this code works for larger values of PAGE_SIZE?
Is there any need to change other things or will this work with the
hardcoded 4KB chunks?


I haven't looked into the details, but I've pointed out before that 
using PAGE_SIZE in the buddy or its test cases would be incorrect.


Background is that the buddy allocator is for devices and those work 
independent of the CPU PAGE_SIZE. So it can be that on a CPU with 64k 
pages the buddy still needs to work with 4k.


Could be that this is work, but could as well be that this is completely 
broken. Arun and Mathew needs to answer this, I haven't tested it nor 
reviewed the code.


Yeah, we should not be using PAGE_SIZE or PAGE_SHIFT in drm_buddy.[ch] 
and tests/drm_buddy_test.c. The smallest default page size is SZ_4K for 
drm_buddy. A patch to fix that would be very welcome. If no takers I can 
send something.




Regards,
Christian.



  Arnd




Re: [PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-19 Thread Matthew Auld

On 19/02/2024 10:30, Christian König wrote:

Am 19.02.24 um 11:28 schrieb Matthew Auld:

On 19/02/2024 09:53, Christian König wrote:

Am 19.02.24 um 10:42 schrieb Matthew Auld:

On 15/02/2024 17:44, Matthew Auld wrote:

Doesn't seem to compile on 32b, presumably due to u64 mod/division.
Simplest is to just switch over to u32 here. Also make print modifiers
consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test")
Reported-by: Geert Uytterhoeven 
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Maxime Ripard 


Any chance someone can push just this single patch here, since it 
fixes 32b build? It already has an r-b from Arun.


Already working on this. Just give me a few more minutes.


Thanks.


No, problem. I would have pushed this earlier, but my build server 
doesn't want to work any more. Looks like the SSD has passed its 
warranty :(


Should I push the other three patches to drm-misc-fixes as well? I 
currently can't even build test them.


Need to send a v2 for that. One minor change in the test just to be 
consistent with using u32. Thanks.




Thanks,
Christian.





Thanks,
Christian.




---
  drivers/gpu/drm/tests/drm_buddy_test.c | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index fee6bec757d1..edacc1adb28f 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 
chunk_size)

    static void drm_test_buddy_alloc_contiguous(struct kunit *test)
  {
-    u64 mm_size, ps = SZ_4K, i, n_pages, total;
+    u32 mm_size, ps = SZ_4K, i, n_pages, total;
  struct drm_buddy_block *block;
  struct drm_buddy mm;
  LIST_HEAD(left);
@@ -56,30 +56,30 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)

  KUNIT_ASSERT_FALSE_MSG(test,
 drm_buddy_alloc_blocks(, 0, mm_size,
    ps, ps, list, 0),
-   "buddy_alloc hit an error size=%d\n",
+   "buddy_alloc hit an error size=%u\n",
 ps);
  } while (++i < n_pages);
    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%d\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 2 * ps);
+   "buddy_alloc didn't error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  /*
   * At this point we should have enough contiguous space for 2 
blocks,
   * however they are never buddies (since we freed middle and 
right) so
@@ -88,13 +88,13 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

  2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 2 * ps);
+   "buddy_alloc hit an error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

  3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 3 * ps);
+   "buddy_alloc hit an error size=%u\n", 3 * ps);
    total = 0;
  list_for_each_entry(block, , link)






Re: [PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-19 Thread Matthew Auld

On 19/02/2024 09:53, Christian König wrote:

Am 19.02.24 um 10:42 schrieb Matthew Auld:

On 15/02/2024 17:44, Matthew Auld wrote:

Doesn't seem to compile on 32b, presumably due to u64 mod/division.
Simplest is to just switch over to u32 here. Also make print modifiers
consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test")
Reported-by: Geert Uytterhoeven 
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Maxime Ripard 


Any chance someone can push just this single patch here, since it 
fixes 32b build? It already has an r-b from Arun.


Already working on this. Just give me a few more minutes.


Thanks.



Thanks,
Christian.




---
  drivers/gpu/drm/tests/drm_buddy_test.c | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index fee6bec757d1..edacc1adb28f 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 chunk_size)
    static void drm_test_buddy_alloc_contiguous(struct kunit *test)
  {
-    u64 mm_size, ps = SZ_4K, i, n_pages, total;
+    u32 mm_size, ps = SZ_4K, i, n_pages, total;
  struct drm_buddy_block *block;
  struct drm_buddy mm;
  LIST_HEAD(left);
@@ -56,30 +56,30 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)

  KUNIT_ASSERT_FALSE_MSG(test,
 drm_buddy_alloc_blocks(, 0, mm_size,
    ps, ps, list, 0),
-   "buddy_alloc hit an error size=%d\n",
+   "buddy_alloc hit an error size=%u\n",
 ps);
  } while (++i < n_pages);
    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%d\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 2 * ps);
+   "buddy_alloc didn't error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

 3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   "buddy_alloc didn't error size=%u\n", 3 * ps);
  /*
   * At this point we should have enough contiguous space for 2 
blocks,
   * however they are never buddies (since we freed middle and 
right) so
@@ -88,13 +88,13 @@ static void 
drm_test_buddy_alloc_contiguous(struct kunit *test)
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

  2 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 2 * ps);
+   "buddy_alloc hit an error size=%u\n", 2 * ps);
    drm_buddy_free_list(, );
  KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,

  3 * ps, ps, ,
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-   "buddy_alloc hit an error size=%d\n", 3 * ps);
+   "buddy_alloc hit an error size=%u\n", 3 * ps);
    total = 0;
  list_for_each_entry(block, , link)




Re: [PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-19 Thread Matthew Auld

On 15/02/2024 17:44, Matthew Auld wrote:

Doesn't seem to compile on 32b, presumably due to u64 mod/division.
Simplest is to just switch over to u32 here. Also make print modifiers
consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test")
Reported-by: Geert Uytterhoeven 
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Maxime Ripard 


Any chance someone can push just this single patch here, since it fixes 
32b build? It already has an r-b from Arun.



---
  drivers/gpu/drm/tests/drm_buddy_test.c | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index fee6bec757d1..edacc1adb28f 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 chunk_size)
  
  static void drm_test_buddy_alloc_contiguous(struct kunit *test)

  {
-   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   u32 mm_size, ps = SZ_4K, i, n_pages, total;
struct drm_buddy_block *block;
struct drm_buddy mm;
LIST_HEAD(left);
@@ -56,30 +56,30 @@ static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)
KUNIT_ASSERT_FALSE_MSG(test,
   drm_buddy_alloc_blocks(, 0, mm_size,
  ps, ps, list, 0),
-  "buddy_alloc hit an error size=%d\n",
+  "buddy_alloc hit an error size=%u\n",
   ps);
} while (++i < n_pages);
  
  	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,

   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%d\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   2 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+  "buddy_alloc didn't error size=%u\n", 2 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
/*
 * At this point we should have enough contiguous space for 2 blocks,
 * however they are never buddies (since we freed middle and right) so
@@ -88,13 +88,13 @@ static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)
KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
2 * ps, ps, 
,

DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc hit an error size=%d\n", 2 * ps);
+  "buddy_alloc hit an error size=%u\n", 2 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
3 * ps, ps, 
,

DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc hit an error size=%d\n", 3 * ps);
+  "buddy_alloc hit an error size=%u\n", 3 * ps);
  
  	total = 0;

list_for_each_entry(block, , link)


Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support

2024-02-16 Thread Matthew Auld

On 16/02/2024 14:02, Christian König wrote:

Am 16.02.24 um 14:21 schrieb Matthew Auld:

On 16/02/2024 12:33, Christian König wrote:

Am 16.02.24 um 13:23 schrieb Matthew Auld:

On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:

Add a function to support defragmentation.

v1: Defragment the memory beginning from min_order
 till the required memory space is available.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/drm_buddy.c | 67 
+++--

  include/drm/drm_buddy.h |  3 ++


No users?


Other question is how can a buddy allocator fragment in the first place?


The fragmentation is due to pages now being tracked as dirty/clear. 
Should the allocator merge together a page that is dirty with a page 
that is cleared? When should it do that? User wants to be able to keep 
the two separate if possible. For example, freeing one single dirty 
page can dirty a huge swathe of your already cleared pages if they are 
merged together. Or do you have some some other ideas here?


Sorry, that was not what I meant. I should probably have been clearer.

That dirty and clean pages are now kept separated is obvious, but why do 
you need to de-fragment them at some point?


Ah, right. At the very least we need to do something similar to this at 
fini(), just to ensure we properly merge everything back together so we 
can correctly tear down the mm. Outside of that the thinking was that it 
might be useful to call when allocating larger min page-sizes. You might 
now be failing the allocation due to fragmentation, and so in some cases 
might be better off running some kind of defrag step first, instead of 
failing the allocation and trying to evict stuff. Anyway, if that is not 
a concern for amdgpu, then we just need to handle the fini() case and 
can keep this internal.




Christian.





Christian.




  2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 33ad0cfbd54c..fac423d2cb73 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
  }
  EXPORT_SYMBOL(drm_get_buddy);
  -static void __drm_buddy_free(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+ struct drm_buddy_block *block,
+ bool defrag)
  {
  struct drm_buddy_block *parent;
+    unsigned int order;
    while ((parent = block->parent)) {
  struct drm_buddy_block *buddy;
@@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy 
*mm,

  if (!drm_buddy_block_is_free(buddy))
  break;
  -    if (drm_buddy_block_is_clear(block) !=
-    drm_buddy_block_is_clear(buddy))
-    break;
+    if (!defrag) {
+    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
  -    if (drm_buddy_block_is_clear(block))
-    mark_cleared(parent);
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+    }


Maybe check if the two blocks are incompatible and chuck a warn if 
they are not? Main thing is not to hide issues with split blocks 
that should have been merged before.



list_del(>link);
  @@ -304,8 +308,49 @@ static void __drm_buddy_free(struct 
drm_buddy *mm,

  block = parent;
  }
  +    order = drm_buddy_block_order(block);
  mark_free(mm, block);
+
+    return order;
+}
+
+/**
+ * drm_buddy_defrag - Defragmentation routine
+ *
+ * @mm: DRM buddy manager
+ * @min_order: minimum order in the freelist to begin
+ * the defragmentation process
+ *
+ * Driver calls the defragmentation function when the
+ * requested memory allocation returns -ENOSPC.
+ */
+void drm_buddy_defrag(struct drm_buddy *mm,
+  unsigned int min_order)


Just wondering if we need "full defag" also? We would probably need 
to call this at fini() anyway.



+{
+    struct drm_buddy_block *block;
+    struct list_head *list;
+    unsigned int order;
+    int i;
+
+    if (min_order > mm->max_order)
+    return;
+
+    for (i = min_order - 1; i >= 0; i--) {


Need to be careful with min_order = 0 ?


+    list = >free_list[i];
+    if (list_empty(list))
+    continue;
+
+    list_for_each_entry_reverse(block, list, link) {


Don't we need the safe_reverse() variant here, since this is 
removing from the list?



+    if (!block->parent)
+    continue;
+
+    order = __drm_buddy_free(mm, block, 1);
+    if (order >= min_order)
+    return;
+    }
+    }
  }
+EXPORT_SYMBOL(drm_buddy_defrag);
    /**
   * drm_buddy_free_block - free a block
@@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  

Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support

2024-02-16 Thread Matthew Auld

On 16/02/2024 12:33, Christian König wrote:

Am 16.02.24 um 13:23 schrieb Matthew Auld:

On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:

Add a function to support defragmentation.

v1: Defragment the memory beginning from min_order
 till the required memory space is available.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/drm_buddy.c | 67 +++--
  include/drm/drm_buddy.h |  3 ++


No users?


Other question is how can a buddy allocator fragment in the first place?


The fragmentation is due to pages now being tracked as dirty/clear. 
Should the allocator merge together a page that is dirty with a page 
that is cleared? When should it do that? User wants to be able to keep 
the two separate if possible. For example, freeing one single dirty page 
can dirty a huge swathe of your already cleared pages if they are merged 
together. Or do you have some some other ideas here?




Christian.




  2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 33ad0cfbd54c..fac423d2cb73 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
  }
  EXPORT_SYMBOL(drm_get_buddy);
  -static void __drm_buddy_free(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+ struct drm_buddy_block *block,
+ bool defrag)
  {
  struct drm_buddy_block *parent;
+    unsigned int order;
    while ((parent = block->parent)) {
  struct drm_buddy_block *buddy;
@@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
  if (!drm_buddy_block_is_free(buddy))
  break;
  -    if (drm_buddy_block_is_clear(block) !=
-    drm_buddy_block_is_clear(buddy))
-    break;
+    if (!defrag) {
+    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
  -    if (drm_buddy_block_is_clear(block))
-    mark_cleared(parent);
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+    }


Maybe check if the two blocks are incompatible and chuck a warn if 
they are not? Main thing is not to hide issues with split blocks that 
should have been merged before.



    list_del(>link);
  @@ -304,8 +308,49 @@ static void __drm_buddy_free(struct drm_buddy 
*mm,

  block = parent;
  }
  +    order = drm_buddy_block_order(block);
  mark_free(mm, block);
+
+    return order;
+}
+
+/**
+ * drm_buddy_defrag - Defragmentation routine
+ *
+ * @mm: DRM buddy manager
+ * @min_order: minimum order in the freelist to begin
+ * the defragmentation process
+ *
+ * Driver calls the defragmentation function when the
+ * requested memory allocation returns -ENOSPC.
+ */
+void drm_buddy_defrag(struct drm_buddy *mm,
+  unsigned int min_order)


Just wondering if we need "full defag" also? We would probably need to 
call this at fini() anyway.



+{
+    struct drm_buddy_block *block;
+    struct list_head *list;
+    unsigned int order;
+    int i;
+
+    if (min_order > mm->max_order)
+    return;
+
+    for (i = min_order - 1; i >= 0; i--) {


Need to be careful with min_order = 0 ?


+    list = >free_list[i];
+    if (list_empty(list))
+    continue;
+
+    list_for_each_entry_reverse(block, list, link) {


Don't we need the safe_reverse() variant here, since this is removing 
from the list?



+    if (!block->parent)
+    continue;
+
+    order = __drm_buddy_free(mm, block, 1);
+    if (order >= min_order)
+    return;
+    }
+    }
  }
+EXPORT_SYMBOL(drm_buddy_defrag);
    /**
   * drm_buddy_free_block - free a block
@@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  if (drm_buddy_block_is_clear(block))
  mm->clear_avail += drm_buddy_block_size(mm, block);
  -    __drm_buddy_free(mm, block);
+    __drm_buddy_free(mm, block, 0);
  }
  EXPORT_SYMBOL(drm_buddy_free_block);
  @@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
  if (buddy &&
  (drm_buddy_block_is_free(block) &&
   drm_buddy_block_is_free(buddy)))
-    __drm_buddy_free(mm, block);
+    __drm_buddy_free(mm, block, 0);
  return ERR_PTR(err);
  }
  @@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
    err_undo:
  if (tmp != order)
-    __drm_buddy_free(mm, block);
+    __drm_buddy_free(mm, block, 0);
  return ERR_PTR(err);
  }
  @@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
  if (buddy &&
  (drm_buddy_block_is_free(block) &&
  

Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support

2024-02-16 Thread Matthew Auld

On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:

Add a function to support defragmentation.

v1: Defragment the memory beginning from min_order
 till the required memory space is available.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/drm_buddy.c | 67 +++--
  include/drm/drm_buddy.h |  3 ++


No users?


  2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 33ad0cfbd54c..fac423d2cb73 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
  }
  EXPORT_SYMBOL(drm_get_buddy);
  
-static void __drm_buddy_free(struct drm_buddy *mm,

-struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+struct drm_buddy_block *block,
+bool defrag)
  {
struct drm_buddy_block *parent;
+   unsigned int order;
  
  	while ((parent = block->parent)) {

struct drm_buddy_block *buddy;
@@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
  
-		if (drm_buddy_block_is_clear(block) !=

-   drm_buddy_block_is_clear(buddy))
-   break;
+   if (!defrag) {
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
  
-		if (drm_buddy_block_is_clear(block))

-   mark_cleared(parent);
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+   }


Maybe check if the two blocks are incompatible and chuck a warn if they 
are not? Main thing is not to hide issues with split blocks that should 
have been merged before.


  
  		list_del(>link);
  
@@ -304,8 +308,49 @@ static void __drm_buddy_free(struct drm_buddy *mm,

block = parent;
}
  
+	order = drm_buddy_block_order(block);

mark_free(mm, block);
+
+   return order;
+}
+
+/**
+ * drm_buddy_defrag - Defragmentation routine
+ *
+ * @mm: DRM buddy manager
+ * @min_order: minimum order in the freelist to begin
+ * the defragmentation process
+ *
+ * Driver calls the defragmentation function when the
+ * requested memory allocation returns -ENOSPC.
+ */
+void drm_buddy_defrag(struct drm_buddy *mm,
+ unsigned int min_order)


Just wondering if we need "full defag" also? We would probably need to 
call this at fini() anyway.



+{
+   struct drm_buddy_block *block;
+   struct list_head *list;
+   unsigned int order;
+   int i;
+
+   if (min_order > mm->max_order)
+   return;
+
+   for (i = min_order - 1; i >= 0; i--) {


Need to be careful with min_order = 0 ?


+   list = >free_list[i];
+   if (list_empty(list))
+   continue;
+
+   list_for_each_entry_reverse(block, list, link) {


Don't we need the safe_reverse() variant here, since this is removing 
from the list?



+   if (!block->parent)
+   continue;
+
+   order = __drm_buddy_free(mm, block, 1);
+   if (order >= min_order)
+   return;
+   }
+   }
  }
+EXPORT_SYMBOL(drm_buddy_defrag);
  
  /**

   * drm_buddy_free_block - free a block
@@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
if (drm_buddy_block_is_clear(block))
mm->clear_avail += drm_buddy_block_size(mm, block);
  
-	__drm_buddy_free(mm, block);

+   __drm_buddy_free(mm, block, 0);
  }
  EXPORT_SYMBOL(drm_buddy_free_block);
  
@@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,

if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
  }
  
@@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
  
  err_undo:

if (tmp != order)
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
  }
  
@@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,

if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
  
  err_free:

if (err == -ENOSPC && total_allocated_on_err) {
diff --git a/include/drm/drm_buddy.h b/

Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature

2024-02-16 Thread Matthew Auld

On 08/02/2024 15:49, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 


Probably needs a new unit test.

I think we are missing something to forcefully re-merge everything at 
fini()? In theory we can just call the defrag routine. Otherwise we 
might trigger various warnings since the root(s) might still be split.


Also one nit below. Otherwise I think looks good.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 192 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  18 +-
  6 files changed, 187 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..33ad0cfbd54c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
  }
  
+static void clear_reset(struct drm_buddy_block *block)

+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
  
+	if (drm_buddy_block_is_clear(block)) {

+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
  
  	return 0;

@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
  
+		if (drm_buddy_block_is_clear(block) !=

+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
  
  		drm_block_free(mm, block);

@@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm-&g

Re: [PATCH] drm/buddy: Modify duplicate list_splice_tail call

2024-02-16 Thread Matthew Auld

On 16/02/2024 10:00, Arunpravin Paneer Selvam wrote:

Remove the duplicate list_splice_tail call when the
total_allocated < size condition is true.

Cc:  # 6.7+
Fixes: 8746c6c9dfa3 ("drm/buddy: Fix alloc_range() error handling code")
Reported-by: Bert Karwatzki 
Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/drm_buddy.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c1a99bf4dffd..c4222b886db7 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -538,13 +538,13 @@ static int __alloc_range(struct drm_buddy *mm,
list_add(>left->tmp_link, dfs);
} while (1);
  
-	list_splice_tail(, blocks);

-
if (total_allocated < size) {
err = -ENOSPC;
goto err_free;
}
  
+	list_splice_tail(, blocks);


Sigh. Can we extend the unit test(s) to catch this?

Reviewed-by: Matthew Auld 


+
return 0;
  
  err_undo:


base-commit: a64056bb5a3215bd31c8ce17d609ba0f4d5c55ea


[PATCH 6/6] drm/xe/stolen: ignore first page for FBC

2024-02-15 Thread Matthew Auld
Seems like we can potentially hit underruns if the CFB offset is within
the first page of stolen. Just like i915 skip the first page.

BSpec: 50214
Reported-by: Matt Roper 
Signed-off-by: Matthew Auld 
---
 drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h 
b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
index bd233007c1b7..003474cfdf31 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
@@ -19,6 +19,9 @@ static inline int i915_gem_stolen_insert_node_in_range(struct 
xe_device *xe,
int err;
u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT;
 
+   if (start < SZ_4K)
+   start = SZ_4K;
+
if (align)
size = ALIGN(size, align);
 
-- 
2.43.0



[PATCH 4/6] drm/tests/drm_buddy: add alloc_range_bias test

2024-02-15 Thread Matthew Auld
Sanity check range bias with DRM_BUDDY_RANGE_ALLOCATION.

Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 218 +
 1 file changed, 218 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index edacc1adb28f..3d4b29686132 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -14,11 +14,216 @@
 
 #include "../lib/drm_random.h"
 
+static unsigned int random_seed;
+
 static inline u64 get_size(int order, u64 chunk_size)
 {
return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_alloc_range_bias(struct kunit *test)
+{
+   u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
+   DRM_RND_STATE(prng, random_seed);
+   unsigned int i, count, *order;
+   struct drm_buddy mm;
+   LIST_HEAD(allocated);
+
+   bias_size = SZ_1M;
+   ps = roundup_pow_of_two(prandom_u32_state() % bias_size);
+   ps = max(SZ_4K, ps);
+   mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
+
+   kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, ps),
+  "buddy_init failed\n");
+
+   count = mm_size / bias_size;
+   order = drm_random_order(count, );
+   KUNIT_EXPECT_TRUE(test, order);
+
+   /*
+* Idea is to split the address space into uniform bias ranges, and then
+* in some random order allocate within each bias, using various
+* patterns within. This should detect if allocations leak out from a
+* given bias, for example.
+*/
+
+   for (i = 0; i < count; i++) {
+   LIST_HEAD(tmp);
+   u64 size;
+
+   bias_start = order[i] * bias_size;
+   bias_end = bias_start + bias_size;
+   bias_rem = bias_size;
+
+   /* internal round_up too big */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start,
+bias_end, 
bias_size + ps, bias_size,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc failed with bias(%x-%x), 
size=%u, ps=%u\n",
+ bias_start, bias_end, bias_size, 
bias_size);
+
+   /* size too big */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start,
+bias_end, 
bias_size + ps, ps,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with 
bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, bias_size + ps, ps);
+
+   /* bias range too small for size */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start + 
ps,
+bias_end, 
bias_size, ps,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with 
bias(%x-%x), size=%u, ps=%u\n",
+ bias_start + ps, bias_end, bias_size, ps);
+
+   /* bias misaligned */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start + 
ps,
+bias_end - ps,
+bias_size >> 1, 
bias_size >> 1,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc h didn't fail with 
bias(%x-%x), size=%u, ps=%u\n",
+ bias_start + ps, bias_end - ps, bias_size 
>> 1, bias_size >> 1);
+
+   /* single big page */
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, bias_start,
+ bias_end, 
bias_size, bias_size,
+ ,
+ 
DRM_BUDDY_RANGE_AL

[PATCH 5/6] drm/xe/stolen: lower the default alignment

2024-02-15 Thread Matthew Auld
No need to be so aggressive here. The upper layers will already apply
the needed alignment, plus some allocations might wish to skip it. Main
issue is that we might want to have start/end bias range which doesn't
match the default alignment which is rejected by the allocator.

Signed-off-by: Matthew Auld 
Cc: Matt Roper 
---
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c 
b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 662f1e9bfc65..2e94f90e1018 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -203,7 +203,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 {
struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(>drm, sizeof(*mgr), 
GFP_KERNEL);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-   u64 stolen_size, io_size, pgsize;
+   u64 stolen_size, io_size;
int err;
 
if (IS_SRIOV_VF(xe))
@@ -220,10 +220,6 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
return;
}
 
-   pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
-   if (pgsize < PAGE_SIZE)
-   pgsize = PAGE_SIZE;
-
/*
 * We don't try to attempt partial visible support for stolen vram,
 * since stolen is always at the end of vram, and the BAR size is pretty
@@ -234,7 +230,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
io_size = stolen_size;
 
err = __xe_ttm_vram_mgr_init(xe, >base, XE_PL_STOLEN, stolen_size,
-io_size, pgsize);
+io_size, SZ_4K);
if (err) {
drm_dbg_kms(>drm, "Stolen mgr init failed: %i\n", err);
return;
-- 
2.43.0



[PATCH 3/6] drm/buddy: check range allocation matches alignment

2024-02-15 Thread Matthew Auld
Likely not a big deal for real users, but for consistency we should
respect the min_page_size here. Main issue is that bias allocations
turns into normal range allocation if the range and size matches
exactly, and in the next patch we want to add some unit tests for this
part of the api.

Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
---
 drivers/gpu/drm/drm_buddy.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d09540d4065b..ee9913016626 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -771,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
return -EINVAL;
 
/* Actual range allocation */
-   if (start + size == end)
+   if (start + size == end) {
+   if (!IS_ALIGNED(start | end, min_block_size))
+   return -EINVAL;
+
return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+   }
 
original_size = size;
original_min_size = min_block_size;
-- 
2.43.0



[PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-15 Thread Matthew Auld
Doesn't seem to compile on 32b, presumably due to u64 mod/division.
Simplest is to just switch over to u32 here. Also make print modifiers
consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test")
Reported-by: Geert Uytterhoeven 
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc: Maxime Ripard 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index fee6bec757d1..edacc1adb28f 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 chunk_size)
 
 static void drm_test_buddy_alloc_contiguous(struct kunit *test)
 {
-   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   u32 mm_size, ps = SZ_4K, i, n_pages, total;
struct drm_buddy_block *block;
struct drm_buddy mm;
LIST_HEAD(left);
@@ -56,30 +56,30 @@ static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)
KUNIT_ASSERT_FALSE_MSG(test,
   drm_buddy_alloc_blocks(, 0, mm_size,
  ps, ps, list, 0),
-  "buddy_alloc hit an error size=%d\n",
+  "buddy_alloc hit an error size=%u\n",
   ps);
} while (++i < n_pages);
 
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%d\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
 
drm_buddy_free_list(, );
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   2 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+  "buddy_alloc didn't error size=%u\n", 2 * ps);
 
drm_buddy_free_list(, );
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
/*
 * At this point we should have enough contiguous space for 2 blocks,
 * however they are never buddies (since we freed middle and right) so
@@ -88,13 +88,13 @@ static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)
KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
2 * ps, ps, 
,

DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc hit an error size=%d\n", 2 * ps);
+  "buddy_alloc hit an error size=%u\n", 2 * ps);
 
drm_buddy_free_list(, );
KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
3 * ps, ps, 
,

DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc hit an error size=%d\n", 3 * ps);
+  "buddy_alloc hit an error size=%u\n", 3 * ps);
 
total = 0;
list_for_each_entry(block, , link)
-- 
2.43.0



[PATCH 2/6] drm/buddy: fix range bias

2024-02-15 Thread Matthew Auld
There is a corner case here where start/end is after/before the block
range we are currently checking. If so we need to be sure that splitting
the block will eventually give use the block size we need. To do that we
should adjust the block range to account for the start/end, and only
continue with the split if the size/alignment will fit the requested
size. Not doing so can result in leaving split blocks unmerged when it
eventually fails.

Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function")
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Christian König 
Cc:  # v5.18+
---
 drivers/gpu/drm/drm_buddy.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c1a99bf4dffd..d09540d4065b 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
 u64 start, u64 end,
 unsigned int order)
 {
+   u64 req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
if (drm_buddy_block_is_allocated(block))
continue;
 
+   if (block_start < start || block_end > end) {
+   u64 adjusted_start = max(block_start, start);
+   u64 adjusted_end = min(block_end, end);
+
+   if (round_down(adjusted_end + 1, req_size) <=
+   round_up(adjusted_start, req_size))
+   continue;
+   }
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
/*
-- 
2.43.0



Re: [PATCH 2/2] drm/tests/drm_buddy: add alloc_contiguous test

2024-02-13 Thread Matthew Auld

On 13/02/2024 13:52, Arunpravin Paneer Selvam wrote:

Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION.

References: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Signed-off-by: Matthew Auld 
Reviewed-by: Arunpravin Paneer Selvam 


It looks like you changed the patch authorship here.


Cc: Arunpravin Paneer Selvam 
Cc: Limonciello 
Cc: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++
  1 file changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..fee6bec757d1 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,6 +8,7 @@
  
  #include 

  #include 
+#include 
  
  #include 
  
@@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)

return (1 << order) * chunk_size;
  }
  
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)

+{
+   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   struct drm_buddy_block *block;
+   struct drm_buddy mm;
+   LIST_HEAD(left);
+   LIST_HEAD(middle);
+   LIST_HEAD(right);
+   LIST_HEAD(allocated);
+
+   mm_size = 16 * 3 * SZ_4K;
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Idea is to fragment the address space by alternating block
+* allocations between three different lists; one for left, middle and
+* right. We can then free a list to simulate fragmentation. In
+* particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+* including the try_harder path.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 3;
+
+   if (slot == 0)
+   list = 
+   else if (slot == 1)
+   list = 
+   else
+   list = 
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, 0, mm_size,
+ ps, ps, list, 0),
+  "buddy_alloc hit an error size=%d\n",
+  ps);
+   } while (++i < n_pages);
+
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   /*
+* At this point we should have enough contiguous space for 2 blocks,
+* however they are never buddies (since we freed middle and right) so
+* will require the try_harder logic to find them.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   2 * ps, ps, 
,
+   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   3 * ps, ps, 
,
+   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+   total = 0;
+   list_for_each_entry(block, , link)
+   total += drm_buddy_block_size(, block);
+
+   KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+   drm_buddy_free_list(, );
+   drm_buddy_fini();
+}
+
  static void drm_test_bu

Re: [PATCH] drm/tests/drm_buddy: add alloc_contiguous test

2024-02-12 Thread Matthew Auld

On 12/02/2024 08:23, Arunpravin Paneer Selvam wrote:

Hi Matthew,

Can I push this test case along with the bug fix patch.


Sure. Please go ahead.



Thanks,
Arun.

On 2/8/2024 8:06 PM, Matthew Auld wrote:

Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION.

References: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Limonciello 
Cc: Christian König 
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++
  1 file changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index ea2af6bd9abe..4215d8b5fcf0 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,6 +8,7 @@
  #include 
  #include 
+#include 
  #include 
@@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)
  return (1 << order) * chunk_size;
  }
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+    u64 mm_size, ps = SZ_4K, i, n_pages, total;
+    struct drm_buddy_block *block;
+    struct drm_buddy mm;
+    LIST_HEAD(left);
+    LIST_HEAD(middle);
+    LIST_HEAD(right);
+    LIST_HEAD(allocated);
+
+    mm_size = 16 * 3 * SZ_4K;
+
+    KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+    /*
+ * Idea is to fragment the address space by alternating block
+ * allocations between three different lists; one for left, 
middle and

+ * right. We can then free a list to simulate fragmentation. In
+ * particular we want to exercise the 
DRM_BUDDY_CONTIGUOUS_ALLOCATION,

+ * including the try_harder path.
+ */
+
+    i = 0;
+    n_pages = mm_size / ps;
+    do {
+    struct list_head *list;
+    int slot = i % 3;
+
+    if (slot == 0)
+    list = 
+    else if (slot == 1)
+    list = 
+    else
+    list = 
+    KUNIT_ASSERT_FALSE_MSG(test,
+   drm_buddy_alloc_blocks(, 0, mm_size,
+  ps, ps, list, 0),
+   "buddy_alloc hit an error size=%d\n",
+   ps);
+    } while (++i < n_pages);
+
+    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   3 * ps, ps, ,
+   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+   "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+    drm_buddy_free_list(, );
+    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   3 * ps, ps, ,
+   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   2 * ps, ps, ,
+   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+   "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+    drm_buddy_free_list(, );
+    KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   3 * ps, ps, ,
+   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+   "buddy_alloc didn't error size=%llu\n", 3 * ps);
+    /*
+ * At this point we should have enough contiguous space for 2 
blocks,
+ * however they are never buddies (since we freed middle and 
right) so

+ * will require the try_harder logic to find them.
+ */
+    KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   2 * ps, ps, ,
+   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+   "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+    drm_buddy_free_list(, );
+    KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   3 * ps, ps, ,
+   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+   "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+    total = 0;
+    list_for_each_entry(block, , link)
+    total += drm_buddy_block_size(, block);
+
+    KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+    drm_buddy_free_list(, );
+    drm_buddy_fini();
+}
+
  static void drm_test_buddy_alloc_pathological(struct kunit *test)
  {
  u64 mm_size, size, start = 0;
@@ -280,6 +368,7 @@ static struct kunit_case drm_buddy_tests[] = {
  KUNIT_CASE(drm_test_buddy_alloc_optimistic),
  KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
  KUNIT_CASE(drm_test_buddy_alloc_pathological),
+    KUNIT_CASE(drm_test_buddy_alloc_contiguous),
  {}
  };




Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Matthew Auld

On 08/02/2024 14:17, Matthew Auld wrote:

On 08/02/2024 13:47, Arunpravin Paneer Selvam wrote:

Hi Matthew,

On 2/8/2024 7:00 PM, Matthew Auld wrote:

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?

In the try hard contiguous allocation, for example the requested 
memory is 1024 pages,
it might go and pick the highest and last block (of size 512 pages) in 
the freelist where
there are no more space exist in the total address range. In this kind 
of corner case,
alloc_range was returning success though the allocated size is less 
than the requested size.
Hence in try_hard_contiguous_allocation, we will not proceed to the 
LHS allocation and
we return only with the RHS allocation having only the 512 pages of 
allocation. This
leads to display corruption in many use cases (I think mainly when 
requested for contiguous huge buffer)

mainly on APU platforms.


Ok, I guess other thing is doing:

lhs_offset = drm_buddy_block_offset(block) - lhs_size;

I presume it's possible for block_offset < lhs_size here, which might be 
funny?


I think would also be good to add some basic unit test here:
https://patchwork.freedesktop.org/patch/577497/?series=129671=1

Test passes with your patch, and ofc fails without it.

Just the question of the lhs_offset above,
Reviewed-by: Matthew Auld 





Thanks,
Arun.


The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:




[PATCH] drm/tests/drm_buddy: add alloc_contiguous test

2024-02-08 Thread Matthew Auld
Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION.

References: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Limonciello 
Cc: Christian König 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++
 1 file changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..4215d8b5fcf0 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)
return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   struct drm_buddy_block *block;
+   struct drm_buddy mm;
+   LIST_HEAD(left);
+   LIST_HEAD(middle);
+   LIST_HEAD(right);
+   LIST_HEAD(allocated);
+
+   mm_size = 16 * 3 * SZ_4K;
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Idea is to fragment the address space by alternating block
+* allocations between three different lists; one for left, middle and
+* right. We can then free a list to simulate fragmentation. In
+* particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+* including the try_harder path.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 3;
+
+   if (slot == 0)
+   list = 
+   else if (slot == 1)
+   list = 
+   else
+   list = 
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, 0, mm_size,
+ ps, ps, list, 0),
+  "buddy_alloc hit an error size=%d\n",
+  ps);
+   } while (++i < n_pages);
+
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   /*
+* At this point we should have enough contiguous space for 2 blocks,
+* however they are never buddies (since we freed middle and right) so
+* will require the try_harder logic to find them.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+   total = 0;
+   list_for_each_entry(block, , link)
+   total += drm_buddy_block_size(, block);
+
+   KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+   drm_buddy_free_list(, );
+   drm_buddy_fini();
+}
+
 static void drm_test_buddy_alloc_pathological(struct kunit *test)
 {
u64 mm_size, size, start = 0;
@@ -280,6 +368,7 @@ static struct kunit_case drm_buddy_tests[] = {
KUNIT_CASE(drm_test_buddy_alloc_optimi

Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Matthew Auld

On 08/02/2024 13:47, Arunpravin Paneer Selvam wrote:

Hi Matthew,

On 2/8/2024 7:00 PM, Matthew Auld wrote:

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?

In the try hard contiguous allocation, for example the requested memory 
is 1024 pages,
it might go and pick the highest and last block (of size 512 pages) in 
the freelist where
there are no more space exist in the total address range. In this kind 
of corner case,
alloc_range was returning success though the allocated size is less than 
the requested size.
Hence in try_hard_contiguous_allocation, we will not proceed to the LHS 
allocation and
we return only with the RHS allocation having only the 512 pages of 
allocation. This
leads to display corruption in many use cases (I think mainly when 
requested for contiguous huge buffer)

mainly on APU platforms.


Ok, I guess other thing is doing:

lhs_offset = drm_buddy_block_offset(block) - lhs_size;

I presume it's possible for block_offset < lhs_size here, which might be 
funny?




Thanks,
Arun.


The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:




Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Matthew Auld

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?



The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 
Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
  
  	list_splice_tail(, blocks);

+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
  
  err_undo:


Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature

2024-01-31 Thread Matthew Auld

On 30/01/2024 20:30, Arunpravin Paneer Selvam wrote:

Hi Matthew,

On 12/21/2023 12:51 AM, Matthew Auld wrote:

Hi,

On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.


I was not involved, but it looks like we have also tried enabling the 
clear-on-free idea for VRAM in i915 and then also tracking that in the 
allocator, however that work unfortunately is not upstream. The code 
is open source though: 
https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300


It looks like some of the design differences there are having two 
separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
me). And also the inclusion of a de-fragmentation routine, since buddy 
blocks are now not always merged back, we might choose to run the 
defrag in some cases, which also sounds reasonable. IIRC in amdgpu 
userspace can control the page-size for an allocation, so perhaps you 
would want to run it first if the allocation fails, before trying to 
evict stuff?
I checked the clear-on-free idea implemented in i915. In amdgpu version, 
we are clearing all the blocks in amdgpu free routine and DRM buddy 
expects only the DRM_BUDDY_CLEARED flag. Basically, we are keeping the 
cleared blocks ready to be allocated when the user request for the 
cleared memory. We observed that this improves the performance on games 
and resolves the stutter issues as well. I see i915 active fences part 
does the same job for i915. Could we move this part into i915 free 
routine and set the DRM_BUDDY_CLEARED flag.


On de-fragmentation , I have included a function which can be called at 
places where we get -ENOSPC. This routine will merge back the clear and 
dirty blocks together to form a larger block of requested size. I am 
wondering where we could use this routine as for the non-contiguous 
memory we have the fallback method and for the contiguous memory we have 
the try harder method which searches through the tree.


Don't you also want to call it from your vram manager when the requested 
page size is something large, before trying to evict stuff? That could 
now fail due to fragmention IIUC. Or am I misreading mdgpu_vram_mgr_new()?




I agree we can have 2 lists (clear list and dirty list) and this would 
reduce the search iterations. But we need to handle the 2 lists design 
in all the functions which might require more time for testing on all 
platforms. Could we just go ahead with 1 list (free list) for now and I 
am going to take up this work as my next task.


Sounds good.



Thanks,
Arun.




v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks

Re: [PATCH v5 1/3] drm/buddy: Implement tracking clear page feature

2024-01-31 Thread Matthew Auld

On 30/01/2024 19:48, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
  }
  
+static void clear_reset(struct drm_buddy_block *block)

+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
  
+	if (drm_buddy_block_is_clear(block)) {

+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
  
  	return 0;

@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
  
+		if (drm_buddy_block_is_clear(block) !=

+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
  
  		drm_block_free(mm, block);

@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
  }
  EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
   * @mm: DRM buddy manager
   * @objects: input list head to free blocks
   */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+struct list_head *objects,
+unsigned long flags)
  {
struct drm_buddy_block *block, *on;
  
+	if (flags & DRM_BUDDY_CLEARED) {

+   list_for_each_entry(block, objects, link)
+

Re: [PATCH] drm/doc/rfc: Removing missing reference to xe.rst

2024-01-19 Thread Matthew Auld

On 19/01/2024 16:25, Rodrigo Vivi wrote:

On Tue, Jan 16, 2024 at 05:03:31PM -0500, Rodrigo Vivi wrote:

The file has already been deleted as the tasks were completed.
However the index reference was missed behind.


Gentle ping on this one.
I should have mentioned here that this fixes a doc build warning:

Documentation/gpu/rfc/index.rst:35: WARNING: toctree contains reference to 
nonexisting document 'gpu/rfc/xe'



Fixes: d11dc7aa98e5 ("drm/doc/rfc: Remove Xe's pre-merge plan")
Cc: Lucas De Marchi 
Signed-off-by: Rodrigo Vivi 

Reviewed-by: Matthew Auld 


Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature

2023-12-20 Thread Matthew Auld

Hi,

On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.


I was not involved, but it looks like we have also tried enabling the 
clear-on-free idea for VRAM in i915 and then also tracking that in the 
allocator, however that work unfortunately is not upstream. The code is 
open source though: 
https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300


It looks like some of the design differences there are having two 
separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
me). And also the inclusion of a de-fragmentation routine, since buddy 
blocks are now not always merged back, we might choose to run the defrag 
in some cases, which also sounds reasonable. IIRC in amdgpu userspace 
can control the page-size for an allocation, so perhaps you would want 
to run it first if the allocation fails, before trying to evict stuff?




v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
  }
  
+static void clear_reset(struct drm_buddy_block *block)

+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
  
+	if (drm_buddy_block_is_clear(block)) {

+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
  
  	return 0;

@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
  
+		if (drm_buddy_block_is_clear(block) !=

+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
  
  		drm_block_free(mm, block);

@@ -295,6 +318,9 @@ void 

Re: [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation

2023-09-11 Thread Matthew Auld
t I guess in practice should be pretty meh, given 
that the extra rhs is hopefully not too big in the corner case where the 
alignment doesn't fit the min_block_size?


Anyway, for patches 1-3,
Reviewed-by: Matthew Auld 


+   } else if (err != -ENOSPC) {
+   drm_buddy_free_list(mm, blocks);
+   return err;
+   }
+   /* Free blocks for the next iteration */
+   drm_buddy_free_list(mm, blocks);
+   }
+
+   return -ENOSPC;
  }
  
  /**

@@ -626,7 +691,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
  
  	new_start = drm_buddy_block_offset(block);

list_add(>tmp_link, );
-   err =  __alloc_range(mm, , new_start, new_size, blocks);
+   err =  __alloc_range(mm, , new_start, new_size, blocks, NULL);
if (err) {
mark_allocated(block);
mm->avail -= drm_buddy_block_size(mm, block);
@@ -645,7 +710,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
   * @start: start of the allowed range for this block
   * @end: end of the allowed range for this block
   * @size: size of the allocation
- * @min_page_size: alignment of the allocation
+ * @min_block_size: alignment of the allocation
   * @blocks: output list head to add allocated blocks
   * @flags: DRM_BUDDY_*_ALLOCATION flags
   *
@@ -660,23 +725,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
   */
  int drm_buddy_alloc_blocks(struct drm_buddy *mm,
   u64 start, u64 end, u64 size,
-  u64 min_page_size,
+  u64 min_block_size,
   struct list_head *blocks,
   unsigned long flags)
  {
struct drm_buddy_block *block = NULL;
+   u64 original_size, original_min_size;
unsigned int min_order, order;
-   unsigned long pages;
LIST_HEAD(allocated);
+   unsigned long pages;
int err;
  
  	if (size < mm->chunk_size)

return -EINVAL;
  
-	if (min_page_size < mm->chunk_size)

+   if (min_block_size < mm->chunk_size)
return -EINVAL;
  
-	if (!is_power_of_2(min_page_size))

+   if (!is_power_of_2(min_block_size))
return -EINVAL;
  
  	if (!IS_ALIGNED(start | end | size, mm->chunk_size))

@@ -690,14 +756,23 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
  
  	/* Actual range allocation */

if (start + size == end)
-   return __drm_buddy_alloc_range(mm, start, size, blocks);
-
-   if (!IS_ALIGNED(size, min_page_size))
-   return -EINVAL;
+   return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+
+   original_size = size;
+   original_min_size = min_block_size;
+
+   /* Roundup the size to power of 2 */
+   if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
+   size = roundup_pow_of_two(size);
+   min_block_size = size;
+   /* Align size value to min_block_size */
+   } else if (!IS_ALIGNED(size, min_block_size)) {
+   size = round_up(size, min_block_size);
+   }
  
  	pages = size >> ilog2(mm->chunk_size);

order = fls(pages) - 1;
-   min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+   min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
  
  	do {

order = min(order, (unsigned int)fls(pages) - 1);
@@ -716,6 +791,16 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;
  
  			if (order-- == min_order) {

+   if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
+   !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+   /*
+* Try contiguous block allocation 
through
+* try harder method
+*/
+   return __alloc_contig_try_harder(mm,
+
original_size,
+
original_min_size,
+
blocks);
err = -ENOSPC;
goto err_free;
}
@@ -732,6 +817,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;
} while (1);
  
+	/* Trim the allocated block to the required size */

+   if (original_size != size) {
+   struct list_head *trim_list;
+   LIST_HEAD(temp);
+   u64 trim_size;
+
+   trim_list = 
+   trim_size = original_size;
+
+   if (!list_is_singular()) {
+   block = list_last_entry(, typeof(*block), 
link);
+ 

Re: [PATCH 1/3] drm/buddy: Fix contiguous memory allocation issues

2023-08-21 Thread Matthew Auld

Hi,

On 21/08/2023 11:14, Arunpravin Paneer Selvam wrote:

The way now contiguous requests are implemented such that
the size rounded up to power of 2 and the corresponding order
block picked from the freelist.

In addition to the older method, the new method will rounddown
the size to power of 2 and the corresponding order block picked
from the freelist. And for the remaining size we traverse the
tree and try to allocate either from the freelist block's buddy
or from the peer block. If the remaining size from peer/buddy
block is not free, we pick the next freelist block and repeat
the same method.

Moved contiguous/alignment size computation part and trim
function to the drm buddy manager.


I think we should also mention somewhere what issue this is trying to 
solve. IIUC the roundup_power_of_two() might in some cases trigger 
-ENOSPC even though there might be enough free space, and so to help 
with that we introduce a try harder mechanism.




Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/drm_buddy.c | 253 ++--
  include/drm/drm_buddy.h |   6 +-
  2 files changed, 248 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..220f60c08a03 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
return __alloc_range(mm, , start, size, blocks);
  }
  
+static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,

+  u64 size,
+  u64 min_block_size,
+  struct drm_buddy_block *block,
+  struct list_head *blocks)
+{
+   struct drm_buddy_block *buddy, *parent = NULL;
+   u64 start, offset = 0;
+   LIST_HEAD(dfs);
+   int err;
+
+   if (!block)
+   return -EINVAL;
+
+   buddy = __get_buddy(block);
+   if (!buddy)
+   return -ENOSPC;
+
+   if (drm_buddy_block_is_allocated(buddy))
+   return -ENOSPC;
+
+   parent = block->parent;
+   if (!parent)
+   return -ENOSPC;
+
+   if (block->parent->right == block) {
+   u64 remaining;
+
+   /* Compute the leftover size for allocation */
+   remaining = max((size - drm_buddy_block_size(mm, buddy)),
+   min_block_size);
+   if (!IS_ALIGNED(remaining, min_block_size))
+   remaining = round_up(remaining, min_block_size);
+
+   /* Check if remaining size is greater than buddy block size */
+   if (drm_buddy_block_size(mm, buddy) < remaining)
+   return -ENOSPC;
+
+   offset = drm_buddy_block_size(mm, buddy) - remaining;
+   }
+
+   list_add(>tmp_link, );
+   start = drm_buddy_block_offset(parent) + offset;
+
+   err = __alloc_range(mm, , start, size, blocks);
+   if (err)
+   return -ENOSPC;
+
+   return 0;
+}
+
+static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
+ u64 size,
+ u64 min_block_size,
+ struct drm_buddy_block *block,
+ struct list_head *blocks)
+{
+   struct drm_buddy_block *first, *peer, *tmp;
+   struct drm_buddy_block *parent = NULL;
+   u64 start, offset = 0;
+   unsigned int order;
+   LIST_HEAD(dfs);
+   int err;
+
+   if (!block)
+   return -EINVAL;
+
+   order = drm_buddy_block_order(block);
+   /* Add freelist block to dfs list */
+   list_add(>tmp_link, );
+
+   tmp = block;
+   parent = block->parent;
+   while (parent) {
+   if (block->parent->left == block) {
+   if (parent->left != tmp) {
+   peer = parent->left;
+   break;
+   }
+   } else {
+   if (parent->right != tmp) {
+   peer = parent->right;
+   break;
+   }
+   }
+
+   tmp = parent;
+   parent = tmp->parent;
+   }
+
+   if (!parent)
+   return -ENOSPC;
+
+   do {
+   if (drm_buddy_block_is_allocated(peer))
+   return -ENOSPC;
+   /* Exit loop if peer block order is equal to block order */
+   if (drm_buddy_block_order(peer) == order)
+   break;
+
+   if (drm_buddy_block_is_split(peer)) {
+   /* Traverse down to the block order level */
+   if (block->parent->left == 

Re: [PATCH v2] drm/ttm: fix one use-after-free

2023-07-05 Thread Matthew Auld
On Wed, 5 Jul 2023 at 11:08, Lang Yu  wrote:
>
> bo->kref is increased once(kref_init()) in ttm_bo_release,
> but decreased twice(ttm_bo_put()) respectively in
> ttm_bo_delayed_delete and ttm_bo_cleanup_refs,
> which is unbalanced.
>
> Just clean up bo resource in one place for a delayed deleted bo.
>
> Fixes: 9bff18d13473 ("drm/ttm: use per BO cleanup workers")
>
> [   67.399887] refcount_t: underflow; use-after-free.
> [   67.399901] WARNING: CPU: 0 PID: 3172 at lib/refcount.c:28 
> refcount_warn_saturate+0xc2/0x110
> [   67.400124] RIP: 0010:refcount_warn_saturate+0xc2/0x110
> [   67.400173] Call Trace:
> [   67.400176]  
> [   67.400181]  ttm_mem_evict_first+0x4fe/0x5b0 [ttm]
> [   67.400216]  ttm_bo_mem_space+0x1e3/0x240 [ttm]
> [   67.400239]  ttm_bo_validate+0xc7/0x190 [ttm]
> [   67.400253]  ? ww_mutex_trylock+0x1b1/0x390
> [   67.400266]  ttm_bo_init_reserved+0x183/0x1c0 [ttm]
> [   67.400280]  ? __rwlock_init+0x3d/0x70
> [   67.400292]  amdgpu_bo_create+0x1cd/0x4f0 [amdgpu]
> [   67.400607]  ? __pfx_amdgpu_bo_user_destroy+0x10/0x10 [amdgpu]
> [   67.400980]  amdgpu_bo_create_user+0x38/0x70 [amdgpu]
> [   67.401291]  amdgpu_gem_object_create+0x77/0xb0 [amdgpu]
> [   67.401641]  ? __pfx_amdgpu_bo_user_destroy+0x10/0x10 [amdgpu]
> [   67.401958]  amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x228/0xa30 [amdgpu]
> [   67.402433]  kfd_ioctl_alloc_memory_of_gpu+0x14e/0x390 [amdgpu]
> [   67.402824]  ? lock_release+0x13f/0x290
> [   67.402838]  kfd_ioctl+0x1e0/0x640 [amdgpu]
> [   67.403205]  ? __pfx_kfd_ioctl_alloc_memory_of_gpu+0x10/0x10 [amdgpu]
> [   67.403579]  ? tomoyo_file_ioctl+0x19/0x20
> [   67.403590]  __x64_sys_ioctl+0x95/0xd0
> [   67.403601]  do_syscall_64+0x3b/0x90
> [   67.403609]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
>
> Signed-off-by: Lang Yu 
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 89 
>  1 file changed, 10 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 326a3d13a829..1e073dfb1332 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -224,82 +224,6 @@ static void ttm_bo_flush_all_fences(struct 
> ttm_buffer_object *bo)
> dma_resv_iter_end();
>  }
>
> -/**
> - * ttm_bo_cleanup_refs
> - * If bo idle, remove from lru lists, and unref.
> - * If not idle, block if possible.
> - *
> - * Must be called with lru_lock and reservation held, this function
> - * will drop the lru lock and optionally the reservation lock before 
> returning.
> - *
> - * @bo:The buffer object to clean-up
> - * @interruptible: Any sleeps should occur interruptibly.
> - * @no_wait_gpu:   Never wait for gpu. Return -EBUSY instead.
> - * @unlock_resv:   Unlock the reservation lock as well.
> - */
> -
> -static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> -  bool interruptible, bool no_wait_gpu,
> -  bool unlock_resv)
> -{
> -   struct dma_resv *resv = >base._resv;
> -   int ret;
> -
> -   if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
> -   ret = 0;
> -   else
> -   ret = -EBUSY;
> -
> -   if (ret && !no_wait_gpu) {
> -   long lret;
> -
> -   if (unlock_resv)
> -   dma_resv_unlock(bo->base.resv);
> -   spin_unlock(>bdev->lru_lock);
> -
> -   lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
> -interruptible,
> -30 * HZ);
> -
> -   if (lret < 0)
> -   return lret;
> -   else if (lret == 0)
> -   return -EBUSY;
> -
> -   spin_lock(>bdev->lru_lock);
> -   if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> -   /*
> -* We raced, and lost, someone else holds the 
> reservation now,
> -* and is probably busy in ttm_bo_cleanup_memtype_use.
> -*
> -* Even if it's not the case, because we finished 
> waiting any
> -* delayed destruction would succeed, so just return 
> success
> -* here.
> -*/
> -   spin_unlock(>bdev->lru_lock);
> -   return 0;
> -   }
> -   ret = 0;
> -   }
> -
> -   if (ret) {
> -   if (unlock_resv)
> -   dma_resv_unlock(bo->base.resv);
> -   spin_unlock(>bdev->lru_lock);
> -   return ret;
> -   }
> -
> -   spin_unlock(>bdev->lru_lock);
> -   ttm_bo_cleanup_memtype_use(bo);
> -
> -   if (unlock_resv)
> -   dma_resv_unlock(bo->base.resv);
> -
> -   ttm_bo_put(bo);

The put() here is indeed broken and leads to 

Re: [PATCH v2] drm: fix drmm_mutex_init()

2023-05-22 Thread Matthew Auld

On 22/05/2023 10:43, Thomas Zimmermann wrote:

Hi

Am 19.05.23 um 11:07 schrieb Matthew Auld:

In mutex_init() lockdep identifies a lock by defining a special static
key for each lock class. However if we wrap the macro in a function,
like in drmm_mutex_init(), we end up generating:

int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
{
   static struct lock_class_key __key;

   __mutex_init((lock), "lock", &__key);
   
}

The static __key here is what lockdep uses to identify the lock class,
however since this is just a normal function the key here will be
created once, where all callers then use the same key. In effect the
mutex->depmap.key will be the same pointer for different
drmm_mutex_init() callers. This then results in impossible lockdep
splats since lockdep thinks completely unrelated locks are the same lock
class.

To fix this turn drmm_mutex_init() into a macro such that it generates a
different "static struct lock_class_key __key" for each invocation,
which looks to be inline with what mutex_init() wants.

v2:
   - Revamp the commit message with clearer explanation of the issue.
   - Rather export __drmm_mutex_release() than static inline.

Reported-by: Thomas Hellström 
Reported-by: Sarah Walker 
Fixes: e13f13e039dc ("drm: Add DRM-managed mutex_init()")
Cc: Stanislaw Gruszka 
Cc: Boris Brezillon 
Cc: Thomas Zimmermann 
Cc: Jocelyn Falempe 
Cc: Daniel Vetter 
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Matthew Auld 


Acked-by: Thomas Zimmermann 

Shall I add the patch to drm-misc-fixes?


Yes, please do. Thanks.



Best regards
Thomas


---
  drivers/gpu/drm/drm_managed.c | 22 ++
  include/drm/drm_managed.h | 18 +-
  2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/drm_managed.c 
b/drivers/gpu/drm/drm_managed.c

index 4cf214de50c4..c21c3f623033 100644
--- a/drivers/gpu/drm/drm_managed.c
+++ b/drivers/gpu/drm/drm_managed.c
@@ -264,28 +264,10 @@ void drmm_kfree(struct drm_device *dev, void *data)
  }
  EXPORT_SYMBOL(drmm_kfree);
-static void drmm_mutex_release(struct drm_device *dev, void *res)
+void __drmm_mutex_release(struct drm_device *dev, void *res)
  {
  struct mutex *lock = res;
  mutex_destroy(lock);
  }
-
-/**
- * drmm_mutex_init - _device-managed mutex_init()
- * @dev: DRM device
- * @lock: lock to be initialized
- *
- * Returns:
- * 0 on success, or a negative errno code otherwise.
- *
- * This is a _device-managed version of mutex_init(). The 
initialized

- * lock is automatically destroyed on the final drm_dev_put().
- */
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
-{
-    mutex_init(lock);
-
-    return drmm_add_action_or_reset(dev, drmm_mutex_release, lock);
-}
-EXPORT_SYMBOL(drmm_mutex_init);
+EXPORT_SYMBOL(__drmm_mutex_release);
diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
index 359883942612..ad08f834af40 100644
--- a/include/drm/drm_managed.h
+++ b/include/drm/drm_managed.h
@@ -105,6 +105,22 @@ char *drmm_kstrdup(struct drm_device *dev, const 
char *s, gfp_t gfp);

  void drmm_kfree(struct drm_device *dev, void *data);
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock);
+void __drmm_mutex_release(struct drm_device *dev, void *res);
+
+/**
+ * drmm_mutex_init - _device-managed mutex_init()
+ * @dev: DRM device
+ * @lock: lock to be initialized
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise.
+ *
+ * This is a _device-managed version of mutex_init(). The 
initialized

+ * lock is automatically destroyed on the final drm_dev_put().
+ */
+#define drmm_mutex_init(dev, lock) ({ \
+    mutex_init(lock); \
+    drmm_add_action_or_reset(dev, __drmm_mutex_release, lock); \
+}) \
  #endif




[PATCH v2] drm: fix drmm_mutex_init()

2023-05-19 Thread Matthew Auld
In mutex_init() lockdep identifies a lock by defining a special static
key for each lock class. However if we wrap the macro in a function,
like in drmm_mutex_init(), we end up generating:

int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
{
  static struct lock_class_key __key;

  __mutex_init((lock), "lock", &__key);
  
}

The static __key here is what lockdep uses to identify the lock class,
however since this is just a normal function the key here will be
created once, where all callers then use the same key. In effect the
mutex->depmap.key will be the same pointer for different
drmm_mutex_init() callers. This then results in impossible lockdep
splats since lockdep thinks completely unrelated locks are the same lock
class.

To fix this turn drmm_mutex_init() into a macro such that it generates a
different "static struct lock_class_key __key" for each invocation,
which looks to be inline with what mutex_init() wants.

v2:
  - Revamp the commit message with clearer explanation of the issue.
  - Rather export __drmm_mutex_release() than static inline.

Reported-by: Thomas Hellström 
Reported-by: Sarah Walker 
Fixes: e13f13e039dc ("drm: Add DRM-managed mutex_init()")
Cc: Stanislaw Gruszka 
Cc: Boris Brezillon 
Cc: Thomas Zimmermann 
Cc: Jocelyn Falempe 
Cc: Daniel Vetter 
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Matthew Auld 
---
 drivers/gpu/drm/drm_managed.c | 22 ++
 include/drm/drm_managed.h | 18 +-
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
index 4cf214de50c4..c21c3f623033 100644
--- a/drivers/gpu/drm/drm_managed.c
+++ b/drivers/gpu/drm/drm_managed.c
@@ -264,28 +264,10 @@ void drmm_kfree(struct drm_device *dev, void *data)
 }
 EXPORT_SYMBOL(drmm_kfree);
 
-static void drmm_mutex_release(struct drm_device *dev, void *res)
+void __drmm_mutex_release(struct drm_device *dev, void *res)
 {
struct mutex *lock = res;
 
mutex_destroy(lock);
 }
-
-/**
- * drmm_mutex_init - _device-managed mutex_init()
- * @dev: DRM device
- * @lock: lock to be initialized
- *
- * Returns:
- * 0 on success, or a negative errno code otherwise.
- *
- * This is a _device-managed version of mutex_init(). The initialized
- * lock is automatically destroyed on the final drm_dev_put().
- */
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
-{
-   mutex_init(lock);
-
-   return drmm_add_action_or_reset(dev, drmm_mutex_release, lock);
-}
-EXPORT_SYMBOL(drmm_mutex_init);
+EXPORT_SYMBOL(__drmm_mutex_release);
diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
index 359883942612..ad08f834af40 100644
--- a/include/drm/drm_managed.h
+++ b/include/drm/drm_managed.h
@@ -105,6 +105,22 @@ char *drmm_kstrdup(struct drm_device *dev, const char *s, 
gfp_t gfp);
 
 void drmm_kfree(struct drm_device *dev, void *data);
 
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock);
+void __drmm_mutex_release(struct drm_device *dev, void *res);
+
+/**
+ * drmm_mutex_init - _device-managed mutex_init()
+ * @dev: DRM device
+ * @lock: lock to be initialized
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise.
+ *
+ * This is a _device-managed version of mutex_init(). The initialized
+ * lock is automatically destroyed on the final drm_dev_put().
+ */
+#define drmm_mutex_init(dev, lock) ({   \
+   mutex_init(lock);\
+   drmm_add_action_or_reset(dev, __drmm_mutex_release, lock);   \
+})  \
 
 #endif
-- 
2.40.1



Re: [PATCH] drm/managed: Define drmm_mutex_init() as a macro to fix lockdep

2023-05-19 Thread Matthew Auld
On Fri, 19 May 2023 at 09:55, Boris Brezillon
 wrote:
>
> drmm_mutex_init() needs to be defined as a macro if we want
> lockdep to classify locks properly. If we don't do that, all locks
> will be considered as belonging to the same lock class, leading to
> false positive deadlock reports.
>
> Signed-off-by: Boris Brezillon 
> Reported-by: Sarah Walker 

Yeah, we also encountered the same issue. Patch is here:
https://patchwork.freedesktop.org/patch/537605/?series=117891=2

> ---
>  drivers/gpu/drm/drm_managed.c | 26 --
>  include/drm/drm_managed.h | 30 +-
>  2 files changed, 29 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
> index 4cf214de50c4..71c49819a7a2 100644
> --- a/drivers/gpu/drm/drm_managed.c
> +++ b/drivers/gpu/drm/drm_managed.c
> @@ -263,29 +263,3 @@ void drmm_kfree(struct drm_device *dev, void *data)
> free_dr(dr_match);
>  }
>  EXPORT_SYMBOL(drmm_kfree);
> -
> -static void drmm_mutex_release(struct drm_device *dev, void *res)
> -{
> -   struct mutex *lock = res;
> -
> -   mutex_destroy(lock);
> -}
> -
> -/**
> - * drmm_mutex_init - _device-managed mutex_init()
> - * @dev: DRM device
> - * @lock: lock to be initialized
> - *
> - * Returns:
> - * 0 on success, or a negative errno code otherwise.
> - *
> - * This is a _device-managed version of mutex_init(). The initialized
> - * lock is automatically destroyed on the final drm_dev_put().
> - */
> -int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
> -{
> -   mutex_init(lock);
> -
> -   return drmm_add_action_or_reset(dev, drmm_mutex_release, lock);
> -}
> -EXPORT_SYMBOL(drmm_mutex_init);
> diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
> index 359883942612..87ffb92a16ba 100644
> --- a/include/drm/drm_managed.h
> +++ b/include/drm/drm_managed.h
> @@ -105,6 +105,34 @@ char *drmm_kstrdup(struct drm_device *dev, const char 
> *s, gfp_t gfp);
>
>  void drmm_kfree(struct drm_device *dev, void *data);
>
> -int drmm_mutex_init(struct drm_device *dev, struct mutex *lock);
> +/* Private function, don't use. */
> +static inline void __drmm_mutex_release(struct drm_device *dev, void *res)
> +{
> +   struct mutex *lock = res;
> +
> +   mutex_destroy(lock);
> +}
> +
> +/**
> + * drmm_mutex_init - _device-managed mutex_init()
> + * @dev: DRM device
> + * @lock: lock to be initialized
> + *
> + * Returns:
> + * 0 on success, or a negative errno code otherwise.
> + *
> + * This is a _device-managed version of mutex_init(). The initialized
> + * lock is automatically destroyed on the final drm_dev_put().
> + *
> + * This needs to be defined as a macro to let lockdep classify locks
> + * properly. If we don't do that, all locks will be considered as
> + * belonging to the same lock class, leading to false positive lockdep
> + * reports.
> + */
> +#define drmm_mutex_init(dev, lock) \
> +   ({\
> +   mutex_init(lock); \
> +   drmm_add_action_or_reset(dev, __drmm_mutex_release, lock); \
> +   })
>
>  #endif
> --
> 2.40.1
>


Re: [PATCH v5 1/7] drm: fix drmm_mutex_init()

2023-05-17 Thread Matthew Auld

On 17/05/2023 17:21, Thomas Zimmermann wrote:

Hi

Am 17.05.23 um 17:22 schrieb Matthew Auld:

In mutex_init() lockdep seems to identify a lock by defining a static
key for each lock class. However if we wrap the whole thing in a
function the static key will be the same for everything calling that
function, which looks to be the case for drmm_mutex_init(). This then
results in impossible lockdep splats since lockdep thinks completely
unrelated locks are the same lock class. The other issue is that when
looking at splats we lose the actual lock name, where instead of seeing
something like xe->mem_access.lock for the name, we just see something
generic like lock#8.

Attempt to fix this by converting drmm_mutex_init() into a macro, which
should ensure that mutex_init() behaves as expected.


If that's what is required, then OK. But even with your commit mesage, I 
find it entirely non-obvious what the problem is. Isn't there a way to 
annotate drmm_mutex_init() so that lockdep treats it like a regular 
mutex_init()?


AFAICT the issue is that with the existing drmm_mutex_init() we 
basically end up generating:


int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
{
 static struct lock_class_key __key;

 __mutex_init((lock), "lock", &__key);
 
}

I think the special static __key is what lockdep uses to identify a lock 
class, so every time we call drmm_mutex_init() we should expect a 
different key. However since this is just a normal function the key will 
be created once and then all callers use the same key. For example, if 
you print mutex->depmap.key you will get the same pointer underneath for 
different drmm_mutex_init callers. And then ofc lockdep gets really 
confused.


Turning it into a macro ensures that each drmm_mutex_init() generates a 
different "static struct lock_class_key __key" for each invocation, 
which looks to be inline with what mutex_init() wants.


I'm not sure if there a better way to solve this...



Best regards
Thomas



Reported-by: Thomas Hellström 
Fixes: e13f13e039dc ("drm: Add DRM-managed mutex_init()")
Cc: Thomas Zimmermann 
Cc: Jocelyn Falempe 
Cc: Daniel Vetter 
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Matthew Auld 
---
  drivers/gpu/drm/drm_managed.c | 26 --
  include/drm/drm_managed.h | 23 ++-
  2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/drm_managed.c 
b/drivers/gpu/drm/drm_managed.c

index 4cf214de50c4..71c49819a7a2 100644
--- a/drivers/gpu/drm/drm_managed.c
+++ b/drivers/gpu/drm/drm_managed.c
@@ -263,29 +263,3 @@ void drmm_kfree(struct drm_device *dev, void *data)
  free_dr(dr_match);
  }
  EXPORT_SYMBOL(drmm_kfree);
-
-static void drmm_mutex_release(struct drm_device *dev, void *res)
-{
-    struct mutex *lock = res;
-
-    mutex_destroy(lock);
-}
-
-/**
- * drmm_mutex_init - _device-managed mutex_init()
- * @dev: DRM device
- * @lock: lock to be initialized
- *
- * Returns:
- * 0 on success, or a negative errno code otherwise.
- *
- * This is a _device-managed version of mutex_init(). The 
initialized

- * lock is automatically destroyed on the final drm_dev_put().
- */
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
-{
-    mutex_init(lock);
-
-    return drmm_add_action_or_reset(dev, drmm_mutex_release, lock);
-}
-EXPORT_SYMBOL(drmm_mutex_init);
diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
index 359883942612..01f977e91933 100644
--- a/include/drm/drm_managed.h
+++ b/include/drm/drm_managed.h
@@ -105,6 +105,27 @@ char *drmm_kstrdup(struct drm_device *dev, const 
char *s, gfp_t gfp);

  void drmm_kfree(struct drm_device *dev, void *data);
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock);
+static inline void __drmm_mutex_release(struct drm_device *dev, void 
*res)

+{
+    struct mutex *lock = res;
+
+    mutex_destroy(lock);
+}
+
+/**
+ * drmm_mutex_init - _device-managed mutex_init()
+ * @dev: DRM device
+ * @lock: lock to be initialized
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise.
+ *
+ * This is a _device-managed version of mutex_init(). The 
initialized

+ * lock is automatically destroyed on the final drm_dev_put().
+ */
+#define drmm_mutex_init(dev, lock) ({ \
+    mutex_init(lock); \
+    drmm_add_action_or_reset(dev, __drmm_mutex_release, lock); \
+}) \
  #endif




Re: [PATCH v5 1/7] drm: fix drmm_mutex_init()

2023-05-17 Thread Matthew Auld

On 17/05/2023 17:05, Stanislaw Gruszka wrote:

On Wed, May 17, 2023 at 04:22:38PM +0100, Matthew Auld wrote:

In mutex_init() lockdep seems to identify a lock by defining a static
key for each lock class. However if we wrap the whole thing in a
function the static key will be the same for everything calling that
function, which looks to be the case for drmm_mutex_init(). This then
results in impossible lockdep splats since lockdep thinks completely
unrelated locks are the same lock class. The other issue is that when
looking at splats we lose the actual lock name, where instead of seeing
something like xe->mem_access.lock for the name, we just see something
generic like lock#8.

Attempt to fix this by converting drmm_mutex_init() into a macro, which
should ensure that mutex_init() behaves as expected.


Nice catch :-) we observed lockdep deadlock false alarms too, but I could
not spot it out and were adding lockdep_set_class(key) to avoid those.



+static inline void __drmm_mutex_release(struct drm_device *dev, void *res)


Can this be inline if used in drmm_add_action_or_reset() ?


I think so. Did I missing something here? It at least builds for me.





+{
+   struct mutex *lock = res;
+
+   mutex_destroy(lock);
+}
+
+/**
+ * drmm_mutex_init - _device-managed mutex_init()
+ * @dev: DRM device
+ * @lock: lock to be initialized
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise.
+ *
+ * This is a _device-managed version of mutex_init(). The initialized
+ * lock is automatically destroyed on the final drm_dev_put().
+ */
+#define drmm_mutex_init(dev, lock) ({   \
+   mutex_init(lock);\
+   drmm_add_action_or_reset(dev, __drmm_mutex_release, lock);   \
+})  \


Regards
Stanislaw




[PATCH v5 1/7] drm: fix drmm_mutex_init()

2023-05-17 Thread Matthew Auld
In mutex_init() lockdep seems to identify a lock by defining a static
key for each lock class. However if we wrap the whole thing in a
function the static key will be the same for everything calling that
function, which looks to be the case for drmm_mutex_init(). This then
results in impossible lockdep splats since lockdep thinks completely
unrelated locks are the same lock class. The other issue is that when
looking at splats we lose the actual lock name, where instead of seeing
something like xe->mem_access.lock for the name, we just see something
generic like lock#8.

Attempt to fix this by converting drmm_mutex_init() into a macro, which
should ensure that mutex_init() behaves as expected.

Reported-by: Thomas Hellström 
Fixes: e13f13e039dc ("drm: Add DRM-managed mutex_init()")
Cc: Thomas Zimmermann 
Cc: Jocelyn Falempe 
Cc: Daniel Vetter 
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Matthew Auld 
---
 drivers/gpu/drm/drm_managed.c | 26 --
 include/drm/drm_managed.h | 23 ++-
 2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
index 4cf214de50c4..71c49819a7a2 100644
--- a/drivers/gpu/drm/drm_managed.c
+++ b/drivers/gpu/drm/drm_managed.c
@@ -263,29 +263,3 @@ void drmm_kfree(struct drm_device *dev, void *data)
free_dr(dr_match);
 }
 EXPORT_SYMBOL(drmm_kfree);
-
-static void drmm_mutex_release(struct drm_device *dev, void *res)
-{
-   struct mutex *lock = res;
-
-   mutex_destroy(lock);
-}
-
-/**
- * drmm_mutex_init - _device-managed mutex_init()
- * @dev: DRM device
- * @lock: lock to be initialized
- *
- * Returns:
- * 0 on success, or a negative errno code otherwise.
- *
- * This is a _device-managed version of mutex_init(). The initialized
- * lock is automatically destroyed on the final drm_dev_put().
- */
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
-{
-   mutex_init(lock);
-
-   return drmm_add_action_or_reset(dev, drmm_mutex_release, lock);
-}
-EXPORT_SYMBOL(drmm_mutex_init);
diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
index 359883942612..01f977e91933 100644
--- a/include/drm/drm_managed.h
+++ b/include/drm/drm_managed.h
@@ -105,6 +105,27 @@ char *drmm_kstrdup(struct drm_device *dev, const char *s, 
gfp_t gfp);
 
 void drmm_kfree(struct drm_device *dev, void *data);
 
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock);
+static inline void __drmm_mutex_release(struct drm_device *dev, void *res)
+{
+   struct mutex *lock = res;
+
+   mutex_destroy(lock);
+}
+
+/**
+ * drmm_mutex_init - _device-managed mutex_init()
+ * @dev: DRM device
+ * @lock: lock to be initialized
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise.
+ *
+ * This is a _device-managed version of mutex_init(). The initialized
+ * lock is automatically destroyed on the final drm_dev_put().
+ */
+#define drmm_mutex_init(dev, lock) ({   \
+   mutex_init(lock);\
+   drmm_add_action_or_reset(dev, __drmm_mutex_release, lock);   \
+})  \
 
 #endif
-- 
2.40.1



Re: [PATCH 7/7] drm/i915: Allow user to set cache at BO creation

2023-04-06 Thread Matthew Auld
On Sat, 1 Apr 2023 at 07:37,  wrote:
>
> From: Fei Yang 
>
> To comply with the design that buffer objects shall have immutable
> cache setting through out its life cycle, {set, get}_caching ioctl's
> are no longer supported from MTL onward. With that change caching
> policy can only be set at object creation time. The current code
> applies a default (platform dependent) cache setting for all objects.
> However this is not optimal for performance tuning. The patch extends
> the existing gem_create uAPI to let user set PAT index for the object
> at creation time.
> The new extension is platform independent, so UMD's can switch to using
> this extension for older platforms as well, while {set, get}_caching are
> still supported on these legacy paltforms for compatibility reason.

Do we forbid {set, get}_caching, when combined with this new extension
on the same BO? There is some documentation in @cache_dirty. The
concern is being able to subvert the flush-on-acquire for non-LLC.

>
> Cc: Chris Wilson 
> Cc: Matt Roper 
> Signed-off-by: Fei Yang 
> Reviewed-by: Andi Shyti 
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 
>  include/uapi/drm/i915_drm.h| 36 ++
>  tools/include/uapi/drm/i915_drm.h  | 36 ++
>  3 files changed, 105 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> index e76c9703680e..1c6e2034d28e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> @@ -244,6 +244,7 @@ struct create_ext {
> unsigned int n_placements;
> unsigned int placement_mask;
> unsigned long flags;
> +   unsigned int pat_index;
>  };
>
>  static void repr_placements(char *buf, size_t size,
> @@ -393,11 +394,39 @@ static int ext_set_protected(struct i915_user_extension 
> __user *base, void *data
> return 0;
>  }
>
> +static int ext_set_pat(struct i915_user_extension __user *base, void *data)
> +{
> +   struct create_ext *ext_data = data;
> +   struct drm_i915_private *i915 = ext_data->i915;
> +   struct drm_i915_gem_create_ext_set_pat ext;
> +   unsigned int max_pat_index;
> +
> +   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
> +offsetofend(struct drm_i915_gem_create_ext_set_pat, 
> rsvd));
> +
> +   if (copy_from_user(, base, sizeof(ext)))
> +   return -EFAULT;
> +
> +   max_pat_index = INTEL_INFO(i915)->max_pat_index;
> +
> +   if (ext.pat_index > max_pat_index) {
> +   drm_dbg(>drm, "PAT index is invalid: %u\n",
> +   ext.pat_index);
> +   return -EINVAL;
> +   }
> +
> +   ext_data->pat_index = ext.pat_index;
> +
> +   return 0;
> +}
> +
>  static const i915_user_extension_fn create_extensions[] = {
> [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
> [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
> +   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
>  };
>
> +#define PAT_INDEX_NOT_SET  0x
>  /**
>   * Creates a new mm object and returns a handle to it.
>   * @dev: drm device pointer
> @@ -417,6 +446,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
> *data,
> if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
> return -EINVAL;
>
> +   ext_data.pat_index = PAT_INDEX_NOT_SET;
> ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
>create_extensions,
>ARRAY_SIZE(create_extensions),
> @@ -453,5 +483,8 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
> *data,
> if (IS_ERR(obj))
> return PTR_ERR(obj);
>
> +   if (ext_data.pat_index != PAT_INDEX_NOT_SET)
> +   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
> +
> return i915_gem_publish(obj, file, >size, >handle);
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index dba7c5a5b25e..03c5c314846e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
>  *
>  * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
>  * struct drm_i915_gem_create_ext_protected_content.
> +*
> +* For I915_GEM_CREATE_EXT_SET_PAT usage see
> +* struct drm_i915_gem_create_ext_set_pat.
>  */
>  #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
>  #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> +#define I915_GEM_CREATE_EXT_SET_PAT 2
> __u64 extensions;
>  };
>
> @@ -3747,6 +3751,38 @@ struct drm_i915_gem_create_ext_protected_content {
> __u32 flags;
>  };
>
> +/**
> + * struct drm_i915_gem_create_ext_set_pat - The
> + * I915_GEM_CREATE_EXT_SET_PAT extension.
> + *
> + * If this 

Re: [PATCH] drm/i915: Fix context runtime accounting

2023-03-31 Thread Matthew Auld
On Mon, 20 Mar 2023 at 15:14, Tvrtko Ursulin
 wrote:
>
> From: Tvrtko Ursulin 
>
> When considering whether to mark one context as stopped and another as
> started we need to look at whether the previous and new _contexts_ are
> different and not just requests. Otherwise the software tracked context
> start time was incorrectly updated to the most recent lite-restore time-
> stamp, which was in some cases resulting in active time going backward,
> until the context switch (typically the hearbeat pulse) would synchronise
> with the hardware tracked context runtime. Easiest use case to observe
> this behaviour was with a full screen clients with close to 100% engine
> load.
>
> Signed-off-by: Tvrtko Ursulin 
> Fixes: bb6287cb1886 ("drm/i915: Track context current active time")
> Cc:  # v5.19+

Seems reasonable to me, fwiw,
Reviewed-by: Matthew Auld 


Re: [PATCH] drm/ttm: drop extra ttm_bo_put in ttm_bo_cleanup_refs

2023-03-16 Thread Matthew Auld
On Thu, 16 Mar 2023 at 07:26, Christian König
 wrote:
>
> That was accidentially left over when we switched to the delayed delete
> worker.
>
> Suggested-by: Matthew Auld 
> Signed-off-by: Christian König 
> Fixes: ("9bff18d13473") drm/ttm: use per BO cleanup workers
> Reported-by: Steven Rostedt (Google) 
> Tested-by: Steven Rostedt (Google) 
Reviewed-by: Matthew Auld 


Re: [Intel-gfx] [BUG 6.3-rc1] Bad lock in ttm_bo_delayed_delete()

2023-03-15 Thread Matthew Auld
On Wed, 15 Mar 2023 at 18:41, Christian König
 wrote:
>
> Am 08.03.23 um 13:43 schrieb Steven Rostedt:
> > On Wed, 8 Mar 2023 07:17:38 +0100
> > Christian König  wrote:
> >
> >> What test case/environment do you run to trigger this?
> > I'm running a 32bit x86 qemu instance. Attached is the config.
> >
> > The libvirt xml file is here: https://rostedt.org/vm-images/tracetest-32.xml
> > and the VM image itself is here: 
> > https://rostedt.org/vm-images/tracetest-32.qcow2.bz2
>
> I've started to download that, but it will take about an hour. So I
> tried to avoid that for now.
>
> But looks like there isn't any other way to reproduce this, the code
> seems to work with both amdgpu and radeon.
>
> My suspicion is that we just have a reference count issue in qxl or ttm
> which was never noticed because it didn't caused any problems (except
> for a minor memory corruption).

Why does ttm_bo_cleanup_refs() do a bo_put() at the end? It doesn't
make sense to me. Say if the BO is in the process of being delay freed
(bo->deleted = true), and we just did the kref_init() in
ttm_bo_release(), it might drop that ref hitting ttm_bo_release() yet
again, this time doing the actual bo->destroy(), which frees the
object. The worker then fires at some later point calling
ttm_bo_delayed_delete(), but the BO has already been freed.

>
> Now you get a rain of warnings because we try to grab the lock in the
> delete worker.
>
> Christian.
>
> >
> > It happened again in another test (it's not 100% reproducible).
> >
> > [   23.234838] [ cut here ]
> > [   23.236391] DEBUG_LOCKS_WARN_ON(lock->magic != lock)
> > [   23.236429] WARNING: CPU: 0 PID: 61 at kernel/locking/mutex.c:582 
> > __ww_mutex_lock.constprop.0+0x566/0xfec
> > [   23.240990] Modules linked in:
> > [   23.242368] CPU: 0 PID: 61 Comm: kworker/0:1H Not tainted 
> > 6.3.0-rc1-test-1-ga98bd42762ed-dirty #972
> > [   23.245106] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 
> > 1.16.0-debian-1.16.0-5 04/01/2014
> > [   23.247900] Workqueue: ttm ttm_bo_delayed_delete
> > [   23.249642] EIP: __ww_mutex_lock.constprop.0+0x566/0xfec
> > [   23.251563] Code: e8 2b 5a 95 ff 85 c0 0f 84 25 fb ff ff 8b 0d 18 71 3b 
> > c8 85 c9 0f 85 17 fb ff ff 68 c0 58 07 c8 68 07 77 05 c8 e8 e6 0a 40 ff 
> > <0f> 0b 58 5a e9 ff fa ff ff e8 f8 59 95 ff 85 c0 74 0e 8b 0d 18 71
> > [   23.256901] EAX: 0028 EBX:  ECX: c1847dd8 EDX: 0002
> > [   23.258849] ESI:  EDI: c12958bc EBP: c1847f00 ESP: c1847eac
> > [   23.260786] DS: 007b ES: 007b FS: 00d8 GS:  SS: 0068 EFLAGS: 00010286
> > [   23.262840] CR0: 80050033 CR2: ffbff000 CR3: 0850e000 CR4: 00150ef0
> > [   23.264781] Call Trace:
> > [   23.265899]  ? lock_is_held_type+0xbe/0x10c
> > [   23.267434]  ? ttm_bo_delayed_delete+0x30/0x94
> > [   23.268971]  ww_mutex_lock+0x32/0x94
> > [   23.270327]  ttm_bo_delayed_delete+0x30/0x94
> > [   23.271818]  process_one_work+0x21a/0x538
> > [   23.273242]  worker_thread+0x146/0x398
> > [   23.274616]  kthread+0xea/0x10c
> > [   23.275859]  ? process_one_work+0x538/0x538
> > [   23.277312]  ? kthread_complete_and_exit+0x1c/0x1c
> > [   23.278899]  ret_from_fork+0x1c/0x28
> > [   23.280223] irq event stamp: 33
> > [   23.281440] hardirqs last  enabled at (33): [] 
> > _raw_spin_unlock_irqrestore+0x2d/0x58
> > [   23.283860] hardirqs last disabled at (32): [] 
> > kvfree_call_rcu+0x155/0x2ec
> > [   23.286066] softirqs last  enabled at (0): [] 
> > copy_process+0x989/0x2368
> > [   23.288220] softirqs last disabled at (0): [<>] 0x0
> > [   23.289952] ---[ end trace  ]---
> > [   23.291501] [ cut here ]
> > [   23.293027] refcount_t: underflow; use-after-free.
> > [   23.294644] WARNING: CPU: 0 PID: 61 at lib/refcount.c:28 
> > refcount_warn_saturate+0xb6/0xfc
> > [   23.296959] Modules linked in:
> > [   23.298168] CPU: 0 PID: 61 Comm: kworker/0:1H Tainted: GW
> >   6.3.0-rc1-test-1-ga98bd42762ed-dirty #972
> > [   23.301073] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 
> > 1.16.0-debian-1.16.0-5 04/01/2014
> > [   23.303642] Workqueue: ttm ttm_bo_delayed_delete
> > [   23.305190] EIP: refcount_warn_saturate+0xb6/0xfc
> > [   23.306767] Code: 68 70 e1 0c c8 e8 f6 d6 a9 ff 0f 0b 58 c9 c3 90 80 3d 
> > 8a 78 38 c8 00 75 8a c6 05 8a 78 38 c8 01 68 9c e1 0c c8 e8 d6 d6 a9 ff 
> > <0f> 0b 59 c9 c3 80 3d 88 78 38 c8 00 0f 85 67 ff ff ff c6 05 88 78
> > [   23.311935] EAX: 0026 EBX: c1295950 ECX: c1847e40 EDX: 0002
> > [   23.313884] ESI: c12958bc EDI: f7591100 EBP: c1847f18 ESP: c1847f14
> > [   23.315840] DS: 007b ES: 007b FS: 00d8 GS:  SS: 0068 EFLAGS: 00010246
> > [   23.317887] CR0: 80050033 CR2: ffbff000 CR3: 0850e000 CR4: 00150ef0
> > [   23.319859] Call Trace:
> > [   23.320978]  ttm_bo_delayed_delete+0x8c/0x94
> > [   23.322492]  process_one_work+0x21a/0x538
> > [   23.323959]  worker_thread+0x146/0x398
> > [   23.325353]  kthread+0xea/0x10c
> > [  

Re: [PATCH v4 5/5] drm/i915/gt: Make sure that errors are propagated through request chains

2023-03-10 Thread Matthew Auld

On 08/03/2023 09:41, Andi Shyti wrote:

Currently, when we perform operations such as clearing or copying
large blocks of memory, we generate multiple requests that are
executed in a chain.

However, if one of these requests fails, we may not realize it
unless it happens to be the last request in the chain. This is
because errors are not properly propagated.

For this we need to keep propagating the chain of fence
notification in order to always reach the final fence associated
to the final request.

To address this issue, we need to ensure that the chain of fence
notifications is always propagated so that we can reach the final
fence associated with the last request. By doing so, we will be
able to detect any memory operation  failures and determine
whether the memory is still invalid.

On copy and clear migration signal fences upon completion.

On copy and clear migration, signal fences upon request
completion to ensure that we have a reliable perpetuation of the
operation outcome.

Fixes: cf586021642d80 ("drm/i915/gt: Pipelined page migration")
Reported-by: Matthew Auld 
Suggested-by: Chris Wilson 
Signed-off-by: Andi Shyti 
Cc: sta...@vger.kernel.org
Reviewed-by: Matthew Auld 
---
  drivers/gpu/drm/i915/gt/intel_migrate.c | 41 ++---
  1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 3f638f1987968..0031e7b1b4704 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -742,13 +742,19 @@ intel_context_migrate_copy(struct intel_context *ce,
dst_offset = 2 * CHUNK_SZ;
}
  
+	/*

+* While building the chain of requests, we need to ensure
+* that no one can sneak into the timeline unnoticed.
+*/
+   mutex_lock(>timeline->mutex);
+


Hmm, this looks different/new from the previous version. Why do we only 
do this for the copy and not the clear btw? Both should be conceptually 
the same. Sorry if I'm misunderstanding something here.



do {
int len;
  
-		rq = i915_request_create(ce);

+   rq = i915_request_create_locked(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   goto out_ce;
+   break;
}
  
  		if (deps) {

@@ -878,10 +884,14 @@ intel_context_migrate_copy(struct intel_context *ce,
  
  		/* Arbitration is re-enabled between requests. */

  out_rq:
-   if (*out)
+   i915_sw_fence_await(>submit);
+   i915_request_get(rq);
+   i915_request_add_locked(rq);
+   if (*out) {
+   i915_sw_fence_complete(&(*out)->submit);
i915_request_put(*out);
-   *out = i915_request_get(rq);
-   i915_request_add(rq);
+   }
+   *out = rq;
  
  		if (err)

break;
@@ -905,7 +915,10 @@ intel_context_migrate_copy(struct intel_context *ce,
cond_resched();
} while (1);
  
-out_ce:

+   mutex_unlock(>timeline->mutex);
+
+   if (*out)
+   i915_sw_fence_complete(&(*out)->submit);
return err;
  }
  
@@ -1005,7 +1018,7 @@ intel_context_migrate_clear(struct intel_context *ce,

rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   goto out_ce;
+   break;
}
  
  		if (deps) {

@@ -1056,17 +1069,23 @@ intel_context_migrate_clear(struct intel_context *ce,
  
  		/* Arbitration is re-enabled between requests. */

  out_rq:
-   if (*out)
-   i915_request_put(*out);
-   *out = i915_request_get(rq);
+   i915_sw_fence_await(>submit);
+   i915_request_get(rq);
i915_request_add(rq);
+   if (*out) {
+   i915_sw_fence_complete(&(*out)->submit);
+   i915_request_put(*out);
+   }
+   *out = rq;
+
if (err || !it.sg || !sg_dma_len(it.sg))
break;
  
  		cond_resched();

} while (1);
  
-out_ce:

+   if (*out)
+   i915_sw_fence_complete(&(*out)->submit);
return err;
  }
  


Re: [Intel-gfx] [PATCH 1/3] drm/i915: Set I915_BO_ALLOC_USER for framebuffer

2023-03-06 Thread Matthew Auld

On 06/03/2023 13:31, Das, Nirmoy wrote:

Hi Matt,

On 3/6/2023 1:25 PM, Matthew Auld wrote:

On 06/03/2023 12:07, Nirmoy Das wrote:

Framebuffer is exposed to userspace so set I915_BO_ALLOC_USER
flag for it. This also make sure that ttm allocates offset
for lmem objects.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/i915/display/intel_dpt.c   | 4 +++-
  drivers/gpu/drm/i915/display/intel_fbdev.c | 3 ++-
  drivers/gpu/drm/i915/display/intel_plane_initial.c | 3 ++-
  3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c

index ad1a37b515fb..2e6238881860 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -254,7 +254,9 @@ intel_dpt_create(struct intel_framebuffer *fb)
    size = round_up(size * sizeof(gen8_pte_t), I915_GTT_PAGE_SIZE);
  -    dpt_obj = i915_gem_object_create_lmem(i915, size, 
I915_BO_ALLOC_CONTIGUOUS);

+    dpt_obj = i915_gem_object_create_lmem(i915, size,
+  I915_BO_ALLOC_CONTIGUOUS |
+  I915_BO_ALLOC_USER);


AFAICT this is just some driver internal stuff for display page-table, 
which gets mapped through GGTT or something, and is not the actual fb. 
Is it really exposed to the user?



I misunderstood this for something else. I will remove this.




  if (IS_ERR(dpt_obj) && i915_ggtt_has_aperture(to_gt(i915)->ggtt))
  dpt_obj = i915_gem_object_create_stolen(i915, size);
  if (IS_ERR(dpt_obj) && !HAS_LMEM(i915)) {
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c 
b/drivers/gpu/drm/i915/display/intel_fbdev.c

index 3659350061a7..98ae3a3a986a 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -163,7 +163,8 @@ static int intelfb_alloc(struct drm_fb_helper 
*helper,

  obj = ERR_PTR(-ENODEV);
  if (HAS_LMEM(dev_priv)) {
  obj = i915_gem_object_create_lmem(dev_priv, size,
-  I915_BO_ALLOC_CONTIGUOUS);
+  I915_BO_ALLOC_CONTIGUOUS |
+  I915_BO_ALLOC_USER);
  } else {
  /*
   * If the FB is too big, just don't use it since fbdev is 
not very
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
b/drivers/gpu/drm/i915/display/intel_plane_initial.c

index bb6ea7de5c61..4a3680f6a3f5 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -110,7 +110,8 @@ initial_plane_vma(struct drm_i915_private *i915,
  size * 2 > i915->dsm.usable_size)
  return NULL;
  -    obj = i915_gem_object_create_region_at(mem, phys_base, size, 0);
+    obj = i915_gem_object_create_region_at(mem, phys_base, size,
+   I915_BO_ALLOC_USER);


ALLOC_USER has the side effect of also zeroing the memory underneath, 
IIRC. However this here is the pre-allocated fb (will have some boot 
logo stuff), so we shouldn't ever clear it.



This was my concern.  I wonder if there is any other better way than to 
use a temp buffer to copy the pre-allocated content and put it back 
after getting i915_gem_object_create_region_at().


If we need ALLOC_USER for this buffer then maybe just a new flag like 
BO_PREALLOCATED which skips all the clearing?





Regards,

Nirmoy





  if (IS_ERR(obj))
  return NULL;


Re: [PATCH 1/3] drm/i915: Set I915_BO_ALLOC_USER for framebuffer

2023-03-06 Thread Matthew Auld

On 06/03/2023 12:07, Nirmoy Das wrote:

Framebuffer is exposed to userspace so set I915_BO_ALLOC_USER
flag for it. This also make sure that ttm allocates offset
for lmem objects.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/i915/display/intel_dpt.c   | 4 +++-
  drivers/gpu/drm/i915/display/intel_fbdev.c | 3 ++-
  drivers/gpu/drm/i915/display/intel_plane_initial.c | 3 ++-
  3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index ad1a37b515fb..2e6238881860 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -254,7 +254,9 @@ intel_dpt_create(struct intel_framebuffer *fb)
  
  	size = round_up(size * sizeof(gen8_pte_t), I915_GTT_PAGE_SIZE);
  
-	dpt_obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_CONTIGUOUS);

+   dpt_obj = i915_gem_object_create_lmem(i915, size,
+ I915_BO_ALLOC_CONTIGUOUS |
+ I915_BO_ALLOC_USER);


AFAICT this is just some driver internal stuff for display page-table, 
which gets mapped through GGTT or something, and is not the actual fb. 
Is it really exposed to the user?



if (IS_ERR(dpt_obj) && i915_ggtt_has_aperture(to_gt(i915)->ggtt))
dpt_obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(dpt_obj) && !HAS_LMEM(i915)) {
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c 
b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 3659350061a7..98ae3a3a986a 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -163,7 +163,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
obj = ERR_PTR(-ENODEV);
if (HAS_LMEM(dev_priv)) {
obj = i915_gem_object_create_lmem(dev_priv, size,
- I915_BO_ALLOC_CONTIGUOUS);
+ I915_BO_ALLOC_CONTIGUOUS |
+ I915_BO_ALLOC_USER);
} else {
/*
 * If the FB is too big, just don't use it since fbdev is not 
very
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index bb6ea7de5c61..4a3680f6a3f5 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -110,7 +110,8 @@ initial_plane_vma(struct drm_i915_private *i915,
size * 2 > i915->dsm.usable_size)
return NULL;
  
-	obj = i915_gem_object_create_region_at(mem, phys_base, size, 0);

+   obj = i915_gem_object_create_region_at(mem, phys_base, size,
+  I915_BO_ALLOC_USER);


ALLOC_USER has the side effect of also zeroing the memory underneath, 
IIRC. However this here is the pre-allocated fb (will have some boot 
logo stuff), so we shouldn't ever clear it.



if (IS_ERR(obj))
return NULL;
  


Re: [Intel-gfx] [PATCH] drm/i915/gt: Make sure that errors are propagated through request chains

2023-02-24 Thread Matthew Auld
On Fri, 10 Feb 2023 at 14:06, Andi Shyti  wrote:
>
> Currently, for operations like memory clear or copy for big
> chunks of memory, we generate multiple requests executed in a
> chain.
>
> But if one of the requests generated fails we would not know it
> to unless it happens to the last request, because errors are not
> properly propagated.
>
> For this we need to keep propagating the chain of fence
> notification in order to always reach the final fence associated
> to the final request.
>
> This way we would know that the memory operation has failed and
> whether the memory is still invalid.
>
> On copy and clear migration signal fences upon completion.
>
> Fixes: cf586021642d80 ("drm/i915/gt: Pipelined page migration")
> Reported-by: Matthew Auld 
> Suggested-by: Chris Wilson 
> Signed-off-by: Andi Shyti 
> Cc: sta...@vger.kernel.org
Reviewed-by: Matthew Auld 


Re: [Intel-gfx] [PATCH 7/7] drm/ttm: cleanup ttm_range_mgr_node

2023-02-21 Thread Matthew Auld
On Fri, 17 Feb 2023 at 12:23, Christian König
 wrote:
>
> We don't need multiple drm_mm nodes any more. Clean that up and remove
> the extra complexity.
>
> Signed-off-by: Christian König 
Reviewed-by: Matthew Auld 


Re: [PATCH 5/7] drm/gem: Remove BUG_ON in drm_gem_private_object_init

2023-02-21 Thread Matthew Auld
On Fri, 17 Feb 2023 at 12:23, Christian König
 wrote:
>
> From: Somalapuram Amaranath 
>
> ttm_resource can allocate size in bytes to support less than page size.
>
> Signed-off-by: Somalapuram Amaranath 
> Reviewed-by: Christian König 
> Signed-off-by: Christian König 
> Link: 
> https://patchwork.freedesktop.org/patch/msgid/20230208090106.9659-1-amaranath.somalapu...@amd.com
> ---
>  drivers/gpu/drm/drm_gem.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
> index aa15c52ae182..5a3ca3363f82 100644
> --- a/drivers/gpu/drm/drm_gem.c
> +++ b/drivers/gpu/drm/drm_gem.c
> @@ -152,8 +152,6 @@ EXPORT_SYMBOL(drm_gem_object_init);
>  void drm_gem_private_object_init(struct drm_device *dev,
>  struct drm_gem_object *obj, size_t size)
>  {
> -   BUG_ON((size & (PAGE_SIZE - 1)) != 0);
> -

There are also some comments in drm_gem_{get, put}_pages referring to
this exact BUG_ON(), which could do with updating now.

> obj->dev = dev;
> obj->filp = NULL;
>
> --
> 2.34.1
>


Re: [PATCH 1/4] drm/gem-vram: handle NULL bo->resource in move callback

2023-02-21 Thread Matthew Auld

On 21/02/2023 16:17, Christian König wrote:

Am 21.02.23 um 17:13 schrieb Matthew Auld:

On 10/02/2023 11:03, Christian König wrote:

Am 08.02.23 um 15:53 schrieb Matthew Auld:

The ttm BO now initially has NULL bo->resource, and leaves the driver
the handle that. However it looks like we forgot to handle that for
ttm_bo_move_memcpy() users, like with vram-gem, since it just silently
returns zero. This seems to then trigger warnings like:

WARNING: CPU: 0 PID: 1 at drivers/gpu/drm/drm_gem_vram_helper.c:255 
drm_gem_vram_offset (??:?)


Fix this by calling move_null() if the new resource is TTM_PL_SYSTEM,
otherwise do the multi-hop sequence to ensure can safely call into
ttm_bo_move_memcpy(), since it might also need to clear the memory.
This should give the same behaviour as before.

While we are here let's also treat calling ttm_bo_move_memcpy() with
NULL bo->resource as programmer error, where expectation is that upper
layers should now handle it.

Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources 
during BO creation")

Signed-off-by: Matthew Auld 
Cc: Christian König 


Oh, I wasn't aware that this broke at so many places. Especially 
radeon was tested earlier in the development of the patch set.


Thanks for looking into that, the radeon patch has my rb and the rest 
of the series is Acked-by: Christian König .


Should we go ahead and land this? (minus patch 3 since that is already 
fixed by vmware folks).


Yeah, sure go ahead.


I assume this has to go via some drm-misc type tree, for which I don't 
currently have commit rights. Can you help with merging this?




Thanks,
Christian.





Regards,
Christian.


---
  drivers/gpu/drm/drm_gem_vram_helper.c | 11 +++
  drivers/gpu/drm/ttm/ttm_bo_util.c |  4 ++--
  2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c 
b/drivers/gpu/drm/drm_gem_vram_helper.c

index d40b3edb52d0..0bea3df2a16d 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -916,6 +916,17 @@ static int bo_driver_move(struct 
ttm_buffer_object *bo,

  {
  struct drm_gem_vram_object *gbo;
+    if (!bo->resource) {
+    if (new_mem->mem_type != TTM_PL_SYSTEM) {
+    hop->mem_type = TTM_PL_SYSTEM;
+    hop->flags = TTM_PL_FLAG_TEMPORARY;
+    return -EMULTIHOP;
+    }
+
+    ttm_bo_move_null(bo, new_mem);
+    return 0;
+    }
+
  gbo = drm_gem_vram_of_bo(bo);
  return drm_gem_vram_bo_driver_move(gbo, evict, ctx, new_mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c

index d9d2b0903b22..fd9fd3d15101 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -157,8 +157,8 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object 
*bo,

  bool clear;
  int ret = 0;
-    if (!src_mem)
-    return 0;
+    if (WARN_ON(!src_mem))
+    return -EINVAL;
  src_man = ttm_manager_type(bdev, src_mem->mem_type);
  if (ttm && ((ttm->page_flags & TTM_TT_FLAG_SWAPPED) ||






Re: [PATCH 1/4] drm/gem-vram: handle NULL bo->resource in move callback

2023-02-21 Thread Matthew Auld

On 10/02/2023 11:03, Christian König wrote:

Am 08.02.23 um 15:53 schrieb Matthew Auld:

The ttm BO now initially has NULL bo->resource, and leaves the driver
the handle that. However it looks like we forgot to handle that for
ttm_bo_move_memcpy() users, like with vram-gem, since it just silently
returns zero. This seems to then trigger warnings like:

WARNING: CPU: 0 PID: 1 at drivers/gpu/drm/drm_gem_vram_helper.c:255 
drm_gem_vram_offset (??:?)


Fix this by calling move_null() if the new resource is TTM_PL_SYSTEM,
otherwise do the multi-hop sequence to ensure can safely call into
ttm_bo_move_memcpy(), since it might also need to clear the memory.
This should give the same behaviour as before.

While we are here let's also treat calling ttm_bo_move_memcpy() with
NULL bo->resource as programmer error, where expectation is that upper
layers should now handle it.

Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during 
BO creation")

Signed-off-by: Matthew Auld 
Cc: Christian König 


Oh, I wasn't aware that this broke at so many places. Especially radeon 
was tested earlier in the development of the patch set.


Thanks for looking into that, the radeon patch has my rb and the rest of 
the series is Acked-by: Christian König .


Should we go ahead and land this? (minus patch 3 since that is already 
fixed by vmware folks).




Regards,
Christian.


---
  drivers/gpu/drm/drm_gem_vram_helper.c | 11 +++
  drivers/gpu/drm/ttm/ttm_bo_util.c |  4 ++--
  2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c 
b/drivers/gpu/drm/drm_gem_vram_helper.c

index d40b3edb52d0..0bea3df2a16d 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -916,6 +916,17 @@ static int bo_driver_move(struct 
ttm_buffer_object *bo,

  {
  struct drm_gem_vram_object *gbo;
+    if (!bo->resource) {
+    if (new_mem->mem_type != TTM_PL_SYSTEM) {
+    hop->mem_type = TTM_PL_SYSTEM;
+    hop->flags = TTM_PL_FLAG_TEMPORARY;
+    return -EMULTIHOP;
+    }
+
+    ttm_bo_move_null(bo, new_mem);
+    return 0;
+    }
+
  gbo = drm_gem_vram_of_bo(bo);
  return drm_gem_vram_bo_driver_move(gbo, evict, ctx, new_mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c

index d9d2b0903b22..fd9fd3d15101 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -157,8 +157,8 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
  bool clear;
  int ret = 0;
-    if (!src_mem)
-    return 0;
+    if (WARN_ON(!src_mem))
+    return -EINVAL;
  src_man = ttm_manager_type(bdev, src_mem->mem_type);
  if (ttm && ((ttm->page_flags & TTM_TT_FLAG_SWAPPED) ||




Re: [Intel-gfx] [PATCH 3/3] drm/ttm: Change the meaning of the fields in the drm_mm_nodes structure from pfn to bytes v2

2023-02-14 Thread Matthew Auld
On Tue, 14 Feb 2023 at 07:43, Christian König
 wrote:
>
> From: Somalapuram Amaranath 
>
> Change the ttm_range_man_alloc() allocation from pages to size in bytes.
> Fix the dependent drm_mm_nodes start and size from pages to bytes.
>
> v2 (chk): Change the drm_mm_node usage in amdgpu as well. re-order the
>   patch to be independent of the resource->start change.
>
> Signed-off-by: Somalapuram Amaranath 
> Reviewed-by: Christian König 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c| 15 ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h |  8 
>  drivers/gpu/drm/i915/i915_scatterlist.c|  6 +++---
>  drivers/gpu/drm/ttm/ttm_range_manager.c| 17 -
>  4 files changed, 23 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> index 44367f03316f..c90423cd1292 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> @@ -116,7 +116,6 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager 
> *man,
>   struct ttm_resource **res)
>  {
> struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
> -   uint32_t num_pages = PFN_UP(tbo->base.size);
> struct ttm_range_mgr_node *node;
> int r;
>
> @@ -134,17 +133,19 @@ static int amdgpu_gtt_mgr_new(struct 
> ttm_resource_manager *man,
> if (place->lpfn) {
> spin_lock(>lock);
> r = drm_mm_insert_node_in_range(>mm, >mm_nodes[0],
> -   num_pages, 
> tbo->page_alignment,
> -   0, place->fpfn, place->lpfn,
> +   tbo->base.size,
> +   tbo->page_alignment << 
> PAGE_SHIFT, 0,
> +   place->fpfn << PAGE_SHIFT,
> +   place->lpfn << PAGE_SHIFT,
> DRM_MM_INSERT_BEST);
> spin_unlock(>lock);
> if (unlikely(r))
> goto err_free;
>
> -   node->base.start = node->mm_nodes[0].start;
> +   node->base.start = node->mm_nodes[0].start >> PAGE_SHIFT;
> } else {
> node->mm_nodes[0].start = 0;
> -   node->mm_nodes[0].size = PFN_UP(node->base.size);
> +   node->mm_nodes[0].size = node->base.size;
> node->base.start = AMDGPU_BO_INVALID_OFFSET;
> }
>
> @@ -285,8 +286,8 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
> uint64_t gtt_size)
>
> ttm_resource_manager_init(man, >mman.bdev, gtt_size);
>
> -   start = AMDGPU_GTT_MAX_TRANSFER_SIZE * 
> AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
> -   size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
> +   start = (AMDGPU_GTT_MAX_TRANSFER_SIZE * 
> AMDGPU_GTT_NUM_TRANSFER_WINDOWS) << PAGE_SHIFT;
> +   size = adev->gmc.gart_size - start;
> drm_mm_init(>mm, start, size);
> spin_lock_init(>lock);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> index 5c4f93ee0c57..5c78f0b09351 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -94,8 +94,8 @@ static inline void amdgpu_res_first(struct ttm_resource 
> *res,
> while (start >= node->size << PAGE_SHIFT)
> start -= node++->size << PAGE_SHIFT;
>
> -   cur->start = (node->start << PAGE_SHIFT) + start;
> -   cur->size = min((node->size << PAGE_SHIFT) - start, size);
> +   cur->start = node->start + start;
> +   cur->size = min(node->size - start, size);
> cur->remaining = size;
> cur->node = node;
> break;
> @@ -155,8 +155,8 @@ static inline void amdgpu_res_next(struct 
> amdgpu_res_cursor *cur, uint64_t size)
> node = cur->node;
>
> cur->node = ++node;
> -   cur->start = node->start << PAGE_SHIFT;
> -   cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
> +   cur->start = node->start;
> +   cur->size = min(node->size, cur->remaining);
> break;
> default:
> return;
> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c 
> b/drivers/gpu/drm/i915/i915_scatterlist.c
> index 756289e43dff..7defda1219d0 100644
> --- a/drivers/gpu/drm/i915/i915_scatterlist.c
> +++ b/drivers/gpu/drm/i915/i915_scatterlist.c
> @@ -94,7 +94,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct 
> drm_mm_node *node,
> if (!rsgt)
> return ERR_PTR(-ENOMEM);
>
> -   i915_refct_sgt_init(rsgt, 

[PATCH 3/4] drm/vmwgfx: handle NULL bo->resource in move callback

2023-02-08 Thread Matthew Auld
The ttm bo now initially has NULL bo->resource, and leaves the driver
the handle that. However it looks like we forgot to handle that for
vmwgfx.  It looks like this will just null-ptr-deref in vmw_move(), if
bo->resource is NULL.

Fix this by calling move_null() if the new resource is TTM_PL_SYSTEM,
otherwise do the multi-hop sequence to ensure can safely call into
ttm_bo_move_memcpy(), since it might also need to clear the memory.
This should give the same behaviour as before.

Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during BO 
creation")
Signed-off-by: Matthew Auld 
Cc: Christian König 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 856a352a72a6..c598c5a9fe2c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -596,10 +596,23 @@ static int vmw_move(struct ttm_buffer_object *bo,
struct ttm_resource *new_mem,
struct ttm_place *hop)
 {
-   struct ttm_resource_manager *old_man = ttm_manager_type(bo->bdev, 
bo->resource->mem_type);
+   struct ttm_resource_manager *old_man;
struct ttm_resource_manager *new_man = ttm_manager_type(bo->bdev, 
new_mem->mem_type);
int ret;
 
+   if (!bo->resource) {
+   if (new_mem->mem_type != TTM_PL_SYSTEM) {
+   hop->mem_type = TTM_PL_SYSTEM;
+   hop->flags = TTM_PL_FLAG_TEMPORARY;
+   return -EMULTIHOP;
+   }
+
+   ttm_bo_move_null(bo, new_mem);
+   return 0;
+   }
+
+   old_man = ttm_manager_type(bo->bdev, bo->resource->mem_type);
+
if (new_man->use_tt && !vmw_memtype_is_system(new_mem->mem_type)) {
ret = vmw_ttm_bind(bo->bdev, bo->ttm, new_mem);
if (ret)
-- 
2.39.1



[PATCH 4/4] drm/radeon: handle NULL bo->resource in move callback

2023-02-08 Thread Matthew Auld
The ttm bo now initially has NULL bo->resource, and leaves the driver
the handle that. However it looks like we forgot to handle that for
radeon.  It looks like this will just null-ptr-deref in
radeon_bo_move(), if bo->resource is NULL.

Fix this by calling move_null().

Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during BO 
creation")
Signed-off-by: Matthew Auld 
Cc: Christian König 
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 67075c85f847..2220cdf6a3f6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -213,7 +213,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, 
bool evict,
 
rbo = container_of(bo, struct radeon_bo, tbo);
rdev = radeon_get_rdev(bo->bdev);
-   if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
+   if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
+bo->ttm == NULL)) {
ttm_bo_move_null(bo, new_mem);
goto out;
}
-- 
2.39.1



[PATCH 2/4] drm/qxl: handle NULL bo->resource in move callback

2023-02-08 Thread Matthew Auld
The ttm bo now initially has NULL bo->resource, and leaves the driver
the handle that. However it looks like we forgot to handle that for qxl.
It looks like this will just null-ptr-deref in qxl_bo_move(), if
bo->resource is NULL.

Fix this by calling move_null() if the new resource is TTM_PL_SYSTEM,
otherwise do the multi-hop sequence to ensure can safely call into
ttm_bo_move_memcpy(), since it might also need to clear the memory.
This should give the same behaviour as before.

Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during BO 
creation")
Signed-off-by: Matthew Auld 
Cc: Christian König 
---
 drivers/gpu/drm/qxl/qxl_ttm.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index a92a5b0d4c25..1a82629bce3f 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -143,6 +143,17 @@ static int qxl_bo_move(struct ttm_buffer_object *bo, bool 
evict,
struct ttm_resource *old_mem = bo->resource;
int ret;
 
+   if (!old_mem) {
+   if (new_mem->mem_type != TTM_PL_SYSTEM) {
+   hop->mem_type = TTM_PL_SYSTEM;
+   hop->flags = TTM_PL_FLAG_TEMPORARY;
+   return -EMULTIHOP;
+   }
+
+   ttm_bo_move_null(bo, new_mem);
+   return 0;
+   }
+
qxl_bo_move_notify(bo, new_mem);
 
ret = ttm_bo_wait_ctx(bo, ctx);
-- 
2.39.1



[PATCH 1/4] drm/gem-vram: handle NULL bo->resource in move callback

2023-02-08 Thread Matthew Auld
The ttm BO now initially has NULL bo->resource, and leaves the driver
the handle that. However it looks like we forgot to handle that for
ttm_bo_move_memcpy() users, like with vram-gem, since it just silently
returns zero. This seems to then trigger warnings like:

WARNING: CPU: 0 PID: 1 at drivers/gpu/drm/drm_gem_vram_helper.c:255 
drm_gem_vram_offset (??:?)

Fix this by calling move_null() if the new resource is TTM_PL_SYSTEM,
otherwise do the multi-hop sequence to ensure can safely call into
ttm_bo_move_memcpy(), since it might also need to clear the memory.
This should give the same behaviour as before.

While we are here let's also treat calling ttm_bo_move_memcpy() with
NULL bo->resource as programmer error, where expectation is that upper
layers should now handle it.

Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during BO 
creation")
Signed-off-by: Matthew Auld 
Cc: Christian König 
---
 drivers/gpu/drm/drm_gem_vram_helper.c | 11 +++
 drivers/gpu/drm/ttm/ttm_bo_util.c |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c 
b/drivers/gpu/drm/drm_gem_vram_helper.c
index d40b3edb52d0..0bea3df2a16d 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -916,6 +916,17 @@ static int bo_driver_move(struct ttm_buffer_object *bo,
 {
struct drm_gem_vram_object *gbo;
 
+   if (!bo->resource) {
+   if (new_mem->mem_type != TTM_PL_SYSTEM) {
+   hop->mem_type = TTM_PL_SYSTEM;
+   hop->flags = TTM_PL_FLAG_TEMPORARY;
+   return -EMULTIHOP;
+   }
+
+   ttm_bo_move_null(bo, new_mem);
+   return 0;
+   }
+
gbo = drm_gem_vram_of_bo(bo);
 
return drm_gem_vram_bo_driver_move(gbo, evict, ctx, new_mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index d9d2b0903b22..fd9fd3d15101 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -157,8 +157,8 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
bool clear;
int ret = 0;
 
-   if (!src_mem)
-   return 0;
+   if (WARN_ON(!src_mem))
+   return -EINVAL;
 
src_man = ttm_manager_type(bdev, src_mem->mem_type);
if (ttm && ((ttm->page_flags & TTM_TT_FLAG_SWAPPED) ||
-- 
2.39.1



Re: [drm-misc:drm-misc-next] [drm/ttm] 1802537820: WARNING:at_drivers/gpu/drm/drm_gem_vram_helper.c:#drm_gem_vram_offset

2023-02-08 Thread Matthew Auld
On Wed, 8 Feb 2023 at 12:41, Christian König  wrote:
>
> Am 08.02.23 um 10:38 schrieb Matthew Auld:
> > On Wed, 8 Feb 2023 at 08:32, Christian König  
> > wrote:
> >> Hey guys,
> >>
> >> I'm pretty sure this is a bug in bochs which happens to surface because
> >> of a recent TTM change, we have seen similar problems in the past with
> >> this driver.
> >>
> >> What happens is that userspace tries to bind a BO to a CRTC before the
> >> BO has even a backing store.
> >>
> >> Any idea how to fix this? I can just remove the warning, but that's not
> >> really a good solution.
> > IIUC this driver is just using ttm_bo_move_memcpy() underneath for its
> > bo_move callback, which looks to be doing:
> >
> > if (!bo->resource)
> >  return 0;
> >
> > Which doesn't make any sense to me.There should at least be a
> > move_null(), and maybe also a multi-hop to handle clearing. Otherwise
> > bo->resource is likely always NULL (and we hit the above warning),
> > even after the dummy move. What do you think?
>
> Oh, good point. That should indeed be move_null().
>
> Do you want to write a patch or should I take care of this?

I can try to type that.

>
> Thanks for pointing that out,
> Christian.
>
> >
> >> Regards,
> >> Christian.
> >>
> >> Am 08.02.23 um 05:32 schrieb kernel test robot:
> >>> Greeting,
> >>>
> >>> FYI, we noticed 
> >>> WARNING:at_drivers/gpu/drm/drm_gem_vram_helper.c:#drm_gem_vram_offset due 
> >>> to commit (built with gcc-11):
> >>>
> >>> commit: 1802537820389183dfcd814e0f6a60d1496a75ef ("drm/ttm: stop 
> >>> allocating dummy resources during BO creation")
> >>> git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
> >>>
> >>> in testcase: boot
> >>>
> >>> on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 
> >>> -m 16G
> >>>
> >>> caused below changes (please refer to attached dmesg/kmsg for entire 
> >>> log/backtrace):
> >>>
> >>>
> >>> If you fix the issue, kindly add following tag
> >>> | Reported-by: kernel test robot 
> >>> | Link: 
> >>> https://lore.kernel.org/oe-lkp/202302081038.984b8c1-oliver.s...@intel.com
> >>>
> >>>
> >>> [   25.994992][T1] [ cut here ]
> >>> [ 25.995050][ T1] WARNING: CPU: 0 PID: 1 at 
> >>> drivers/gpu/drm/drm_gem_vram_helper.c:255 drm_gem_vram_offset (??:?)
> >>> [   25.995080][T1] Modules linked in:
> >>> [   25.995100][T1] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G   
> >>>  T  6.2.0-rc6-01191-g180253782038 #1 
> >>> a8db67375c3ac749313dafaec43f39836e38fae9
> >>> [   25.995117][T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 
> >>> 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
> >>> [ 25.995128][ T1] RIP: 0010:drm_gem_vram_offset (??:?)
> >>> [ 25.995144][ T1] Code: 02 00 00 b8 ff ff 37 00 48 89 fa 48 c1 e0 2a 48 
> >>> c1 ea 03 80 3c 02 00 74 05 e8 7f 1f eb fe 48 8b 9b 20 02 00 00 48 85 db 
> >>> 75 06 <0f> 0b 31 c0 eb 4b 48 8d 7b 10 b8 ff ff 37 00 48 89 fa 48 c1 e0 2a
> >>> All code
> >>> 
> >>>  0:02 00   add(%rax),%al
> >>>  2:00 b8 ff ff 37 00   add%bh,0x37(%rax)
> >>>  8:48 89 famov%rdi,%rdx
> >>>  b:48 c1 e0 2a shl$0x2a,%rax
> >>>  f:48 c1 ea 03 shr$0x3,%rdx
> >>> 13:80 3c 02 00 cmpb   $0x0,(%rdx,%rax,1)
> >>> 17:74 05   je 0x1e
> >>> 19:e8 7f 1f eb fe  callq  0xfeeb1f9d
> >>> 1e:48 8b 9b 20 02 00 00mov0x220(%rbx),%rbx
> >>> 25:48 85 dbtest   %rbx,%rbx
> >>> 28:75 06   jne0x30
> >>> 2a:*   0f 0b   ud2 <-- trapping 
> >>> instruction
> >>> 2c:31 c0   xor%eax,%eax
> >>> 2e:eb 4b   jmp0x7b
> >>> 30:48 8d 7b 10 lea0x10(%rbx),%rdi
> >>> 34:b8 ff ff 37 00  mov$0x37,%eax
> >>>

Re: [drm-misc:drm-misc-next] [drm/ttm] 1802537820: WARNING:at_drivers/gpu/drm/drm_gem_vram_helper.c:#drm_gem_vram_offset

2023-02-08 Thread Matthew Auld
On Wed, 8 Feb 2023 at 08:32, Christian König  wrote:
>
> Hey guys,
>
> I'm pretty sure this is a bug in bochs which happens to surface because
> of a recent TTM change, we have seen similar problems in the past with
> this driver.
>
> What happens is that userspace tries to bind a BO to a CRTC before the
> BO has even a backing store.
>
> Any idea how to fix this? I can just remove the warning, but that's not
> really a good solution.

IIUC this driver is just using ttm_bo_move_memcpy() underneath for its
bo_move callback, which looks to be doing:

if (!bo->resource)
return 0;

Which doesn't make any sense to me.There should at least be a
move_null(), and maybe also a multi-hop to handle clearing. Otherwise
bo->resource is likely always NULL (and we hit the above warning),
even after the dummy move. What do you think?

>
> Regards,
> Christian.
>
> Am 08.02.23 um 05:32 schrieb kernel test robot:
> > Greeting,
> >
> > FYI, we noticed 
> > WARNING:at_drivers/gpu/drm/drm_gem_vram_helper.c:#drm_gem_vram_offset due 
> > to commit (built with gcc-11):
> >
> > commit: 1802537820389183dfcd814e0f6a60d1496a75ef ("drm/ttm: stop allocating 
> > dummy resources during BO creation")
> > git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
> >
> > in testcase: boot
> >
> > on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 
> > 16G
> >
> > caused below changes (please refer to attached dmesg/kmsg for entire 
> > log/backtrace):
> >
> >
> > If you fix the issue, kindly add following tag
> > | Reported-by: kernel test robot 
> > | Link: 
> > https://lore.kernel.org/oe-lkp/202302081038.984b8c1-oliver.s...@intel.com
> >
> >
> > [   25.994992][T1] [ cut here ]
> > [ 25.995050][ T1] WARNING: CPU: 0 PID: 1 at 
> > drivers/gpu/drm/drm_gem_vram_helper.c:255 drm_gem_vram_offset (??:?)
> > [   25.995080][T1] Modules linked in:
> > [   25.995100][T1] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G 
> >T  6.2.0-rc6-01191-g180253782038 #1 
> > a8db67375c3ac749313dafaec43f39836e38fae9
> > [   25.995117][T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 
> > 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
> > [ 25.995128][ T1] RIP: 0010:drm_gem_vram_offset (??:?)
> > [ 25.995144][ T1] Code: 02 00 00 b8 ff ff 37 00 48 89 fa 48 c1 e0 2a 48 c1 
> > ea 03 80 3c 02 00 74 05 e8 7f 1f eb fe 48 8b 9b 20 02 00 00 48 85 db 75 06 
> > <0f> 0b 31 c0 eb 4b 48 8d 7b 10 b8 ff ff 37 00 48 89 fa 48 c1 e0 2a
> > All code
> > 
> > 0:02 00   add(%rax),%al
> > 2:00 b8 ff ff 37 00   add%bh,0x37(%rax)
> > 8:48 89 famov%rdi,%rdx
> > b:48 c1 e0 2a shl$0x2a,%rax
> > f:48 c1 ea 03 shr$0x3,%rdx
> >13:80 3c 02 00 cmpb   $0x0,(%rdx,%rax,1)
> >17:74 05   je 0x1e
> >19:e8 7f 1f eb fe  callq  0xfeeb1f9d
> >1e:48 8b 9b 20 02 00 00mov0x220(%rbx),%rbx
> >25:48 85 dbtest   %rbx,%rbx
> >28:75 06   jne0x30
> >2a:*   0f 0b   ud2 <-- trapping 
> > instruction
> >2c:31 c0   xor%eax,%eax
> >2e:eb 4b   jmp0x7b
> >30:48 8d 7b 10 lea0x10(%rbx),%rdi
> >34:b8 ff ff 37 00  mov$0x37,%eax
> >39:48 89 famov%rdi,%rdx
> >3c:48 c1 e0 2a shl$0x2a,%rax
> >
> > Code starting with the faulting instruction
> > ===
> > 0:0f 0b   ud2
> > 2:31 c0   xor%eax,%eax
> > 4:eb 4b   jmp0x51
> > 6:48 8d 7b 10 lea0x10(%rbx),%rdi
> > a:b8 ff ff 37 00  mov$0x37,%eax
> > f:48 89 famov%rdi,%rdx
> >12:48 c1 e0 2a shl$0x2a,%rax
> > [   25.995156][T1] RSP: :c901f028 EFLAGS: 00210246
> > [   25.995174][T1] RAX: dc00 RBX:  RCX: 
> > 
> > [   25.995186][T1] RDX: 111026dee544 RSI: 8881372d4b10 RDI: 
> > 888136f72a20
> > [   25.995196][T1] RBP: c901f030 R08:  R09: 
> > 
> > [   25.995206][T1] R10:  R11:  R12: 
> > 8881372d4b00
> > [   25.995215][T1] R13: 888136e9ee00 R14: 888136f4a060 R15: 
> > 0500
> > [   25.995225][T1] FS:  () 
> > GS:8883aee0() knlGS:
> > [   25.995236][T1] CS:  0010 DS:  ES:  CR0: 80050033
> > [   25.995247][T1] CR2: f7fa1cd4 CR3: 06015000 CR4: 
> > 000406b0
> > [   

Re: [PATCH] drm/i915/hwmon: Enable PL1 power limit

2023-02-07 Thread Matthew Auld
On Tue, 7 Feb 2023 at 17:19, Dixit, Ashutosh  wrote:
>
> On Tue, 07 Feb 2023 08:12:25 -0800, Dixit, Ashutosh wrote:
> >
> > On Tue, 07 Feb 2023 01:32:44 -0800, Matthew Auld wrote:
> > >
> > > On Fri, 3 Feb 2023 at 15:54, Ashutosh Dixit  
> > > wrote:
> > > >
> > > > Previous documentation suggested that PL1 power limit is always
> > > > enabled. However we now find this not to be the case on some
> > > > platforms (such as ATSM). Therefore enable PL1 power limit during hwmon
> > > > initialization.
> > >
> > > For some reason it looks like this change is impacting the atsm in CI:
> > > https://intel-gfx-ci.01.org/tree/drm-tip/bat-atsm-1.html
> >
> > Hmm, the change was meant for ATSM. Anyway let me try to get hold of an
> > ATSM and see if I can figure out what might be going on with these
> > seemingly unrelated failures and if I can repro them locally. Thanks!
>
> Rodrigo/Matt,
>
> I am proposing we revert this now and remerge again after investigating,
> even getting ATSM systems to investigate is not easy so it might take a few
> days to investigate. What do you guys think?

Yeah, maybe just revert for now.

>
> Thanks.
> --
> Ashutosh
>
>
> >
> > >
> > > >
> > > > Bspec: 51864
> > > >
> > > > v2: Add Bspec reference (Gwan-gyeong)
> > > > v3: Add Fixes tag
> > > >
> > > > Fixes: 99f55efb79114 ("drm/i915/hwmon: Power PL1 limit and TDP setting")
> > > > Signed-off-by: Ashutosh Dixit 
> > > > Reviewed-by: Gwan-gyeong Mun 
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_hwmon.c | 5 +
> > > >  1 file changed, 5 insertions(+)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
> > > > b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > index 1225bc432f0d5..4683a5b96eff1 100644
> > > > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > > > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > @@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct 
> > > > drm_i915_private *i915)
> > > > for_each_gt(gt, i915, i)
> > > > hwm_energy(>ddat_gt[i], );
> > > > }
> > > > +
> > > > +   /* Enable PL1 power limit */
> > > > +   if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
> > > > +   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
> > > > hwmon->rg.pkg_rapl_limit,
> > > > +   PKG_PWR_LIM_1_EN, 
> > > > PKG_PWR_LIM_1_EN);
> > > >  }
> > > >
> > > >  void i915_hwmon_register(struct drm_i915_private *i915)
> > > > --
> > > > 2.38.0
> > > >


Re: [PATCH] drm/i915/hwmon: Enable PL1 power limit

2023-02-07 Thread Matthew Auld
On Fri, 3 Feb 2023 at 15:54, Ashutosh Dixit  wrote:
>
> Previous documentation suggested that PL1 power limit is always
> enabled. However we now find this not to be the case on some
> platforms (such as ATSM). Therefore enable PL1 power limit during hwmon
> initialization.

For some reason it looks like this change is impacting the atsm in CI:
https://intel-gfx-ci.01.org/tree/drm-tip/bat-atsm-1.html

>
> Bspec: 51864
>
> v2: Add Bspec reference (Gwan-gyeong)
> v3: Add Fixes tag
>
> Fixes: 99f55efb79114 ("drm/i915/hwmon: Power PL1 limit and TDP setting")
> Signed-off-by: Ashutosh Dixit 
> Reviewed-by: Gwan-gyeong Mun 
> ---
>  drivers/gpu/drm/i915/i915_hwmon.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
> b/drivers/gpu/drm/i915/i915_hwmon.c
> index 1225bc432f0d5..4683a5b96eff1 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private 
> *i915)
> for_each_gt(gt, i915, i)
> hwm_energy(>ddat_gt[i], );
> }
> +
> +   /* Enable PL1 power limit */
> +   if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
> +   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
> hwmon->rg.pkg_rapl_limit,
> +   PKG_PWR_LIM_1_EN, 
> PKG_PWR_LIM_1_EN);
>  }
>
>  void i915_hwmon_register(struct drm_i915_private *i915)
> --
> 2.38.0
>


Re: [PATCH] drm/i915/gt: Avoid redundant pointer validity check

2023-02-06 Thread Matthew Auld

On 06/02/2023 09:45, Tvrtko Ursulin wrote:


Hi,

Adding Matt & Thomas as potential candidates to review.

Regards,

Tvrtko

On 03/02/2023 19:30, Deepak R Varma wrote:

The macro definition of gen6_for_all_pdes() expands to a for loop such
that it breaks when the page table is null. Hence there is no need to
again test validity of the page table entry pointers in the pde list.
This change is identified using itnull.cocci semantic patch.

Signed-off-by: Deepak R Varma 
---
Please note: Proposed change is compile tested only.

  drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c

index 5aaacc53fa4c..787b9e6d9f59 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -258,8 +258,7 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt 
*ppgtt)

  u32 pde;
  gen6_for_all_pdes(pt, pd, pde)
-    if (pt)
-    free_pt(>base.vm, pt);
+    free_pt(>base.vm, pt);
  }
  static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -304,7 +303,7 @@ static void pd_vma_unbind(struct 
i915_address_space *vm,

  /* Free all no longer used page tables */
  gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
-    if (!pt || atomic_read(>used))
+    if (atomic_read(>used))


Wow, I was really confused trying to remember how this all works.

The gen6_for_all_pdes() does:

(pt = i915_pt_entry(pd, iter), true)

So NULL pt is expected, and does not 'break' here, since 'true' is 
always the value that decides whether to terminate the loop. So this 
patch would lead to NULL ptr deref, AFAICT.





  continue;
  free_pt(>base.vm, pt);


Re: [Intel-gfx] [PATCH] Initialize the obj flags for shmem objects

2023-02-03 Thread Matthew Auld

On 03/02/2023 12:10, Tvrtko Ursulin wrote:



On 03/02/2023 11:57, Aravind Iddamsetty wrote:

Obj flags for shmem objects is not being set correctly.

Cc: Matthew Auld 
Signed-off-by: Aravind Iddamsetty 


Could even be:

Fixes: 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on 
acquire")

Cc:  # v5.15+


Yup, that's what I also got.



?

Regards,

Tvrtko


---
  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c

index 114443096841..37d1efcd3ca6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -596,7 +596,7 @@ static int shmem_object_init(struct 
intel_memory_region *mem,

  mapping_set_gfp_mask(mapping, mask);
  GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
-    i915_gem_object_init(obj, _gem_shmem_ops, _class, 0);
+    i915_gem_object_init(obj, _gem_shmem_ops, _class, flags);
  obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
  obj->write_domain = I915_GEM_DOMAIN_CPU;
  obj->read_domains = I915_GEM_DOMAIN_CPU;


Re: [PATCH] Initialize the obj flags for shmem objects

2023-02-03 Thread Matthew Auld

On 03/02/2023 11:57, Aravind Iddamsetty wrote:

Obj flags for shmem objects is not being set correctly.

Cc: Matthew Auld 
Signed-off-by: Aravind Iddamsetty 


Subject should have "drm/i915:" prefix.

This is also a bug fix due to not setting BO_ALLOC_USER (the other flags 
don't seem to matter for shmem), which is quite important, so we need to 
figure out the "Fixes" tag. Maybe mention in the commit message that 
this fixes setting ALLOC_USER which is needed even for shmem.


Looking at the git history, ALLOC_USER looks to be first introduced in 
213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend"), 
but the users of ALLOC_USER at this stage are only interesting for the 
ttm backend, and that already passes the flags due to using its own 
object_init() vfunc for all normal object types.


So the first real user impacted by this bug appears to be in: 
13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on acquire").


So I think needs:

Fixes: 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on 
acquire")

Cc:  # v5.15+

With that,
Reviewed-by: Matthew Auld 



---
  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 114443096841..37d1efcd3ca6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -596,7 +596,7 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
mapping_set_gfp_mask(mapping, mask);
GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
  
-	i915_gem_object_init(obj, _gem_shmem_ops, _class, 0);

+   i915_gem_object_init(obj, _gem_shmem_ops, _class, flags);
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;


Re: [PATCH 2/2] drm/ttm: revert "stop allocating dummy resources during BO creation"

2023-01-31 Thread Matthew Auld
On Wed, 25 Jan 2023 at 16:24, Matthew Auld
 wrote:
>
> On Wed, 25 Jan 2023 at 16:15, Christian König  
> wrote:
> >
> > Am 25.01.23 um 17:13 schrieb Matthew Auld:
> > > On Wed, 25 Jan 2023 at 15:50, Christian König
> > >  wrote:
> > >> This reverts commit 00984ad39599bb2a1e6ec5d4e9c75a749f7f45c9.
> > >>
> > >> It seems to still breka i915.
> > > We also need to revert the third patch:
> > >
> > > b49323aa35d5 drm/ttm: prevent moving of pinned BOs
> > >
> > > It introduces the side effect of no longer calling tt_create(true) in
> > > ttm_bo_validate(), and I'm 99% sure that will break object clearing.
> > > We rely on having a ttm_tt for the initial dummy placement, with
> > > FLAG_ZERO_ALLOC set if clear is needed. Also I'm not sure who even
> > > creates the ttm_tt now, if ttm_bo_validate() doesn't, and we don't
> > > have the dummy move, like with this patch.
> >
> > Oh, yes of course. Can I add your Acked-by to reverting all three?
>
> Yeah, feel free to add. I can then resend your series with the extra
> stuff we need for i915.

https://patchwork.freedesktop.org/series/113484/

CI appears to be happy now. Feel free to merge the series.

>
> >
> > Thanks,
> > Christian.
> >
> > >
> > >> Signed-off-by: Christian König 
> > >> ---
> > >>   drivers/gpu/drm/ttm/ttm_bo.c | 7 +++
> > >>   1 file changed, 7 insertions(+)
> > >>
> > >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > >> index 33471e363ff4..9baccb2f6e99 100644
> > >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > >> @@ -957,6 +957,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, 
> > >> struct ttm_buffer_object *bo,
> > >>   struct sg_table *sg, struct dma_resv *resv,
> > >>   void (*destroy) (struct ttm_buffer_object *))
> > >>   {
> > >> +   static const struct ttm_place sys_mem = { .mem_type = 
> > >> TTM_PL_SYSTEM };
> > >>  int ret;
> > >>
> > >>  kref_init(>kref);
> > >> @@ -973,6 +974,12 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, 
> > >> struct ttm_buffer_object *bo,
> > >>  bo->base.resv = >base._resv;
> > >>  atomic_inc(_glob.bo_count);
> > >>
> > >> +   ret = ttm_resource_alloc(bo, _mem, >resource);
> > >> +   if (unlikely(ret)) {
> > >> +   ttm_bo_put(bo);
> > >> +   return ret;
> > >> +   }
> > >> +
> > >>  /*
> > >>   * For ttm_bo_type_device buffers, allocate
> > >>   * address space from the device.
> > >> --
> > >> 2.34.1
> > >>
> >


[PATCH v2 2/6] drm/i915/ttm: audit remaining bo->resource

2023-01-30 Thread Matthew Auld
In the near future TTM will have NULL bo->resource when the object is
initially created, plus after calling into pipeline-gutting. Try to
handle the remaining cases. In practice NULL bo->resource should be
taken to mean swapped-out or purged object.

v2 (Andrzej):
  - Rather make i915_ttm_cpu_maps_iomem() return false with NULL
resource.

References: 516198d317d8 ("drm/i915: audit bo->resource usage v3")
Signed-off-by: Matthew Auld 
Cc: Christian König 
Cc: Nirmoy Das 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 10 --
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h  |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c |  4 
 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c   |  7 +--
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4758f21c91e1..341b94672abc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -472,7 +472,7 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, 
unsigned int flags)
struct ttm_placement place = {};
int ret;
 
-   if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
+   if (!bo->ttm || i915_ttm_cpu_maps_iomem(bo->resource))
return 0;
 
GEM_BUG_ON(!i915_tt->is_shmem);
@@ -511,7 +511,13 @@ static void i915_ttm_delete_mem_notify(struct 
ttm_buffer_object *bo)
 {
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 
-   if (bo->resource && !i915_ttm_is_ghost_object(bo)) {
+   /*
+* This gets called twice by ttm, so long as we have a ttm resource or
+* ttm_tt then we can still safely call this. Due to pipeline-gutting,
+* we maybe have NULL bo->resource, but in that case we should always
+* have a ttm alive (like if the pages are swapped out).
+*/
+   if ((bo->resource || bo->ttm) && !i915_ttm_is_ghost_object(bo)) {
__i915_gem_object_pages_fini(obj);
i915_ttm_free_cached_io_rsgt(obj);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index 2a94a99ef76b..f8f6bed1b297 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -98,7 +98,7 @@ static inline bool i915_ttm_gtt_binds_lmem(struct 
ttm_resource *mem)
 static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)
 {
/* Once / if we support GGTT, this is also false for cached ttm_tts */
-   return mem->mem_type != I915_PL_SYSTEM;
+   return mem && mem->mem_type != I915_PL_SYSTEM;
 }
 
 bool i915_ttm_resource_mappable(struct ttm_resource *res);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 76dd9e5e1a8b..d030182ca176 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -711,6 +711,10 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 
assert_object_held(dst);
assert_object_held(src);
+
+   if (GEM_WARN_ON(!src_bo->resource || !dst_bo->resource))
+   return -EINVAL;
+
i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
 
ret = dma_resv_reserve_fences(src_bo->base.resv, 1);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
index 7e67742bc65e..dfe39c8e74d8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
@@ -53,7 +53,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region 
*apply,
unsigned int flags;
int err = 0;
 
-   if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup)
+   if (!i915_ttm_cpu_maps_iomem(bo->resource) || obj->ttm.backup)
return 0;
 
if (pm_apply->allow_gpu && i915_gem_object_evictable(obj))
@@ -187,7 +187,10 @@ static int i915_ttm_restore(struct 
i915_gem_apply_to_region *apply,
return err;
 
/* Content may have been swapped. */
-   err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, );
+   if (!backup_bo->resource)
+   err = ttm_bo_validate(backup_bo, i915_ttm_sys_placement(), 
);
+   if (!err)
+   err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, );
if (!err) {
err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu,
false);
-- 
2.39.1



[PATCH v2 6/6] drm/ttm: prevent moving of pinned BOs

2023-01-30 Thread Matthew Auld
From: Christian König 

We have checks for this in the individual drivers move callback, but
it's probably better to generally forbid that on a higher level.

Also stops exporting ttm_resource_compat() since that's not necessary
any more after removing the extra checks in vmwgfx.

Signed-off-by: Christian König 
Reviewed-by: Matthew Auld 
Signed-off-by: Matthew Auld 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 
 drivers/gpu/drm/nouveau/nouveau_bo.c|  3 ---
 drivers/gpu/drm/radeon/radeon_ttm.c |  4 
 drivers/gpu/drm/ttm/ttm_bo.c| 20 
 drivers/gpu/drm/ttm/ttm_resource.c  |  1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c  | 19 ++-
 6 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c5ef7f7bdc15..2cd081cbf706 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -466,11 +466,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, 
bool evict,
return r;
}
 
-   /* Can't move a pinned BO */
abo = ttm_to_amdgpu_bo(bo);
-   if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
-   return -EINVAL;
-
adev = amdgpu_ttm_adev(bo->bdev);
 
if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 288eebc70a67..c2ec91cc845d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1015,9 +1015,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
if (ret)
goto out_ntfy;
 
-   if (nvbo->bo.pin_count)
-   NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
-
if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
ret = nouveau_bo_vm_bind(bo, new_reg, _tile);
if (ret)
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1e8e287e113c..67075c85f847 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -211,11 +211,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, 
bool evict,
if (r)
return r;
 
-   /* Can't move a pinned BO */
rbo = container_of(bo, struct radeon_bo, tbo);
-   if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
-   return -EINVAL;
-
rdev = radeon_get_rdev(bo->bdev);
if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
ttm_bo_move_null(bo, new_mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 169818b32be2..882c2fa346f3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -893,14 +893,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
if (!placement->num_placement && !placement->num_busy_placement)
return ttm_bo_pipeline_gutting(bo);
 
-   /*
-* Check whether we need to move buffer.
-*/
-   if (!bo->resource || !ttm_resource_compat(bo->resource, placement)) {
-   ret = ttm_bo_move_buffer(bo, placement, ctx);
-   if (ret)
-   return ret;
-   }
+   /* Check whether we need to move buffer. */
+   if (bo->resource && ttm_resource_compat(bo->resource, placement))
+   return 0;
+
+   /* Moving of pinned BOs is forbidden */
+   if (bo->pin_count)
+   return -EINVAL;
+
+   ret = ttm_bo_move_buffer(bo, placement, ctx);
+   if (ret)
+   return ret;
+
/*
 * We might need to add a TTM.
 */
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index b8a826a24fb2..7333f7a87a2f 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -361,7 +361,6 @@ bool ttm_resource_compat(struct ttm_resource *res,
 
return false;
 }
-EXPORT_SYMBOL(ttm_resource_compat);
 
 void ttm_resource_set_bo(struct ttm_resource *res,
 struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index aa1cd5126a32..9bf1f9d2f9b6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -87,12 +87,7 @@ int vmw_bo_pin_in_placement(struct vmw_private *dev_priv,
if (unlikely(ret != 0))
goto err;
 
-   if (buf->base.pin_count > 0)
-   ret = ttm_resource_compat(bo->resource, placement)
-   ? 0 : -EINVAL;
-   else
-   ret = ttm_bo_validate(bo, placement, );
-
+   ret = ttm_bo_validate(bo, placement, );
if (!ret)
vmw_bo_pin_reserved(buf, true);

[PATCH v2 4/6] drm/ttm: stop allocating dummy resources during BO creation

2023-01-30 Thread Matthew Auld
From: Christian König 

That should not be necessary any more when drivers should at least be
able to handle the move without a resource.

Signed-off-by: Christian König 
Reviewed-by: Matthew Auld 
Signed-off-by: Matthew Auld 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 773080f48864..169818b32be2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -952,7 +952,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct 
ttm_buffer_object *bo,
 struct sg_table *sg, struct dma_resv *resv,
 void (*destroy) (struct ttm_buffer_object *))
 {
-   static const struct ttm_place sys_mem = { .mem_type = TTM_PL_SYSTEM };
int ret;
 
kref_init(>kref);
@@ -969,12 +968,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct 
ttm_buffer_object *bo,
bo->base.resv = >base._resv;
atomic_inc(_glob.bo_count);
 
-   ret = ttm_resource_alloc(bo, _mem, >resource);
-   if (unlikely(ret)) {
-   ttm_bo_put(bo);
-   return ret;
-   }
-
/*
 * For ttm_bo_type_device buffers, allocate
 * address space from the device.
-- 
2.39.1



  1   2   3   4   5   6   7   8   9   10   >